pacemaker  3.0.0-d8340737c4
Scalable High-Availability cluster resource manager
pcmk_sched_primitive.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2024 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <stdbool.h>
13 #include <stdint.h> // uint8_t, uint32_t
14 
15 #include <crm/common/xml.h>
16 #include <pacemaker-internal.h>
17 
18 #include "libpacemaker_private.h"
19 
20 static void stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
21  bool optional);
22 static void start_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
23  bool optional);
24 static void demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
25  bool optional);
26 static void promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node,
27  bool optional);
28 static void assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node,
29  bool optional);
30 
31 #define RSC_ROLE_MAX (pcmk_role_promoted + 1)
32 
33 static enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
34  /* This array lists the immediate next role when transitioning from one role
35  * to a target role. For example, when going from Stopped to Promoted, the
36  * next role is Unpromoted, because the resource must be started before it
37  * can be promoted. The current state then becomes Started, which is fed
38  * into this array again, giving a next role of Promoted.
39  *
40  * Current role Immediate next role Final target role
41  * ------------ ------------------- -----------------
42  */
43  /* Unknown */ { pcmk_role_unknown, /* Unknown */
44  pcmk_role_stopped, /* Stopped */
45  pcmk_role_stopped, /* Started */
46  pcmk_role_stopped, /* Unpromoted */
47  pcmk_role_stopped, /* Promoted */
48  },
49  /* Stopped */ { pcmk_role_stopped, /* Unknown */
50  pcmk_role_stopped, /* Stopped */
51  pcmk_role_started, /* Started */
52  pcmk_role_unpromoted, /* Unpromoted */
53  pcmk_role_unpromoted, /* Promoted */
54  },
55  /* Started */ { pcmk_role_stopped, /* Unknown */
56  pcmk_role_stopped, /* Stopped */
57  pcmk_role_started, /* Started */
58  pcmk_role_unpromoted, /* Unpromoted */
59  pcmk_role_promoted, /* Promoted */
60  },
61  /* Unpromoted */ { pcmk_role_stopped, /* Unknown */
62  pcmk_role_stopped, /* Stopped */
63  pcmk_role_stopped, /* Started */
64  pcmk_role_unpromoted, /* Unpromoted */
65  pcmk_role_promoted, /* Promoted */
66  },
67  /* Promoted */ { pcmk_role_stopped, /* Unknown */
68  pcmk_role_unpromoted, /* Stopped */
69  pcmk_role_unpromoted, /* Started */
70  pcmk_role_unpromoted, /* Unpromoted */
71  pcmk_role_promoted, /* Promoted */
72  },
73 };
74 
83 typedef void (*rsc_transition_fn)(pcmk_resource_t *rsc, pcmk_node_t *node,
84  bool optional);
85 
86 static rsc_transition_fn rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
87  /* This array lists the function needed to transition directly from one role
88  * to another. NULL indicates that nothing is needed.
89  *
90  * Current role Transition function Next role
91  * ------------ ------------------- ----------
92  */
93  /* Unknown */ { assert_role_error, /* Unknown */
94  stop_resource, /* Stopped */
95  assert_role_error, /* Started */
96  assert_role_error, /* Unpromoted */
97  assert_role_error, /* Promoted */
98  },
99  /* Stopped */ { assert_role_error, /* Unknown */
100  NULL, /* Stopped */
101  start_resource, /* Started */
102  start_resource, /* Unpromoted */
103  assert_role_error, /* Promoted */
104  },
105  /* Started */ { assert_role_error, /* Unknown */
106  stop_resource, /* Stopped */
107  NULL, /* Started */
108  NULL, /* Unpromoted */
109  promote_resource, /* Promoted */
110  },
111  /* Unpromoted */ { assert_role_error, /* Unknown */
112  stop_resource, /* Stopped */
113  stop_resource, /* Started */
114  NULL, /* Unpromoted */
115  promote_resource, /* Promoted */
116  },
117  /* Promoted */ { assert_role_error, /* Unknown */
118  demote_resource, /* Stopped */
119  demote_resource, /* Started */
120  demote_resource, /* Unpromoted */
121  NULL, /* Promoted */
122  },
123 };
124 
133 static GList *
134 sorted_allowed_nodes(const pcmk_resource_t *rsc)
135 {
136  if (rsc->priv->allowed_nodes != NULL) {
137  GList *nodes = g_hash_table_get_values(rsc->priv->allowed_nodes);
138 
139  if (nodes != NULL) {
140  return pcmk__sort_nodes(nodes, pcmk__current_node(rsc));
141  }
142  }
143  return NULL;
144 }
145 
165 static bool
166 assign_best_node(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
167  bool stop_if_fail)
168 {
169  GList *nodes = NULL;
170  pcmk_node_t *chosen = NULL;
171  pcmk_node_t *best = NULL;
172  const pcmk_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc);
173 
174  if (prefer == NULL) {
175  prefer = most_free_node;
176  }
177 
178  if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) {
179  // We've already finished assignment of resources to nodes
180  return rsc->priv->assigned_node != NULL;
181  }
182 
183  // Sort allowed nodes by score
184  nodes = sorted_allowed_nodes(rsc);
185  if (nodes != NULL) {
186  best = (pcmk_node_t *) nodes->data; // First node has best score
187  }
188 
189  if ((prefer != NULL) && (nodes != NULL)) {
190  // Get the allowed node version of prefer
191  chosen = g_hash_table_lookup(rsc->priv->allowed_nodes,
192  prefer->priv->id);
193 
194  if (chosen == NULL) {
195  pcmk__rsc_trace(rsc, "Preferred node %s for %s was unknown",
196  pcmk__node_name(prefer), rsc->id);
197 
198  /* Favor the preferred node as long as its score is at least as good as
199  * the best allowed node's.
200  *
201  * An alternative would be to favor the preferred node even if the best
202  * node is better, when the best node's score is less than INFINITY.
203  */
204  } else if (chosen->assign->score < best->assign->score) {
205  pcmk__rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
206  pcmk__node_name(chosen), rsc->id);
207  chosen = NULL;
208 
209  } else if (!pcmk__node_available(chosen, true, false)) {
210  pcmk__rsc_trace(rsc, "Preferred node %s for %s was unavailable",
211  pcmk__node_name(chosen), rsc->id);
212  chosen = NULL;
213 
214  } else {
215  pcmk__rsc_trace(rsc,
216  "Chose preferred node %s for %s "
217  "(ignoring %d candidates)",
218  pcmk__node_name(chosen), rsc->id,
219  g_list_length(nodes));
220  }
221  }
222 
223  if ((chosen == NULL) && (best != NULL)) {
224  /* Either there is no preferred node, or the preferred node is not
225  * suitable, but another node is allowed to run the resource.
226  */
227 
228  chosen = best;
229 
230  if (!pcmk__is_unique_clone(rsc->priv->parent)
231  && (chosen->assign->score > 0) // Zero not acceptable
232  && pcmk__node_available(chosen, false, false)) {
233  /* If the resource is already running on a node, prefer that node if
234  * it is just as good as the chosen node.
235  *
236  * We don't do this for unique clone instances, because
237  * pcmk__assign_instances() has already assigned instances to their
238  * running nodes when appropriate, and if we get here, we don't want
239  * remaining unassigned instances to prefer a node that's already
240  * running another instance.
241  */
242  pcmk_node_t *running = pcmk__current_node(rsc);
243 
244  if (running == NULL) {
245  // Nothing to do
246 
247  } else if (!pcmk__node_available(running, true, false)) {
248  pcmk__rsc_trace(rsc,
249  "Current node for %s (%s) can't run resources",
250  rsc->id, pcmk__node_name(running));
251 
252  } else {
253  int nodes_with_best_score = 1;
254 
255  for (GList *iter = nodes->next; iter; iter = iter->next) {
256  pcmk_node_t *allowed = (pcmk_node_t *) iter->data;
257 
258  if (allowed->assign->score != chosen->assign->score) {
259  // The nodes are sorted by score, so no more are equal
260  break;
261  }
262  if (pcmk__same_node(allowed, running)) {
263  // Scores are equal, so prefer the current node
264  chosen = allowed;
265  }
266  nodes_with_best_score++;
267  }
268 
269  if (nodes_with_best_score > 1) {
270  uint8_t log_level = LOG_INFO;
271 
272  if (chosen->assign->score >= PCMK_SCORE_INFINITY) {
273  log_level = LOG_WARNING;
274  }
275  do_crm_log(log_level,
276  "Chose %s for %s from %d nodes with score %s",
277  pcmk__node_name(chosen), rsc->id,
278  nodes_with_best_score,
279  pcmk_readable_score(chosen->assign->score));
280  }
281  }
282  }
283 
284  pcmk__rsc_trace(rsc, "Chose %s for %s from %d candidates",
285  pcmk__node_name(chosen), rsc->id, g_list_length(nodes));
286  }
287 
288  pcmk__assign_resource(rsc, chosen, false, stop_if_fail);
289  g_list_free(nodes);
290  return rsc->priv->assigned_node != NULL;
291 }
292 
300 static void
301 apply_this_with(pcmk__colocation_t *colocation, pcmk_resource_t *rsc)
302 {
303  GHashTable *archive = NULL;
304  pcmk_resource_t *other = colocation->primary;
305 
306  // In certain cases, we will need to revert the node scores
307  if ((colocation->dependent_role >= pcmk_role_promoted)
308  || ((colocation->score < 0)
309  && (colocation->score > -PCMK_SCORE_INFINITY))) {
310  archive = pcmk__copy_node_table(rsc->priv->allowed_nodes);
311  }
312 
313  if (pcmk_is_set(other->flags, pcmk__rsc_unassigned)) {
314  pcmk__rsc_trace(rsc,
315  "%s: Assigning colocation %s primary %s first"
316  "(score=%d role=%s)",
317  rsc->id, colocation->id, other->id,
318  colocation->score,
319  pcmk_role_text(colocation->dependent_role));
320  other->priv->cmds->assign(other, NULL, true);
321  }
322 
323  // Apply the colocation score to this resource's allowed node scores
324  rsc->priv->cmds->apply_coloc_score(rsc, other, colocation, true);
325  if ((archive != NULL)
327  pcmk__rsc_info(rsc,
328  "%s: Reverting scores from colocation with %s "
329  "because no nodes allowed",
330  rsc->id, other->id);
331  g_hash_table_destroy(rsc->priv->allowed_nodes);
332  rsc->priv->allowed_nodes = archive;
333  archive = NULL;
334  }
335  if (archive != NULL) {
336  g_hash_table_destroy(archive);
337  }
338 }
339 
346 static void
347 remote_connection_assigned(const pcmk_resource_t *connection)
348 {
349  pcmk_node_t *remote_node = pcmk_find_node(connection->priv->scheduler,
350  connection->id);
351 
352  CRM_CHECK(remote_node != NULL, return);
353 
354  if ((connection->priv->assigned_node != NULL)
355  && (connection->priv->next_role != pcmk_role_stopped)) {
356 
357  crm_trace("Pacemaker Remote node %s will be online",
358  remote_node->priv->id);
359  remote_node->details->online = TRUE;
360  if (!pcmk_is_set(remote_node->priv->flags, pcmk__node_seen)) {
361  // Avoid unnecessary fence, since we will attempt connection
362  remote_node->details->unclean = FALSE;
363  }
364 
365  } else {
366  crm_trace("Pacemaker Remote node %s will be shut down "
367  "(%sassigned connection's next role is %s)",
368  remote_node->priv->id,
369  ((connection->priv->assigned_node == NULL)? "un" : ""),
370  pcmk_role_text(connection->priv->next_role));
371  remote_node->details->shutdown = TRUE;
372  }
373 }
374 
393 pcmk_node_t *
395  bool stop_if_fail)
396 {
397  GList *this_with_colocations = NULL;
398  GList *with_this_colocations = NULL;
399  GList *iter = NULL;
400  pcmk_resource_t *parent = NULL;
401  pcmk__colocation_t *colocation = NULL;
402  pcmk_scheduler_t *scheduler = NULL;
403 
404  pcmk__assert(pcmk__is_primitive(rsc));
405  scheduler = rsc->priv->scheduler;
406  parent = rsc->priv->parent;
407 
408  // Never assign a child without parent being assigned first
409  if ((parent != NULL) && !pcmk_is_set(parent->flags, pcmk__rsc_assigning)) {
410  pcmk__rsc_debug(rsc, "%s: Assigning parent %s first",
411  rsc->id, parent->id);
412  parent->priv->cmds->assign(parent, prefer, stop_if_fail);
413  }
414 
415  if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) {
416  // Assignment has already been done
417  const char *node_name = "no node";
418 
419  if (rsc->priv->assigned_node != NULL) {
420  node_name = pcmk__node_name(rsc->priv->assigned_node);
421  }
422  pcmk__rsc_debug(rsc, "%s: pre-assigned to %s", rsc->id, node_name);
423  return rsc->priv->assigned_node;
424  }
425 
426  // Ensure we detect assignment loops
428  pcmk__rsc_debug(rsc, "Breaking assignment loop involving %s", rsc->id);
429  return NULL;
430  }
432 
433  pe__show_node_scores(true, rsc, "Pre-assignment",
434  rsc->priv->allowed_nodes, scheduler);
435 
436  this_with_colocations = pcmk__this_with_colocations(rsc);
437  with_this_colocations = pcmk__with_this_colocations(rsc);
438 
439  // Apply mandatory colocations first, to satisfy as many as possible
440  for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
441  colocation = iter->data;
442 
443  if ((colocation->score <= -PCMK_SCORE_INFINITY)
444  || (colocation->score >= PCMK_SCORE_INFINITY)) {
445  apply_this_with(colocation, rsc);
446  }
447  }
448  for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
449  colocation = iter->data;
450 
451  if ((colocation->score <= -PCMK_SCORE_INFINITY)
452  || (colocation->score >= PCMK_SCORE_INFINITY)) {
453  pcmk__add_dependent_scores(colocation, rsc);
454  }
455  }
456 
457  pe__show_node_scores(true, rsc, "Mandatory-colocations",
458  rsc->priv->allowed_nodes, scheduler);
459 
460  // Then apply optional colocations
461  for (iter = this_with_colocations; iter != NULL; iter = iter->next) {
462  colocation = iter->data;
463 
464  if ((colocation->score > -PCMK_SCORE_INFINITY)
465  && (colocation->score < PCMK_SCORE_INFINITY)) {
466  apply_this_with(colocation, rsc);
467  }
468  }
469  for (iter = with_this_colocations; iter != NULL; iter = iter->next) {
470  colocation = iter->data;
471 
472  if ((colocation->score > -PCMK_SCORE_INFINITY)
473  && (colocation->score < PCMK_SCORE_INFINITY)) {
474  pcmk__add_dependent_scores(colocation, rsc);
475  }
476  }
477 
478  g_list_free(this_with_colocations);
479  g_list_free(with_this_colocations);
480 
481  if (rsc->priv->next_role == pcmk_role_stopped) {
482  pcmk__rsc_trace(rsc,
483  "Banning %s from all nodes because it will be stopped",
484  rsc->id);
487 
488  } else if ((rsc->priv->next_role > rsc->priv->orig_role)
491  crm_notice("Resource %s cannot be elevated from %s to %s due to "
493  rsc->id, pcmk_role_text(rsc->priv->orig_role),
494  pcmk_role_text(rsc->priv->next_role));
495  pe__set_next_role(rsc, rsc->priv->orig_role,
497  }
498 
501  rsc, __func__, rsc->priv->allowed_nodes, scheduler);
502 
503  // Unmanage resource if fencing is enabled but no device is configured
507  }
508 
509  if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
510  // Unmanaged resources stay on their current node
511  const char *reason = NULL;
512  pcmk_node_t *assign_to = NULL;
513 
514  pe__set_next_role(rsc, rsc->priv->orig_role, "unmanaged");
515  assign_to = pcmk__current_node(rsc);
516  if (assign_to == NULL) {
517  reason = "inactive";
518  } else if (rsc->priv->orig_role == pcmk_role_promoted) {
519  reason = "promoted";
520  } else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
521  reason = "failed";
522  } else {
523  reason = "active";
524  }
525  pcmk__rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id,
526  (assign_to? assign_to->priv->name : "no node"),
527  reason);
528  pcmk__assign_resource(rsc, assign_to, true, stop_if_fail);
529 
531  // Must stop at some point, but be consistent with stop_if_fail
532  if (stop_if_fail) {
533  pcmk__rsc_debug(rsc,
534  "Forcing %s to stop: " PCMK_OPT_STOP_ALL_RESOURCES,
535  rsc->id);
536  }
537  pcmk__assign_resource(rsc, NULL, true, stop_if_fail);
538 
539  } else if (!assign_best_node(rsc, prefer, stop_if_fail)) {
540  // Assignment failed
541  if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed)) {
542  pcmk__rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
543  } else if ((rsc->priv->active_nodes != NULL) && stop_if_fail) {
544  pcmk__rsc_info(rsc, "Stopping removed resource %s", rsc->id);
545  }
546  }
547 
549 
551  remote_connection_assigned(rsc);
552  }
553 
554  return rsc->priv->assigned_node;
555 }
556 
568 static void
569 schedule_restart_actions(pcmk_resource_t *rsc, pcmk_node_t *current,
570  bool need_stop, bool need_promote)
571 {
572  enum rsc_role_e role = rsc->priv->orig_role;
573  enum rsc_role_e next_role;
574  rsc_transition_fn fn = NULL;
575 
577 
578  // Bring resource down to a stop on its current node
579  while (role != pcmk_role_stopped) {
580  next_role = rsc_state_matrix[role][pcmk_role_stopped];
581  pcmk__rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
582  (need_stop? "required" : "optional"), rsc->id,
583  pcmk_role_text(role), pcmk_role_text(next_role));
584  fn = rsc_action_matrix[role][next_role];
585  if (fn == NULL) {
586  break;
587  }
588  fn(rsc, current, !need_stop);
589  role = next_role;
590  }
591 
592  // Bring resource up to its next role on its next node
593  while ((rsc->priv->orig_role <= rsc->priv->next_role)
594  && (role != rsc->priv->orig_role)
595  && !pcmk_is_set(rsc->flags, pcmk__rsc_blocked)) {
596  bool required = need_stop;
597 
598  next_role = rsc_state_matrix[role][rsc->priv->orig_role];
599  if ((next_role == pcmk_role_promoted) && need_promote) {
600  required = true;
601  }
602  pcmk__rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
603  (required? "required" : "optional"), rsc->id,
604  pcmk_role_text(role), pcmk_role_text(next_role));
605  fn = rsc_action_matrix[role][next_role];
606  if (fn == NULL) {
607  break;
608  }
609  fn(rsc, rsc->priv->assigned_node, !required);
610  role = next_role;
611  }
612 
614 }
615 
624 static const char *
625 set_default_next_role(pcmk_resource_t *rsc)
626 {
627  if (rsc->priv->next_role != pcmk_role_unknown) {
628  return "explicit";
629  }
630 
631  if (rsc->priv->assigned_node == NULL) {
632  pe__set_next_role(rsc, pcmk_role_stopped, "assignment");
633  } else {
634  pe__set_next_role(rsc, pcmk_role_started, "assignment");
635  }
636  return "implicit";
637 }
638 
645 static void
646 create_pending_start(pcmk_resource_t *rsc)
647 {
648  pcmk_action_t *start = NULL;
649 
650  pcmk__rsc_trace(rsc,
651  "Creating action for %s to represent already pending start",
652  rsc->id);
653  start = start_action(rsc, rsc->priv->assigned_node, TRUE);
655 }
656 
663 static void
664 schedule_role_transition_actions(pcmk_resource_t *rsc)
665 {
666  enum rsc_role_e role = rsc->priv->orig_role;
667 
668  while (role != rsc->priv->next_role) {
669  enum rsc_role_e next_role =
670  rsc_state_matrix[role][rsc->priv->next_role];
671  rsc_transition_fn fn = NULL;
672 
673  pcmk__rsc_trace(rsc,
674  "Creating action to take %s from %s to %s "
675  "(ending at %s)",
676  rsc->id, pcmk_role_text(role),
677  pcmk_role_text(next_role),
678  pcmk_role_text(rsc->priv->next_role));
679  fn = rsc_action_matrix[role][next_role];
680  if (fn == NULL) {
681  break;
682  }
683  fn(rsc, rsc->priv->assigned_node, false);
684  role = next_role;
685  }
686 }
687 
694 void
696 {
697  bool need_stop = false;
698  bool need_promote = false;
699  bool is_moving = false;
700  bool allow_migrate = false;
701  bool multiply_active = false;
702 
703  pcmk_node_t *current = NULL;
704  pcmk_node_t *migration_target = NULL;
705  unsigned int num_all_active = 0;
706  unsigned int num_clean_active = 0;
707  const char *next_role_source = NULL;
708 
709  pcmk__assert(pcmk__is_primitive(rsc));
710 
711  next_role_source = set_default_next_role(rsc);
712  pcmk__rsc_trace(rsc,
713  "Creating all actions for %s transition from %s to %s "
714  "(%s) on %s",
715  rsc->id, pcmk_role_text(rsc->priv->orig_role),
716  pcmk_role_text(rsc->priv->next_role), next_role_source,
717  pcmk__node_name(rsc->priv->assigned_node));
718 
719  current = rsc->priv->fns->active_node(rsc, &num_all_active,
720  &num_clean_active);
721 
722  g_list_foreach(rsc->priv->dangling_migration_sources,
724 
725  if ((current != NULL) && (rsc->priv->assigned_node != NULL)
726  && !pcmk__same_node(current, rsc->priv->assigned_node)
727  && (rsc->priv->next_role >= pcmk_role_started)) {
728 
729  pcmk__rsc_trace(rsc, "Moving %s from %s to %s",
730  rsc->id, pcmk__node_name(current),
731  pcmk__node_name(rsc->priv->assigned_node));
732  is_moving = true;
733  allow_migrate = pcmk__rsc_can_migrate(rsc, current);
734 
735  // This is needed even if migrating (though I'm not sure why ...)
736  need_stop = true;
737  }
738 
739  // Check whether resource is partially migrated and/or multiply active
740  migration_target = rsc->priv->partial_migration_target;
741  if ((rsc->priv->partial_migration_source != NULL)
742  && (migration_target != NULL) && allow_migrate && (num_all_active == 2)
743  && pcmk__same_node(current, rsc->priv->partial_migration_source)
744  && pcmk__same_node(rsc->priv->assigned_node, migration_target)) {
745  /* A partial migration is in progress, and the migration target remains
746  * the same as when the migration began.
747  */
748  pcmk__rsc_trace(rsc,
749  "Partial migration of %s from %s to %s will continue",
750  rsc->id,
751  pcmk__node_name(rsc->priv->partial_migration_source),
752  pcmk__node_name(migration_target));
753 
754  } else if ((rsc->priv->partial_migration_source != NULL)
755  || (migration_target != NULL)) {
756  // A partial migration is in progress but can't be continued
757 
758  if (num_all_active > 2) {
759  // The resource is migrating *and* multiply active!
760  crm_notice("Forcing recovery of %s because it is migrating "
761  "from %s to %s and possibly active elsewhere",
762  rsc->id,
763  pcmk__node_name(rsc->priv->partial_migration_source),
764  pcmk__node_name(migration_target));
765  } else {
766  // The migration source or target isn't available
767  crm_notice("Forcing recovery of %s because it can no longer "
768  "migrate from %s to %s",
769  rsc->id,
770  pcmk__node_name(rsc->priv->partial_migration_source),
771  pcmk__node_name(migration_target));
772  }
773  need_stop = true;
774  rsc->priv->partial_migration_source = NULL;
775  rsc->priv->partial_migration_target = NULL;
776  allow_migrate = false;
777 
778  } else if (pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing)) {
779  multiply_active = (num_all_active > 1);
780  } else {
781  /* If a resource has PCMK_META_REQUIRES set to PCMK_VALUE_NOTHING or
782  * PCMK_VALUE_QUORUM, don't consider it active on unclean nodes (similar
783  * to how all resources behave when PCMK_OPT_STONITH_ENABLED is false).
784  * We can start such resources elsewhere before fencing completes, and
785  * if we considered the resource active on the failed node, we would
786  * attempt recovery for being active on multiple nodes.
787  */
788  multiply_active = (num_clean_active > 1);
789  }
790 
791  if (multiply_active) {
792  const char *class = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS);
793 
794  // Resource was (possibly) incorrectly multiply active
795  pcmk__sched_err(rsc->priv->scheduler,
796  "%s resource %s might be active on %u nodes (%s)",
797  pcmk__s(class, "Untyped"), rsc->id, num_all_active,
799  crm_notice("For more information, see \"What are multiply active "
800  "resources?\" at "
801  "https://projects.clusterlabs.org/w/clusterlabs/faq/");
802 
803  switch (rsc->priv->multiply_active_policy) {
805  need_stop = true;
806  break;
808  need_stop = true; // stop_resource() will skip expected node
810  break;
811  default:
812  break;
813  }
814 
815  } else {
817  }
818 
820  create_pending_start(rsc);
821  }
822 
823  if (is_moving) {
824  // Remaining tests are only for resources staying where they are
825 
826  } else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
828  need_stop = true;
829  pcmk__rsc_trace(rsc, "Recovering %s", rsc->id);
830  } else {
831  pcmk__rsc_trace(rsc, "Recovering %s by demotion", rsc->id);
832  if (rsc->priv->next_role == pcmk_role_promoted) {
833  need_promote = true;
834  }
835  }
836 
837  } else if (pcmk_is_set(rsc->flags, pcmk__rsc_blocked)) {
838  pcmk__rsc_trace(rsc, "Blocking further actions on %s", rsc->id);
839  need_stop = true;
840 
841  } else if ((rsc->priv->orig_role > pcmk_role_started)
842  && (current != NULL)
843  && (rsc->priv->assigned_node != NULL)) {
844  pcmk_action_t *start = NULL;
845 
846  pcmk__rsc_trace(rsc, "Creating start action for promoted resource %s",
847  rsc->id);
848  start = start_action(rsc, rsc->priv->assigned_node, TRUE);
849  if (!pcmk_is_set(start->flags, pcmk__action_optional)) {
850  // Recovery of a promoted resource
851  pcmk__rsc_trace(rsc, "%s restart is required for recovery", rsc->id);
852  need_stop = true;
853  }
854  }
855 
856  // Create any actions needed to bring resource down and back up to same role
857  schedule_restart_actions(rsc, current, need_stop, need_promote);
858 
859  // Create any actions needed to take resource from this role to the next
860  schedule_role_transition_actions(rsc);
861 
863 
864  if (allow_migrate) {
865  pcmk__create_migration_actions(rsc, current);
866  }
867 }
868 
875 static void
876 rsc_avoids_remote_nodes(const pcmk_resource_t *rsc)
877 {
878  GHashTableIter iter;
879  pcmk_node_t *node = NULL;
880 
881  g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes);
882  while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
883  if (node->priv->remote != NULL) {
884  node->assign->score = -PCMK_SCORE_INFINITY;
885  }
886  }
887 }
888 
902 static GList *
903 allowed_nodes_as_list(const pcmk_resource_t *rsc)
904 {
905  GList *allowed_nodes = NULL;
906 
907  if (rsc->priv->allowed_nodes != NULL) {
908  allowed_nodes = g_hash_table_get_values(rsc->priv->allowed_nodes);
909  }
910 
911  if (!pcmk__is_daemon) {
912  allowed_nodes = g_list_sort(allowed_nodes, pe__cmp_node_name);
913  }
914 
915  return allowed_nodes;
916 }
917 
924 void
926 {
927  GList *allowed_nodes = NULL;
928  bool check_unfencing = false;
929  bool check_utilization = false;
930  pcmk_scheduler_t *scheduler = NULL;
931 
932  pcmk__assert(pcmk__is_primitive(rsc));
933  scheduler = rsc->priv->scheduler;
934 
935  if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
936  pcmk__rsc_trace(rsc,
937  "Skipping implicit constraints for unmanaged resource "
938  "%s", rsc->id);
939  return;
940  }
941 
942  // Whether resource requires unfencing
943  check_unfencing = !pcmk_is_set(rsc->flags, pcmk__rsc_fence_device)
947 
948  // Whether a non-default placement strategy is used
949  check_utilization = (g_hash_table_size(rsc->priv->utilization) > 0)
950  && !pcmk__str_eq(scheduler->priv->placement_strategy,
952 
953  // Order stops before starts (i.e. restart)
954  pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0), NULL,
955  rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0), NULL,
959 
960  // Promotable ordering: demote before stop, start before promote
961  if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
963  || (rsc->priv->orig_role > pcmk_role_unpromoted)) {
964 
966  NULL,
967  rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
968  NULL,
970 
972  NULL,
973  rsc, pcmk__op_key(rsc->id, PCMK_ACTION_PROMOTE, 0),
974  NULL,
976  }
977 
978  // Don't clear resource history if probing on same node
980  NULL, rsc,
982  NULL,
984  scheduler);
985 
986  // Certain checks need allowed nodes
987  if (check_unfencing || check_utilization
988  || (rsc->priv->launcher != NULL)) {
989 
990  allowed_nodes = allowed_nodes_as_list(rsc);
991  }
992 
993  if (check_unfencing) {
994  g_list_foreach(allowed_nodes, pcmk__order_restart_vs_unfence, rsc);
995  }
996 
997  if (check_utilization) {
998  pcmk__create_utilization_constraints(rsc, allowed_nodes);
999  }
1000 
1001  if (rsc->priv->launcher != NULL) {
1002  pcmk_resource_t *remote_rsc = NULL;
1003 
1005  // rsc is the implicit remote connection for a guest or bundle node
1006 
1007  /* Guest resources are not allowed to run on Pacemaker Remote nodes,
1008  * to avoid nesting remotes. However, bundles are allowed.
1009  */
1011  rsc_avoids_remote_nodes(rsc->priv->launcher);
1012  }
1013 
1014  /* If someone cleans up a guest or bundle node's launcher, we will
1015  * likely schedule a (re-)probe of the launcher and recovery of the
1016  * connection. Order the connection stop after the launcher probe,
1017  * so that if we detect the launcher running, we will trigger a new
1018  * transition and avoid the unnecessary recovery.
1019  */
1022  rsc, PCMK_ACTION_STOP,
1024 
1025  /* A user can specify that a resource must start on a Pacemaker Remote
1026  * node by explicitly configuring it with the PCMK__META_CONTAINER
1027  * meta-attribute. This is of questionable merit, since location
1028  * constraints can accomplish the same thing. But we support it, so here
1029  * we check whether a resource (that is not itself a remote connection)
1030  * has PCMK__META_CONTAINER set to a remote node or guest node resource.
1031  */
1032  } else if (pcmk_is_set(rsc->priv->launcher->flags,
1034  remote_rsc = rsc->priv->launcher;
1035  } else {
1036  remote_rsc =
1038  rsc->priv->launcher);
1039  }
1040 
1041  if (remote_rsc != NULL) {
1042  /* Force the resource on the Pacemaker Remote node instead of
1043  * colocating the resource with the launcher.
1044  */
1045  for (GList *item = allowed_nodes; item; item = item->next) {
1046  pcmk_node_t *node = item->data;
1047 
1048  if (node->priv->remote != remote_rsc) {
1049  node->assign->score = -PCMK_SCORE_INFINITY;
1050  }
1051  }
1052 
1053  } else {
1054  /* This resource is either launched by a resource that does NOT
1055  * represent a Pacemaker Remote node, or a Pacemaker Remote
1056  * connection resource for a guest node or bundle.
1057  */
1058  int score;
1059 
1060  crm_trace("Order and colocate %s relative to its launcher %s",
1061  rsc->id, rsc->priv->launcher->id);
1062 
1064  pcmk__op_key(rsc->priv->launcher->id,
1065  PCMK_ACTION_START, 0),
1066  NULL, rsc,
1067  pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
1068  NULL,
1071 
1072  pcmk__new_ordering(rsc,
1073  pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
1074  NULL,
1075  rsc->priv->launcher,
1076  pcmk__op_key(rsc->priv->launcher->id,
1077  PCMK_ACTION_STOP, 0),
1079 
1081  score = 10000; /* Highly preferred but not essential */
1082  } else {
1083  score = PCMK_SCORE_INFINITY; // Force to run on same host
1084  }
1085  pcmk__new_colocation("#resource-with-container", NULL, score, rsc,
1086  rsc->priv->launcher, NULL, NULL,
1088  }
1089  }
1090 
1093  /* Remote connections and fencing devices are not allowed to run on
1094  * Pacemaker Remote nodes
1095  */
1096  rsc_avoids_remote_nodes(rsc);
1097  }
1098  g_list_free(allowed_nodes);
1099 }
1100 
1116 int
1118  const pcmk_resource_t *primary,
1119  const pcmk__colocation_t *colocation,
1120  bool for_dependent)
1121 {
1122  enum pcmk__coloc_affects filter_results;
1123 
1124  pcmk__assert((dependent != NULL) && (primary != NULL)
1125  && (colocation != NULL));
1126 
1127  if (for_dependent) {
1128  // Always process on behalf of primary resource
1129  return primary->priv->cmds->apply_coloc_score(dependent, primary,
1130  colocation, false);
1131  }
1132 
1133  filter_results = pcmk__colocation_affects(dependent, primary, colocation,
1134  false);
1135  pcmk__rsc_trace(dependent, "%s %s with %s (%s, score=%d, filter=%d)",
1136  ((colocation->score > 0)? "Colocating" : "Anti-colocating"),
1137  dependent->id, primary->id, colocation->id,
1138  colocation->score,
1139  filter_results);
1140 
1141  switch (filter_results) {
1143  return pcmk__apply_coloc_to_priority(dependent, primary,
1144  colocation);
1145 
1147  pcmk__apply_coloc_to_scores(dependent, primary, colocation);
1148  return 0;
1149 
1150  default: // pcmk__coloc_affects_nothing
1151  return 0;
1152  }
1153 }
1154 
1155 /* Primitive implementation of
1156  * pcmk__assignment_methods_t:with_this_colocations()
1157  */
1158 void
1160  const pcmk_resource_t *orig_rsc, GList **list)
1161 {
1162  const pcmk_resource_t *parent = NULL;
1163 
1164  pcmk__assert(pcmk__is_primitive(rsc) && (list != NULL));
1165  parent = rsc->priv->parent;
1166 
1167  if (rsc == orig_rsc) {
1168  /* For the resource itself, add all of its own colocations and relevant
1169  * colocations from its parent (if any).
1170  */
1172  orig_rsc);
1173  if (parent != NULL) {
1174  parent->priv->cmds->with_this_colocations(parent, orig_rsc, list);
1175  }
1176  } else {
1177  // For an ancestor, add only explicitly configured constraints
1178  for (GList *iter = rsc->priv->with_this_colocations;
1179  iter != NULL; iter = iter->next) {
1180  pcmk__colocation_t *colocation = iter->data;
1181 
1182  if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
1183  pcmk__add_with_this(list, colocation, orig_rsc);
1184  }
1185  }
1186  }
1187 }
1188 
1189 /* Primitive implementation of
1190  * pcmk__assignment_methods_t:this_with_colocations()
1191  */
1192 void
1194  const pcmk_resource_t *orig_rsc, GList **list)
1195 {
1196  const pcmk_resource_t *parent = NULL;
1197 
1198  pcmk__assert(pcmk__is_primitive(rsc) && (list != NULL));
1199  parent = rsc->priv->parent;
1200 
1201  if (rsc == orig_rsc) {
1202  /* For the resource itself, add all of its own colocations and relevant
1203  * colocations from its parent (if any).
1204  */
1206  orig_rsc);
1207  if (parent != NULL) {
1208  parent->priv->cmds->this_with_colocations(parent, orig_rsc, list);
1209  }
1210  } else {
1211  // For an ancestor, add only explicitly configured constraints
1212  for (GList *iter = rsc->priv->this_with_colocations;
1213  iter != NULL; iter = iter->next) {
1214  pcmk__colocation_t *colocation = iter->data;
1215 
1216  if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) {
1217  pcmk__add_this_with(list, colocation, orig_rsc);
1218  }
1219  }
1220  }
1221 }
1222 
1232 uint32_t
1234 {
1235  pcmk__assert(action != NULL);
1236  return (uint32_t) action->flags;
1237 }
1238 
1253 static bool
1254 is_expected_node(const pcmk_resource_t *rsc, const pcmk_node_t *node)
1255 {
1256  return pcmk_all_flags_set(rsc->flags,
1258  && (rsc->priv->next_role > pcmk_role_stopped)
1259  && pcmk__same_node(rsc->priv->assigned_node, node);
1260 }
1261 
1270 static void
1271 stop_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1272 {
1273  for (GList *iter = rsc->priv->active_nodes;
1274  iter != NULL; iter = iter->next) {
1275 
1276  pcmk_node_t *current = (pcmk_node_t *) iter->data;
1277  pcmk_action_t *stop = NULL;
1278 
1279  if (is_expected_node(rsc, current)) {
1280  /* We are scheduling restart actions for a multiply active resource
1281  * with PCMK_META_MULTIPLE_ACTIVE=PCMK_VALUE_STOP_UNEXPECTED, and
1282  * this is where it should not be stopped.
1283  */
1284  pcmk__rsc_trace(rsc,
1285  "Skipping stop of multiply active resource %s "
1286  "on expected node %s",
1287  rsc->id, pcmk__node_name(current));
1288  continue;
1289  }
1290 
1291  if (rsc->priv->partial_migration_target != NULL) {
1292  // Continue migration if node originally was and remains target
1293  if (pcmk__same_node(current, rsc->priv->partial_migration_target)
1294  && pcmk__same_node(current, rsc->priv->assigned_node)) {
1295  pcmk__rsc_trace(rsc,
1296  "Skipping stop of %s on %s "
1297  "because partial migration there will continue",
1298  rsc->id, pcmk__node_name(current));
1299  continue;
1300  } else {
1301  pcmk__rsc_trace(rsc,
1302  "Forcing stop of %s on %s "
1303  "because migration target changed",
1304  rsc->id, pcmk__node_name(current));
1305  optional = false;
1306  }
1307  }
1308 
1309  pcmk__rsc_trace(rsc, "Scheduling stop of %s on %s",
1310  rsc->id, pcmk__node_name(current));
1311  stop = stop_action(rsc, current, optional);
1312 
1313  if (rsc->priv->assigned_node == NULL) {
1314  pe_action_set_reason(stop, "node availability", true);
1315  } else if (pcmk_all_flags_set(rsc->flags, pcmk__rsc_restarting
1317  /* We are stopping a multiply active resource on a node that is
1318  * not its expected node, and we are still scheduling restart
1319  * actions, so the stop is for being multiply active.
1320  */
1321  pe_action_set_reason(stop, "being multiply active", true);
1322  }
1323 
1324  if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) {
1326  }
1327 
1329  pcmk_action_t *unfence = pe_fence_op(current, PCMK_ACTION_ON, true,
1330  NULL, false,
1331  rsc->priv->scheduler);
1332 
1334  if (!pcmk__node_unfenced(current)) {
1336  "Stopping %s until %s can be unfenced",
1337  rsc->id, pcmk__node_name(current));
1338  }
1339  }
1340  }
1341 }
1342 
1351 static void
1352 start_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1353 {
1354  pcmk_action_t *start = NULL;
1355 
1356  pcmk__assert(node != NULL);
1357 
1358  pcmk__rsc_trace(rsc, "Scheduling %s start of %s on %s (score %d)",
1359  (optional? "optional" : "required"), rsc->id,
1360  pcmk__node_name(node), node->assign->score);
1361  start = start_action(rsc, node, TRUE);
1362 
1364 
1365  if (pcmk_is_set(start->flags, pcmk__action_runnable) && !optional) {
1367  }
1368 
1369  if (is_expected_node(rsc, node)) {
1370  /* This could be a problem if the start becomes necessary for other
1371  * reasons later.
1372  */
1373  pcmk__rsc_trace(rsc,
1374  "Start of multiply active resouce %s "
1375  "on expected node %s will be a pseudo-action",
1376  rsc->id, pcmk__node_name(node));
1378  }
1379 }
1380 
1389 static void
1390 promote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1391 {
1392  GList *iter = NULL;
1393  GList *action_list = NULL;
1394  bool runnable = true;
1395 
1396  pcmk__assert(node != NULL);
1397 
1398  // Any start must be runnable for promotion to be runnable
1399  action_list = pe__resource_actions(rsc, node, PCMK_ACTION_START, true);
1400  for (iter = action_list; iter != NULL; iter = iter->next) {
1401  pcmk_action_t *start = (pcmk_action_t *) iter->data;
1402 
1403  if (!pcmk_is_set(start->flags, pcmk__action_runnable)) {
1404  runnable = false;
1405  }
1406  }
1407  g_list_free(action_list);
1408 
1409  if (runnable) {
1410  pcmk_action_t *promote = promote_action(rsc, node, optional);
1411 
1412  pcmk__rsc_trace(rsc, "Scheduling %s promotion of %s on %s",
1413  (optional? "optional" : "required"), rsc->id,
1414  pcmk__node_name(node));
1415 
1416  if (is_expected_node(rsc, node)) {
1417  /* This could be a problem if the promote becomes necessary for
1418  * other reasons later.
1419  */
1420  pcmk__rsc_trace(rsc,
1421  "Promotion of multiply active resouce %s "
1422  "on expected node %s will be a pseudo-action",
1423  rsc->id, pcmk__node_name(node));
1425  }
1426  } else {
1427  pcmk__rsc_trace(rsc, "Not promoting %s on %s: start unrunnable",
1428  rsc->id, pcmk__node_name(node));
1429  action_list = pe__resource_actions(rsc, node, PCMK_ACTION_PROMOTE,
1430  true);
1431  for (iter = action_list; iter != NULL; iter = iter->next) {
1432  pcmk_action_t *promote = (pcmk_action_t *) iter->data;
1433 
1435  }
1436  g_list_free(action_list);
1437  }
1438 }
1439 
1448 static void
1449 demote_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1450 {
1451  /* Since this will only be called for a primitive (possibly as an instance
1452  * of a collective resource), the resource is multiply active if it is
1453  * running on more than one node, so we want to demote on all of them as
1454  * part of recovery, regardless of which one is the desired node.
1455  */
1456  for (GList *iter = rsc->priv->active_nodes;
1457  iter != NULL; iter = iter->next) {
1458 
1459  pcmk_node_t *current = (pcmk_node_t *) iter->data;
1460 
1461  if (is_expected_node(rsc, current)) {
1462  pcmk__rsc_trace(rsc,
1463  "Skipping demote of multiply active resource %s "
1464  "on expected node %s",
1465  rsc->id, pcmk__node_name(current));
1466  } else {
1467  pcmk__rsc_trace(rsc, "Scheduling %s demotion of %s on %s",
1468  (optional? "optional" : "required"), rsc->id,
1469  pcmk__node_name(current));
1470  demote_action(rsc, current, optional);
1471  }
1472  }
1473 }
1474 
1475 static void
1476 assert_role_error(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
1477 {
1478  pcmk__assert(false);
1479 }
1480 
1489 void
1491  bool optional)
1492 {
1493  /* If the cleanup is required, its orderings are optional, because they're
1494  * relevant only if both actions are required. Conversely, if the cleanup is
1495  * optional, the orderings make the then action required if the first action
1496  * becomes required.
1497  */
1498  uint32_t flag = optional? pcmk__ar_first_implies_then : pcmk__ar_ordered;
1499 
1500  CRM_CHECK((rsc != NULL) && (node != NULL), return);
1501 
1502  if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) {
1503  pcmk__rsc_trace(rsc, "Skipping clean-up of %s on %s: resource failed",
1504  rsc->id, pcmk__node_name(node));
1505  return;
1506  }
1507 
1508  if (node->details->unclean || !node->details->online) {
1509  pcmk__rsc_trace(rsc, "Skipping clean-up of %s on %s: node unavailable",
1510  rsc->id, pcmk__node_name(node));
1511  return;
1512  }
1513 
1514  crm_notice("Scheduling clean-up of %s on %s",
1515  rsc->id, pcmk__node_name(node));
1516  delete_action(rsc, node, optional);
1517 
1518  // stop -> clean-up -> start
1520  rsc, PCMK_ACTION_DELETE, flag);
1522  rsc, PCMK_ACTION_START, flag);
1523 }
1524 
1532 void
1534 {
1535  char *name = NULL;
1536  char *value = NULL;
1537  const pcmk_resource_t *parent = NULL;
1538 
1539  pcmk__assert(pcmk__is_primitive(rsc) && (xml != NULL));
1540 
1541  /* Clone instance numbers get set internally as meta-attributes, and are
1542  * needed in the transition graph (for example, to tell unique clone
1543  * instances apart).
1544  */
1545  value = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CLONE);
1546  if (value != NULL) {
1548  crm_xml_add(xml, name, value);
1549  free(name);
1550  }
1551 
1552  // Not sure if this one is really needed ...
1553  value = g_hash_table_lookup(rsc->priv->meta, PCMK_META_REMOTE_NODE);
1554  if (value != NULL) {
1556  crm_xml_add(xml, name, value);
1557  free(name);
1558  }
1559 
1560  /* The PCMK__META_CONTAINER meta-attribute can be set on the primitive
1561  * itself or one of its ancestors, so check them all and keep the highest.
1562  */
1563  for (parent = rsc; parent != NULL; parent = parent->priv->parent) {
1564  if (parent->priv->launcher != NULL) {
1566  parent->priv->launcher->id);
1567  }
1568  }
1569 
1570  /* Bundle replica children will get their external-ip set internally as a
1571  * meta-attribute. The graph action needs it, but under a different naming
1572  * convention than other meta-attributes.
1573  */
1574  value = g_hash_table_lookup(rsc->priv->meta, "external-ip");
1575  if (value != NULL) {
1576  crm_xml_add(xml, "pcmk_external_ip", value);
1577  }
1578 }
1579 
1580 // Primitive implementation of pcmk__assignment_methods_t:add_utilization()
1581 void
1583  const pcmk_resource_t *orig_rsc,
1584  GList *all_rscs, GHashTable *utilization)
1585 {
1586  pcmk__assert(pcmk__is_primitive(rsc) && (orig_rsc != NULL)
1587  && (utilization != NULL));
1588 
1589  if (!pcmk_is_set(rsc->flags, pcmk__rsc_unassigned)) {
1590  return;
1591  }
1592 
1593  pcmk__rsc_trace(orig_rsc,
1594  "%s: Adding primitive %s as colocated utilization",
1595  orig_rsc->id, rsc->id);
1596  pcmk__release_node_capacity(utilization, rsc);
1597 }
1598 
1607 static time_t
1608 shutdown_time(pcmk_node_t *node)
1609 {
1610  const char *shutdown = pcmk__node_attr(node, PCMK__NODE_ATTR_SHUTDOWN, NULL,
1612  time_t result = 0;
1613 
1614  if (shutdown != NULL) {
1615  long long result_ll;
1616  int rc = pcmk__scan_ll(shutdown, &result_ll, 0LL);
1617 
1618  if (rc == pcmk_rc_ok) {
1619  result = (time_t) result_ll;
1620  } else {
1621  crm_warn("Ignoring invalid value '%s' for %s "
1622  PCMK__NODE_ATTR_SHUTDOWN " attribute: %s",
1623  shutdown, pcmk__node_name(node), pcmk_rc_str(rc));
1624  }
1625  }
1626  return (result == 0)? get_effective_time(node->priv->scheduler) : result;
1627 }
1628 
1636 static void
1637 ban_if_not_locked(gpointer data, gpointer user_data)
1638 {
1639  const pcmk_node_t *node = (const pcmk_node_t *) data;
1640  pcmk_resource_t *rsc = (pcmk_resource_t *) user_data;
1641 
1642  if (!pcmk__same_node(node, rsc->priv->lock_node)) {
1645  }
1646 }
1647 
1648 // Primitive implementation of pcmk__assignment_methods_t:shutdown_lock()
1649 void
1651 {
1652  pcmk_scheduler_t *scheduler = NULL;
1653 
1654  pcmk__assert(pcmk__is_primitive(rsc));
1655  scheduler = rsc->priv->scheduler;
1656 
1657  // Fence devices and remote connections can't be locked
1658  if (pcmk_any_flags_set(rsc->flags, pcmk__rsc_fence_device
1660  return;
1661  }
1662 
1663  if (rsc->priv->lock_node != NULL) {
1664  // The lock was obtained from resource history
1665 
1666  if (rsc->priv->active_nodes != NULL) {
1667  /* The resource was started elsewhere even though it is now
1668  * considered locked. This shouldn't be possible, but as a
1669  * failsafe, we don't want to disturb the resource now.
1670  */
1671  pcmk__rsc_info(rsc,
1672  "Cancelling shutdown lock "
1673  "because %s is already active", rsc->id);
1675  rsc->priv->lock_node = NULL;
1676  rsc->priv->lock_time = 0;
1677  }
1678 
1679  // Only a resource active on exactly one node can be locked
1680  } else if (pcmk__list_of_1(rsc->priv->active_nodes)) {
1681  pcmk_node_t *node = rsc->priv->active_nodes->data;
1682 
1683  if (node->details->shutdown) {
1684  if (node->details->unclean) {
1685  pcmk__rsc_debug(rsc,
1686  "Not locking %s to unclean %s for shutdown",
1687  rsc->id, pcmk__node_name(node));
1688  } else {
1689  rsc->priv->lock_node = node;
1690  rsc->priv->lock_time = shutdown_time(node);
1691  }
1692  }
1693  }
1694 
1695  if (rsc->priv->lock_node == NULL) {
1696  // No lock needed
1697  return;
1698  }
1699 
1700  if (scheduler->priv->shutdown_lock_ms > 0U) {
1701  time_t lock_expiration = rsc->priv->lock_time
1703 
1704  pcmk__rsc_info(rsc, "Locking %s to %s due to shutdown (expires @%lld)",
1705  rsc->id, pcmk__node_name(rsc->priv->lock_node),
1706  (long long) lock_expiration);
1707  pe__update_recheck_time(++lock_expiration, scheduler,
1708  "shutdown lock expiration");
1709  } else {
1710  pcmk__rsc_info(rsc, "Locking %s to %s due to shutdown",
1711  rsc->id, pcmk__node_name(rsc->priv->lock_node));
1712  }
1713 
1714  // If resource is locked to one node, ban it from all other nodes
1715  g_list_foreach(scheduler->nodes, ban_if_not_locked, rsc);
1716 }
const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle)
Definition: complex.c:1043
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:213
pcmk_resource_t * pe__resource_contains_guest_node(const pcmk_scheduler_t *scheduler, const pcmk_resource_t *rsc)
Definition: remote.c:29
pcmk_node_t * pcmk_find_node(const pcmk_scheduler_t *scheduler, const char *node_name)
Find a node by name in scheduler data.
Definition: scheduler.c:100
Relation applies only if actions are on same node.
#define crm_notice(fmt, args...)
Definition: logging.h:365
&#39;then&#39; is runnable (and migratable) only if &#39;first&#39; is runnable
#define pcmk__sched_err(scheduler, fmt...)
const char * pcmk_role_text(enum rsc_role_e role)
Get readable description of a resource role.
Definition: roles.c:23
#define PCMK__NODE_ATTR_SHUTDOWN
Do not recover resources from outside partition.
Definition: scheduler.h:39
char data[0]
Definition: cpg.c:58
pcmk__coloc_affects
#define promote_action(rsc, node, optional)
Definition: internal.h:214
G_GNUC_INTERNAL void pcmk__new_ordering(pcmk_resource_t *first_rsc, char *first_task, pcmk_action_t *first_action, pcmk_resource_t *then_rsc, char *then_task, pcmk_action_t *then_action, uint32_t flags, pcmk_scheduler_t *sched)
G_GNUC_INTERNAL void pcmk__order_vs_unfence(const pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_action_t *action, enum pcmk__action_relation_flags order)
#define stop_action(rsc, node, optional)
Definition: internal.h:206
pcmk_resource_t * parent
Stopped.
Definition: roles.h:36
const char * name
Definition: cib.c:26
G_GNUC_INTERNAL void pcmk__create_utilization_constraints(pcmk_resource_t *rsc, const GList *allowed_nodes)
const char * pcmk_readable_score(int score)
Return a displayable static string for a score value.
Definition: scores.c:102
pcmk_node_t *(* assign)(pcmk_resource_t *rsc, const pcmk_node_t *prefer, bool stop_if_fail)
G_GNUC_INTERNAL void pcmk__release_node_capacity(GHashTable *current_utilization, const pcmk_resource_t *rsc)
G_GNUC_INTERNAL bool pcmk__any_node_available(GHashTable *nodes)
gint pe__cmp_node_name(gconstpointer a, gconstpointer b)
Definition: utils.c:149
#define pcmk__rsc_trace(rsc, fmt, args...)
bool pcmk__rsc_can_migrate(const pcmk_resource_t *rsc, const pcmk_node_t *current)
#define PCMK_ACTION_ON
Definition: actions.h:55
#define delete_action(rsc, node, optional)
Definition: internal.h:202
#define pcmk__rsc_info(rsc, fmt, args...)
#define PCMK_OPT_SHUTDOWN_LOCK
Definition: options.h:60
#define pcmk__set_rsc_flags(resource, flags_to_set)
#define PCMK_ACTION_MONITOR
Definition: actions.h:51
void pcmk__schedule_cleanup(pcmk_resource_t *rsc, const pcmk_node_t *node, bool optional)
void pcmk__primitive_add_utilization(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList *all_rscs, GHashTable *utilization)
pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler)
Definition: pe_actions.c:1245
#define PCMK_VALUE_DEFAULT
Definition: options.h:143
pcmk__scheduler_private_t * priv
Definition: scheduler.h:99
void pe_action_set_reason(pcmk_action_t *action, const char *reason, bool overwrite)
Definition: pe_actions.c:1568
Promoted.
Definition: roles.h:39
uint64_t flags
Definition: scheduler.h:89
G_GNUC_INTERNAL const pcmk_node_t * pcmk__ban_insufficient_capacity(pcmk_resource_t *rsc)
void pcmk__primitive_internal_constraints(pcmk_resource_t *rsc)
gboolean shutdown
Definition: nodes.h:62
gboolean unclean
Definition: nodes.h:58
#define PCMK__META_CLONE
const pcmk__rsc_methods_t * fns
GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: pe_actions.c:1517
const char * pcmk_rc_str(int rc)
Get a user-friendly description of a return code.
Definition: results.c:609
G_GNUC_INTERNAL bool pcmk__node_available(const pcmk_node_t *node, bool consider_score, bool consider_guest)
const char * action
Definition: pcmk_fence.c:32
G_GNUC_INTERNAL bool pcmk__assign_resource(pcmk_resource_t *rsc, pcmk_node_t *node, bool force, bool stop_if_fail)
#define RSC_ROLE_MAX
G_GNUC_INTERNAL void pcmk__apply_coloc_to_scores(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation)
enum pe_quorum_policy no_quorum_policy
Definition: scheduler.h:93
pcmk_node_t * partial_migration_source
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition: xml_element.c:1015
#define pcmk__rsc_debug(rsc, fmt, args...)
#define demote_action(rsc, node, optional)
Definition: internal.h:218
pcmk__node_private_t * priv
Definition: nodes.h:85
#define PCMK_ACTION_DEMOTE
Definition: actions.h:40
int pcmk__scan_ll(const char *text, long long *result, long long default_value)
Definition: strings.c:92
#define crm_warn(fmt, args...)
Definition: logging.h:362
void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why)
Definition: complex.c:1265
G_GNUC_INTERNAL void pcmk__add_with_this_list(GList **list, GList *addition, const pcmk_resource_t *rsc)
G_GNUC_INTERNAL int pcmk__apply_coloc_to_priority(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation)
pcmk_node_t * pcmk__primitive_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer, bool stop_if_fail)
pcmk_scheduler_t * scheduler
Actions are ordered (optionally, if no other flags are set)
#define pcmk__clear_action_flags(action, flags_to_clear)
G_GNUC_INTERNAL void pcmk__create_recurring_actions(pcmk_resource_t *rsc)
#define PCMK_OPT_STOP_ALL_RESOURCES
Definition: options.h:69
void resource_location(pcmk_resource_t *rsc, const pcmk_node_t *node, int score, const char *tag, pcmk_scheduler_t *scheduler)
Definition: utils.c:365
G_GNUC_INTERNAL void pcmk__add_this_with(GList **list, const pcmk__colocation_t *colocation, const pcmk_resource_t *rsc)
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: xml_element.c:1168
#define crm_trace(fmt, args...)
Definition: logging.h:372
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:149
G_GNUC_INTERNAL void pcmk__new_colocation(const char *id, const char *node_attr, int score, pcmk_resource_t *dependent, pcmk_resource_t *primary, const char *dependent_role_spec, const char *primary_role_spec, uint32_t flags)
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:80
const char * pcmk__multiply_active_text(const pcmk_resource_t *rsc)
Get readable description of a multiply-active recovery type.
Definition: resources.c:54
#define PCMK_META_REMOTE_NODE
Definition: options.h:108
Stop unexpected instances.
#define PCMK_ACTION_START
Definition: actions.h:63
pcmk__resource_private_t * priv
Definition: resources.h:61
bool pcmk__is_daemon
Definition: logging.c:47
Unpromoted.
Definition: roles.h:38
Wrappers for and extensions to libxml2.
G_GNUC_INTERNAL void pcmk__add_with_this(GList **list, const pcmk__colocation_t *colocation, const pcmk_resource_t *rsc)
rsc_role_e
Definition: roles.h:34
void pcmk__create_migration_actions(pcmk_resource_t *rsc, const pcmk_node_t *current)
#define PCMK_META_TARGET_ROLE
Definition: options.h:113
#define PCMK_ACTION_STOP
Definition: actions.h:66
#define pcmk__clear_rsc_flags(resource, flags_to_clear)
void pcmk__primitive_create_actions(pcmk_resource_t *rsc)
pcmk_scheduler_t * scheduler
#define PCMK_ACTION_DELETE
Definition: actions.h:39
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition: actions.c:195
void pcmk__primitive_shutdown_lock(pcmk_resource_t *rsc)
const pcmk_node_t * lock_node
gboolean order_actions(pcmk_action_t *first, pcmk_action_t *then, uint32_t flags)
Definition: utils.c:465
G_GNUC_INTERNAL void pcmk__order_restart_vs_unfence(gpointer data, gpointer user_data)
void pe__update_recheck_time(time_t recheck, pcmk_scheduler_t *scheduler, const char *reason)
Definition: utils.c:675
#define pcmk__order_resource_actions(first_rsc, first_task, then_rsc, then_task, flags)
pcmk_resource_t * primary
#define pcmk__assert(expr)
void pcmk__with_primitive_colocations(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList **list)
#define PCMK_ACTION_LRM_DELETE
Definition: actions.h:44
G_GNUC_INTERNAL enum pcmk__coloc_affects pcmk__colocation_affects(const pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation, bool preview)
#define PCMK_XA_CLASS
Definition: xml_names.h:246
Stop on all, start on desired.
GList * nodes
Definition: scheduler.h:97
G_GNUC_INTERNAL GList * pcmk__with_this_colocations(const pcmk_resource_t *rsc)
pcmk__action_result_t result
Definition: pcmk_fence.c:37
G_GNUC_INTERNAL void pcmk__add_dependent_scores(gpointer data, gpointer user_data)
#define start_action(rsc, node, optional)
Definition: internal.h:210
G_GNUC_INTERNAL GHashTable * pcmk__copy_node_table(GHashTable *nodes)
#define CRM_META
Definition: crm.h:75
pcmk_scheduler_t * scheduler
G_GNUC_INTERNAL void pcmk__add_this_with_list(GList **list, GList *addition, const pcmk_resource_t *rsc)
pcmk_resource_t * remote
G_GNUC_INTERNAL bool pcmk__node_unfenced(const pcmk_node_t *node)
void(* rsc_transition_fn)(pcmk_resource_t *rsc, pcmk_node_t *node, bool optional)
pcmk_resource_t * launcher
G_GNUC_INTERNAL GList * pcmk__this_with_colocations(const pcmk_resource_t *rsc)
const char * pcmk__node_attr(const pcmk_node_t *node, const char *name, const char *target, enum pcmk__rsc_node node_type)
Definition: attrs.c:114
Started.
Definition: roles.h:37
guint pcmk__timeout_ms2s(guint timeout_ms)
Definition: utils.c:425
If &#39;then&#39; action becomes required, &#39;first&#39; becomes optional.
#define PCMK_ACTION_PROMOTE
Definition: actions.h:57
#define PCMK_OPT_NO_QUORUM_POLICY
Definition: options.h:46
#define pe__show_node_scores(level, rsc, text, nodes, scheduler)
Definition: internal.h:164
void pe__clear_resource_history(pcmk_resource_t *rsc, const pcmk_node_t *node)
Definition: pe_actions.c:1594
#define pcmk__set_action_flags(action, flags_to_set)
uint32_t pcmk__primitive_action_flags(pcmk_action_t *action, const pcmk_node_t *node)
void pcmk__primitive_add_graph_meta(const pcmk_resource_t *rsc, xmlNode *xml)
unsigned long long flags
Definition: resources.h:69
G_GNUC_INTERNAL GList * pcmk__sort_nodes(GList *nodes, pcmk_node_t *active_node)
gboolean online
Definition: nodes.h:50
#define PCMK__META_CONTAINER
void pcmk__primitive_with_colocations(const pcmk_resource_t *rsc, const pcmk_resource_t *orig_rsc, GList **list)
Resource role is unknown.
Definition: roles.h:35
#define PCMK_VALUE_FREEZE
Definition: options.h:156
const char * parent
Definition: cib.c:27
struct pcmk__node_details * details
Definition: nodes.h:82
time_t get_effective_time(pcmk_scheduler_t *scheduler)
Definition: utils.c:402
int(* apply_coloc_score)(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent)
int pcmk__primitive_apply_coloc_score(pcmk_resource_t *dependent, const pcmk_resource_t *primary, const pcmk__colocation_t *colocation, bool for_dependent)
void pcmk__abort_dangling_migration(void *data, void *user_data)
pcmk_node_t *(* active_node)(const pcmk_resource_t *rsc, unsigned int *count_all, unsigned int *count_clean)
uint64_t flags
Definition: remote.c:211
const pcmk__assignment_methods_t * cmds
pcmk_node_t * partial_migration_target
#define PCMK_SCORE_INFINITY
Integer score to use to represent "infinity".
Definition: scores.h:26
char * crm_meta_name(const char *field)
Get the environment variable equivalent of a meta-attribute name.
Definition: nvpair.c:407
struct pcmk__node_assignment * assign
Definition: nodes.h:79