pacemaker  2.1.7-0f7f88312f
Scalable High-Availability cluster resource manager
pcmk_sched_nodes.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2023 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 #include <crm/msg_xml.h>
13 #include <pacemaker-internal.h>
14 #include <pacemaker.h>
15 #include "libpacemaker_private.h"
16 
29 bool
30 pcmk__node_available(const pcmk_node_t *node, bool consider_score,
31  bool consider_guest)
32 {
33  if ((node == NULL) || (node->details == NULL) || !node->details->online
34  || node->details->shutdown || node->details->unclean
35  || node->details->standby || node->details->maintenance) {
36  return false;
37  }
38 
39  if (consider_score && (node->weight < 0)) {
40  return false;
41  }
42 
43  // @TODO Go through all callers to see which should set consider_guest
44  if (consider_guest && pe__is_guest_node(node)) {
46 
47  if (guest->fns->location(guest, NULL, FALSE) == NULL) {
48  return false;
49  }
50  }
51 
52  return true;
53 }
54 
63 GHashTable *
64 pcmk__copy_node_table(GHashTable *nodes)
65 {
66  GHashTable *new_table = NULL;
67  GHashTableIter iter;
68  pcmk_node_t *node = NULL;
69 
70  if (nodes == NULL) {
71  return NULL;
72  }
73  new_table = pcmk__strkey_table(NULL, free);
74  g_hash_table_iter_init(&iter, nodes);
75  while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
76  pcmk_node_t *new_node = pe__copy_node(node);
77 
78  g_hash_table_insert(new_table, (gpointer) new_node->details->id,
79  new_node);
80  }
81  return new_table;
82 }
83 
92 static void
93 destroy_node_tables(gpointer data)
94 {
95  g_hash_table_destroy((GHashTable *) data);
96 }
97 
113 void
114 pcmk__copy_node_tables(const pcmk_resource_t *rsc, GHashTable **copy)
115 {
116  CRM_ASSERT((rsc != NULL) && (copy != NULL));
117 
118  if (*copy == NULL) {
119  *copy = pcmk__strkey_table(NULL, destroy_node_tables);
120  }
121 
122  g_hash_table_insert(*copy, rsc->id,
124 
125  for (const GList *iter = rsc->children; iter != NULL; iter = iter->next) {
126  pcmk__copy_node_tables((const pcmk_resource_t *) iter->data, copy);
127  }
128 }
129 
144 void
146 {
147  CRM_ASSERT((rsc != NULL) && (backup != NULL));
148 
149  g_hash_table_destroy(rsc->allowed_nodes);
150 
151  // Copy to avoid danger with multiple restores
152  rsc->allowed_nodes = g_hash_table_lookup(backup, rsc->id);
154 
155  for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
156  pcmk__restore_node_tables((pcmk_resource_t *) iter->data, backup);
157  }
158 }
159 
169 GList *
170 pcmk__copy_node_list(const GList *list, bool reset)
171 {
172  GList *result = NULL;
173 
174  for (const GList *iter = list; iter != NULL; iter = iter->next) {
175  pcmk_node_t *new_node = NULL;
176  pcmk_node_t *this_node = iter->data;
177 
178  new_node = pe__copy_node(this_node);
179  if (reset) {
180  new_node->weight = 0;
181  }
182  result = g_list_prepend(result, new_node);
183  }
184  return result;
185 }
186 
201 static gint
202 compare_nodes(gconstpointer a, gconstpointer b, gpointer data)
203 {
204  const pcmk_node_t *node1 = (const pcmk_node_t *) a;
205  const pcmk_node_t *node2 = (const pcmk_node_t *) b;
206  const pcmk_node_t *preferred = (const pcmk_node_t *) data;
207 
208  int node1_score = -INFINITY;
209  int node2_score = -INFINITY;
210 
211  int result = 0;
212 
213  if (a == NULL) {
214  return 1;
215  }
216  if (b == NULL) {
217  return -1;
218  }
219 
220  // Compare node scores
221 
222  if (pcmk__node_available(node1, false, false)) {
223  node1_score = node1->weight;
224  }
225  if (pcmk__node_available(node2, false, false)) {
226  node2_score = node2->weight;
227  }
228 
229  if (node1_score > node2_score) {
230  crm_trace("%s before %s (score %d > %d)",
231  pe__node_name(node1), pe__node_name(node2),
232  node1_score, node2_score);
233  return -1;
234  }
235 
236  if (node1_score < node2_score) {
237  crm_trace("%s after %s (score %d < %d)",
238  pe__node_name(node1), pe__node_name(node2),
239  node1_score, node2_score);
240  return 1;
241  }
242 
243  // If appropriate, compare node utilization
244 
245  if (pcmk__str_eq(node1->details->data_set->placement_strategy, "minimal",
246  pcmk__str_casei)) {
247  goto equal;
248  }
249 
250  if (pcmk__str_eq(node1->details->data_set->placement_strategy, "balanced",
251  pcmk__str_casei)) {
253  if (result < 0) {
254  crm_trace("%s before %s (greater capacity by %d attributes)",
255  pe__node_name(node1), pe__node_name(node2), result * -1);
256  return -1;
257  } else if (result > 0) {
258  crm_trace("%s after %s (lower capacity by %d attributes)",
259  pe__node_name(node1), pe__node_name(node2), result);
260  return 1;
261  }
262  }
263 
264  // Compare number of resources already assigned to node
265 
267  crm_trace("%s before %s (%d resources < %d)",
268  pe__node_name(node1), pe__node_name(node2),
270  return -1;
271 
273  crm_trace("%s after %s (%d resources > %d)",
274  pe__node_name(node1), pe__node_name(node2),
276  return 1;
277  }
278 
279  // Check whether one node is already running desired resource
280 
281  if (preferred != NULL) {
282  if (pe__same_node(preferred, node1)) {
283  crm_trace("%s before %s (preferred node)",
284  pe__node_name(node1), pe__node_name(node2));
285  return -1;
286  } else if (pe__same_node(preferred, node2)) {
287  crm_trace("%s after %s (not preferred node)",
288  pe__node_name(node1), pe__node_name(node2));
289  return 1;
290  }
291  }
292 
293  // If all else is equal, prefer node with lowest-sorting name
294 equal:
295  result = strcmp(node1->details->uname, node2->details->uname);
296  if (result < 0) {
297  crm_trace("%s before %s (name)",
298  pe__node_name(node1), pe__node_name(node2));
299  return -1;
300  } else if (result > 0) {
301  crm_trace("%s after %s (name)",
302  pe__node_name(node1), pe__node_name(node2));
303  return 1;
304  }
305 
306  crm_trace("%s == %s", pe__node_name(node1), pe__node_name(node2));
307  return 0;
308 }
309 
319 GList *
320 pcmk__sort_nodes(GList *nodes, pcmk_node_t *active_node)
321 {
322  return g_list_sort_with_data(nodes, compare_nodes, active_node);
323 }
324 
334 bool
335 pcmk__any_node_available(GHashTable *nodes)
336 {
337  GHashTableIter iter;
338  const pcmk_node_t *node = NULL;
339 
340  if (nodes == NULL) {
341  return false;
342  }
343  g_hash_table_iter_init(&iter, nodes);
344  while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
345  if (pcmk__node_available(node, true, false)) {
346  return true;
347  }
348  }
349  return false;
350 }
351 
358 void
360 {
361  int base_health = 0;
362  enum pcmk__health_strategy strategy;
363  const char *strategy_str = pe_pref(scheduler->config_hash,
365 
366  strategy = pcmk__parse_health_strategy(strategy_str);
367  if (strategy == pcmk__health_strategy_none) {
368  return;
369  }
370  crm_info("Applying node health strategy '%s'", strategy_str);
371 
372  // The progressive strategy can use a base health score
373  if (strategy == pcmk__health_strategy_progressive) {
374  base_health = pe__health_score(PCMK__OPT_NODE_HEALTH_BASE, scheduler);
375  }
376 
377  for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
378  pcmk_node_t *node = (pcmk_node_t *) iter->data;
379  int health = pe__sum_node_health_scores(node, base_health);
380 
381  // An overall health score of 0 has no effect
382  if (health == 0) {
383  continue;
384  }
385  crm_info("Overall system health of %s is %d",
386  pe__node_name(node), health);
387 
388  // Use node health as a location score for each resource on the node
389  for (GList *r = scheduler->resources; r != NULL; r = r->next) {
390  pcmk_resource_t *rsc = (pcmk_resource_t *) r->data;
391 
392  bool constrain = true;
393 
394  if (health < 0) {
395  /* Negative health scores do not apply to resources with
396  * allow-unhealthy-nodes=true.
397  */
398  constrain = !crm_is_true(g_hash_table_lookup(rsc->meta,
400  }
401  if (constrain) {
402  pcmk__new_location(strategy_str, rsc, health, NULL, node);
403  } else {
404  pe_rsc_trace(rsc, "%s is immune from health ban on %s",
405  rsc->id, pe__node_name(node));
406  }
407  }
408  }
409 }
410 
421 pcmk_node_t *
423 {
424  GHashTable *allowed_nodes = NULL;
425 
426  if ((rsc == NULL) || (node == NULL)) {
427  return NULL;
428  } else if (rsc->parent == NULL) {
429  allowed_nodes = rsc->allowed_nodes;
430  } else {
431  allowed_nodes = rsc->parent->allowed_nodes;
432  }
433  return g_hash_table_lookup(allowed_nodes, node->details->id);
434 }
pcmk_node_t * pe__copy_node(const pcmk_node_t *this_node)
Definition: utils.c:89
char data[0]
Definition: cpg.c:55
#define INFINITY
Definition: crm.h:98
pcmk_node_t *(* location)(const pcmk_resource_t *rsc, GList **list, int current)
List nodes where a resource (or any of its children) is.
Definition: resources.h:339
GList * children
Resource&#39;s child resources, if any.
Definition: resources.h:475
G_GNUC_INTERNAL pe__location_t * pcmk__new_location(const char *id, pcmk_resource_t *rsc, int node_score, const char *discover_mode, pcmk_node_t *foo_node)
void pcmk__copy_node_tables(const pcmk_resource_t *rsc, GHashTable **copy)
GHashTable * meta
Resource&#39;s meta-attributes.
Definition: resources.h:471
High Level API.
pcmk_node_t node2
const char * pe_pref(GHashTable *options, const char *name)
Definition: common.c:314
GList * pcmk__sort_nodes(GList *nodes, pcmk_node_t *active_node)
enum pcmk__health_strategy pcmk__parse_health_strategy(const char *value)
Definition: health.c:41
pcmk_scheduler_t * data_set
Cluster that node is part of.
Definition: nodes.h:126
pcmk_resource_t * container
Resource containing this one, if any.
Definition: resources.h:480
Implementation of pcmk_scheduler_t.
Definition: scheduler.h:172
GList * resources
Resources in cluster.
Definition: scheduler.h:196
bool pcmk__any_node_available(GHashTable *nodes)
GList * nodes
Nodes in cluster.
Definition: scheduler.h:195
int weight
Node score for a given resource.
Definition: nodes.h:131
pcmk_resource_t * parent
Resource&#39;s parent resource, if any.
Definition: resources.h:413
Implementation of pcmk_resource_t.
Definition: resources.h:399
#define PCMK__OPT_NODE_HEALTH_BASE
#define crm_trace(fmt, args...)
Definition: logging.h:387
int num_resources
Definition: nodes.h:108
GList * pcmk__copy_node_list(const GList *list, bool reset)
struct pe_node_shared_s * details
Basic node information.
Definition: nodes.h:134
const char * uname
Node name in cluster.
Definition: nodes.h:68
GHashTable * config_hash
Cluster properties.
Definition: scheduler.h:187
gboolean standby
Whether in standby mode.
Definition: nodes.h:73
Implementation of pcmk_node_t.
Definition: nodes.h:130
void pcmk__apply_node_health(pcmk_scheduler_t *scheduler)
const char * placement_strategy
Value of placement-strategy property.
Definition: scheduler.h:180
#define PCMK__OPT_NODE_HEALTH_STRATEGY
const char * id
Node ID at the cluster layer.
Definition: nodes.h:67
G_GNUC_INTERNAL int pcmk__compare_node_capacities(const pcmk_node_t *node1, const pcmk_node_t *node2)
#define PCMK__META_ALLOW_UNHEALTHY_NODES
bool pe__is_guest_node(const pcmk_node_t *node)
Definition: remote.c:33
GHashTable * pcmk__copy_node_table(GHashTable *nodes)
GHashTable * pcmk__strkey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition: strings.c:608
pcmk__action_result_t result
Definition: pcmk_fence.c:35
pcmk_rsc_methods_t * fns
Resource object methods.
Definition: resources.h:416
pcmk_scheduler_t * scheduler
#define CRM_ASSERT(expr)
Definition: results.h:42
void pcmk__restore_node_tables(pcmk_resource_t *rsc, GHashTable *backup)
pcmk_node_t * pcmk__top_allowed_node(const pcmk_resource_t *rsc, const pcmk_node_t *node)
pcmk__health_strategy
gboolean shutdown
Whether shutting down.
Definition: nodes.h:78
gboolean maintenance
Whether in maintenance mode.
Definition: nodes.h:81
bool pcmk__node_available(const pcmk_node_t *node, bool consider_score, bool consider_guest)
gboolean crm_is_true(const char *s)
Definition: strings.c:416
int pe__sum_node_health_scores(const pcmk_node_t *node, int base_health)
Definition: pe_health.c:96
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:37
pcmk_node_t node1
gboolean unclean
Whether node requires fencing.
Definition: nodes.h:76
#define crm_info(fmt, args...)
Definition: logging.h:384
gboolean online
Whether online.
Definition: nodes.h:72
pcmk_resource_t * remote_rsc
Remote connection resource for node, if it is a Pacemaker Remote node.
Definition: nodes.h:111
char * id
Resource ID in configuration.
Definition: resources.h:400
GHashTable * allowed_nodes
Nodes where resource may run (key is node ID, not name)
Definition: resources.h:466