pacemaker  2.1.1-52dc28db4
Scalable High-Availability cluster resource manager
pcmk_sched_utilization.c
Go to the documentation of this file.
1 /*
2  * Copyright 2014-2021 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU General Public License version 2
7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 #include <crm/msg_xml.h>
12 #include <pacemaker-internal.h>
13 
14 static GList *find_colocated_rscs(GList *colocated_rscs, pe_resource_t * rsc,
15  pe_resource_t * orig_rsc);
16 
17 static GList *group_find_colocated_rscs(GList *colocated_rscs, pe_resource_t * rsc,
18  pe_resource_t * orig_rsc);
19 
20 static void group_add_unallocated_utilization(GHashTable * all_utilization, pe_resource_t * rsc,
21  GList *all_rscs);
22 
23 struct compare_data {
24  const pe_node_t *node1;
25  const pe_node_t *node2;
26  int result;
27 };
28 
29 static int
30 utilization_value(const char *s)
31 {
32  int value = 0;
33 
34  /* @TODO It would make sense to restrict utilization values to nonnegative
35  * integers, but the documentation just says "integers" and we didn't
36  * restrict them initially, so for backward compatibility, allow any
37  * integer.
38  */
39  if (s != NULL) {
40  pcmk__scan_min_int(s, &value, INT_MIN);
41  }
42  return value;
43 }
44 
45 static void
46 do_compare_capacity1(gpointer key, gpointer value, gpointer user_data)
47 {
48  int node1_capacity = 0;
49  int node2_capacity = 0;
50  struct compare_data *data = user_data;
51 
52  node1_capacity = utilization_value(value);
53  node2_capacity = utilization_value(g_hash_table_lookup(data->node2->details->utilization, key));
54 
55  if (node1_capacity > node2_capacity) {
56  data->result--;
57  } else if (node1_capacity < node2_capacity) {
58  data->result++;
59  }
60 }
61 
62 static void
63 do_compare_capacity2(gpointer key, gpointer value, gpointer user_data)
64 {
65  int node1_capacity = 0;
66  int node2_capacity = 0;
67  struct compare_data *data = user_data;
68 
69  if (g_hash_table_lookup_extended(data->node1->details->utilization, key, NULL, NULL)) {
70  return;
71  }
72 
73  node1_capacity = 0;
74  node2_capacity = utilization_value(value);
75 
76  if (node1_capacity > node2_capacity) {
77  data->result--;
78  } else if (node1_capacity < node2_capacity) {
79  data->result++;
80  }
81 }
82 
83 /* rc < 0 if 'node1' has more capacity remaining
84  * rc > 0 if 'node1' has less capacity remaining
85  */
86 int
87 compare_capacity(const pe_node_t * node1, const pe_node_t * node2)
88 {
89  struct compare_data data;
90 
91  data.node1 = node1;
92  data.node2 = node2;
93  data.result = 0;
94 
95  g_hash_table_foreach(node1->details->utilization, do_compare_capacity1, &data);
96  g_hash_table_foreach(node2->details->utilization, do_compare_capacity2, &data);
97 
98  return data.result;
99 }
100 
101 struct calculate_data {
102  GHashTable *current_utilization;
103  gboolean plus;
104 };
105 
106 static void
107 do_calculate_utilization(gpointer key, gpointer value, gpointer user_data)
108 {
109  const char *current = NULL;
110  char *result = NULL;
111  struct calculate_data *data = user_data;
112 
113  current = g_hash_table_lookup(data->current_utilization, key);
114  if (data->plus) {
115  result = pcmk__itoa(utilization_value(current) + utilization_value(value));
116  g_hash_table_replace(data->current_utilization, strdup(key), result);
117 
118  } else if (current) {
119  result = pcmk__itoa(utilization_value(current) - utilization_value(value));
120  g_hash_table_replace(data->current_utilization, strdup(key), result);
121  }
122 }
123 
124 /* Specify 'plus' to FALSE when allocating
125  * Otherwise to TRUE when deallocating
126  */
127 void
128 calculate_utilization(GHashTable * current_utilization,
129  GHashTable * utilization, gboolean plus)
130 {
131  struct calculate_data data;
132 
133  data.current_utilization = current_utilization;
134  data.plus = plus;
135 
136  g_hash_table_foreach(utilization, do_calculate_utilization, &data);
137 }
138 
139 
140 struct capacity_data {
141  pe_node_t *node;
142  const char *rsc_id;
143  gboolean is_enough;
144 };
145 
146 static void
147 check_capacity(gpointer key, gpointer value, gpointer user_data)
148 {
149  int required = 0;
150  int remaining = 0;
151  struct capacity_data *data = user_data;
152 
153  required = utilization_value(value);
154  remaining = utilization_value(g_hash_table_lookup(data->node->details->utilization, key));
155 
156  if (required > remaining) {
157  CRM_ASSERT(data->rsc_id);
158  CRM_ASSERT(data->node);
159 
160  crm_debug("Node %s does not have enough %s for %s: required=%d remaining=%d",
161  data->node->details->uname, (char *)key, data->rsc_id, required, remaining);
162  data->is_enough = FALSE;
163  }
164 }
165 
166 static gboolean
167 have_enough_capacity(pe_node_t * node, const char * rsc_id, GHashTable * utilization)
168 {
169  struct capacity_data data;
170 
171  data.node = node;
172  data.rsc_id = rsc_id;
173  data.is_enough = TRUE;
174 
175  g_hash_table_foreach(utilization, check_capacity, &data);
176 
177  return data.is_enough;
178 }
179 
180 
181 static void
182 native_add_unallocated_utilization(GHashTable * all_utilization, pe_resource_t * rsc)
183 {
184  if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
185  return;
186  }
187 
188  calculate_utilization(all_utilization, rsc->utilization, TRUE);
189 }
190 
191 static void
192 add_unallocated_utilization(GHashTable * all_utilization, pe_resource_t * rsc,
193  GList *all_rscs, pe_resource_t * orig_rsc)
194 {
195  if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) {
196  return;
197  }
198 
199  if (rsc->variant == pe_native) {
200  pe_rsc_trace(orig_rsc, "%s: Adding %s as colocated utilization",
201  orig_rsc->id, rsc->id);
202  native_add_unallocated_utilization(all_utilization, rsc);
203 
204  } else if (rsc->variant == pe_group) {
205  pe_rsc_trace(orig_rsc, "%s: Adding %s as colocated utilization",
206  orig_rsc->id, rsc->id);
207  group_add_unallocated_utilization(all_utilization, rsc, all_rscs);
208 
209  } else if (pe_rsc_is_clone(rsc)) {
210  GList *gIter1 = NULL;
211  gboolean existing = FALSE;
212 
213  /* Check if there's any child already existing in the list */
214  gIter1 = rsc->children;
215  for (; gIter1 != NULL; gIter1 = gIter1->next) {
216  pe_resource_t *child = (pe_resource_t *) gIter1->data;
217  GList *gIter2 = NULL;
218 
219  if (g_list_find(all_rscs, child)) {
220  existing = TRUE;
221 
222  } else {
223  /* Check if there's any child of another cloned group already existing in the list */
224  gIter2 = child->children;
225  for (; gIter2 != NULL; gIter2 = gIter2->next) {
226  pe_resource_t *grandchild = (pe_resource_t *) gIter2->data;
227 
228  if (g_list_find(all_rscs, grandchild)) {
229  pe_rsc_trace(orig_rsc, "%s: Adding %s as colocated utilization",
230  orig_rsc->id, child->id);
231  add_unallocated_utilization(all_utilization, child, all_rscs, orig_rsc);
232  existing = TRUE;
233  break;
234  }
235  }
236  }
237  }
238 
239  // rsc->children is always non-NULL but this makes static analysis happy
240  if (!existing && (rsc->children != NULL)) {
241  pe_resource_t *first_child = (pe_resource_t *) rsc->children->data;
242 
243  pe_rsc_trace(orig_rsc, "%s: Adding %s as colocated utilization",
244  orig_rsc->id, ID(first_child->xml));
245  add_unallocated_utilization(all_utilization, first_child, all_rscs, orig_rsc);
246  }
247  }
248 }
249 
250 static GHashTable *
251 sum_unallocated_utilization(pe_resource_t * rsc, GList *colocated_rscs)
252 {
253  GList *gIter = NULL;
254  GList *all_rscs = NULL;
255  GHashTable *all_utilization = pcmk__strkey_table(free, free);
256 
257  all_rscs = g_list_copy(colocated_rscs);
258  if (g_list_find(all_rscs, rsc) == FALSE) {
259  all_rscs = g_list_append(all_rscs, rsc);
260  }
261 
262  for (gIter = all_rscs; gIter != NULL; gIter = gIter->next) {
263  pe_resource_t *listed_rsc = (pe_resource_t *) gIter->data;
264 
265  if (!pcmk_is_set(listed_rsc->flags, pe_rsc_provisional)) {
266  continue;
267  }
268 
269  pe_rsc_trace(rsc, "%s: Processing unallocated colocated %s", rsc->id, listed_rsc->id);
270  add_unallocated_utilization(all_utilization, listed_rsc, all_rscs, rsc);
271  }
272 
273  g_list_free(all_rscs);
274 
275  return all_utilization;
276 }
277 
278 static GList *
279 find_colocated_rscs(GList *colocated_rscs, pe_resource_t * rsc, pe_resource_t * orig_rsc)
280 {
281  GList *gIter = NULL;
282 
283  if (rsc == NULL) {
284  return colocated_rscs;
285 
286  } else if (g_list_find(colocated_rscs, rsc)) {
287  return colocated_rscs;
288  }
289 
290  crm_trace("%s: %s is supposed to be colocated with %s", orig_rsc->id, rsc->id, orig_rsc->id);
291  colocated_rscs = g_list_append(colocated_rscs, rsc);
292 
293  for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
294  pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
295  pe_resource_t *rsc_rh = constraint->rsc_rh;
296 
297  /* Break colocation loop */
298  if (rsc_rh == orig_rsc) {
299  continue;
300  }
301 
302  if (constraint->score == INFINITY
303  && filter_colocation_constraint(rsc, rsc_rh, constraint, TRUE) == influence_rsc_location) {
304 
305  if (rsc_rh->variant == pe_group) {
306  /* Need to use group_variant_data */
307  colocated_rscs = group_find_colocated_rscs(colocated_rscs, rsc_rh, orig_rsc);
308 
309  } else {
310  colocated_rscs = find_colocated_rscs(colocated_rscs, rsc_rh, orig_rsc);
311  }
312  }
313  }
314 
315  for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
316  pcmk__colocation_t *constraint = (pcmk__colocation_t *) gIter->data;
317  pe_resource_t *rsc_lh = constraint->rsc_lh;
318 
319  /* Break colocation loop */
320  if (rsc_lh == orig_rsc) {
321  continue;
322  }
323 
324  if (pe_rsc_is_clone(rsc_lh) == FALSE && pe_rsc_is_clone(rsc)) {
325  /* We do not know if rsc_lh will be colocated with orig_rsc in this case */
326  continue;
327  }
328 
329  if (constraint->score == INFINITY
330  && filter_colocation_constraint(rsc_lh, rsc, constraint, TRUE) == influence_rsc_location) {
331 
332  if (rsc_lh->variant == pe_group) {
333  /* Need to use group_variant_data */
334  colocated_rscs = group_find_colocated_rscs(colocated_rscs, rsc_lh, orig_rsc);
335 
336  } else {
337  colocated_rscs = find_colocated_rscs(colocated_rscs, rsc_lh, orig_rsc);
338  }
339  }
340  }
341 
342  return colocated_rscs;
343 }
344 
345 void
347 {
348  CRM_CHECK(rsc && prefer && data_set, return);
349  if (!pcmk__str_eq(data_set->placement_strategy, "default", pcmk__str_casei)) {
350  GHashTableIter iter;
351  GList *colocated_rscs = NULL;
352  gboolean any_capable = FALSE;
353  pe_node_t *node = NULL;
354 
355  colocated_rscs = find_colocated_rscs(colocated_rscs, rsc, rsc);
356  if (colocated_rscs) {
357  GHashTable *unallocated_utilization = NULL;
358  char *rscs_id = crm_strdup_printf("%s and its colocated resources",
359  rsc->id);
360  pe_node_t *most_capable_node = NULL;
361 
362  unallocated_utilization = sum_unallocated_utilization(rsc, colocated_rscs);
363 
364  g_hash_table_iter_init(&iter, rsc->allowed_nodes);
365  while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
366  if (can_run_resources(node) == FALSE || node->weight < 0) {
367  continue;
368  }
369 
370  if (have_enough_capacity(node, rscs_id, unallocated_utilization)) {
371  any_capable = TRUE;
372  }
373 
374  if (most_capable_node == NULL ||
375  compare_capacity(node, most_capable_node) < 0) {
376  /* < 0 means 'node' is more capable */
377  most_capable_node = node;
378  }
379  }
380 
381  if (any_capable) {
382  g_hash_table_iter_init(&iter, rsc->allowed_nodes);
383  while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
384  if (can_run_resources(node) == FALSE || node->weight < 0) {
385  continue;
386  }
387 
388  if (have_enough_capacity(node, rscs_id, unallocated_utilization) == FALSE) {
389  pe_rsc_debug(rsc,
390  "Resource %s and its colocated resources"
391  " cannot be allocated to node %s: not enough capacity",
392  rsc->id, node->details->uname);
393  resource_location(rsc, node, -INFINITY, "__limit_utilization__", data_set);
394  }
395  }
396 
397  } else if (*prefer == NULL) {
398  *prefer = most_capable_node;
399  }
400 
401  if (unallocated_utilization) {
402  g_hash_table_destroy(unallocated_utilization);
403  }
404 
405  g_list_free(colocated_rscs);
406  free(rscs_id);
407  }
408 
409  if (any_capable == FALSE) {
410  g_hash_table_iter_init(&iter, rsc->allowed_nodes);
411  while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
412  if (can_run_resources(node) == FALSE || node->weight < 0) {
413  continue;
414  }
415 
416  if (have_enough_capacity(node, rsc->id, rsc->utilization) == FALSE) {
417  pe_rsc_debug(rsc,
418  "Resource %s cannot be allocated to node %s:"
419  " not enough capacity",
420  rsc->id, node->details->uname);
421  resource_location(rsc, node, -INFINITY, "__limit_utilization__", data_set);
422  }
423  }
424  }
425  pe__show_node_weights(true, rsc, "Post-utilization", rsc->allowed_nodes, data_set);
426  }
427 }
428 
429 #define VARIANT_GROUP 1
430 #include <lib/pengine/variant.h>
431 
432 GList *
433 group_find_colocated_rscs(GList *colocated_rscs, pe_resource_t * rsc, pe_resource_t * orig_rsc)
434 {
435  group_variant_data_t *group_data = NULL;
436 
437  get_group_variant_data(group_data, rsc);
438  if (group_data->colocated || pe_rsc_is_clone(rsc->parent)) {
439  GList *gIter = rsc->children;
440 
441  for (; gIter != NULL; gIter = gIter->next) {
442  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
443 
444  colocated_rscs = find_colocated_rscs(colocated_rscs, child_rsc, orig_rsc);
445  }
446 
447  } else {
448  if (group_data->first_child) {
449  colocated_rscs = find_colocated_rscs(colocated_rscs, group_data->first_child, orig_rsc);
450  }
451  }
452 
453  colocated_rscs = find_colocated_rscs(colocated_rscs, rsc, orig_rsc);
454 
455  return colocated_rscs;
456 }
457 
458 static void
459 group_add_unallocated_utilization(GHashTable * all_utilization, pe_resource_t * rsc,
460  GList *all_rscs)
461 {
462  group_variant_data_t *group_data = NULL;
463 
464  get_group_variant_data(group_data, rsc);
465  if (group_data->colocated || pe_rsc_is_clone(rsc->parent)) {
466  GList *gIter = rsc->children;
467 
468  for (; gIter != NULL; gIter = gIter->next) {
469  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
470 
471  if (pcmk_is_set(child_rsc->flags, pe_rsc_provisional) &&
472  g_list_find(all_rscs, child_rsc) == FALSE) {
473  native_add_unallocated_utilization(all_utilization, child_rsc);
474  }
475  }
476 
477  } else {
478  if (group_data->first_child &&
479  pcmk_is_set(group_data->first_child->flags, pe_rsc_provisional) &&
480  g_list_find(all_rscs, group_data->first_child) == FALSE) {
481  native_add_unallocated_utilization(all_utilization, group_data->first_child);
482  }
483  }
484 }
485 
486 
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:218
void process_utilization(pe_resource_t *rsc, pe_node_t **prefer, pe_working_set_t *data_set)
pe_resource_t * rsc_lh
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:19
char data[0]
Definition: cpg.c:55
#define INFINITY
Definition: crm.h:99
GList * rsc_cons
Definition: pe_types.h:358
#define pe__show_node_weights(level, rsc, text, nodes, data_set)
Definition: internal.h:353
int pcmk__scan_min_int(const char *text, int *result, int minimum)
Definition: strings.c:127
GList * children
Definition: pe_types.h:378
void calculate_utilization(GHashTable *current_utilization, GHashTable *utilization, gboolean plus)
xmlNode * xml
Definition: pe_types.h:324
pe_resource_t * rsc_rh
void resource_location(pe_resource_t *rsc, pe_node_t *node, int score, const char *tag, pe_working_set_t *data_set)
Definition: utils.c:1594
GList * rsc_cons_lhs
Definition: pe_types.h:357
gboolean can_run_resources(const pe_node_t *node)
#define pe_rsc_provisional
Definition: pe_types.h:258
#define crm_debug(fmt, args...)
Definition: logging.h:355
int compare_capacity(const pe_node_t *node1, const pe_node_t *node2)
#define crm_trace(fmt, args...)
Definition: logging.h:356
char * crm_strdup_printf(char const *format,...) G_GNUC_PRINTF(1
#define pcmk_is_set(g, f)
Convenience alias for pcmk_all_flags_set(), to check single flag.
Definition: util.h:114
struct pe_node_shared_s * details
Definition: pe_types.h:244
unsigned long long flags
Definition: pe_types.h:349
GHashTable * utilization
Definition: pe_types.h:376
enum pe_obj_types variant
Definition: pe_types.h:331
const char * placement_strategy
Definition: pe_types.h:144
enum filter_colocation_res filter_colocation_constraint(pe_resource_t *rsc_lh, pe_resource_t *rsc_rh, pcmk__colocation_t *constraint, gboolean preview)
GHashTable * pcmk__strkey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
Definition: strings.c:610
#define CRM_ASSERT(expr)
Definition: results.h:42
GHashTable * utilization
Definition: pe_types.h:235
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:20
#define ID(x)
Definition: msg_xml.h:456
pe_resource_t * parent
Definition: pe_types.h:329
char * id
Definition: pe_types.h:322
GHashTable * allowed_nodes
Definition: pe_types.h:369