root/daemons/controld/controld_throttle.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. load2str
  2. throttle_check_thresholds
  3. throttle_handle_load
  4. throttle_mode
  5. throttle_send_command
  6. throttle_timer_cb
  7. throttle_record_free
  8. throttle_set_load_target
  9. throttle_update_job_max
  10. throttle_init
  11. controld_configure_throttle
  12. throttle_fini
  13. throttle_get_total_job_limit
  14. throttle_get_job_limit
  15. throttle_update

   1 /*
   2  * Copyright 2013-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/types.h>
  13 #include <sys/stat.h>
  14 
  15 #include <unistd.h>
  16 #include <ctype.h>
  17 #include <dirent.h>
  18 
  19 #include <crm/crm.h>
  20 #include <crm/common/xml.h>
  21 #include <crm/cluster.h>
  22 
  23 #include <pacemaker-controld.h>
  24 
  25 /* These values don't need to be bits, but these particular values must be kept
  26  * for backward compatibility during rolling upgrades.
  27  */
  28 enum throttle_state_e {
  29     throttle_none       = 0x0000,
  30     throttle_low        = 0x0001,
  31     throttle_med        = 0x0010,
  32     throttle_high       = 0x0100,
  33     throttle_extreme    = 0x1000,
  34 };
  35 
  36 struct throttle_record_s {
  37     int max;
  38     enum throttle_state_e mode;
  39     char *node;
  40 };
  41 
  42 static int throttle_job_max = 0;
  43 static float throttle_load_target = 0.0;
  44 
  45 #define THROTTLE_FACTOR_LOW    1.2
  46 #define THROTTLE_FACTOR_MEDIUM 1.6
  47 #define THROTTLE_FACTOR_HIGH   2.0
  48 
  49 static GHashTable *throttle_records = NULL;
  50 static mainloop_timer_t *throttle_timer = NULL;
  51 
  52 static const char *
  53 load2str(enum throttle_state_e mode)
     /* [previous][next][first][last][top][bottom][index][help] */
  54 {
  55     switch (mode) {
  56         case throttle_extreme:  return "extreme";
  57         case throttle_high:     return "high";
  58         case throttle_med:      return "medium";
  59         case throttle_low:      return "low";
  60         case throttle_none:     return "negligible";
  61         default:                return "undetermined";
  62     }
  63 }
  64 
  65 /*!
  66  * \internal
  67  * \brief Check a load value against throttling thresholds
  68  *
  69  * \param[in] load        Load value to check
  70  * \param[in] desc        Description of metric (for logging)
  71  * \param[in] thresholds  Low/medium/high/extreme thresholds
  72  *
  73  * \return Throttle mode corresponding to load value
  74  */
  75 static enum throttle_state_e
  76 throttle_check_thresholds(float load, const char *desc,
     /* [previous][next][first][last][top][bottom][index][help] */
  77                           const float thresholds[4])
  78 {
  79     if (load > thresholds[3]) {
  80         crm_notice("Extreme %s detected: %f", desc, load);
  81         return throttle_extreme;
  82 
  83     } else if (load > thresholds[2]) {
  84         crm_notice("High %s detected: %f", desc, load);
  85         return throttle_high;
  86 
  87     } else if (load > thresholds[1]) {
  88         crm_info("Moderate %s detected: %f", desc, load);
  89         return throttle_med;
  90 
  91     } else if (load > thresholds[0]) {
  92         crm_debug("Noticeable %s detected: %f", desc, load);
  93         return throttle_low;
  94     }
  95 
  96     crm_trace("Negligible %s detected: %f", desc, load);
  97     return throttle_none;
  98 }
  99 
 100 static enum throttle_state_e
 101 throttle_handle_load(float load, const char *desc, int cores)
     /* [previous][next][first][last][top][bottom][index][help] */
 102 {
 103     float normalize;
 104     float thresholds[4];
 105 
 106     if (cores == 1) {
 107         /* On a single core machine, a load of 1.0 is already too high */
 108         normalize = 0.6;
 109 
 110     } else {
 111         /* Normalize the load to be per-core */
 112         normalize = cores;
 113     }
 114     thresholds[0] = throttle_load_target * normalize * THROTTLE_FACTOR_LOW;
 115     thresholds[1] = throttle_load_target * normalize * THROTTLE_FACTOR_MEDIUM;
 116     thresholds[2] = throttle_load_target * normalize * THROTTLE_FACTOR_HIGH;
 117     thresholds[3] = load + 1.0; /* never extreme */
 118 
 119     return throttle_check_thresholds(load, desc, thresholds);
 120 }
 121 
 122 static enum throttle_state_e
 123 throttle_mode(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 124 {
 125     enum throttle_state_e mode = throttle_none;
 126 
 127     unsigned int cores = pcmk__procfs_num_cores();
 128     float load;
 129     float thresholds[4];
 130 
 131     if (pcmk__throttle_cib_load(PCMK__SERVER_BASED, &load)) {
 132         float cib_max_cpu = 0.95;
 133 
 134         /* The CIB is a single-threaded task and thus cannot consume more
 135          * than 100% of a CPU (and 1/cores of the overall system load).
 136          *
 137          * On a many-cored system, the CIB might therefore be maxed out (causing
 138          * operations to fail or appear to fail) even though the overall system
 139          * load is still reasonable.
 140          *
 141          * Therefore, the 'normal' thresholds can not apply here, and we need a
 142          * special case.
 143          */
 144         if (cores == 1) {
 145             cib_max_cpu = 0.4;
 146         }
 147         if ((throttle_load_target > 0.0) && (throttle_load_target < cib_max_cpu)) {
 148             cib_max_cpu = throttle_load_target;
 149         }
 150 
 151         thresholds[0] = cib_max_cpu * 0.8;
 152         thresholds[1] = cib_max_cpu * 0.9;
 153         thresholds[2] = cib_max_cpu;
 154         /* Can only happen on machines with a low number of cores */
 155         thresholds[3] = cib_max_cpu * 1.5;
 156 
 157         mode = throttle_check_thresholds(load, "CIB load", thresholds);
 158     }
 159 
 160     if (throttle_load_target <= 0) {
 161         /* If we ever make this a valid value, the cluster will at least behave
 162          * as expected
 163          */
 164         return mode;
 165     }
 166 
 167     if (pcmk__throttle_load_avg(&load)) {
 168         enum throttle_state_e cpu_load;
 169 
 170         cpu_load = throttle_handle_load(load, "CPU load", cores);
 171         if (cpu_load > mode) {
 172             mode = cpu_load;
 173         }
 174         crm_debug("Current load is %f across %u core(s)", load, cores);
 175     }
 176 
 177     return mode;
 178 }
 179 
 180 static void
 181 throttle_send_command(enum throttle_state_e mode)
     /* [previous][next][first][last][top][bottom][index][help] */
 182 {
 183     xmlNode *xml = NULL;
 184     static enum throttle_state_e last = -1;
 185 
 186     if(mode != last) {
 187         crm_info("New throttle mode: %s load (was %s)",
 188                  load2str(mode), load2str(last));
 189         last = mode;
 190 
 191         xml = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD, NULL,
 192                                 CRM_SYSTEM_CRMD, CRM_OP_THROTTLE, NULL);
 193         crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MODE, mode);
 194         crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MAX, throttle_job_max);
 195 
 196         pcmk__cluster_send_message(NULL, pcmk_ipc_controld, xml);
 197         pcmk__xml_free(xml);
 198     }
 199 }
 200 
 201 static gboolean
 202 throttle_timer_cb(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 203 {
 204     throttle_send_command(throttle_mode());
 205     return TRUE;
 206 }
 207 
 208 static void
 209 throttle_record_free(gpointer p)
     /* [previous][next][first][last][top][bottom][index][help] */
 210 {
 211     struct throttle_record_s *r = p;
 212     free(r->node);
 213     free(r);
 214 }
 215 
 216 static void
 217 throttle_set_load_target(float target)
     /* [previous][next][first][last][top][bottom][index][help] */
 218 {
 219     throttle_load_target = target;
 220 }
 221 
 222 /*!
 223  * \internal
 224  * \brief Update the maximum number of simultaneous jobs
 225  *
 226  * \param[in] preference  Cluster-wide \c PCMK_OPT_NODE_ACTION_LIMIT from the
 227  *                        CIB
 228  */
 229 static void
 230 throttle_update_job_max(const char *preference)
     /* [previous][next][first][last][top][bottom][index][help] */
 231 {
 232     long long max = 0LL;
 233 
 234     // Per-node override
 235     const char *env_limit = pcmk__env_option(PCMK__ENV_NODE_ACTION_LIMIT);
 236 
 237     if (env_limit != NULL) {
 238         int rc = pcmk__scan_ll(env_limit, &max, 0LL);
 239 
 240         if (rc != pcmk_rc_ok) {
 241             crm_warn("Ignoring local option PCMK_" PCMK__ENV_NODE_ACTION_LIMIT
 242                      " because '%s' is not a valid value: %s",
 243                      env_limit, pcmk_rc_str(rc));
 244             env_limit = NULL;
 245         }
 246     }
 247     if (env_limit == NULL) {
 248         // Option validator should prevent invalid values
 249         CRM_LOG_ASSERT(pcmk__scan_ll(preference, &max, 0LL) == pcmk_rc_ok);
 250     }
 251 
 252     if (max > 0) {
 253         throttle_job_max = (max >= INT_MAX)? INT_MAX : (int) max;
 254     } else {
 255         // Default is based on the number of cores detected
 256         throttle_job_max = 2 * pcmk__procfs_num_cores();
 257     }
 258 }
 259 
 260 void
 261 throttle_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 262 {
 263     if(throttle_records == NULL) {
 264         throttle_records = pcmk__strkey_table(NULL, throttle_record_free);
 265         throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL);
 266     }
 267 
 268     throttle_update_job_max(NULL);
 269     mainloop_timer_start(throttle_timer);
 270 }
 271 
 272 /*!
 273  * \internal
 274  * \brief Configure throttle options based on the CIB
 275  *
 276  * \param[in,out] options  Name/value pairs for configured options
 277  */
 278 void
 279 controld_configure_throttle(GHashTable *options)
     /* [previous][next][first][last][top][bottom][index][help] */
 280 {
 281     const char *value = g_hash_table_lookup(options, PCMK_OPT_LOAD_THRESHOLD);
 282 
 283     if (value != NULL) {
 284         throttle_set_load_target(strtof(value, NULL) / 100.0);
 285     }
 286 
 287     value = g_hash_table_lookup(options, PCMK_OPT_NODE_ACTION_LIMIT);
 288     throttle_update_job_max(value);
 289 }
 290 
 291 void
 292 throttle_fini(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 293 {
 294     if (throttle_timer != NULL) {
 295         mainloop_timer_del(throttle_timer);
 296         throttle_timer = NULL;
 297     }
 298     if (throttle_records != NULL) {
 299         g_hash_table_destroy(throttle_records);
 300         throttle_records = NULL;
 301     }
 302 }
 303 
 304 int
 305 throttle_get_total_job_limit(int l)
     /* [previous][next][first][last][top][bottom][index][help] */
 306 {
 307     /* Cluster-wide limit */
 308     GHashTableIter iter;
 309     int limit = l;
 310     int peers = pcmk__cluster_num_active_nodes();
 311     struct throttle_record_s *r = NULL;
 312 
 313     g_hash_table_iter_init(&iter, throttle_records);
 314 
 315     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &r)) {
 316         switch(r->mode) {
 317 
 318             case throttle_extreme:
 319                 if(limit == 0 || limit > peers/4) {
 320                     limit = QB_MAX(1, peers/4);
 321                 }
 322                 break;
 323 
 324             case throttle_high:
 325                 if(limit == 0 || limit > peers/2) {
 326                     limit = QB_MAX(1, peers/2);
 327                 }
 328                 break;
 329             default:
 330                 break;
 331         }
 332     }
 333     if(limit == l) {
 334 
 335     } else if(l == 0) {
 336         crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d", limit);
 337 
 338     } else {
 339         crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d instead of %d", limit, l);
 340     }
 341     return limit;
 342 }
 343 
 344 int
 345 throttle_get_job_limit(const char *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 346 {
 347     int jobs = 1;
 348     struct throttle_record_s *r = NULL;
 349 
 350     r = g_hash_table_lookup(throttle_records, node);
 351     if(r == NULL) {
 352         r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s));
 353         r->node = pcmk__str_copy(node);
 354         r->mode = throttle_low;
 355         r->max = throttle_job_max;
 356         crm_trace("Defaulting to local values for unknown node %s", node);
 357 
 358         g_hash_table_insert(throttle_records, r->node, r);
 359     }
 360 
 361     switch(r->mode) {
 362         case throttle_extreme:
 363         case throttle_high:
 364             jobs = 1; /* At least one job must always be allowed */
 365             break;
 366         case throttle_med:
 367             jobs = QB_MAX(1, r->max / 4);
 368             break;
 369         case throttle_low:
 370             jobs = QB_MAX(1, r->max / 2);
 371             break;
 372         case throttle_none:
 373             jobs = QB_MAX(1, r->max);
 374             break;
 375         default:
 376             crm_err("Unknown throttle mode %.4x on %s", r->mode, node);
 377             break;
 378     }
 379     return jobs;
 380 }
 381 
 382 void
 383 throttle_update(xmlNode *xml)
     /* [previous][next][first][last][top][bottom][index][help] */
 384 {
 385     int max = 0;
 386     int mode = 0;
 387     struct throttle_record_s *r = NULL;
 388     const char *from = crm_element_value(xml, PCMK__XA_SRC);
 389 
 390     crm_element_value_int(xml, PCMK__XA_CRM_LIMIT_MODE, &mode);
 391     crm_element_value_int(xml, PCMK__XA_CRM_LIMIT_MAX, &max);
 392 
 393     r = g_hash_table_lookup(throttle_records, from);
 394 
 395     if(r == NULL) {
 396         r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s));
 397         r->node = pcmk__str_copy(from);
 398         g_hash_table_insert(throttle_records, r->node, r);
 399     }
 400 
 401     r->max = max;
 402     r->mode = (enum throttle_state_e) mode;
 403 
 404     crm_debug("Node %s has %s load and supports at most %d jobs; new job limit %d",
 405               from, load2str((enum throttle_state_e) mode), max,
 406               throttle_get_job_limit(from));
 407 }

/* [previous][next][first][last][top][bottom][index][help] */