This source file includes following definitions.
- load2str
- find_cib_loadfile
- throttle_cib_load
- throttle_load_avg
- throttle_check_thresholds
- throttle_handle_load
- throttle_mode
- throttle_send_command
- throttle_timer_cb
- throttle_record_free
- throttle_set_load_target
- throttle_update_job_max
- throttle_init
- controld_configure_throttle
- throttle_fini
- throttle_get_total_job_limit
- throttle_get_job_limit
- throttle_update
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/types.h>
13 #include <sys/stat.h>
14
15 #include <unistd.h>
16 #include <ctype.h>
17 #include <dirent.h>
18
19 #include <crm/crm.h>
20 #include <crm/common/xml.h>
21 #include <crm/cluster.h>
22
23 #include <pacemaker-controld.h>
24
25
26
27
28 enum throttle_state_e {
29 throttle_none = 0x0000,
30 throttle_low = 0x0001,
31 throttle_med = 0x0010,
32 throttle_high = 0x0100,
33 throttle_extreme = 0x1000,
34 };
35
36 struct throttle_record_s {
37 int max;
38 enum throttle_state_e mode;
39 char *node;
40 };
41
42 static int throttle_job_max = 0;
43 static float throttle_load_target = 0.0;
44
45 #define THROTTLE_FACTOR_LOW 1.2
46 #define THROTTLE_FACTOR_MEDIUM 1.6
47 #define THROTTLE_FACTOR_HIGH 2.0
48
49 static GHashTable *throttle_records = NULL;
50 static mainloop_timer_t *throttle_timer = NULL;
51
52 static const char *
53 load2str(enum throttle_state_e mode)
54 {
55 switch (mode) {
56 case throttle_extreme: return "extreme";
57 case throttle_high: return "high";
58 case throttle_med: return "medium";
59 case throttle_low: return "low";
60 case throttle_none: return "negligible";
61 default: return "undetermined";
62 }
63 }
64
65 #if HAVE_LINUX_PROCFS
66
67
68
69
70
71
72
73
74
75
76 static char *
77 find_cib_loadfile(void)
78 {
79 pid_t pid = pcmk__procfs_pid_of(PCMK__SERVER_BASED);
80
81 return pid? crm_strdup_printf("/proc/%lld/stat", (long long) pid) : NULL;
82 }
83
84 static bool
85 throttle_cib_load(float *load)
86 {
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125 static char *loadfile = NULL;
126 static time_t last_call = 0;
127 static long ticks_per_s = 0;
128 static unsigned long last_utime, last_stime;
129
130 char buffer[64*1024];
131 FILE *stream = NULL;
132 time_t now = time(NULL);
133
134 if(load == NULL) {
135 return FALSE;
136 } else {
137 *load = 0.0;
138 }
139
140 if(loadfile == NULL) {
141 last_call = 0;
142 last_utime = 0;
143 last_stime = 0;
144 loadfile = find_cib_loadfile();
145 if (loadfile == NULL) {
146 crm_warn("Couldn't find CIB load file");
147 return FALSE;
148 }
149 ticks_per_s = sysconf(_SC_CLK_TCK);
150 crm_trace("Found %s", loadfile);
151 }
152
153 stream = fopen(loadfile, "r");
154 if(stream == NULL) {
155 int rc = errno;
156
157 crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc);
158 free(loadfile); loadfile = NULL;
159 return FALSE;
160 }
161
162 if(fgets(buffer, sizeof(buffer), stream)) {
163 char *comm = pcmk__assert_alloc(1, 256);
164 char state = 0;
165 int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0;
166 unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0;
167
168 rc = sscanf(buffer, "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
169 &pid, comm, &state,
170 &ppid, &pgrp, &session, &tty_nr, &tpgid,
171 &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime);
172 free(comm);
173
174 if(rc != 15) {
175 crm_err("Only %d of 15 fields found in %s", rc, loadfile);
176 fclose(stream);
177 return FALSE;
178
179 } else if(last_call > 0
180 && last_call < now
181 && last_utime <= utime
182 && last_stime <= stime) {
183
184 time_t elapsed = now - last_call;
185 unsigned long delta_utime = utime - last_utime;
186 unsigned long delta_stime = stime - last_stime;
187
188 *load = (delta_utime + delta_stime);
189 *load /= ticks_per_s;
190 *load /= elapsed;
191 crm_debug("cib load: %f (%lu ticks in %lds)", *load, delta_utime + delta_stime, (long)elapsed);
192
193 } else {
194 crm_debug("Init %lu + %lu ticks at %ld (%lu tps)", utime, stime, (long)now, ticks_per_s);
195 }
196
197 last_call = now;
198 last_utime = utime;
199 last_stime = stime;
200
201 fclose(stream);
202 return TRUE;
203 }
204
205 fclose(stream);
206 return FALSE;
207 }
208
209 static bool
210 throttle_load_avg(float *load)
211 {
212 char buffer[256];
213 FILE *stream = NULL;
214 const char *loadfile = "/proc/loadavg";
215
216 if(load == NULL) {
217 return FALSE;
218 }
219
220 stream = fopen(loadfile, "r");
221 if(stream == NULL) {
222 int rc = errno;
223 crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc);
224 return FALSE;
225 }
226
227 if(fgets(buffer, sizeof(buffer), stream)) {
228 char *nl = strstr(buffer, "\n");
229
230
231 *load = strtof(buffer, NULL);
232 if(nl) { nl[0] = 0; }
233
234 fclose(stream);
235 return TRUE;
236 }
237
238 fclose(stream);
239 return FALSE;
240 }
241
242
243
244
245
246
247
248
249
250
251
252 static enum throttle_state_e
253 throttle_check_thresholds(float load, const char *desc,
254 const float thresholds[4])
255 {
256 if (load > thresholds[3]) {
257 crm_notice("Extreme %s detected: %f", desc, load);
258 return throttle_extreme;
259
260 } else if (load > thresholds[2]) {
261 crm_notice("High %s detected: %f", desc, load);
262 return throttle_high;
263
264 } else if (load > thresholds[1]) {
265 crm_info("Moderate %s detected: %f", desc, load);
266 return throttle_med;
267
268 } else if (load > thresholds[0]) {
269 crm_debug("Noticeable %s detected: %f", desc, load);
270 return throttle_low;
271 }
272
273 crm_trace("Negligible %s detected: %f", desc, load);
274 return throttle_none;
275 }
276
277 static enum throttle_state_e
278 throttle_handle_load(float load, const char *desc, int cores)
279 {
280 float normalize;
281 float thresholds[4];
282
283 if (cores == 1) {
284
285 normalize = 0.6;
286
287 } else {
288
289 normalize = cores;
290 }
291 thresholds[0] = throttle_load_target * normalize * THROTTLE_FACTOR_LOW;
292 thresholds[1] = throttle_load_target * normalize * THROTTLE_FACTOR_MEDIUM;
293 thresholds[2] = throttle_load_target * normalize * THROTTLE_FACTOR_HIGH;
294 thresholds[3] = load + 1.0;
295
296 return throttle_check_thresholds(load, desc, thresholds);
297 }
298 #endif
299
300 static enum throttle_state_e
301 throttle_mode(void)
302 {
303 enum throttle_state_e mode = throttle_none;
304
305 #if HAVE_LINUX_PROCFS
306 unsigned int cores;
307 float load;
308 float thresholds[4];
309
310 cores = pcmk__procfs_num_cores();
311 if(throttle_cib_load(&load)) {
312 float cib_max_cpu = 0.95;
313
314
315
316
317
318
319
320
321
322
323
324
325 if(cores == 1) {
326 cib_max_cpu = 0.4;
327 }
328 if(throttle_load_target > 0.0 && throttle_load_target < cib_max_cpu) {
329 cib_max_cpu = throttle_load_target;
330 }
331
332 thresholds[0] = cib_max_cpu * 0.8;
333 thresholds[1] = cib_max_cpu * 0.9;
334 thresholds[2] = cib_max_cpu;
335
336 thresholds[3] = cib_max_cpu * 1.5;
337
338 mode = throttle_check_thresholds(load, "CIB load", thresholds);
339 }
340
341 if(throttle_load_target <= 0) {
342
343 return mode;
344 }
345
346 if(throttle_load_avg(&load)) {
347 enum throttle_state_e cpu_load;
348
349 cpu_load = throttle_handle_load(load, "CPU load", cores);
350 if (cpu_load > mode) {
351 mode = cpu_load;
352 }
353 crm_debug("Current load is %f across %u core(s)", load, cores);
354 }
355 #endif
356 return mode;
357 }
358
359 static void
360 throttle_send_command(enum throttle_state_e mode)
361 {
362 xmlNode *xml = NULL;
363 static enum throttle_state_e last = -1;
364
365 if(mode != last) {
366 crm_info("New throttle mode: %s load (was %s)",
367 load2str(mode), load2str(last));
368 last = mode;
369
370 xml = pcmk__new_request(pcmk_ipc_controld, CRM_SYSTEM_CRMD, NULL,
371 CRM_SYSTEM_CRMD, CRM_OP_THROTTLE, NULL);
372 crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MODE, mode);
373 crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MAX, throttle_job_max);
374
375 pcmk__cluster_send_message(NULL, pcmk_ipc_controld, xml);
376 pcmk__xml_free(xml);
377 }
378 }
379
380 static gboolean
381 throttle_timer_cb(gpointer data)
382 {
383 throttle_send_command(throttle_mode());
384 return TRUE;
385 }
386
387 static void
388 throttle_record_free(gpointer p)
389 {
390 struct throttle_record_s *r = p;
391 free(r->node);
392 free(r);
393 }
394
395 static void
396 throttle_set_load_target(float target)
397 {
398 throttle_load_target = target;
399 }
400
401
402
403
404
405
406
407
408 static void
409 throttle_update_job_max(const char *preference)
410 {
411 long long max = 0LL;
412
413
414 const char *env_limit = pcmk__env_option(PCMK__ENV_NODE_ACTION_LIMIT);
415
416 if (env_limit != NULL) {
417 int rc = pcmk__scan_ll(env_limit, &max, 0LL);
418
419 if (rc != pcmk_rc_ok) {
420 crm_warn("Ignoring local option PCMK_" PCMK__ENV_NODE_ACTION_LIMIT
421 " because '%s' is not a valid value: %s",
422 env_limit, pcmk_rc_str(rc));
423 env_limit = NULL;
424 }
425 }
426 if (env_limit == NULL) {
427
428 CRM_LOG_ASSERT(pcmk__scan_ll(preference, &max, 0LL) == pcmk_rc_ok);
429 }
430
431 if (max > 0) {
432 throttle_job_max = (max >= INT_MAX)? INT_MAX : (int) max;
433 } else {
434
435 throttle_job_max = 2 * pcmk__procfs_num_cores();
436 }
437 }
438
439 void
440 throttle_init(void)
441 {
442 if(throttle_records == NULL) {
443 throttle_records = pcmk__strkey_table(NULL, throttle_record_free);
444 throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL);
445 }
446
447 throttle_update_job_max(NULL);
448 mainloop_timer_start(throttle_timer);
449 }
450
451
452
453
454
455
456
457 void
458 controld_configure_throttle(GHashTable *options)
459 {
460 const char *value = g_hash_table_lookup(options, PCMK_OPT_LOAD_THRESHOLD);
461
462 if (value != NULL) {
463 throttle_set_load_target(strtof(value, NULL) / 100.0);
464 }
465
466 value = g_hash_table_lookup(options, PCMK_OPT_NODE_ACTION_LIMIT);
467 throttle_update_job_max(value);
468 }
469
470 void
471 throttle_fini(void)
472 {
473 if (throttle_timer != NULL) {
474 mainloop_timer_del(throttle_timer);
475 throttle_timer = NULL;
476 }
477 if (throttle_records != NULL) {
478 g_hash_table_destroy(throttle_records);
479 throttle_records = NULL;
480 }
481 }
482
483 int
484 throttle_get_total_job_limit(int l)
485 {
486
487 GHashTableIter iter;
488 int limit = l;
489 int peers = pcmk__cluster_num_active_nodes();
490 struct throttle_record_s *r = NULL;
491
492 g_hash_table_iter_init(&iter, throttle_records);
493
494 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &r)) {
495 switch(r->mode) {
496
497 case throttle_extreme:
498 if(limit == 0 || limit > peers/4) {
499 limit = QB_MAX(1, peers/4);
500 }
501 break;
502
503 case throttle_high:
504 if(limit == 0 || limit > peers/2) {
505 limit = QB_MAX(1, peers/2);
506 }
507 break;
508 default:
509 break;
510 }
511 }
512 if(limit == l) {
513
514 } else if(l == 0) {
515 crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d", limit);
516
517 } else {
518 crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d instead of %d", limit, l);
519 }
520 return limit;
521 }
522
523 int
524 throttle_get_job_limit(const char *node)
525 {
526 int jobs = 1;
527 struct throttle_record_s *r = NULL;
528
529 r = g_hash_table_lookup(throttle_records, node);
530 if(r == NULL) {
531 r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s));
532 r->node = pcmk__str_copy(node);
533 r->mode = throttle_low;
534 r->max = throttle_job_max;
535 crm_trace("Defaulting to local values for unknown node %s", node);
536
537 g_hash_table_insert(throttle_records, r->node, r);
538 }
539
540 switch(r->mode) {
541 case throttle_extreme:
542 case throttle_high:
543 jobs = 1;
544 break;
545 case throttle_med:
546 jobs = QB_MAX(1, r->max / 4);
547 break;
548 case throttle_low:
549 jobs = QB_MAX(1, r->max / 2);
550 break;
551 case throttle_none:
552 jobs = QB_MAX(1, r->max);
553 break;
554 default:
555 crm_err("Unknown throttle mode %.4x on %s", r->mode, node);
556 break;
557 }
558 return jobs;
559 }
560
561 void
562 throttle_update(xmlNode *xml)
563 {
564 int max = 0;
565 int mode = 0;
566 struct throttle_record_s *r = NULL;
567 const char *from = crm_element_value(xml, PCMK__XA_SRC);
568
569 crm_element_value_int(xml, PCMK__XA_CRM_LIMIT_MODE, &mode);
570 crm_element_value_int(xml, PCMK__XA_CRM_LIMIT_MAX, &max);
571
572 r = g_hash_table_lookup(throttle_records, from);
573
574 if(r == NULL) {
575 r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s));
576 r->node = pcmk__str_copy(from);
577 g_hash_table_insert(throttle_records, r->node, r);
578 }
579
580 r->max = max;
581 r->mode = (enum throttle_state_e) mode;
582
583 crm_debug("Node %s has %s load and supports at most %d jobs; new job limit %d",
584 from, load2str((enum throttle_state_e) mode), max,
585 throttle_get_job_limit(from));
586 }