This source file includes following definitions.
- load2str
- find_cib_loadfile
- throttle_cib_load
- throttle_load_avg
- throttle_check_thresholds
- throttle_handle_load
- throttle_mode
- throttle_send_command
- throttle_timer_cb
- throttle_record_free
- throttle_set_load_target
- throttle_update_job_max
- throttle_init
- controld_configure_throttle
- throttle_fini
- throttle_get_total_job_limit
- throttle_get_job_limit
- throttle_update
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/types.h>
13 #include <sys/stat.h>
14
15 #include <unistd.h>
16 #include <ctype.h>
17 #include <dirent.h>
18
19 #include <crm/crm.h>
20 #include <crm/common/xml.h>
21 #include <crm/cluster.h>
22
23 #include <pacemaker-controld.h>
24
25
26
27
28 enum throttle_state_e {
29 throttle_none = 0x0000,
30 throttle_low = 0x0001,
31 throttle_med = 0x0010,
32 throttle_high = 0x0100,
33 throttle_extreme = 0x1000,
34 };
35
36 struct throttle_record_s {
37 int max;
38 enum throttle_state_e mode;
39 char *node;
40 };
41
42 static int throttle_job_max = 0;
43 static float throttle_load_target = 0.0;
44
45 #define THROTTLE_FACTOR_LOW 1.2
46 #define THROTTLE_FACTOR_MEDIUM 1.6
47 #define THROTTLE_FACTOR_HIGH 2.0
48
49 static GHashTable *throttle_records = NULL;
50 static mainloop_timer_t *throttle_timer = NULL;
51
52 static const char *
53 load2str(enum throttle_state_e mode)
54 {
55 switch (mode) {
56 case throttle_extreme: return "extreme";
57 case throttle_high: return "high";
58 case throttle_med: return "medium";
59 case throttle_low: return "low";
60 case throttle_none: return "negligible";
61 default: return "undetermined";
62 }
63 }
64
65 #if HAVE_LINUX_PROCFS
66
67
68
69
70
71
72
73
74
75
76 static char *
77 find_cib_loadfile(void)
78 {
79 pid_t pid = pcmk__procfs_pid_of("pacemaker-based");
80
81 return pid? crm_strdup_printf("/proc/%lld/stat", (long long) pid) : NULL;
82 }
83
84 static bool
85 throttle_cib_load(float *load)
86 {
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125 static char *loadfile = NULL;
126 static time_t last_call = 0;
127 static long ticks_per_s = 0;
128 static unsigned long last_utime, last_stime;
129
130 char buffer[64*1024];
131 FILE *stream = NULL;
132 time_t now = time(NULL);
133
134 if(load == NULL) {
135 return FALSE;
136 } else {
137 *load = 0.0;
138 }
139
140 if(loadfile == NULL) {
141 last_call = 0;
142 last_utime = 0;
143 last_stime = 0;
144 loadfile = find_cib_loadfile();
145 if (loadfile == NULL) {
146 crm_warn("Couldn't find CIB load file");
147 return FALSE;
148 }
149 ticks_per_s = sysconf(_SC_CLK_TCK);
150 crm_trace("Found %s", loadfile);
151 }
152
153 stream = fopen(loadfile, "r");
154 if(stream == NULL) {
155 int rc = errno;
156
157 crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc);
158 free(loadfile); loadfile = NULL;
159 return FALSE;
160 }
161
162 if(fgets(buffer, sizeof(buffer), stream)) {
163 char *comm = pcmk__assert_alloc(1, 256);
164 char state = 0;
165 int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0;
166 unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0;
167
168 rc = sscanf(buffer, "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
169 &pid, comm, &state,
170 &ppid, &pgrp, &session, &tty_nr, &tpgid,
171 &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime);
172 free(comm);
173
174 if(rc != 15) {
175 crm_err("Only %d of 15 fields found in %s", rc, loadfile);
176 fclose(stream);
177 return FALSE;
178
179 } else if(last_call > 0
180 && last_call < now
181 && last_utime <= utime
182 && last_stime <= stime) {
183
184 time_t elapsed = now - last_call;
185 unsigned long delta_utime = utime - last_utime;
186 unsigned long delta_stime = stime - last_stime;
187
188 *load = (delta_utime + delta_stime);
189 *load /= ticks_per_s;
190 *load /= elapsed;
191 crm_debug("cib load: %f (%lu ticks in %lds)", *load, delta_utime + delta_stime, (long)elapsed);
192
193 } else {
194 crm_debug("Init %lu + %lu ticks at %ld (%lu tps)", utime, stime, (long)now, ticks_per_s);
195 }
196
197 last_call = now;
198 last_utime = utime;
199 last_stime = stime;
200
201 fclose(stream);
202 return TRUE;
203 }
204
205 fclose(stream);
206 return FALSE;
207 }
208
209 static bool
210 throttle_load_avg(float *load)
211 {
212 char buffer[256];
213 FILE *stream = NULL;
214 const char *loadfile = "/proc/loadavg";
215
216 if(load == NULL) {
217 return FALSE;
218 }
219
220 stream = fopen(loadfile, "r");
221 if(stream == NULL) {
222 int rc = errno;
223 crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc);
224 return FALSE;
225 }
226
227 if(fgets(buffer, sizeof(buffer), stream)) {
228 char *nl = strstr(buffer, "\n");
229
230
231 *load = strtof(buffer, NULL);
232 if(nl) { nl[0] = 0; }
233
234 fclose(stream);
235 return TRUE;
236 }
237
238 fclose(stream);
239 return FALSE;
240 }
241
242
243
244
245
246
247
248
249
250
251
252 static enum throttle_state_e
253 throttle_check_thresholds(float load, const char *desc,
254 const float thresholds[4])
255 {
256 if (load > thresholds[3]) {
257 crm_notice("Extreme %s detected: %f", desc, load);
258 return throttle_extreme;
259
260 } else if (load > thresholds[2]) {
261 crm_notice("High %s detected: %f", desc, load);
262 return throttle_high;
263
264 } else if (load > thresholds[1]) {
265 crm_info("Moderate %s detected: %f", desc, load);
266 return throttle_med;
267
268 } else if (load > thresholds[0]) {
269 crm_debug("Noticeable %s detected: %f", desc, load);
270 return throttle_low;
271 }
272
273 crm_trace("Negligible %s detected: %f", desc, load);
274 return throttle_none;
275 }
276
277 static enum throttle_state_e
278 throttle_handle_load(float load, const char *desc, int cores)
279 {
280 float normalize;
281 float thresholds[4];
282
283 if (cores == 1) {
284
285 normalize = 0.6;
286
287 } else {
288
289 normalize = cores;
290 }
291 thresholds[0] = throttle_load_target * normalize * THROTTLE_FACTOR_LOW;
292 thresholds[1] = throttle_load_target * normalize * THROTTLE_FACTOR_MEDIUM;
293 thresholds[2] = throttle_load_target * normalize * THROTTLE_FACTOR_HIGH;
294 thresholds[3] = load + 1.0;
295
296 return throttle_check_thresholds(load, desc, thresholds);
297 }
298 #endif
299
300 static enum throttle_state_e
301 throttle_mode(void)
302 {
303 enum throttle_state_e mode = throttle_none;
304
305 #if HAVE_LINUX_PROCFS
306 unsigned int cores;
307 float load;
308 float thresholds[4];
309
310 cores = pcmk__procfs_num_cores();
311 if(throttle_cib_load(&load)) {
312 float cib_max_cpu = 0.95;
313
314
315
316
317
318
319
320
321
322
323
324
325 if(cores == 1) {
326 cib_max_cpu = 0.4;
327 }
328 if(throttle_load_target > 0.0 && throttle_load_target < cib_max_cpu) {
329 cib_max_cpu = throttle_load_target;
330 }
331
332 thresholds[0] = cib_max_cpu * 0.8;
333 thresholds[1] = cib_max_cpu * 0.9;
334 thresholds[2] = cib_max_cpu;
335
336 thresholds[3] = cib_max_cpu * 1.5;
337
338 mode = throttle_check_thresholds(load, "CIB load", thresholds);
339 }
340
341 if(throttle_load_target <= 0) {
342
343 return mode;
344 }
345
346 if(throttle_load_avg(&load)) {
347 enum throttle_state_e cpu_load;
348
349 cpu_load = throttle_handle_load(load, "CPU load", cores);
350 if (cpu_load > mode) {
351 mode = cpu_load;
352 }
353 crm_debug("Current load is %f across %u core(s)", load, cores);
354 }
355 #endif
356 return mode;
357 }
358
359 static void
360 throttle_send_command(enum throttle_state_e mode)
361 {
362 xmlNode *xml = NULL;
363 static enum throttle_state_e last = -1;
364
365 if(mode != last) {
366 crm_info("New throttle mode: %s load (was %s)",
367 load2str(mode), load2str(last));
368 last = mode;
369
370 xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
371 crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MODE, mode);
372 crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MAX, throttle_job_max);
373
374 pcmk__cluster_send_message(NULL, crm_msg_crmd, xml);
375 free_xml(xml);
376 }
377 }
378
379 static gboolean
380 throttle_timer_cb(gpointer data)
381 {
382 throttle_send_command(throttle_mode());
383 return TRUE;
384 }
385
386 static void
387 throttle_record_free(gpointer p)
388 {
389 struct throttle_record_s *r = p;
390 free(r->node);
391 free(r);
392 }
393
394 static void
395 throttle_set_load_target(float target)
396 {
397 throttle_load_target = target;
398 }
399
400
401
402
403
404
405
406
407 static void
408 throttle_update_job_max(const char *preference)
409 {
410 long long max = 0LL;
411
412
413 const char *env_limit = pcmk__env_option(PCMK__ENV_NODE_ACTION_LIMIT);
414
415 if (env_limit != NULL) {
416 int rc = pcmk__scan_ll(env_limit, &max, 0LL);
417
418 if (rc != pcmk_rc_ok) {
419 crm_warn("Ignoring local option PCMK_" PCMK__ENV_NODE_ACTION_LIMIT
420 " because '%s' is not a valid value: %s",
421 env_limit, pcmk_rc_str(rc));
422 env_limit = NULL;
423 }
424 }
425 if (env_limit == NULL) {
426
427 CRM_LOG_ASSERT(pcmk__scan_ll(preference, &max, 0LL) == pcmk_rc_ok);
428 }
429
430 if (max > 0) {
431 throttle_job_max = (max >= INT_MAX)? INT_MAX : (int) max;
432 } else {
433
434 throttle_job_max = 2 * pcmk__procfs_num_cores();
435 }
436 }
437
438 void
439 throttle_init(void)
440 {
441 if(throttle_records == NULL) {
442 throttle_records = pcmk__strkey_table(NULL, throttle_record_free);
443 throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL);
444 }
445
446 throttle_update_job_max(NULL);
447 mainloop_timer_start(throttle_timer);
448 }
449
450
451
452
453
454
455
456 void
457 controld_configure_throttle(GHashTable *options)
458 {
459 const char *value = g_hash_table_lookup(options, PCMK_OPT_LOAD_THRESHOLD);
460
461 if (value != NULL) {
462 throttle_set_load_target(strtof(value, NULL) / 100.0);
463 }
464
465 value = g_hash_table_lookup(options, PCMK_OPT_NODE_ACTION_LIMIT);
466 throttle_update_job_max(value);
467 }
468
469 void
470 throttle_fini(void)
471 {
472 if (throttle_timer != NULL) {
473 mainloop_timer_del(throttle_timer);
474 throttle_timer = NULL;
475 }
476 if (throttle_records != NULL) {
477 g_hash_table_destroy(throttle_records);
478 throttle_records = NULL;
479 }
480 }
481
482 int
483 throttle_get_total_job_limit(int l)
484 {
485
486 GHashTableIter iter;
487 int limit = l;
488 int peers = pcmk__cluster_num_active_nodes();
489 struct throttle_record_s *r = NULL;
490
491 g_hash_table_iter_init(&iter, throttle_records);
492
493 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &r)) {
494 switch(r->mode) {
495
496 case throttle_extreme:
497 if(limit == 0 || limit > peers/4) {
498 limit = QB_MAX(1, peers/4);
499 }
500 break;
501
502 case throttle_high:
503 if(limit == 0 || limit > peers/2) {
504 limit = QB_MAX(1, peers/2);
505 }
506 break;
507 default:
508 break;
509 }
510 }
511 if(limit == l) {
512
513 } else if(l == 0) {
514 crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d", limit);
515
516 } else {
517 crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d instead of %d", limit, l);
518 }
519 return limit;
520 }
521
522 int
523 throttle_get_job_limit(const char *node)
524 {
525 int jobs = 1;
526 struct throttle_record_s *r = NULL;
527
528 r = g_hash_table_lookup(throttle_records, node);
529 if(r == NULL) {
530 r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s));
531 r->node = pcmk__str_copy(node);
532 r->mode = throttle_low;
533 r->max = throttle_job_max;
534 crm_trace("Defaulting to local values for unknown node %s", node);
535
536 g_hash_table_insert(throttle_records, r->node, r);
537 }
538
539 switch(r->mode) {
540 case throttle_extreme:
541 case throttle_high:
542 jobs = 1;
543 break;
544 case throttle_med:
545 jobs = QB_MAX(1, r->max / 4);
546 break;
547 case throttle_low:
548 jobs = QB_MAX(1, r->max / 2);
549 break;
550 case throttle_none:
551 jobs = QB_MAX(1, r->max);
552 break;
553 default:
554 crm_err("Unknown throttle mode %.4x on %s", r->mode, node);
555 break;
556 }
557 return jobs;
558 }
559
560 void
561 throttle_update(xmlNode *xml)
562 {
563 int max = 0;
564 int mode = 0;
565 struct throttle_record_s *r = NULL;
566 const char *from = crm_element_value(xml, PCMK__XA_SRC);
567
568 crm_element_value_int(xml, PCMK__XA_CRM_LIMIT_MODE, &mode);
569 crm_element_value_int(xml, PCMK__XA_CRM_LIMIT_MAX, &max);
570
571 r = g_hash_table_lookup(throttle_records, from);
572
573 if(r == NULL) {
574 r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s));
575 r->node = pcmk__str_copy(from);
576 g_hash_table_insert(throttle_records, r->node, r);
577 }
578
579 r->max = max;
580 r->mode = (enum throttle_state_e) mode;
581
582 crm_debug("Node %s has %s load and supports at most %d jobs; new job limit %d",
583 from, load2str((enum throttle_state_e) mode), max,
584 throttle_get_job_limit(from));
585 }