1 /*
2 * Copyright 2015-2024 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10 #include <crm_internal.h>
11
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <sys/stat.h>
16 #include <sys/types.h>
17 #include <dirent.h>
18 #include <ctype.h>
19
20 #if HAVE_LINUX_PROCFS
21 /*!
22 * \internal
23 * \brief Return name of /proc file containing the CIB daemon's load statistics
24 *
25 * \return Newly allocated memory with file name on success, NULL otherwise
26 *
27 * \note It is the caller's responsibility to free the return value.
28 * This will return NULL if the daemon is being run via valgrind.
29 * This should be called only on Linux systems.
30 */
31 static char *
32 find_cib_loadfile(const char *server)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
33 {
34 pid_t pid = pcmk__procfs_pid_of(server);
35
36 return pid? crm_strdup_printf("/proc/%lld/stat", (long long) pid) : NULL;
37 }
38
39 /*!
40 * \internal
41 * \brief Get process ID and name associated with a /proc directory entry
42 *
43 * \param[in] entry Directory entry (must be result of readdir() on /proc)
44 * \param[out] name If not NULL, a char[16] to hold the process name
45 * \param[out] pid If not NULL, will be set to process ID of entry
46 *
47 * \return Standard Pacemaker return code
48 * \note This should be called only on Linux systems, as not all systems that
49 * support /proc store process names and IDs in the same way. The kernel
50 * limits the process name to the first 15 characters (plus terminator).
51 * It would be nice if there were a public kernel API constant for that
52 * limit, but there isn't.
53 */
54 static int
55 pcmk__procfs_process_info(const struct dirent *entry, char *name, pid_t *pid)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
56 {
57 int fd, local_pid;
58 FILE *file;
59 struct stat statbuf;
60 char procpath[128] = { 0 };
61
62 /* We're only interested in entries whose name is a PID,
63 * so skip anything non-numeric or that is too long.
64 *
65 * 114 = 128 - strlen("/proc/") - strlen("/status") - 1
66 */
67 local_pid = atoi(entry->d_name);
68 if ((local_pid <= 0) || (strlen(entry->d_name) > 114)) {
69 return -1;
70 }
71 if (pid) {
72 *pid = (pid_t) local_pid;
73 }
74
75 /* Get this entry's file information */
76 strcpy(procpath, "/proc/");
77 strcat(procpath, entry->d_name);
78 fd = open(procpath, O_RDONLY);
79 if (fd < 0 ) {
80 return -1;
81 }
82 if (fstat(fd, &statbuf) < 0) {
83 close(fd);
84 return -1;
85 }
86 close(fd);
87
88 /* We're only interested in subdirectories */
89 if (!S_ISDIR(statbuf.st_mode)) {
90 return -1;
91 }
92
93 /* Read the first entry ("Name:") from the process's status file.
94 * We could handle the valgrind case if we parsed the cmdline file
95 * instead, but that's more of a pain than it's worth.
96 */
97 if (name != NULL) {
98 strcat(procpath, "/status");
99 file = fopen(procpath, "r");
100 if (!file) {
101 return -1;
102 }
103 if (fscanf(file, "Name:\t%15[^\n]", name) != 1) {
104 fclose(file);
105 return -1;
106 }
107 name[15] = 0;
108 fclose(file);
109 }
110
111 return 0;
112 }
113 #endif // HAVE_LINUX_PROCFS
114
115 /*!
116 * \internal
117 * \brief Return process ID of a named process
118 *
119 * \param[in] name Process name (as used in /proc/.../status)
120 *
121 * \return Process ID of named process if running, 0 otherwise
122 *
123 * \note This will return 0 if the process is being run via valgrind.
124 * This should be called only on Linux systems.
125 */
126 pid_t
127 pcmk__procfs_pid_of(const char *name)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
128 {
129 #if HAVE_LINUX_PROCFS
130 DIR *dp;
131 struct dirent *entry;
132 pid_t pid = 0;
133 char entry_name[64] = { 0 };
134
135 dp = opendir("/proc");
136 if (dp == NULL) {
137 crm_notice("Can not read /proc directory to track existing components");
138 return 0;
139 }
140
141 while ((entry = readdir(dp)) != NULL) {
142 if ((pcmk__procfs_process_info(entry, entry_name, &pid) == pcmk_rc_ok)
143 && pcmk__str_eq(entry_name, name, pcmk__str_casei)
144 && (pcmk__pid_active(pid, NULL) == pcmk_rc_ok)) {
145
146 crm_info("Found %s active as process %lld", name, (long long) pid);
147 break;
148 }
149 pid = 0;
150 }
151 closedir(dp);
152 return pid;
153 #else
154 return 0;
155 #endif // HAVE_LINUX_PROCFS
156 }
157
158 /*!
159 * \internal
160 * \brief Calculate number of logical CPU cores from procfs
161 *
162 * \return Number of cores (or 1 if unable to determine)
163 */
164 unsigned int
165 pcmk__procfs_num_cores(void)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
166 {
167 #if HAVE_LINUX_PROCFS
168 int cores = 0;
169 FILE *stream = NULL;
170
171 /* Parse /proc/stat instead of /proc/cpuinfo because it's smaller */
172 stream = fopen("/proc/stat", "r");
173 if (stream == NULL) {
174 crm_perror(LOG_INFO, "Could not open /proc/stat");
175 } else {
176 char buffer[2048];
177
178 while (fgets(buffer, sizeof(buffer), stream)) {
179 if (pcmk__starts_with(buffer, "cpu") && isdigit(buffer[3])) {
180 ++cores;
181 }
182 }
183 fclose(stream);
184 }
185 return cores? cores : 1;
186 #else
187 return 1;
188 #endif // HAVE_LINUX_PROCFS
189 }
190
191 /*!
192 * \internal
193 * \brief Get the executable path corresponding to a process ID
194 *
195 * \param[in] pid Process ID to check
196 * \param[out] path Where to store executable path
197 * \param[in] path_size Size of \p path in characters (ideally PATH_MAX)
198 *
199 * \return Standard Pacemaker error code (as possible errno values from
200 * readlink())
201 */
202 int
203 pcmk__procfs_pid2path(pid_t pid, char path[], size_t path_size)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
204 {
205 #if HAVE_LINUX_PROCFS
206 char procfs_exe_path[PATH_MAX];
207 ssize_t link_rc;
208
209 if (snprintf(procfs_exe_path, PATH_MAX, "/proc/%lld/exe",
210 (long long) pid) >= PATH_MAX) {
211 return ENAMETOOLONG; // Truncated (shouldn't be possible in practice)
212 }
213
214 link_rc = readlink(procfs_exe_path, path, path_size - 1);
215 if (link_rc < 0) {
216 return errno;
217 } else if (link_rc >= (path_size - 1)) {
218 return ENAMETOOLONG;
219 }
220
221 path[link_rc] = '\0';
222 return pcmk_rc_ok;
223 #else
224 return EOPNOTSUPP;
225 #endif // HAVE_LINUX_PROCFS
226 }
227
228 /*!
229 * \internal
230 * \brief Check whether process ID information is available from procfs
231 *
232 * \return true if process ID information is available, otherwise false
233 */
234 bool
235 pcmk__procfs_has_pids(void)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
236 {
237 #if HAVE_LINUX_PROCFS
238 static bool have_pids = false;
239 static bool checked = false;
240
241 if (!checked) {
242 char path[PATH_MAX];
243
244 have_pids = pcmk__procfs_pid2path(getpid(), path, sizeof(path)) == pcmk_rc_ok;
245 checked = true;
246 }
247 return have_pids;
248 #else
249 return false;
250 #endif // HAVE_LINUX_PROCFS
251 }
252
253 /*!
254 * \internal
255 * \brief Return an open handle on the directory containing links to open file
256 * descriptors, or NULL on error
257 */
258 DIR *
259 pcmk__procfs_fd_dir(void)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
260 {
261 DIR *dir = NULL;
262
263 /* /proc/self/fd (on Linux) or /dev/fd (on most OSes) contains symlinks to
264 * all open files for the current process, named as the file descriptor.
265 * Use this if available, because it's more efficient than a shotgun
266 * approach to closing descriptors.
267 */
268 #if HAVE_LINUX_PROCFS
269 dir = opendir("/proc/self/fd");
270 #endif // HAVE_LINUX_PROCFS
271
272 return dir;
273 }
274
275 /*!
276 * \internal
277 * \brief Trigger a sysrq command if supported on current platform
278 *
279 * \param[in] t Sysrq command to trigger
280 */
281 void
282 pcmk__sysrq_trigger(char t)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
283 {
284 #if HAVE_LINUX_PROCFS
285 // Root can always write here, regardless of kernel.sysrq value
286 FILE *procf = fopen("/proc/sysrq-trigger", "a");
287
288 if (procf == NULL) {
289 crm_warn("Could not open sysrq-trigger: %s", strerror(errno));
290 } else {
291 fprintf(procf, "%c\n", t);
292 fclose(procf);
293 }
294 #endif // HAVE_LINUX_PROCFS
295 }
296
297 bool
298 pcmk__throttle_cib_load(const char *server, float *load)
/* ![[previous]](../icons/left.png)
![[next]](../icons/right.png)
![[first]](../icons/first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
299 {
300 /* /proc/[pid]/stat
301 *
302 * Status information about the process. This is used by ps(1). It is defined
303 * in /usr/src/linux/fs/proc/array.c.
304 *
305 * The fields, in order, with their proper scanf(3) format specifiers, are:
306 *
307 * pid %d (1) The process ID.
308 * comm %s (2) The filename of the executable, in parentheses. This is
309 * visible whether or not the executable is swapped out.
310 * state %c (3) One character from the string "RSDZTW" where R is running,
311 * S is sleeping in an interruptible wait, D is waiting in
312 * uninterruptible disk sleep, Z is zombie, T is traced or
313 * stopped (on a signal), and W is paging.
314 * ppid %d (4) The PID of the parent.
315 * pgrp %d (5) The process group ID of the process.
316 * session %d (6) The session ID of the process.
317 * tty_nr %d (7) The controlling terminal of the process. (The minor device
318 * number is contained in the combination of bits 31 to 20 and
319 * 7 to 0; the major device number is in bits 15 to 8.)
320 * tpgid %d (8) The ID of the foreground process group of the controlling
321 * terminal of the process.
322 * flags %u (9) The kernel flags word of the process. For bit meanings, see
323 * the PF_* defines in the Linux kernel source file include/linux/sched.h.
324 * Details depend on the kernel version.
325 * minflt %lu (10) The number of minor faults the process has made which have
326 * not required loading a memory page from disk.
327 * cminflt %lu (11) The number of minor faults that the process's waited-for
328 * children have made.
329 * majflt %lu (12) The number of major faults the process has made which have
330 * required loading a memory page from disk.
331 * cmajflt %lu (13) The number of major faults that the process's waited-for
332 * children have made.
333 * utime %lu (14) Amount of time that this process has been scheduled in user
334 * mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).
335 * This includes guest time, guest_time (time spent running a
336 * virtual CPU, see below), so that applications that are not
337 * aware of the guest time field do not lose that time from
338 * their calculations.
339 * stime %lu (15) Amount of time that this process has been scheduled in
340 * kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).
341 */
342
343 #if HAVE_LINUX_PROCFS
344 static char *loadfile = NULL;
345 static time_t last_call = 0;
346 static long ticks_per_s = 0;
347 static unsigned long last_utime, last_stime;
348
349 char buffer[64*1024];
350 FILE *stream = NULL;
351 time_t now = time(NULL);
352
353 if (load == NULL) {
354 return false;
355 } else {
356 *load = 0.0;
357 }
358
359 if (loadfile == NULL) {
360 last_call = 0;
361 last_utime = 0;
362 last_stime = 0;
363
364 loadfile = find_cib_loadfile(server);
365 if (loadfile == NULL) {
366 crm_warn("Couldn't find CIB load file");
367 return false;
368 }
369
370 ticks_per_s = sysconf(_SC_CLK_TCK);
371 crm_trace("Found %s", loadfile);
372 }
373
374 stream = fopen(loadfile, "r");
375 if (stream == NULL) {
376 int rc = errno;
377
378 crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc);
379 free(loadfile);
380 loadfile = NULL;
381 return false;
382 }
383
384 if (fgets(buffer, sizeof(buffer), stream) != NULL) {
385 char *comm = pcmk__assert_alloc(1, 256);
386 char state = 0;
387 int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0;
388 unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0;
389
390 rc = sscanf(buffer, "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
391 &pid, comm, &state, &ppid, &pgrp, &session, &tty_nr, &tpgid,
392 &flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime);
393 free(comm);
394
395 if (rc != 15) {
396 crm_err("Only %d of 15 fields found in %s", rc, loadfile);
397 fclose(stream);
398 return false;
399
400 } else if ((last_call > 0) && (last_call < now) && (last_utime <= utime) &&
401 (last_stime <= stime)) {
402 time_t elapsed = now - last_call;
403 unsigned long delta_utime = utime - last_utime;
404 unsigned long delta_stime = stime - last_stime;
405
406 *load = delta_utime + delta_stime; /* Cast to a float before division */
407 *load /= ticks_per_s;
408 *load /= elapsed;
409 crm_debug("cib load: %f (%lu ticks in %lds)", *load,
410 delta_utime + delta_stime, (long) elapsed);
411
412 } else {
413 crm_debug("Init %lu + %lu ticks at %ld (%lu tps)", utime, stime,
414 (long) now, ticks_per_s);
415 }
416
417 last_call = now;
418 last_utime = utime;
419 last_stime = stime;
420
421 fclose(stream);
422 return true;
423 }
424
425 fclose(stream);
426 #endif // HAVE_LINUX_PROCFS
427 return false;
428 }
429
430 bool
431 pcmk__throttle_load_avg(float *load)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
432 {
433 #if HAVE_LINUX_PROCFS
434 char buffer[256];
435 FILE *stream = NULL;
436 const char *loadfile = "/proc/loadavg";
437
438 if (load == NULL) {
439 return false;
440 }
441
442 stream = fopen(loadfile, "r");
443 if (stream == NULL) {
444 int rc = errno;
445 crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc);
446 return false;
447 }
448
449 if (fgets(buffer, sizeof(buffer), stream) != NULL) {
450 char *nl = strstr(buffer, "\n");
451
452 /* Grab the 1-minute average, ignore the rest */
453 *load = strtof(buffer, NULL);
454 if (nl != NULL) {
455 nl[0] = 0;
456 }
457
458 fclose(stream);
459 return true;
460 }
461
462 fclose(stream);
463 #endif // HAVE_LINUX_PROCFS
464 return false;
465 }