1
2
3#define _GNU_SOURCE
4
5#include <errno.h>
6#include <fcntl.h>
7#include <linux/limits.h>
8#include <poll.h>
9#include <signal.h>
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13#include <sys/inotify.h>
14#include <sys/stat.h>
15#include <sys/types.h>
16#include <sys/wait.h>
17#include <unistd.h>
18
19#include "cgroup_util.h"
20#include "../clone3/clone3_selftests.h"
21
22static ssize_t read_text(const char *path, char *buf, size_t max_len)
23{
24 ssize_t len;
25 int fd;
26
27 fd = open(path, O_RDONLY);
28 if (fd < 0)
29 return fd;
30
31 len = read(fd, buf, max_len - 1);
32 if (len < 0)
33 goto out;
34
35 buf[len] = 0;
36out:
37 close(fd);
38 return len;
39}
40
41static ssize_t write_text(const char *path, char *buf, ssize_t len)
42{
43 int fd;
44
45 fd = open(path, O_WRONLY | O_APPEND);
46 if (fd < 0)
47 return fd;
48
49 len = write(fd, buf, len);
50 if (len < 0) {
51 close(fd);
52 return len;
53 }
54
55 close(fd);
56
57 return len;
58}
59
60char *cg_name(const char *root, const char *name)
61{
62 size_t len = strlen(root) + strlen(name) + 2;
63 char *ret = malloc(len);
64
65 snprintf(ret, len, "%s/%s", root, name);
66
67 return ret;
68}
69
70char *cg_name_indexed(const char *root, const char *name, int index)
71{
72 size_t len = strlen(root) + strlen(name) + 10;
73 char *ret = malloc(len);
74
75 snprintf(ret, len, "%s/%s_%d", root, name, index);
76
77 return ret;
78}
79
80char *cg_control(const char *cgroup, const char *control)
81{
82 size_t len = strlen(cgroup) + strlen(control) + 2;
83 char *ret = malloc(len);
84
85 snprintf(ret, len, "%s/%s", cgroup, control);
86
87 return ret;
88}
89
90int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
91{
92 char path[PATH_MAX];
93
94 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
95
96 if (read_text(path, buf, len) >= 0)
97 return 0;
98
99 return -1;
100}
101
102int cg_read_strcmp(const char *cgroup, const char *control,
103 const char *expected)
104{
105 size_t size;
106 char *buf;
107 int ret;
108
109
110 if (!expected)
111 return -1;
112 else
113 size = strlen(expected) + 1;
114
115 buf = malloc(size);
116 if (!buf)
117 return -1;
118
119 if (cg_read(cgroup, control, buf, size)) {
120 free(buf);
121 return -1;
122 }
123
124 ret = strcmp(expected, buf);
125 free(buf);
126 return ret;
127}
128
129int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
130{
131 char buf[PAGE_SIZE];
132
133 if (cg_read(cgroup, control, buf, sizeof(buf)))
134 return -1;
135
136 return strstr(buf, needle) ? 0 : -1;
137}
138
139long cg_read_long(const char *cgroup, const char *control)
140{
141 char buf[128];
142
143 if (cg_read(cgroup, control, buf, sizeof(buf)))
144 return -1;
145
146 return atol(buf);
147}
148
149long cg_read_key_long(const char *cgroup, const char *control, const char *key)
150{
151 char buf[PAGE_SIZE];
152 char *ptr;
153
154 if (cg_read(cgroup, control, buf, sizeof(buf)))
155 return -1;
156
157 ptr = strstr(buf, key);
158 if (!ptr)
159 return -1;
160
161 return atol(ptr + strlen(key));
162}
163
164long cg_read_lc(const char *cgroup, const char *control)
165{
166 char buf[PAGE_SIZE];
167 const char delim[] = "\n";
168 char *line;
169 long cnt = 0;
170
171 if (cg_read(cgroup, control, buf, sizeof(buf)))
172 return -1;
173
174 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
175 cnt++;
176
177 return cnt;
178}
179
180int cg_write(const char *cgroup, const char *control, char *buf)
181{
182 char path[PATH_MAX];
183 ssize_t len = strlen(buf);
184
185 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
186
187 if (write_text(path, buf, len) == len)
188 return 0;
189
190 return -1;
191}
192
193int cg_find_unified_root(char *root, size_t len)
194{
195 char buf[10 * PAGE_SIZE];
196 char *fs, *mount, *type;
197 const char delim[] = "\n\t ";
198
199 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
200 return -1;
201
202
203
204
205
206 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
207 mount = strtok(NULL, delim);
208 type = strtok(NULL, delim);
209 strtok(NULL, delim);
210 strtok(NULL, delim);
211 strtok(NULL, delim);
212
213 if (strcmp(type, "cgroup2") == 0) {
214 strncpy(root, mount, len);
215 return 0;
216 }
217 }
218
219 return -1;
220}
221
222int cg_create(const char *cgroup)
223{
224 return mkdir(cgroup, 0644);
225}
226
227int cg_wait_for_proc_count(const char *cgroup, int count)
228{
229 char buf[10 * PAGE_SIZE] = {0};
230 int attempts;
231 char *ptr;
232
233 for (attempts = 10; attempts >= 0; attempts--) {
234 int nr = 0;
235
236 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
237 break;
238
239 for (ptr = buf; *ptr; ptr++)
240 if (*ptr == '\n')
241 nr++;
242
243 if (nr >= count)
244 return 0;
245
246 usleep(100000);
247 }
248
249 return -1;
250}
251
252int cg_killall(const char *cgroup)
253{
254 char buf[PAGE_SIZE];
255 char *ptr = buf;
256
257
258 if (!cg_write(cgroup, "cgroup.kill", "1"))
259 return 0;
260
261 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
262 return -1;
263
264 while (ptr < buf + sizeof(buf)) {
265 int pid = strtol(ptr, &ptr, 10);
266
267 if (pid == 0)
268 break;
269 if (*ptr)
270 ptr++;
271 else
272 break;
273 if (kill(pid, SIGKILL))
274 return -1;
275 }
276
277 return 0;
278}
279
280int cg_destroy(const char *cgroup)
281{
282 int ret;
283
284retry:
285 ret = rmdir(cgroup);
286 if (ret && errno == EBUSY) {
287 cg_killall(cgroup);
288 usleep(100);
289 goto retry;
290 }
291
292 if (ret && errno == ENOENT)
293 ret = 0;
294
295 return ret;
296}
297
298int cg_enter(const char *cgroup, int pid)
299{
300 char pidbuf[64];
301
302 snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
303 return cg_write(cgroup, "cgroup.procs", pidbuf);
304}
305
306int cg_enter_current(const char *cgroup)
307{
308 return cg_write(cgroup, "cgroup.procs", "0");
309}
310
311int cg_enter_current_thread(const char *cgroup)
312{
313 return cg_write(cgroup, "cgroup.threads", "0");
314}
315
316int cg_run(const char *cgroup,
317 int (*fn)(const char *cgroup, void *arg),
318 void *arg)
319{
320 int pid, retcode;
321
322 pid = fork();
323 if (pid < 0) {
324 return pid;
325 } else if (pid == 0) {
326 char buf[64];
327
328 snprintf(buf, sizeof(buf), "%d", getpid());
329 if (cg_write(cgroup, "cgroup.procs", buf))
330 exit(EXIT_FAILURE);
331 exit(fn(cgroup, arg));
332 } else {
333 waitpid(pid, &retcode, 0);
334 if (WIFEXITED(retcode))
335 return WEXITSTATUS(retcode);
336 else
337 return -1;
338 }
339}
340
341pid_t clone_into_cgroup(int cgroup_fd)
342{
343#ifdef CLONE_ARGS_SIZE_VER2
344 pid_t pid;
345
346 struct __clone_args args = {
347 .flags = CLONE_INTO_CGROUP,
348 .exit_signal = SIGCHLD,
349 .cgroup = cgroup_fd,
350 };
351
352 pid = sys_clone3(&args, sizeof(struct __clone_args));
353
354
355
356
357
358 if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
359 goto pretend_enosys;
360
361 return pid;
362
363pretend_enosys:
364#endif
365 errno = ENOSYS;
366 return -ENOSYS;
367}
368
369int clone_reap(pid_t pid, int options)
370{
371 int ret;
372 siginfo_t info = {
373 .si_signo = 0,
374 };
375
376again:
377 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
378 if (ret < 0) {
379 if (errno == EINTR)
380 goto again;
381 return -1;
382 }
383
384 if (options & WEXITED) {
385 if (WIFEXITED(info.si_status))
386 return WEXITSTATUS(info.si_status);
387 }
388
389 if (options & WSTOPPED) {
390 if (WIFSTOPPED(info.si_status))
391 return WSTOPSIG(info.si_status);
392 }
393
394 if (options & WCONTINUED) {
395 if (WIFCONTINUED(info.si_status))
396 return 0;
397 }
398
399 return -1;
400}
401
402int dirfd_open_opath(const char *dir)
403{
404 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
405}
406
407#define close_prot_errno(fd) \
408 if (fd >= 0) { \
409 int _e_ = errno; \
410 close(fd); \
411 errno = _e_; \
412 }
413
414static int clone_into_cgroup_run_nowait(const char *cgroup,
415 int (*fn)(const char *cgroup, void *arg),
416 void *arg)
417{
418 int cgroup_fd;
419 pid_t pid;
420
421 cgroup_fd = dirfd_open_opath(cgroup);
422 if (cgroup_fd < 0)
423 return -1;
424
425 pid = clone_into_cgroup(cgroup_fd);
426 close_prot_errno(cgroup_fd);
427 if (pid == 0)
428 exit(fn(cgroup, arg));
429
430 return pid;
431}
432
433int cg_run_nowait(const char *cgroup,
434 int (*fn)(const char *cgroup, void *arg),
435 void *arg)
436{
437 int pid;
438
439 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
440 if (pid > 0)
441 return pid;
442
443
444 if (pid < 0 && errno != ENOSYS)
445 return -1;
446
447 pid = fork();
448 if (pid == 0) {
449 char buf[64];
450
451 snprintf(buf, sizeof(buf), "%d", getpid());
452 if (cg_write(cgroup, "cgroup.procs", buf))
453 exit(EXIT_FAILURE);
454 exit(fn(cgroup, arg));
455 }
456
457 return pid;
458}
459
460int get_temp_fd(void)
461{
462 return open(".", O_TMPFILE | O_RDWR | O_EXCL);
463}
464
465int alloc_pagecache(int fd, size_t size)
466{
467 char buf[PAGE_SIZE];
468 struct stat st;
469 int i;
470
471 if (fstat(fd, &st))
472 goto cleanup;
473
474 size += st.st_size;
475
476 if (ftruncate(fd, size))
477 goto cleanup;
478
479 for (i = 0; i < size; i += sizeof(buf))
480 read(fd, buf, sizeof(buf));
481
482 return 0;
483
484cleanup:
485 return -1;
486}
487
488int alloc_anon(const char *cgroup, void *arg)
489{
490 size_t size = (unsigned long)arg;
491 char *buf, *ptr;
492
493 buf = malloc(size);
494 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
495 *ptr = 0;
496
497 free(buf);
498 return 0;
499}
500
501int is_swap_enabled(void)
502{
503 char buf[PAGE_SIZE];
504 const char delim[] = "\n";
505 int cnt = 0;
506 char *line;
507
508 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
509 return -1;
510
511 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
512 cnt++;
513
514 return cnt > 1;
515}
516
517int set_oom_adj_score(int pid, int score)
518{
519 char path[PATH_MAX];
520 int fd, len;
521
522 sprintf(path, "/proc/%d/oom_score_adj", pid);
523
524 fd = open(path, O_WRONLY | O_APPEND);
525 if (fd < 0)
526 return fd;
527
528 len = dprintf(fd, "%d", score);
529 if (len < 0) {
530 close(fd);
531 return len;
532 }
533
534 close(fd);
535 return 0;
536}
537
538ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
539{
540 char path[PATH_MAX];
541
542 if (!pid)
543 snprintf(path, sizeof(path), "/proc/%s/%s",
544 thread ? "thread-self" : "self", item);
545 else
546 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
547
548 return read_text(path, buf, size);
549}
550
551int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
552{
553 char buf[PAGE_SIZE];
554
555 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
556 return -1;
557
558 return strstr(buf, needle) ? 0 : -1;
559}
560
561int clone_into_cgroup_run_wait(const char *cgroup)
562{
563 int cgroup_fd;
564 pid_t pid;
565
566 cgroup_fd = dirfd_open_opath(cgroup);
567 if (cgroup_fd < 0)
568 return -1;
569
570 pid = clone_into_cgroup(cgroup_fd);
571 close_prot_errno(cgroup_fd);
572 if (pid < 0)
573 return -1;
574
575 if (pid == 0)
576 exit(EXIT_SUCCESS);
577
578
579
580
581
582 (void)clone_reap(pid, WEXITED);
583 return 0;
584}
585
586int cg_prepare_for_wait(const char *cgroup)
587{
588 int fd, ret = -1;
589
590 fd = inotify_init1(0);
591 if (fd == -1)
592 return fd;
593
594 ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
595 IN_MODIFY);
596 if (ret == -1) {
597 close(fd);
598 fd = -1;
599 }
600
601 return fd;
602}
603
604int cg_wait_for(int fd)
605{
606 int ret = -1;
607 struct pollfd fds = {
608 .fd = fd,
609 .events = POLLIN,
610 };
611
612 while (true) {
613 ret = poll(&fds, 1, 10000);
614
615 if (ret == -1) {
616 if (errno == EINTR)
617 continue;
618
619 break;
620 }
621
622 if (ret > 0 && fds.revents & POLLIN) {
623 ret = 0;
624 break;
625 }
626 }
627
628 return ret;
629}
630