linux/tools/testing/selftests/cgroup/test_kmem.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#define _GNU_SOURCE
   3
   4#include <linux/limits.h>
   5#include <fcntl.h>
   6#include <stdio.h>
   7#include <stdlib.h>
   8#include <string.h>
   9#include <sys/stat.h>
  10#include <sys/types.h>
  11#include <unistd.h>
  12#include <sys/wait.h>
  13#include <errno.h>
  14#include <sys/sysinfo.h>
  15#include <pthread.h>
  16
  17#include "../kselftest.h"
  18#include "cgroup_util.h"
  19
  20
  21/*
  22 * Memory cgroup charging and vmstat data aggregation is performed using
  23 * percpu batches 32 pages big (look at MEMCG_CHARGE_BATCH). So the maximum
  24 * discrepancy between charge and vmstat entries is number of cpus multiplied
  25 * by 32 pages multiplied by 2.
  26 */
  27#define MAX_VMSTAT_ERROR (4096 * 32 * 2 * get_nprocs())
  28
  29
  30static int alloc_dcache(const char *cgroup, void *arg)
  31{
  32        unsigned long i;
  33        struct stat st;
  34        char buf[128];
  35
  36        for (i = 0; i < (unsigned long)arg; i++) {
  37                snprintf(buf, sizeof(buf),
  38                        "/something-non-existent-with-a-long-name-%64lu-%d",
  39                         i, getpid());
  40                stat(buf, &st);
  41        }
  42
  43        return 0;
  44}
  45
  46/*
  47 * This test allocates 100000 of negative dentries with long names.
  48 * Then it checks that "slab" in memory.stat is larger than 1M.
  49 * Then it sets memory.high to 1M and checks that at least 1/2
  50 * of slab memory has been reclaimed.
  51 */
  52static int test_kmem_basic(const char *root)
  53{
  54        int ret = KSFT_FAIL;
  55        char *cg = NULL;
  56        long slab0, slab1, current;
  57
  58        cg = cg_name(root, "kmem_basic_test");
  59        if (!cg)
  60                goto cleanup;
  61
  62        if (cg_create(cg))
  63                goto cleanup;
  64
  65        if (cg_run(cg, alloc_dcache, (void *)100000))
  66                goto cleanup;
  67
  68        slab0 = cg_read_key_long(cg, "memory.stat", "slab ");
  69        if (slab0 < (1 << 20))
  70                goto cleanup;
  71
  72        cg_write(cg, "memory.high", "1M");
  73        slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
  74        if (slab1 <= 0)
  75                goto cleanup;
  76
  77        current = cg_read_long(cg, "memory.current");
  78        if (current <= 0)
  79                goto cleanup;
  80
  81        if (slab1 < slab0 / 2 && current < slab0 / 2)
  82                ret = KSFT_PASS;
  83cleanup:
  84        cg_destroy(cg);
  85        free(cg);
  86
  87        return ret;
  88}
  89
  90static void *alloc_kmem_fn(void *arg)
  91{
  92        alloc_dcache(NULL, (void *)100);
  93        return NULL;
  94}
  95
  96static int alloc_kmem_smp(const char *cgroup, void *arg)
  97{
  98        int nr_threads = 2 * get_nprocs();
  99        pthread_t *tinfo;
 100        unsigned long i;
 101        int ret = -1;
 102
 103        tinfo = calloc(nr_threads, sizeof(pthread_t));
 104        if (tinfo == NULL)
 105                return -1;
 106
 107        for (i = 0; i < nr_threads; i++) {
 108                if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,
 109                                   (void *)i)) {
 110                        free(tinfo);
 111                        return -1;
 112                }
 113        }
 114
 115        for (i = 0; i < nr_threads; i++) {
 116                ret = pthread_join(tinfo[i], NULL);
 117                if (ret)
 118                        break;
 119        }
 120
 121        free(tinfo);
 122        return ret;
 123}
 124
 125static int cg_run_in_subcgroups(const char *parent,
 126                                int (*fn)(const char *cgroup, void *arg),
 127                                void *arg, int times)
 128{
 129        char *child;
 130        int i;
 131
 132        for (i = 0; i < times; i++) {
 133                child = cg_name_indexed(parent, "child", i);
 134                if (!child)
 135                        return -1;
 136
 137                if (cg_create(child)) {
 138                        cg_destroy(child);
 139                        free(child);
 140                        return -1;
 141                }
 142
 143                if (cg_run(child, fn, NULL)) {
 144                        cg_destroy(child);
 145                        free(child);
 146                        return -1;
 147                }
 148
 149                cg_destroy(child);
 150                free(child);
 151        }
 152
 153        return 0;
 154}
 155
 156/*
 157 * The test creates and destroys a large number of cgroups. In each cgroup it
 158 * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
 159 * threads. Then it checks the sanity of numbers on the parent level:
 160 * the total size of the cgroups should be roughly equal to
 161 * anon + file + slab + kernel_stack.
 162 */
 163static int test_kmem_memcg_deletion(const char *root)
 164{
 165        long current, slab, anon, file, kernel_stack, sum;
 166        int ret = KSFT_FAIL;
 167        char *parent;
 168
 169        parent = cg_name(root, "kmem_memcg_deletion_test");
 170        if (!parent)
 171                goto cleanup;
 172
 173        if (cg_create(parent))
 174                goto cleanup;
 175
 176        if (cg_write(parent, "cgroup.subtree_control", "+memory"))
 177                goto cleanup;
 178
 179        if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))
 180                goto cleanup;
 181
 182        current = cg_read_long(parent, "memory.current");
 183        slab = cg_read_key_long(parent, "memory.stat", "slab ");
 184        anon = cg_read_key_long(parent, "memory.stat", "anon ");
 185        file = cg_read_key_long(parent, "memory.stat", "file ");
 186        kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack ");
 187        if (current < 0 || slab < 0 || anon < 0 || file < 0 ||
 188            kernel_stack < 0)
 189                goto cleanup;
 190
 191        sum = slab + anon + file + kernel_stack;
 192        if (abs(sum - current) < MAX_VMSTAT_ERROR) {
 193                ret = KSFT_PASS;
 194        } else {
 195                printf("memory.current = %ld\n", current);
 196                printf("slab + anon + file + kernel_stack = %ld\n", sum);
 197                printf("slab = %ld\n", slab);
 198                printf("anon = %ld\n", anon);
 199                printf("file = %ld\n", file);
 200                printf("kernel_stack = %ld\n", kernel_stack);
 201        }
 202
 203cleanup:
 204        cg_destroy(parent);
 205        free(parent);
 206
 207        return ret;
 208}
 209
 210/*
 211 * The test reads the entire /proc/kpagecgroup. If the operation went
 212 * successfully (and the kernel didn't panic), the test is treated as passed.
 213 */
 214static int test_kmem_proc_kpagecgroup(const char *root)
 215{
 216        unsigned long buf[128];
 217        int ret = KSFT_FAIL;
 218        ssize_t len;
 219        int fd;
 220
 221        fd = open("/proc/kpagecgroup", O_RDONLY);
 222        if (fd < 0)
 223                return ret;
 224
 225        do {
 226                len = read(fd, buf, sizeof(buf));
 227        } while (len > 0);
 228
 229        if (len == 0)
 230                ret = KSFT_PASS;
 231
 232        close(fd);
 233        return ret;
 234}
 235
 236static void *pthread_wait_fn(void *arg)
 237{
 238        sleep(100);
 239        return NULL;
 240}
 241
 242static int spawn_1000_threads(const char *cgroup, void *arg)
 243{
 244        int nr_threads = 1000;
 245        pthread_t *tinfo;
 246        unsigned long i;
 247        long stack;
 248        int ret = -1;
 249
 250        tinfo = calloc(nr_threads, sizeof(pthread_t));
 251        if (tinfo == NULL)
 252                return -1;
 253
 254        for (i = 0; i < nr_threads; i++) {
 255                if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,
 256                                   (void *)i)) {
 257                        free(tinfo);
 258                        return(-1);
 259                }
 260        }
 261
 262        stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");
 263        if (stack >= 4096 * 1000)
 264                ret = 0;
 265
 266        free(tinfo);
 267        return ret;
 268}
 269
 270/*
 271 * The test spawns a process, which spawns 1000 threads. Then it checks
 272 * that memory.stat's kernel_stack is at least 1000 pages large.
 273 */
 274static int test_kmem_kernel_stacks(const char *root)
 275{
 276        int ret = KSFT_FAIL;
 277        char *cg = NULL;
 278
 279        cg = cg_name(root, "kmem_kernel_stacks_test");
 280        if (!cg)
 281                goto cleanup;
 282
 283        if (cg_create(cg))
 284                goto cleanup;
 285
 286        if (cg_run(cg, spawn_1000_threads, NULL))
 287                goto cleanup;
 288
 289        ret = KSFT_PASS;
 290cleanup:
 291        cg_destroy(cg);
 292        free(cg);
 293
 294        return ret;
 295}
 296
 297/*
 298 * This test sequentionally creates 30 child cgroups, allocates some
 299 * kernel memory in each of them, and deletes them. Then it checks
 300 * that the number of dying cgroups on the parent level is 0.
 301 */
 302static int test_kmem_dead_cgroups(const char *root)
 303{
 304        int ret = KSFT_FAIL;
 305        char *parent;
 306        long dead;
 307        int i;
 308
 309        parent = cg_name(root, "kmem_dead_cgroups_test");
 310        if (!parent)
 311                goto cleanup;
 312
 313        if (cg_create(parent))
 314                goto cleanup;
 315
 316        if (cg_write(parent, "cgroup.subtree_control", "+memory"))
 317                goto cleanup;
 318
 319        if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
 320                goto cleanup;
 321
 322        for (i = 0; i < 5; i++) {
 323                dead = cg_read_key_long(parent, "cgroup.stat",
 324                                        "nr_dying_descendants ");
 325                if (dead == 0) {
 326                        ret = KSFT_PASS;
 327                        break;
 328                }
 329                /*
 330                 * Reclaiming cgroups might take some time,
 331                 * let's wait a bit and repeat.
 332                 */
 333                sleep(1);
 334        }
 335
 336cleanup:
 337        cg_destroy(parent);
 338        free(parent);
 339
 340        return ret;
 341}
 342
 343/*
 344 * This test creates a sub-tree with 1000 memory cgroups.
 345 * Then it checks that the memory.current on the parent level
 346 * is greater than 0 and approximates matches the percpu value
 347 * from memory.stat.
 348 */
 349static int test_percpu_basic(const char *root)
 350{
 351        int ret = KSFT_FAIL;
 352        char *parent, *child;
 353        long current, percpu;
 354        int i;
 355
 356        parent = cg_name(root, "percpu_basic_test");
 357        if (!parent)
 358                goto cleanup;
 359
 360        if (cg_create(parent))
 361                goto cleanup;
 362
 363        if (cg_write(parent, "cgroup.subtree_control", "+memory"))
 364                goto cleanup;
 365
 366        for (i = 0; i < 1000; i++) {
 367                child = cg_name_indexed(parent, "child", i);
 368                if (!child)
 369                        return -1;
 370
 371                if (cg_create(child))
 372                        goto cleanup_children;
 373
 374                free(child);
 375        }
 376
 377        current = cg_read_long(parent, "memory.current");
 378        percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
 379
 380        if (current > 0 && percpu > 0 && abs(current - percpu) <
 381            MAX_VMSTAT_ERROR)
 382                ret = KSFT_PASS;
 383        else
 384                printf("memory.current %ld\npercpu %ld\n",
 385                       current, percpu);
 386
 387cleanup_children:
 388        for (i = 0; i < 1000; i++) {
 389                child = cg_name_indexed(parent, "child", i);
 390                cg_destroy(child);
 391                free(child);
 392        }
 393
 394cleanup:
 395        cg_destroy(parent);
 396        free(parent);
 397
 398        return ret;
 399}
 400
 401#define T(x) { x, #x }
 402struct kmem_test {
 403        int (*fn)(const char *root);
 404        const char *name;
 405} tests[] = {
 406        T(test_kmem_basic),
 407        T(test_kmem_memcg_deletion),
 408        T(test_kmem_proc_kpagecgroup),
 409        T(test_kmem_kernel_stacks),
 410        T(test_kmem_dead_cgroups),
 411        T(test_percpu_basic),
 412};
 413#undef T
 414
 415int main(int argc, char **argv)
 416{
 417        char root[PATH_MAX];
 418        int i, ret = EXIT_SUCCESS;
 419
 420        if (cg_find_unified_root(root, sizeof(root)))
 421                ksft_exit_skip("cgroup v2 isn't mounted\n");
 422
 423        /*
 424         * Check that memory controller is available:
 425         * memory is listed in cgroup.controllers
 426         */
 427        if (cg_read_strstr(root, "cgroup.controllers", "memory"))
 428                ksft_exit_skip("memory controller isn't available\n");
 429
 430        if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
 431                if (cg_write(root, "cgroup.subtree_control", "+memory"))
 432                        ksft_exit_skip("Failed to set memory controller\n");
 433
 434        for (i = 0; i < ARRAY_SIZE(tests); i++) {
 435                switch (tests[i].fn(root)) {
 436                case KSFT_PASS:
 437                        ksft_test_result_pass("%s\n", tests[i].name);
 438                        break;
 439                case KSFT_SKIP:
 440                        ksft_test_result_skip("%s\n", tests[i].name);
 441                        break;
 442                default:
 443                        ret = EXIT_FAILURE;
 444                        ksft_test_result_fail("%s\n", tests[i].name);
 445                        break;
 446                }
 447        }
 448
 449        return ret;
 450}
 451