linux/tools/testing/selftests/cgroup/test_memcontrol.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#define _GNU_SOURCE
   3
   4#include <linux/limits.h>
   5#include <linux/oom.h>
   6#include <fcntl.h>
   7#include <stdio.h>
   8#include <stdlib.h>
   9#include <string.h>
  10#include <sys/stat.h>
  11#include <sys/types.h>
  12#include <unistd.h>
  13#include <sys/socket.h>
  14#include <sys/wait.h>
  15#include <arpa/inet.h>
  16#include <netinet/in.h>
  17#include <netdb.h>
  18#include <errno.h>
  19
  20#include "../kselftest.h"
  21#include "cgroup_util.h"
  22
  23/*
  24 * This test creates two nested cgroups with and without enabling
  25 * the memory controller.
  26 */
  27static int test_memcg_subtree_control(const char *root)
  28{
  29        char *parent, *child, *parent2, *child2;
  30        int ret = KSFT_FAIL;
  31        char buf[PAGE_SIZE];
  32
  33        /* Create two nested cgroups with the memory controller enabled */
  34        parent = cg_name(root, "memcg_test_0");
  35        child = cg_name(root, "memcg_test_0/memcg_test_1");
  36        if (!parent || !child)
  37                goto cleanup;
  38
  39        if (cg_create(parent))
  40                goto cleanup;
  41
  42        if (cg_write(parent, "cgroup.subtree_control", "+memory"))
  43                goto cleanup;
  44
  45        if (cg_create(child))
  46                goto cleanup;
  47
  48        if (cg_read_strstr(child, "cgroup.controllers", "memory"))
  49                goto cleanup;
  50
  51        /* Create two nested cgroups without enabling memory controller */
  52        parent2 = cg_name(root, "memcg_test_1");
  53        child2 = cg_name(root, "memcg_test_1/memcg_test_1");
  54        if (!parent2 || !child2)
  55                goto cleanup;
  56
  57        if (cg_create(parent2))
  58                goto cleanup;
  59
  60        if (cg_create(child2))
  61                goto cleanup;
  62
  63        if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
  64                goto cleanup;
  65
  66        if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
  67                goto cleanup;
  68
  69        ret = KSFT_PASS;
  70
  71cleanup:
  72        cg_destroy(child);
  73        cg_destroy(parent);
  74        free(parent);
  75        free(child);
  76
  77        cg_destroy(child2);
  78        cg_destroy(parent2);
  79        free(parent2);
  80        free(child2);
  81
  82        return ret;
  83}
  84
  85static int alloc_anon_50M_check(const char *cgroup, void *arg)
  86{
  87        size_t size = MB(50);
  88        char *buf, *ptr;
  89        long anon, current;
  90        int ret = -1;
  91
  92        buf = malloc(size);
  93        for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
  94                *ptr = 0;
  95
  96        current = cg_read_long(cgroup, "memory.current");
  97        if (current < size)
  98                goto cleanup;
  99
 100        if (!values_close(size, current, 3))
 101                goto cleanup;
 102
 103        anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
 104        if (anon < 0)
 105                goto cleanup;
 106
 107        if (!values_close(anon, current, 3))
 108                goto cleanup;
 109
 110        ret = 0;
 111cleanup:
 112        free(buf);
 113        return ret;
 114}
 115
 116static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
 117{
 118        size_t size = MB(50);
 119        int ret = -1;
 120        long current, file;
 121        int fd;
 122
 123        fd = get_temp_fd();
 124        if (fd < 0)
 125                return -1;
 126
 127        if (alloc_pagecache(fd, size))
 128                goto cleanup;
 129
 130        current = cg_read_long(cgroup, "memory.current");
 131        if (current < size)
 132                goto cleanup;
 133
 134        file = cg_read_key_long(cgroup, "memory.stat", "file ");
 135        if (file < 0)
 136                goto cleanup;
 137
 138        if (!values_close(file, current, 10))
 139                goto cleanup;
 140
 141        ret = 0;
 142
 143cleanup:
 144        close(fd);
 145        return ret;
 146}
 147
 148/*
 149 * This test create a memory cgroup, allocates
 150 * some anonymous memory and some pagecache
 151 * and check memory.current and some memory.stat values.
 152 */
 153static int test_memcg_current(const char *root)
 154{
 155        int ret = KSFT_FAIL;
 156        long current;
 157        char *memcg;
 158
 159        memcg = cg_name(root, "memcg_test");
 160        if (!memcg)
 161                goto cleanup;
 162
 163        if (cg_create(memcg))
 164                goto cleanup;
 165
 166        current = cg_read_long(memcg, "memory.current");
 167        if (current != 0)
 168                goto cleanup;
 169
 170        if (cg_run(memcg, alloc_anon_50M_check, NULL))
 171                goto cleanup;
 172
 173        if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
 174                goto cleanup;
 175
 176        ret = KSFT_PASS;
 177
 178cleanup:
 179        cg_destroy(memcg);
 180        free(memcg);
 181
 182        return ret;
 183}
 184
 185static int alloc_pagecache_50M(const char *cgroup, void *arg)
 186{
 187        int fd = (long)arg;
 188
 189        return alloc_pagecache(fd, MB(50));
 190}
 191
 192static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
 193{
 194        int fd = (long)arg;
 195        int ppid = getppid();
 196
 197        if (alloc_pagecache(fd, MB(50)))
 198                return -1;
 199
 200        while (getppid() == ppid)
 201                sleep(1);
 202
 203        return 0;
 204}
 205
 206static int alloc_anon_noexit(const char *cgroup, void *arg)
 207{
 208        int ppid = getppid();
 209
 210        if (alloc_anon(cgroup, arg))
 211                return -1;
 212
 213        while (getppid() == ppid)
 214                sleep(1);
 215
 216        return 0;
 217}
 218
 219/*
 220 * Wait until processes are killed asynchronously by the OOM killer
 221 * If we exceed a timeout, fail.
 222 */
 223static int cg_test_proc_killed(const char *cgroup)
 224{
 225        int limit;
 226
 227        for (limit = 10; limit > 0; limit--) {
 228                if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
 229                        return 0;
 230
 231                usleep(100000);
 232        }
 233        return -1;
 234}
 235
 236/*
 237 * First, this test creates the following hierarchy:
 238 * A       memory.min = 50M,  memory.max = 200M
 239 * A/B     memory.min = 50M,  memory.current = 50M
 240 * A/B/C   memory.min = 75M,  memory.current = 50M
 241 * A/B/D   memory.min = 25M,  memory.current = 50M
 242 * A/B/E   memory.min = 500M, memory.current = 0
 243 * A/B/F   memory.min = 0,    memory.current = 50M
 244 *
 245 * Usages are pagecache, but the test keeps a running
 246 * process in every leaf cgroup.
 247 * Then it creates A/G and creates a significant
 248 * memory pressure in it.
 249 *
 250 * A/B    memory.current ~= 50M
 251 * A/B/C  memory.current ~= 33M
 252 * A/B/D  memory.current ~= 17M
 253 * A/B/E  memory.current ~= 0
 254 *
 255 * After that it tries to allocate more than there is
 256 * unprotected memory in A available, and checks
 257 * checks that memory.min protects pagecache even
 258 * in this case.
 259 */
 260static int test_memcg_min(const char *root)
 261{
 262        int ret = KSFT_FAIL;
 263        char *parent[3] = {NULL};
 264        char *children[4] = {NULL};
 265        long c[4];
 266        int i, attempts;
 267        int fd;
 268
 269        fd = get_temp_fd();
 270        if (fd < 0)
 271                goto cleanup;
 272
 273        parent[0] = cg_name(root, "memcg_test_0");
 274        if (!parent[0])
 275                goto cleanup;
 276
 277        parent[1] = cg_name(parent[0], "memcg_test_1");
 278        if (!parent[1])
 279                goto cleanup;
 280
 281        parent[2] = cg_name(parent[0], "memcg_test_2");
 282        if (!parent[2])
 283                goto cleanup;
 284
 285        if (cg_create(parent[0]))
 286                goto cleanup;
 287
 288        if (cg_read_long(parent[0], "memory.min")) {
 289                ret = KSFT_SKIP;
 290                goto cleanup;
 291        }
 292
 293        if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
 294                goto cleanup;
 295
 296        if (cg_write(parent[0], "memory.max", "200M"))
 297                goto cleanup;
 298
 299        if (cg_write(parent[0], "memory.swap.max", "0"))
 300                goto cleanup;
 301
 302        if (cg_create(parent[1]))
 303                goto cleanup;
 304
 305        if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
 306                goto cleanup;
 307
 308        if (cg_create(parent[2]))
 309                goto cleanup;
 310
 311        for (i = 0; i < ARRAY_SIZE(children); i++) {
 312                children[i] = cg_name_indexed(parent[1], "child_memcg", i);
 313                if (!children[i])
 314                        goto cleanup;
 315
 316                if (cg_create(children[i]))
 317                        goto cleanup;
 318
 319                if (i == 2)
 320                        continue;
 321
 322                cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
 323                              (void *)(long)fd);
 324        }
 325
 326        if (cg_write(parent[0], "memory.min", "50M"))
 327                goto cleanup;
 328        if (cg_write(parent[1], "memory.min", "50M"))
 329                goto cleanup;
 330        if (cg_write(children[0], "memory.min", "75M"))
 331                goto cleanup;
 332        if (cg_write(children[1], "memory.min", "25M"))
 333                goto cleanup;
 334        if (cg_write(children[2], "memory.min", "500M"))
 335                goto cleanup;
 336        if (cg_write(children[3], "memory.min", "0"))
 337                goto cleanup;
 338
 339        attempts = 0;
 340        while (!values_close(cg_read_long(parent[1], "memory.current"),
 341                             MB(150), 3)) {
 342                if (attempts++ > 5)
 343                        break;
 344                sleep(1);
 345        }
 346
 347        if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
 348                goto cleanup;
 349
 350        if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
 351                goto cleanup;
 352
 353        for (i = 0; i < ARRAY_SIZE(children); i++)
 354                c[i] = cg_read_long(children[i], "memory.current");
 355
 356        if (!values_close(c[0], MB(33), 10))
 357                goto cleanup;
 358
 359        if (!values_close(c[1], MB(17), 10))
 360                goto cleanup;
 361
 362        if (!values_close(c[2], 0, 1))
 363                goto cleanup;
 364
 365        if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
 366                goto cleanup;
 367
 368        if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
 369                goto cleanup;
 370
 371        ret = KSFT_PASS;
 372
 373cleanup:
 374        for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
 375                if (!children[i])
 376                        continue;
 377
 378                cg_destroy(children[i]);
 379                free(children[i]);
 380        }
 381
 382        for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
 383                if (!parent[i])
 384                        continue;
 385
 386                cg_destroy(parent[i]);
 387                free(parent[i]);
 388        }
 389        close(fd);
 390        return ret;
 391}
 392
 393/*
 394 * First, this test creates the following hierarchy:
 395 * A       memory.low = 50M,  memory.max = 200M
 396 * A/B     memory.low = 50M,  memory.current = 50M
 397 * A/B/C   memory.low = 75M,  memory.current = 50M
 398 * A/B/D   memory.low = 25M,  memory.current = 50M
 399 * A/B/E   memory.low = 500M, memory.current = 0
 400 * A/B/F   memory.low = 0,    memory.current = 50M
 401 *
 402 * Usages are pagecache.
 403 * Then it creates A/G an creates a significant
 404 * memory pressure in it.
 405 *
 406 * Then it checks actual memory usages and expects that:
 407 * A/B    memory.current ~= 50M
 408 * A/B/   memory.current ~= 33M
 409 * A/B/D  memory.current ~= 17M
 410 * A/B/E  memory.current ~= 0
 411 *
 412 * After that it tries to allocate more than there is
 413 * unprotected memory in A available,
 414 * and checks low and oom events in memory.events.
 415 */
 416static int test_memcg_low(const char *root)
 417{
 418        int ret = KSFT_FAIL;
 419        char *parent[3] = {NULL};
 420        char *children[4] = {NULL};
 421        long low, oom;
 422        long c[4];
 423        int i;
 424        int fd;
 425
 426        fd = get_temp_fd();
 427        if (fd < 0)
 428                goto cleanup;
 429
 430        parent[0] = cg_name(root, "memcg_test_0");
 431        if (!parent[0])
 432                goto cleanup;
 433
 434        parent[1] = cg_name(parent[0], "memcg_test_1");
 435        if (!parent[1])
 436                goto cleanup;
 437
 438        parent[2] = cg_name(parent[0], "memcg_test_2");
 439        if (!parent[2])
 440                goto cleanup;
 441
 442        if (cg_create(parent[0]))
 443                goto cleanup;
 444
 445        if (cg_read_long(parent[0], "memory.low"))
 446                goto cleanup;
 447
 448        if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
 449                goto cleanup;
 450
 451        if (cg_write(parent[0], "memory.max", "200M"))
 452                goto cleanup;
 453
 454        if (cg_write(parent[0], "memory.swap.max", "0"))
 455                goto cleanup;
 456
 457        if (cg_create(parent[1]))
 458                goto cleanup;
 459
 460        if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
 461                goto cleanup;
 462
 463        if (cg_create(parent[2]))
 464                goto cleanup;
 465
 466        for (i = 0; i < ARRAY_SIZE(children); i++) {
 467                children[i] = cg_name_indexed(parent[1], "child_memcg", i);
 468                if (!children[i])
 469                        goto cleanup;
 470
 471                if (cg_create(children[i]))
 472                        goto cleanup;
 473
 474                if (i == 2)
 475                        continue;
 476
 477                if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
 478                        goto cleanup;
 479        }
 480
 481        if (cg_write(parent[0], "memory.low", "50M"))
 482                goto cleanup;
 483        if (cg_write(parent[1], "memory.low", "50M"))
 484                goto cleanup;
 485        if (cg_write(children[0], "memory.low", "75M"))
 486                goto cleanup;
 487        if (cg_write(children[1], "memory.low", "25M"))
 488                goto cleanup;
 489        if (cg_write(children[2], "memory.low", "500M"))
 490                goto cleanup;
 491        if (cg_write(children[3], "memory.low", "0"))
 492                goto cleanup;
 493
 494        if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
 495                goto cleanup;
 496
 497        if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
 498                goto cleanup;
 499
 500        for (i = 0; i < ARRAY_SIZE(children); i++)
 501                c[i] = cg_read_long(children[i], "memory.current");
 502
 503        if (!values_close(c[0], MB(33), 10))
 504                goto cleanup;
 505
 506        if (!values_close(c[1], MB(17), 10))
 507                goto cleanup;
 508
 509        if (!values_close(c[2], 0, 1))
 510                goto cleanup;
 511
 512        if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
 513                fprintf(stderr,
 514                        "memory.low prevents from allocating anon memory\n");
 515                goto cleanup;
 516        }
 517
 518        for (i = 0; i < ARRAY_SIZE(children); i++) {
 519                oom = cg_read_key_long(children[i], "memory.events", "oom ");
 520                low = cg_read_key_long(children[i], "memory.events", "low ");
 521
 522                if (oom)
 523                        goto cleanup;
 524                if (i < 2 && low <= 0)
 525                        goto cleanup;
 526                if (i >= 2 && low)
 527                        goto cleanup;
 528        }
 529
 530        ret = KSFT_PASS;
 531
 532cleanup:
 533        for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
 534                if (!children[i])
 535                        continue;
 536
 537                cg_destroy(children[i]);
 538                free(children[i]);
 539        }
 540
 541        for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
 542                if (!parent[i])
 543                        continue;
 544
 545                cg_destroy(parent[i]);
 546                free(parent[i]);
 547        }
 548        close(fd);
 549        return ret;
 550}
 551
 552static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
 553{
 554        size_t size = MB(50);
 555        int ret = -1;
 556        long current;
 557        int fd;
 558
 559        fd = get_temp_fd();
 560        if (fd < 0)
 561                return -1;
 562
 563        if (alloc_pagecache(fd, size))
 564                goto cleanup;
 565
 566        current = cg_read_long(cgroup, "memory.current");
 567        if (current <= MB(29) || current > MB(30))
 568                goto cleanup;
 569
 570        ret = 0;
 571
 572cleanup:
 573        close(fd);
 574        return ret;
 575
 576}
 577
 578/*
 579 * This test checks that memory.high limits the amount of
 580 * memory which can be consumed by either anonymous memory
 581 * or pagecache.
 582 */
 583static int test_memcg_high(const char *root)
 584{
 585        int ret = KSFT_FAIL;
 586        char *memcg;
 587        long high;
 588
 589        memcg = cg_name(root, "memcg_test");
 590        if (!memcg)
 591                goto cleanup;
 592
 593        if (cg_create(memcg))
 594                goto cleanup;
 595
 596        if (cg_read_strcmp(memcg, "memory.high", "max\n"))
 597                goto cleanup;
 598
 599        if (cg_write(memcg, "memory.swap.max", "0"))
 600                goto cleanup;
 601
 602        if (cg_write(memcg, "memory.high", "30M"))
 603                goto cleanup;
 604
 605        if (cg_run(memcg, alloc_anon, (void *)MB(100)))
 606                goto cleanup;
 607
 608        if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
 609                goto cleanup;
 610
 611        if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
 612                goto cleanup;
 613
 614        high = cg_read_key_long(memcg, "memory.events", "high ");
 615        if (high <= 0)
 616                goto cleanup;
 617
 618        ret = KSFT_PASS;
 619
 620cleanup:
 621        cg_destroy(memcg);
 622        free(memcg);
 623
 624        return ret;
 625}
 626
 627/*
 628 * This test checks that memory.max limits the amount of
 629 * memory which can be consumed by either anonymous memory
 630 * or pagecache.
 631 */
 632static int test_memcg_max(const char *root)
 633{
 634        int ret = KSFT_FAIL;
 635        char *memcg;
 636        long current, max;
 637
 638        memcg = cg_name(root, "memcg_test");
 639        if (!memcg)
 640                goto cleanup;
 641
 642        if (cg_create(memcg))
 643                goto cleanup;
 644
 645        if (cg_read_strcmp(memcg, "memory.max", "max\n"))
 646                goto cleanup;
 647
 648        if (cg_write(memcg, "memory.swap.max", "0"))
 649                goto cleanup;
 650
 651        if (cg_write(memcg, "memory.max", "30M"))
 652                goto cleanup;
 653
 654        /* Should be killed by OOM killer */
 655        if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
 656                goto cleanup;
 657
 658        if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
 659                goto cleanup;
 660
 661        current = cg_read_long(memcg, "memory.current");
 662        if (current > MB(30) || !current)
 663                goto cleanup;
 664
 665        max = cg_read_key_long(memcg, "memory.events", "max ");
 666        if (max <= 0)
 667                goto cleanup;
 668
 669        ret = KSFT_PASS;
 670
 671cleanup:
 672        cg_destroy(memcg);
 673        free(memcg);
 674
 675        return ret;
 676}
 677
 678static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
 679{
 680        long mem_max = (long)arg;
 681        size_t size = MB(50);
 682        char *buf, *ptr;
 683        long mem_current, swap_current;
 684        int ret = -1;
 685
 686        buf = malloc(size);
 687        for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
 688                *ptr = 0;
 689
 690        mem_current = cg_read_long(cgroup, "memory.current");
 691        if (!mem_current || !values_close(mem_current, mem_max, 3))
 692                goto cleanup;
 693
 694        swap_current = cg_read_long(cgroup, "memory.swap.current");
 695        if (!swap_current ||
 696            !values_close(mem_current + swap_current, size, 3))
 697                goto cleanup;
 698
 699        ret = 0;
 700cleanup:
 701        free(buf);
 702        return ret;
 703}
 704
 705/*
 706 * This test checks that memory.swap.max limits the amount of
 707 * anonymous memory which can be swapped out.
 708 */
 709static int test_memcg_swap_max(const char *root)
 710{
 711        int ret = KSFT_FAIL;
 712        char *memcg;
 713        long max;
 714
 715        if (!is_swap_enabled())
 716                return KSFT_SKIP;
 717
 718        memcg = cg_name(root, "memcg_test");
 719        if (!memcg)
 720                goto cleanup;
 721
 722        if (cg_create(memcg))
 723                goto cleanup;
 724
 725        if (cg_read_long(memcg, "memory.swap.current")) {
 726                ret = KSFT_SKIP;
 727                goto cleanup;
 728        }
 729
 730        if (cg_read_strcmp(memcg, "memory.max", "max\n"))
 731                goto cleanup;
 732
 733        if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
 734                goto cleanup;
 735
 736        if (cg_write(memcg, "memory.swap.max", "30M"))
 737                goto cleanup;
 738
 739        if (cg_write(memcg, "memory.max", "30M"))
 740                goto cleanup;
 741
 742        /* Should be killed by OOM killer */
 743        if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
 744                goto cleanup;
 745
 746        if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
 747                goto cleanup;
 748
 749        if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
 750                goto cleanup;
 751
 752        if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
 753                goto cleanup;
 754
 755        max = cg_read_key_long(memcg, "memory.events", "max ");
 756        if (max <= 0)
 757                goto cleanup;
 758
 759        ret = KSFT_PASS;
 760
 761cleanup:
 762        cg_destroy(memcg);
 763        free(memcg);
 764
 765        return ret;
 766}
 767
 768/*
 769 * This test disables swapping and tries to allocate anonymous memory
 770 * up to OOM. Then it checks for oom and oom_kill events in
 771 * memory.events.
 772 */
 773static int test_memcg_oom_events(const char *root)
 774{
 775        int ret = KSFT_FAIL;
 776        char *memcg;
 777
 778        memcg = cg_name(root, "memcg_test");
 779        if (!memcg)
 780                goto cleanup;
 781
 782        if (cg_create(memcg))
 783                goto cleanup;
 784
 785        if (cg_write(memcg, "memory.max", "30M"))
 786                goto cleanup;
 787
 788        if (cg_write(memcg, "memory.swap.max", "0"))
 789                goto cleanup;
 790
 791        if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
 792                goto cleanup;
 793
 794        if (cg_read_strcmp(memcg, "cgroup.procs", ""))
 795                goto cleanup;
 796
 797        if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
 798                goto cleanup;
 799
 800        if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
 801                goto cleanup;
 802
 803        ret = KSFT_PASS;
 804
 805cleanup:
 806        cg_destroy(memcg);
 807        free(memcg);
 808
 809        return ret;
 810}
 811
 812struct tcp_server_args {
 813        unsigned short port;
 814        int ctl[2];
 815};
 816
 817static int tcp_server(const char *cgroup, void *arg)
 818{
 819        struct tcp_server_args *srv_args = arg;
 820        struct sockaddr_in6 saddr = { 0 };
 821        socklen_t slen = sizeof(saddr);
 822        int sk, client_sk, ctl_fd, yes = 1, ret = -1;
 823
 824        close(srv_args->ctl[0]);
 825        ctl_fd = srv_args->ctl[1];
 826
 827        saddr.sin6_family = AF_INET6;
 828        saddr.sin6_addr = in6addr_any;
 829        saddr.sin6_port = htons(srv_args->port);
 830
 831        sk = socket(AF_INET6, SOCK_STREAM, 0);
 832        if (sk < 0)
 833                return ret;
 834
 835        if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
 836                goto cleanup;
 837
 838        if (bind(sk, (struct sockaddr *)&saddr, slen)) {
 839                write(ctl_fd, &errno, sizeof(errno));
 840                goto cleanup;
 841        }
 842
 843        if (listen(sk, 1))
 844                goto cleanup;
 845
 846        ret = 0;
 847        if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
 848                ret = -1;
 849                goto cleanup;
 850        }
 851
 852        client_sk = accept(sk, NULL, NULL);
 853        if (client_sk < 0)
 854                goto cleanup;
 855
 856        ret = -1;
 857        for (;;) {
 858                uint8_t buf[0x100000];
 859
 860                if (write(client_sk, buf, sizeof(buf)) <= 0) {
 861                        if (errno == ECONNRESET)
 862                                ret = 0;
 863                        break;
 864                }
 865        }
 866
 867        close(client_sk);
 868
 869cleanup:
 870        close(sk);
 871        return ret;
 872}
 873
 874static int tcp_client(const char *cgroup, unsigned short port)
 875{
 876        const char server[] = "localhost";
 877        struct addrinfo *ai;
 878        char servport[6];
 879        int retries = 0x10; /* nice round number */
 880        int sk, ret;
 881
 882        snprintf(servport, sizeof(servport), "%hd", port);
 883        ret = getaddrinfo(server, servport, NULL, &ai);
 884        if (ret)
 885                return ret;
 886
 887        sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
 888        if (sk < 0)
 889                goto free_ainfo;
 890
 891        ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
 892        if (ret < 0)
 893                goto close_sk;
 894
 895        ret = KSFT_FAIL;
 896        while (retries--) {
 897                uint8_t buf[0x100000];
 898                long current, sock;
 899
 900                if (read(sk, buf, sizeof(buf)) <= 0)
 901                        goto close_sk;
 902
 903                current = cg_read_long(cgroup, "memory.current");
 904                sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
 905
 906                if (current < 0 || sock < 0)
 907                        goto close_sk;
 908
 909                if (current < sock)
 910                        goto close_sk;
 911
 912                if (values_close(current, sock, 10)) {
 913                        ret = KSFT_PASS;
 914                        break;
 915                }
 916        }
 917
 918close_sk:
 919        close(sk);
 920free_ainfo:
 921        freeaddrinfo(ai);
 922        return ret;
 923}
 924
 925/*
 926 * This test checks socket memory accounting.
 927 * The test forks a TCP server listens on a random port between 1000
 928 * and 61000. Once it gets a client connection, it starts writing to
 929 * its socket.
 930 * The TCP client interleaves reads from the socket with check whether
 931 * memory.current and memory.stat.sock are similar.
 932 */
 933static int test_memcg_sock(const char *root)
 934{
 935        int bind_retries = 5, ret = KSFT_FAIL, pid, err;
 936        unsigned short port;
 937        char *memcg;
 938
 939        memcg = cg_name(root, "memcg_test");
 940        if (!memcg)
 941                goto cleanup;
 942
 943        if (cg_create(memcg))
 944                goto cleanup;
 945
 946        while (bind_retries--) {
 947                struct tcp_server_args args;
 948
 949                if (pipe(args.ctl))
 950                        goto cleanup;
 951
 952                port = args.port = 1000 + rand() % 60000;
 953
 954                pid = cg_run_nowait(memcg, tcp_server, &args);
 955                if (pid < 0)
 956                        goto cleanup;
 957
 958                close(args.ctl[1]);
 959                if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
 960                        goto cleanup;
 961                close(args.ctl[0]);
 962
 963                if (!err)
 964                        break;
 965                if (err != EADDRINUSE)
 966                        goto cleanup;
 967
 968                waitpid(pid, NULL, 0);
 969        }
 970
 971        if (err == EADDRINUSE) {
 972                ret = KSFT_SKIP;
 973                goto cleanup;
 974        }
 975
 976        if (tcp_client(memcg, port) != KSFT_PASS)
 977                goto cleanup;
 978
 979        waitpid(pid, &err, 0);
 980        if (WEXITSTATUS(err))
 981                goto cleanup;
 982
 983        if (cg_read_long(memcg, "memory.current") < 0)
 984                goto cleanup;
 985
 986        if (cg_read_key_long(memcg, "memory.stat", "sock "))
 987                goto cleanup;
 988
 989        ret = KSFT_PASS;
 990
 991cleanup:
 992        cg_destroy(memcg);
 993        free(memcg);
 994
 995        return ret;
 996}
 997
 998/*
 999 * This test disables swapping and tries to allocate anonymous memory
1000 * up to OOM with memory.group.oom set. Then it checks that all
1001 * processes in the leaf (but not the parent) were killed.
1002 */
1003static int test_memcg_oom_group_leaf_events(const char *root)
1004{
1005        int ret = KSFT_FAIL;
1006        char *parent, *child;
1007
1008        parent = cg_name(root, "memcg_test_0");
1009        child = cg_name(root, "memcg_test_0/memcg_test_1");
1010
1011        if (!parent || !child)
1012                goto cleanup;
1013
1014        if (cg_create(parent))
1015                goto cleanup;
1016
1017        if (cg_create(child))
1018                goto cleanup;
1019
1020        if (cg_write(parent, "cgroup.subtree_control", "+memory"))
1021                goto cleanup;
1022
1023        if (cg_write(child, "memory.max", "50M"))
1024                goto cleanup;
1025
1026        if (cg_write(child, "memory.swap.max", "0"))
1027                goto cleanup;
1028
1029        if (cg_write(child, "memory.oom.group", "1"))
1030                goto cleanup;
1031
1032        cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1033        cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1034        cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1035        if (!cg_run(child, alloc_anon, (void *)MB(100)))
1036                goto cleanup;
1037
1038        if (cg_test_proc_killed(child))
1039                goto cleanup;
1040
1041        if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
1042                goto cleanup;
1043
1044        if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
1045                goto cleanup;
1046
1047        ret = KSFT_PASS;
1048
1049cleanup:
1050        if (child)
1051                cg_destroy(child);
1052        if (parent)
1053                cg_destroy(parent);
1054        free(child);
1055        free(parent);
1056
1057        return ret;
1058}
1059
1060/*
1061 * This test disables swapping and tries to allocate anonymous memory
1062 * up to OOM with memory.group.oom set. Then it checks that all
1063 * processes in the parent and leaf were killed.
1064 */
1065static int test_memcg_oom_group_parent_events(const char *root)
1066{
1067        int ret = KSFT_FAIL;
1068        char *parent, *child;
1069
1070        parent = cg_name(root, "memcg_test_0");
1071        child = cg_name(root, "memcg_test_0/memcg_test_1");
1072
1073        if (!parent || !child)
1074                goto cleanup;
1075
1076        if (cg_create(parent))
1077                goto cleanup;
1078
1079        if (cg_create(child))
1080                goto cleanup;
1081
1082        if (cg_write(parent, "memory.max", "80M"))
1083                goto cleanup;
1084
1085        if (cg_write(parent, "memory.swap.max", "0"))
1086                goto cleanup;
1087
1088        if (cg_write(parent, "memory.oom.group", "1"))
1089                goto cleanup;
1090
1091        cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
1092        cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1093        cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
1094
1095        if (!cg_run(child, alloc_anon, (void *)MB(100)))
1096                goto cleanup;
1097
1098        if (cg_test_proc_killed(child))
1099                goto cleanup;
1100        if (cg_test_proc_killed(parent))
1101                goto cleanup;
1102
1103        ret = KSFT_PASS;
1104
1105cleanup:
1106        if (child)
1107                cg_destroy(child);
1108        if (parent)
1109                cg_destroy(parent);
1110        free(child);
1111        free(parent);
1112
1113        return ret;
1114}
1115
1116/*
1117 * This test disables swapping and tries to allocate anonymous memory
1118 * up to OOM with memory.group.oom set. Then it checks that all
1119 * processes were killed except those set with OOM_SCORE_ADJ_MIN
1120 */
1121static int test_memcg_oom_group_score_events(const char *root)
1122{
1123        int ret = KSFT_FAIL;
1124        char *memcg;
1125        int safe_pid;
1126
1127        memcg = cg_name(root, "memcg_test_0");
1128
1129        if (!memcg)
1130                goto cleanup;
1131
1132        if (cg_create(memcg))
1133                goto cleanup;
1134
1135        if (cg_write(memcg, "memory.max", "50M"))
1136                goto cleanup;
1137
1138        if (cg_write(memcg, "memory.swap.max", "0"))
1139                goto cleanup;
1140
1141        if (cg_write(memcg, "memory.oom.group", "1"))
1142                goto cleanup;
1143
1144        safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1145        if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
1146                goto cleanup;
1147
1148        cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
1149        if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
1150                goto cleanup;
1151
1152        if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
1153                goto cleanup;
1154
1155        if (kill(safe_pid, SIGKILL))
1156                goto cleanup;
1157
1158        ret = KSFT_PASS;
1159
1160cleanup:
1161        if (memcg)
1162                cg_destroy(memcg);
1163        free(memcg);
1164
1165        return ret;
1166}
1167
1168
1169#define T(x) { x, #x }
1170struct memcg_test {
1171        int (*fn)(const char *root);
1172        const char *name;
1173} tests[] = {
1174        T(test_memcg_subtree_control),
1175        T(test_memcg_current),
1176        T(test_memcg_min),
1177        T(test_memcg_low),
1178        T(test_memcg_high),
1179        T(test_memcg_max),
1180        T(test_memcg_oom_events),
1181        T(test_memcg_swap_max),
1182        T(test_memcg_sock),
1183        T(test_memcg_oom_group_leaf_events),
1184        T(test_memcg_oom_group_parent_events),
1185        T(test_memcg_oom_group_score_events),
1186};
1187#undef T
1188
1189int main(int argc, char **argv)
1190{
1191        char root[PATH_MAX];
1192        int i, ret = EXIT_SUCCESS;
1193
1194        if (cg_find_unified_root(root, sizeof(root)))
1195                ksft_exit_skip("cgroup v2 isn't mounted\n");
1196
1197        /*
1198         * Check that memory controller is available:
1199         * memory is listed in cgroup.controllers
1200         */
1201        if (cg_read_strstr(root, "cgroup.controllers", "memory"))
1202                ksft_exit_skip("memory controller isn't available\n");
1203
1204        for (i = 0; i < ARRAY_SIZE(tests); i++) {
1205                switch (tests[i].fn(root)) {
1206                case KSFT_PASS:
1207                        ksft_test_result_pass("%s\n", tests[i].name);
1208                        break;
1209                case KSFT_SKIP:
1210                        ksft_test_result_skip("%s\n", tests[i].name);
1211                        break;
1212                default:
1213                        ret = EXIT_FAILURE;
1214                        ksft_test_result_fail("%s\n", tests[i].name);
1215                        break;
1216                }
1217        }
1218
1219        return ret;
1220}
1221