linux/lib/test_vmalloc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Test module for stress and analyze performance of vmalloc allocator.
   5 * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
   6 */
   7#include <linux/init.h>
   8#include <linux/kernel.h>
   9#include <linux/module.h>
  10#include <linux/vmalloc.h>
  11#include <linux/random.h>
  12#include <linux/kthread.h>
  13#include <linux/moduleparam.h>
  14#include <linux/completion.h>
  15#include <linux/delay.h>
  16#include <linux/rwsem.h>
  17#include <linux/mm.h>
  18#include <linux/rcupdate.h>
  19#include <linux/slab.h>
  20
  21#define __param(type, name, init, msg)          \
  22        static type name = init;                                \
  23        module_param(name, type, 0444);                 \
  24        MODULE_PARM_DESC(name, msg)                             \
  25
  26__param(int, nr_threads, 0,
  27        "Number of workers to perform tests(min: 1 max: USHRT_MAX)");
  28
  29__param(bool, sequential_test_order, false,
  30        "Use sequential stress tests order");
  31
  32__param(int, test_repeat_count, 1,
  33        "Set test repeat counter");
  34
  35__param(int, test_loop_count, 1000000,
  36        "Set test loop counter");
  37
  38__param(int, run_test_mask, INT_MAX,
  39        "Set tests specified in the mask.\n\n"
  40                "\t\tid: 1,    name: fix_size_alloc_test\n"
  41                "\t\tid: 2,    name: full_fit_alloc_test\n"
  42                "\t\tid: 4,    name: long_busy_list_alloc_test\n"
  43                "\t\tid: 8,    name: random_size_alloc_test\n"
  44                "\t\tid: 16,   name: fix_align_alloc_test\n"
  45                "\t\tid: 32,   name: random_size_align_alloc_test\n"
  46                "\t\tid: 64,   name: align_shift_alloc_test\n"
  47                "\t\tid: 128,  name: pcpu_alloc_test\n"
  48                "\t\tid: 256,  name: kvfree_rcu_1_arg_vmalloc_test\n"
  49                "\t\tid: 512,  name: kvfree_rcu_2_arg_vmalloc_test\n"
  50                /* Add a new test case description here. */
  51);
  52
  53/*
  54 * Read write semaphore for synchronization of setup
  55 * phase that is done in main thread and workers.
  56 */
  57static DECLARE_RWSEM(prepare_for_test_rwsem);
  58
  59/*
  60 * Completion tracking for worker threads.
  61 */
  62static DECLARE_COMPLETION(test_all_done_comp);
  63static atomic_t test_n_undone = ATOMIC_INIT(0);
  64
  65static inline void
  66test_report_one_done(void)
  67{
  68        if (atomic_dec_and_test(&test_n_undone))
  69                complete(&test_all_done_comp);
  70}
  71
  72static int random_size_align_alloc_test(void)
  73{
  74        unsigned long size, align, rnd;
  75        void *ptr;
  76        int i;
  77
  78        for (i = 0; i < test_loop_count; i++) {
  79                get_random_bytes(&rnd, sizeof(rnd));
  80
  81                /*
  82                 * Maximum 1024 pages, if PAGE_SIZE is 4096.
  83                 */
  84                align = 1 << (rnd % 23);
  85
  86                /*
  87                 * Maximum 10 pages.
  88                 */
  89                size = ((rnd % 10) + 1) * PAGE_SIZE;
  90
  91                ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0,
  92                                __builtin_return_address(0));
  93                if (!ptr)
  94                        return -1;
  95
  96                vfree(ptr);
  97        }
  98
  99        return 0;
 100}
 101
 102/*
 103 * This test case is supposed to be failed.
 104 */
 105static int align_shift_alloc_test(void)
 106{
 107        unsigned long align;
 108        void *ptr;
 109        int i;
 110
 111        for (i = 0; i < BITS_PER_LONG; i++) {
 112                align = ((unsigned long) 1) << i;
 113
 114                ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0,
 115                                __builtin_return_address(0));
 116                if (!ptr)
 117                        return -1;
 118
 119                vfree(ptr);
 120        }
 121
 122        return 0;
 123}
 124
 125static int fix_align_alloc_test(void)
 126{
 127        void *ptr;
 128        int i;
 129
 130        for (i = 0; i < test_loop_count; i++) {
 131                ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1,
 132                                GFP_KERNEL | __GFP_ZERO, 0,
 133                                __builtin_return_address(0));
 134                if (!ptr)
 135                        return -1;
 136
 137                vfree(ptr);
 138        }
 139
 140        return 0;
 141}
 142
 143static int random_size_alloc_test(void)
 144{
 145        unsigned int n;
 146        void *p;
 147        int i;
 148
 149        for (i = 0; i < test_loop_count; i++) {
 150                get_random_bytes(&n, sizeof(i));
 151                n = (n % 100) + 1;
 152
 153                p = vmalloc(n * PAGE_SIZE);
 154
 155                if (!p)
 156                        return -1;
 157
 158                *((__u8 *)p) = 1;
 159                vfree(p);
 160        }
 161
 162        return 0;
 163}
 164
 165static int long_busy_list_alloc_test(void)
 166{
 167        void *ptr_1, *ptr_2;
 168        void **ptr;
 169        int rv = -1;
 170        int i;
 171
 172        ptr = vmalloc(sizeof(void *) * 15000);
 173        if (!ptr)
 174                return rv;
 175
 176        for (i = 0; i < 15000; i++)
 177                ptr[i] = vmalloc(1 * PAGE_SIZE);
 178
 179        for (i = 0; i < test_loop_count; i++) {
 180                ptr_1 = vmalloc(100 * PAGE_SIZE);
 181                if (!ptr_1)
 182                        goto leave;
 183
 184                ptr_2 = vmalloc(1 * PAGE_SIZE);
 185                if (!ptr_2) {
 186                        vfree(ptr_1);
 187                        goto leave;
 188                }
 189
 190                *((__u8 *)ptr_1) = 0;
 191                *((__u8 *)ptr_2) = 1;
 192
 193                vfree(ptr_1);
 194                vfree(ptr_2);
 195        }
 196
 197        /*  Success */
 198        rv = 0;
 199
 200leave:
 201        for (i = 0; i < 15000; i++)
 202                vfree(ptr[i]);
 203
 204        vfree(ptr);
 205        return rv;
 206}
 207
 208static int full_fit_alloc_test(void)
 209{
 210        void **ptr, **junk_ptr, *tmp;
 211        int junk_length;
 212        int rv = -1;
 213        int i;
 214
 215        junk_length = fls(num_online_cpus());
 216        junk_length *= (32 * 1024 * 1024 / PAGE_SIZE);
 217
 218        ptr = vmalloc(sizeof(void *) * junk_length);
 219        if (!ptr)
 220                return rv;
 221
 222        junk_ptr = vmalloc(sizeof(void *) * junk_length);
 223        if (!junk_ptr) {
 224                vfree(ptr);
 225                return rv;
 226        }
 227
 228        for (i = 0; i < junk_length; i++) {
 229                ptr[i] = vmalloc(1 * PAGE_SIZE);
 230                junk_ptr[i] = vmalloc(1 * PAGE_SIZE);
 231        }
 232
 233        for (i = 0; i < junk_length; i++)
 234                vfree(junk_ptr[i]);
 235
 236        for (i = 0; i < test_loop_count; i++) {
 237                tmp = vmalloc(1 * PAGE_SIZE);
 238
 239                if (!tmp)
 240                        goto error;
 241
 242                *((__u8 *)tmp) = 1;
 243                vfree(tmp);
 244        }
 245
 246        /* Success */
 247        rv = 0;
 248
 249error:
 250        for (i = 0; i < junk_length; i++)
 251                vfree(ptr[i]);
 252
 253        vfree(ptr);
 254        vfree(junk_ptr);
 255
 256        return rv;
 257}
 258
 259static int fix_size_alloc_test(void)
 260{
 261        void *ptr;
 262        int i;
 263
 264        for (i = 0; i < test_loop_count; i++) {
 265                ptr = vmalloc(3 * PAGE_SIZE);
 266
 267                if (!ptr)
 268                        return -1;
 269
 270                *((__u8 *)ptr) = 0;
 271
 272                vfree(ptr);
 273        }
 274
 275        return 0;
 276}
 277
 278static int
 279pcpu_alloc_test(void)
 280{
 281        int rv = 0;
 282#ifndef CONFIG_NEED_PER_CPU_KM
 283        void __percpu **pcpu;
 284        size_t size, align;
 285        int i;
 286
 287        pcpu = vmalloc(sizeof(void __percpu *) * 35000);
 288        if (!pcpu)
 289                return -1;
 290
 291        for (i = 0; i < 35000; i++) {
 292                unsigned int r;
 293
 294                get_random_bytes(&r, sizeof(i));
 295                size = (r % (PAGE_SIZE / 4)) + 1;
 296
 297                /*
 298                 * Maximum PAGE_SIZE
 299                 */
 300                get_random_bytes(&r, sizeof(i));
 301                align = 1 << ((i % 11) + 1);
 302
 303                pcpu[i] = __alloc_percpu(size, align);
 304                if (!pcpu[i])
 305                        rv = -1;
 306        }
 307
 308        for (i = 0; i < 35000; i++)
 309                free_percpu(pcpu[i]);
 310
 311        vfree(pcpu);
 312#endif
 313        return rv;
 314}
 315
 316struct test_kvfree_rcu {
 317        struct rcu_head rcu;
 318        unsigned char array[20];
 319};
 320
 321static int
 322kvfree_rcu_1_arg_vmalloc_test(void)
 323{
 324        struct test_kvfree_rcu *p;
 325        int i;
 326
 327        for (i = 0; i < test_loop_count; i++) {
 328                p = vmalloc(1 * PAGE_SIZE);
 329                if (!p)
 330                        return -1;
 331
 332                p->array[0] = 'a';
 333                kvfree_rcu(p);
 334        }
 335
 336        return 0;
 337}
 338
 339static int
 340kvfree_rcu_2_arg_vmalloc_test(void)
 341{
 342        struct test_kvfree_rcu *p;
 343        int i;
 344
 345        for (i = 0; i < test_loop_count; i++) {
 346                p = vmalloc(1 * PAGE_SIZE);
 347                if (!p)
 348                        return -1;
 349
 350                p->array[0] = 'a';
 351                kvfree_rcu(p, rcu);
 352        }
 353
 354        return 0;
 355}
 356
 357struct test_case_desc {
 358        const char *test_name;
 359        int (*test_func)(void);
 360};
 361
 362static struct test_case_desc test_case_array[] = {
 363        { "fix_size_alloc_test", fix_size_alloc_test },
 364        { "full_fit_alloc_test", full_fit_alloc_test },
 365        { "long_busy_list_alloc_test", long_busy_list_alloc_test },
 366        { "random_size_alloc_test", random_size_alloc_test },
 367        { "fix_align_alloc_test", fix_align_alloc_test },
 368        { "random_size_align_alloc_test", random_size_align_alloc_test },
 369        { "align_shift_alloc_test", align_shift_alloc_test },
 370        { "pcpu_alloc_test", pcpu_alloc_test },
 371        { "kvfree_rcu_1_arg_vmalloc_test", kvfree_rcu_1_arg_vmalloc_test },
 372        { "kvfree_rcu_2_arg_vmalloc_test", kvfree_rcu_2_arg_vmalloc_test },
 373        /* Add a new test case here. */
 374};
 375
 376struct test_case_data {
 377        int test_failed;
 378        int test_passed;
 379        u64 time;
 380};
 381
 382static struct test_driver {
 383        struct task_struct *task;
 384        struct test_case_data data[ARRAY_SIZE(test_case_array)];
 385
 386        unsigned long start;
 387        unsigned long stop;
 388} *tdriver;
 389
 390static void shuffle_array(int *arr, int n)
 391{
 392        unsigned int rnd;
 393        int i, j, x;
 394
 395        for (i = n - 1; i > 0; i--)  {
 396                get_random_bytes(&rnd, sizeof(rnd));
 397
 398                /* Cut the range. */
 399                j = rnd % i;
 400
 401                /* Swap indexes. */
 402                x = arr[i];
 403                arr[i] = arr[j];
 404                arr[j] = x;
 405        }
 406}
 407
 408static int test_func(void *private)
 409{
 410        struct test_driver *t = private;
 411        int random_array[ARRAY_SIZE(test_case_array)];
 412        int index, i, j;
 413        ktime_t kt;
 414        u64 delta;
 415
 416        for (i = 0; i < ARRAY_SIZE(test_case_array); i++)
 417                random_array[i] = i;
 418
 419        if (!sequential_test_order)
 420                shuffle_array(random_array, ARRAY_SIZE(test_case_array));
 421
 422        /*
 423         * Block until initialization is done.
 424         */
 425        down_read(&prepare_for_test_rwsem);
 426
 427        t->start = get_cycles();
 428        for (i = 0; i < ARRAY_SIZE(test_case_array); i++) {
 429                index = random_array[i];
 430
 431                /*
 432                 * Skip tests if run_test_mask has been specified.
 433                 */
 434                if (!((run_test_mask & (1 << index)) >> index))
 435                        continue;
 436
 437                kt = ktime_get();
 438                for (j = 0; j < test_repeat_count; j++) {
 439                        if (!test_case_array[index].test_func())
 440                                t->data[index].test_passed++;
 441                        else
 442                                t->data[index].test_failed++;
 443                }
 444
 445                /*
 446                 * Take an average time that test took.
 447                 */
 448                delta = (u64) ktime_us_delta(ktime_get(), kt);
 449                do_div(delta, (u32) test_repeat_count);
 450
 451                t->data[index].time = delta;
 452        }
 453        t->stop = get_cycles();
 454
 455        up_read(&prepare_for_test_rwsem);
 456        test_report_one_done();
 457
 458        /*
 459         * Wait for the kthread_stop() call.
 460         */
 461        while (!kthread_should_stop())
 462                msleep(10);
 463
 464        return 0;
 465}
 466
 467static int
 468init_test_configurtion(void)
 469{
 470        /*
 471         * A maximum number of workers is defined as hard-coded
 472         * value and set to USHRT_MAX. We add such gap just in
 473         * case and for potential heavy stressing.
 474         */
 475        nr_threads = clamp(nr_threads, 1, (int) USHRT_MAX);
 476
 477        /* Allocate the space for test instances. */
 478        tdriver = kvcalloc(nr_threads, sizeof(*tdriver), GFP_KERNEL);
 479        if (tdriver == NULL)
 480                return -1;
 481
 482        if (test_repeat_count <= 0)
 483                test_repeat_count = 1;
 484
 485        if (test_loop_count <= 0)
 486                test_loop_count = 1;
 487
 488        return 0;
 489}
 490
 491static void do_concurrent_test(void)
 492{
 493        int i, ret;
 494
 495        /*
 496         * Set some basic configurations plus sanity check.
 497         */
 498        ret = init_test_configurtion();
 499        if (ret < 0)
 500                return;
 501
 502        /*
 503         * Put on hold all workers.
 504         */
 505        down_write(&prepare_for_test_rwsem);
 506
 507        for (i = 0; i < nr_threads; i++) {
 508                struct test_driver *t = &tdriver[i];
 509
 510                t->task = kthread_run(test_func, t, "vmalloc_test/%d", i);
 511
 512                if (!IS_ERR(t->task))
 513                        /* Success. */
 514                        atomic_inc(&test_n_undone);
 515                else
 516                        pr_err("Failed to start %d kthread\n", i);
 517        }
 518
 519        /*
 520         * Now let the workers do their job.
 521         */
 522        up_write(&prepare_for_test_rwsem);
 523
 524        /*
 525         * Sleep quiet until all workers are done with 1 second
 526         * interval. Since the test can take a lot of time we
 527         * can run into a stack trace of the hung task. That is
 528         * why we go with completion_timeout and HZ value.
 529         */
 530        do {
 531                ret = wait_for_completion_timeout(&test_all_done_comp, HZ);
 532        } while (!ret);
 533
 534        for (i = 0; i < nr_threads; i++) {
 535                struct test_driver *t = &tdriver[i];
 536                int j;
 537
 538                if (!IS_ERR(t->task))
 539                        kthread_stop(t->task);
 540
 541                for (j = 0; j < ARRAY_SIZE(test_case_array); j++) {
 542                        if (!((run_test_mask & (1 << j)) >> j))
 543                                continue;
 544
 545                        pr_info(
 546                                "Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n",
 547                                test_case_array[j].test_name,
 548                                t->data[j].test_passed,
 549                                t->data[j].test_failed,
 550                                test_repeat_count, test_loop_count,
 551                                t->data[j].time);
 552                }
 553
 554                pr_info("All test took worker%d=%lu cycles\n",
 555                        i, t->stop - t->start);
 556        }
 557
 558        kvfree(tdriver);
 559}
 560
 561static int vmalloc_test_init(void)
 562{
 563        do_concurrent_test();
 564        return -EAGAIN; /* Fail will directly unload the module */
 565}
 566
 567static void vmalloc_test_exit(void)
 568{
 569}
 570
 571module_init(vmalloc_test_init)
 572module_exit(vmalloc_test_exit)
 573
 574MODULE_LICENSE("GPL");
 575MODULE_AUTHOR("Uladzislau Rezki");
 576MODULE_DESCRIPTION("vmalloc test module");
 577