linux/tools/testing/selftests/rseq/param_test.c
<<
>>
Prefs
   1// SPDX-License-Identifier: LGPL-2.1
   2#define _GNU_SOURCE
   3#include <assert.h>
   4#include <linux/membarrier.h>
   5#include <pthread.h>
   6#include <sched.h>
   7#include <stdatomic.h>
   8#include <stdint.h>
   9#include <stdio.h>
  10#include <stdlib.h>
  11#include <string.h>
  12#include <syscall.h>
  13#include <unistd.h>
  14#include <poll.h>
  15#include <sys/types.h>
  16#include <signal.h>
  17#include <errno.h>
  18#include <stddef.h>
  19
  20static inline pid_t rseq_gettid(void)
  21{
  22        return syscall(__NR_gettid);
  23}
  24
  25#define NR_INJECT       9
  26static int loop_cnt[NR_INJECT + 1];
  27
  28static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
  29static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
  30static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
  31static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
  32static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
  33static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
  34
  35static int opt_modulo, verbose;
  36
  37static int opt_yield, opt_signal, opt_sleep,
  38                opt_disable_rseq, opt_threads = 200,
  39                opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
  40
  41#ifndef RSEQ_SKIP_FASTPATH
  42static long long opt_reps = 5000;
  43#else
  44static long long opt_reps = 100;
  45#endif
  46
  47static __thread __attribute__((tls_model("initial-exec")))
  48unsigned int signals_delivered;
  49
  50#ifndef BENCHMARK
  51
  52static __thread __attribute__((tls_model("initial-exec"), unused))
  53unsigned int yield_mod_cnt, nr_abort;
  54
  55#define printf_verbose(fmt, ...)                        \
  56        do {                                            \
  57                if (verbose)                            \
  58                        printf(fmt, ## __VA_ARGS__);    \
  59        } while (0)
  60
  61#ifdef __i386__
  62
  63#define INJECT_ASM_REG  "eax"
  64
  65#define RSEQ_INJECT_CLOBBER \
  66        , INJECT_ASM_REG
  67
  68#define RSEQ_INJECT_ASM(n) \
  69        "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
  70        "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
  71        "jz 333f\n\t" \
  72        "222:\n\t" \
  73        "dec %%" INJECT_ASM_REG "\n\t" \
  74        "jnz 222b\n\t" \
  75        "333:\n\t"
  76
  77#elif defined(__x86_64__)
  78
  79#define INJECT_ASM_REG_P        "rax"
  80#define INJECT_ASM_REG          "eax"
  81
  82#define RSEQ_INJECT_CLOBBER \
  83        , INJECT_ASM_REG_P \
  84        , INJECT_ASM_REG
  85
  86#define RSEQ_INJECT_ASM(n) \
  87        "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
  88        "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
  89        "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
  90        "jz 333f\n\t" \
  91        "222:\n\t" \
  92        "dec %%" INJECT_ASM_REG "\n\t" \
  93        "jnz 222b\n\t" \
  94        "333:\n\t"
  95
  96#elif defined(__s390__)
  97
  98#define RSEQ_INJECT_INPUT \
  99        , [loop_cnt_1]"m"(loop_cnt[1]) \
 100        , [loop_cnt_2]"m"(loop_cnt[2]) \
 101        , [loop_cnt_3]"m"(loop_cnt[3]) \
 102        , [loop_cnt_4]"m"(loop_cnt[4]) \
 103        , [loop_cnt_5]"m"(loop_cnt[5]) \
 104        , [loop_cnt_6]"m"(loop_cnt[6])
 105
 106#define INJECT_ASM_REG  "r12"
 107
 108#define RSEQ_INJECT_CLOBBER \
 109        , INJECT_ASM_REG
 110
 111#define RSEQ_INJECT_ASM(n) \
 112        "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
 113        "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
 114        "je 333f\n\t" \
 115        "222:\n\t" \
 116        "ahi %%" INJECT_ASM_REG ", -1\n\t" \
 117        "jnz 222b\n\t" \
 118        "333:\n\t"
 119
 120#elif defined(__ARMEL__)
 121
 122#define RSEQ_INJECT_INPUT \
 123        , [loop_cnt_1]"m"(loop_cnt[1]) \
 124        , [loop_cnt_2]"m"(loop_cnt[2]) \
 125        , [loop_cnt_3]"m"(loop_cnt[3]) \
 126        , [loop_cnt_4]"m"(loop_cnt[4]) \
 127        , [loop_cnt_5]"m"(loop_cnt[5]) \
 128        , [loop_cnt_6]"m"(loop_cnt[6])
 129
 130#define INJECT_ASM_REG  "r4"
 131
 132#define RSEQ_INJECT_CLOBBER \
 133        , INJECT_ASM_REG
 134
 135#define RSEQ_INJECT_ASM(n) \
 136        "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
 137        "cmp " INJECT_ASM_REG ", #0\n\t" \
 138        "beq 333f\n\t" \
 139        "222:\n\t" \
 140        "subs " INJECT_ASM_REG ", #1\n\t" \
 141        "bne 222b\n\t" \
 142        "333:\n\t"
 143
 144#elif defined(__AARCH64EL__)
 145
 146#define RSEQ_INJECT_INPUT \
 147        , [loop_cnt_1] "Qo" (loop_cnt[1]) \
 148        , [loop_cnt_2] "Qo" (loop_cnt[2]) \
 149        , [loop_cnt_3] "Qo" (loop_cnt[3]) \
 150        , [loop_cnt_4] "Qo" (loop_cnt[4]) \
 151        , [loop_cnt_5] "Qo" (loop_cnt[5]) \
 152        , [loop_cnt_6] "Qo" (loop_cnt[6])
 153
 154#define INJECT_ASM_REG  RSEQ_ASM_TMP_REG32
 155
 156#define RSEQ_INJECT_ASM(n) \
 157        "       ldr     " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n"       \
 158        "       cbz     " INJECT_ASM_REG ", 333f\n"                     \
 159        "222:\n"                                                        \
 160        "       sub     " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n"   \
 161        "       cbnz    " INJECT_ASM_REG ", 222b\n"                     \
 162        "333:\n"
 163
 164#elif __PPC__
 165
 166#define RSEQ_INJECT_INPUT \
 167        , [loop_cnt_1]"m"(loop_cnt[1]) \
 168        , [loop_cnt_2]"m"(loop_cnt[2]) \
 169        , [loop_cnt_3]"m"(loop_cnt[3]) \
 170        , [loop_cnt_4]"m"(loop_cnt[4]) \
 171        , [loop_cnt_5]"m"(loop_cnt[5]) \
 172        , [loop_cnt_6]"m"(loop_cnt[6])
 173
 174#define INJECT_ASM_REG  "r18"
 175
 176#define RSEQ_INJECT_CLOBBER \
 177        , INJECT_ASM_REG
 178
 179#define RSEQ_INJECT_ASM(n) \
 180        "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
 181        "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
 182        "beq 333f\n\t" \
 183        "222:\n\t" \
 184        "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
 185        "bne 222b\n\t" \
 186        "333:\n\t"
 187
 188#elif defined(__mips__)
 189
 190#define RSEQ_INJECT_INPUT \
 191        , [loop_cnt_1]"m"(loop_cnt[1]) \
 192        , [loop_cnt_2]"m"(loop_cnt[2]) \
 193        , [loop_cnt_3]"m"(loop_cnt[3]) \
 194        , [loop_cnt_4]"m"(loop_cnt[4]) \
 195        , [loop_cnt_5]"m"(loop_cnt[5]) \
 196        , [loop_cnt_6]"m"(loop_cnt[6])
 197
 198#define INJECT_ASM_REG  "$5"
 199
 200#define RSEQ_INJECT_CLOBBER \
 201        , INJECT_ASM_REG
 202
 203#define RSEQ_INJECT_ASM(n) \
 204        "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
 205        "beqz " INJECT_ASM_REG ", 333f\n\t" \
 206        "222:\n\t" \
 207        "addiu " INJECT_ASM_REG ", -1\n\t" \
 208        "bnez " INJECT_ASM_REG ", 222b\n\t" \
 209        "333:\n\t"
 210
 211#else
 212#error unsupported target
 213#endif
 214
 215#define RSEQ_INJECT_FAILED \
 216        nr_abort++;
 217
 218#define RSEQ_INJECT_C(n) \
 219{ \
 220        int loc_i, loc_nr_loops = loop_cnt[n]; \
 221        \
 222        for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
 223                rseq_barrier(); \
 224        } \
 225        if (loc_nr_loops == -1 && opt_modulo) { \
 226                if (yield_mod_cnt == opt_modulo - 1) { \
 227                        if (opt_sleep > 0) \
 228                                poll(NULL, 0, opt_sleep); \
 229                        if (opt_yield) \
 230                                sched_yield(); \
 231                        if (opt_signal) \
 232                                raise(SIGUSR1); \
 233                        yield_mod_cnt = 0; \
 234                } else { \
 235                        yield_mod_cnt++; \
 236                } \
 237        } \
 238}
 239
 240#else
 241
 242#define printf_verbose(fmt, ...)
 243
 244#endif /* BENCHMARK */
 245
 246#include "rseq.h"
 247
 248struct percpu_lock_entry {
 249        intptr_t v;
 250} __attribute__((aligned(128)));
 251
 252struct percpu_lock {
 253        struct percpu_lock_entry c[CPU_SETSIZE];
 254};
 255
 256struct test_data_entry {
 257        intptr_t count;
 258} __attribute__((aligned(128)));
 259
 260struct spinlock_test_data {
 261        struct percpu_lock lock;
 262        struct test_data_entry c[CPU_SETSIZE];
 263};
 264
 265struct spinlock_thread_test_data {
 266        struct spinlock_test_data *data;
 267        long long reps;
 268        int reg;
 269};
 270
 271struct inc_test_data {
 272        struct test_data_entry c[CPU_SETSIZE];
 273};
 274
 275struct inc_thread_test_data {
 276        struct inc_test_data *data;
 277        long long reps;
 278        int reg;
 279};
 280
 281struct percpu_list_node {
 282        intptr_t data;
 283        struct percpu_list_node *next;
 284};
 285
 286struct percpu_list_entry {
 287        struct percpu_list_node *head;
 288} __attribute__((aligned(128)));
 289
 290struct percpu_list {
 291        struct percpu_list_entry c[CPU_SETSIZE];
 292};
 293
 294#define BUFFER_ITEM_PER_CPU     100
 295
 296struct percpu_buffer_node {
 297        intptr_t data;
 298};
 299
 300struct percpu_buffer_entry {
 301        intptr_t offset;
 302        intptr_t buflen;
 303        struct percpu_buffer_node **array;
 304} __attribute__((aligned(128)));
 305
 306struct percpu_buffer {
 307        struct percpu_buffer_entry c[CPU_SETSIZE];
 308};
 309
 310#define MEMCPY_BUFFER_ITEM_PER_CPU      100
 311
 312struct percpu_memcpy_buffer_node {
 313        intptr_t data1;
 314        uint64_t data2;
 315};
 316
 317struct percpu_memcpy_buffer_entry {
 318        intptr_t offset;
 319        intptr_t buflen;
 320        struct percpu_memcpy_buffer_node *array;
 321} __attribute__((aligned(128)));
 322
 323struct percpu_memcpy_buffer {
 324        struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
 325};
 326
 327/* A simple percpu spinlock. Grabs lock on current cpu. */
 328static int rseq_this_cpu_lock(struct percpu_lock *lock)
 329{
 330        int cpu;
 331
 332        for (;;) {
 333                int ret;
 334
 335                cpu = rseq_cpu_start();
 336                ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
 337                                         0, 1, cpu);
 338                if (rseq_likely(!ret))
 339                        break;
 340                /* Retry if comparison fails or rseq aborts. */
 341        }
 342        /*
 343         * Acquire semantic when taking lock after control dependency.
 344         * Matches rseq_smp_store_release().
 345         */
 346        rseq_smp_acquire__after_ctrl_dep();
 347        return cpu;
 348}
 349
 350static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
 351{
 352        assert(lock->c[cpu].v == 1);
 353        /*
 354         * Release lock, with release semantic. Matches
 355         * rseq_smp_acquire__after_ctrl_dep().
 356         */
 357        rseq_smp_store_release(&lock->c[cpu].v, 0);
 358}
 359
 360void *test_percpu_spinlock_thread(void *arg)
 361{
 362        struct spinlock_thread_test_data *thread_data = arg;
 363        struct spinlock_test_data *data = thread_data->data;
 364        long long i, reps;
 365
 366        if (!opt_disable_rseq && thread_data->reg &&
 367            rseq_register_current_thread())
 368                abort();
 369        reps = thread_data->reps;
 370        for (i = 0; i < reps; i++) {
 371                int cpu = rseq_cpu_start();
 372
 373                cpu = rseq_this_cpu_lock(&data->lock);
 374                data->c[cpu].count++;
 375                rseq_percpu_unlock(&data->lock, cpu);
 376#ifndef BENCHMARK
 377                if (i != 0 && !(i % (reps / 10)))
 378                        printf_verbose("tid %d: count %lld\n",
 379                                       (int) rseq_gettid(), i);
 380#endif
 381        }
 382        printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
 383                       (int) rseq_gettid(), nr_abort, signals_delivered);
 384        if (!opt_disable_rseq && thread_data->reg &&
 385            rseq_unregister_current_thread())
 386                abort();
 387        return NULL;
 388}
 389
 390/*
 391 * A simple test which implements a sharded counter using a per-cpu
 392 * lock.  Obviously real applications might prefer to simply use a
 393 * per-cpu increment; however, this is reasonable for a test and the
 394 * lock can be extended to synchronize more complicated operations.
 395 */
 396void test_percpu_spinlock(void)
 397{
 398        const int num_threads = opt_threads;
 399        int i, ret;
 400        uint64_t sum;
 401        pthread_t test_threads[num_threads];
 402        struct spinlock_test_data data;
 403        struct spinlock_thread_test_data thread_data[num_threads];
 404
 405        memset(&data, 0, sizeof(data));
 406        for (i = 0; i < num_threads; i++) {
 407                thread_data[i].reps = opt_reps;
 408                if (opt_disable_mod <= 0 || (i % opt_disable_mod))
 409                        thread_data[i].reg = 1;
 410                else
 411                        thread_data[i].reg = 0;
 412                thread_data[i].data = &data;
 413                ret = pthread_create(&test_threads[i], NULL,
 414                                     test_percpu_spinlock_thread,
 415                                     &thread_data[i]);
 416                if (ret) {
 417                        errno = ret;
 418                        perror("pthread_create");
 419                        abort();
 420                }
 421        }
 422
 423        for (i = 0; i < num_threads; i++) {
 424                ret = pthread_join(test_threads[i], NULL);
 425                if (ret) {
 426                        errno = ret;
 427                        perror("pthread_join");
 428                        abort();
 429                }
 430        }
 431
 432        sum = 0;
 433        for (i = 0; i < CPU_SETSIZE; i++)
 434                sum += data.c[i].count;
 435
 436        assert(sum == (uint64_t)opt_reps * num_threads);
 437}
 438
 439void *test_percpu_inc_thread(void *arg)
 440{
 441        struct inc_thread_test_data *thread_data = arg;
 442        struct inc_test_data *data = thread_data->data;
 443        long long i, reps;
 444
 445        if (!opt_disable_rseq && thread_data->reg &&
 446            rseq_register_current_thread())
 447                abort();
 448        reps = thread_data->reps;
 449        for (i = 0; i < reps; i++) {
 450                int ret;
 451
 452                do {
 453                        int cpu;
 454
 455                        cpu = rseq_cpu_start();
 456                        ret = rseq_addv(&data->c[cpu].count, 1, cpu);
 457                } while (rseq_unlikely(ret));
 458#ifndef BENCHMARK
 459                if (i != 0 && !(i % (reps / 10)))
 460                        printf_verbose("tid %d: count %lld\n",
 461                                       (int) rseq_gettid(), i);
 462#endif
 463        }
 464        printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
 465                       (int) rseq_gettid(), nr_abort, signals_delivered);
 466        if (!opt_disable_rseq && thread_data->reg &&
 467            rseq_unregister_current_thread())
 468                abort();
 469        return NULL;
 470}
 471
 472void test_percpu_inc(void)
 473{
 474        const int num_threads = opt_threads;
 475        int i, ret;
 476        uint64_t sum;
 477        pthread_t test_threads[num_threads];
 478        struct inc_test_data data;
 479        struct inc_thread_test_data thread_data[num_threads];
 480
 481        memset(&data, 0, sizeof(data));
 482        for (i = 0; i < num_threads; i++) {
 483                thread_data[i].reps = opt_reps;
 484                if (opt_disable_mod <= 0 || (i % opt_disable_mod))
 485                        thread_data[i].reg = 1;
 486                else
 487                        thread_data[i].reg = 0;
 488                thread_data[i].data = &data;
 489                ret = pthread_create(&test_threads[i], NULL,
 490                                     test_percpu_inc_thread,
 491                                     &thread_data[i]);
 492                if (ret) {
 493                        errno = ret;
 494                        perror("pthread_create");
 495                        abort();
 496                }
 497        }
 498
 499        for (i = 0; i < num_threads; i++) {
 500                ret = pthread_join(test_threads[i], NULL);
 501                if (ret) {
 502                        errno = ret;
 503                        perror("pthread_join");
 504                        abort();
 505                }
 506        }
 507
 508        sum = 0;
 509        for (i = 0; i < CPU_SETSIZE; i++)
 510                sum += data.c[i].count;
 511
 512        assert(sum == (uint64_t)opt_reps * num_threads);
 513}
 514
 515void this_cpu_list_push(struct percpu_list *list,
 516                        struct percpu_list_node *node,
 517                        int *_cpu)
 518{
 519        int cpu;
 520
 521        for (;;) {
 522                intptr_t *targetptr, newval, expect;
 523                int ret;
 524
 525                cpu = rseq_cpu_start();
 526                /* Load list->c[cpu].head with single-copy atomicity. */
 527                expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
 528                newval = (intptr_t)node;
 529                targetptr = (intptr_t *)&list->c[cpu].head;
 530                node->next = (struct percpu_list_node *)expect;
 531                ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
 532                if (rseq_likely(!ret))
 533                        break;
 534                /* Retry if comparison fails or rseq aborts. */
 535        }
 536        if (_cpu)
 537                *_cpu = cpu;
 538}
 539
 540/*
 541 * Unlike a traditional lock-less linked list; the availability of a
 542 * rseq primitive allows us to implement pop without concerns over
 543 * ABA-type races.
 544 */
 545struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
 546                                           int *_cpu)
 547{
 548        struct percpu_list_node *node = NULL;
 549        int cpu;
 550
 551        for (;;) {
 552                struct percpu_list_node *head;
 553                intptr_t *targetptr, expectnot, *load;
 554                off_t offset;
 555                int ret;
 556
 557                cpu = rseq_cpu_start();
 558                targetptr = (intptr_t *)&list->c[cpu].head;
 559                expectnot = (intptr_t)NULL;
 560                offset = offsetof(struct percpu_list_node, next);
 561                load = (intptr_t *)&head;
 562                ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
 563                                                   offset, load, cpu);
 564                if (rseq_likely(!ret)) {
 565                        node = head;
 566                        break;
 567                }
 568                if (ret > 0)
 569                        break;
 570                /* Retry if rseq aborts. */
 571        }
 572        if (_cpu)
 573                *_cpu = cpu;
 574        return node;
 575}
 576
 577/*
 578 * __percpu_list_pop is not safe against concurrent accesses. Should
 579 * only be used on lists that are not concurrently modified.
 580 */
 581struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
 582{
 583        struct percpu_list_node *node;
 584
 585        node = list->c[cpu].head;
 586        if (!node)
 587                return NULL;
 588        list->c[cpu].head = node->next;
 589        return node;
 590}
 591
 592void *test_percpu_list_thread(void *arg)
 593{
 594        long long i, reps;
 595        struct percpu_list *list = (struct percpu_list *)arg;
 596
 597        if (!opt_disable_rseq && rseq_register_current_thread())
 598                abort();
 599
 600        reps = opt_reps;
 601        for (i = 0; i < reps; i++) {
 602                struct percpu_list_node *node;
 603
 604                node = this_cpu_list_pop(list, NULL);
 605                if (opt_yield)
 606                        sched_yield();  /* encourage shuffling */
 607                if (node)
 608                        this_cpu_list_push(list, node, NULL);
 609        }
 610
 611        printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
 612                       (int) rseq_gettid(), nr_abort, signals_delivered);
 613        if (!opt_disable_rseq && rseq_unregister_current_thread())
 614                abort();
 615
 616        return NULL;
 617}
 618
 619/* Simultaneous modification to a per-cpu linked list from many threads.  */
 620void test_percpu_list(void)
 621{
 622        const int num_threads = opt_threads;
 623        int i, j, ret;
 624        uint64_t sum = 0, expected_sum = 0;
 625        struct percpu_list list;
 626        pthread_t test_threads[num_threads];
 627        cpu_set_t allowed_cpus;
 628
 629        memset(&list, 0, sizeof(list));
 630
 631        /* Generate list entries for every usable cpu. */
 632        sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
 633        for (i = 0; i < CPU_SETSIZE; i++) {
 634                if (!CPU_ISSET(i, &allowed_cpus))
 635                        continue;
 636                for (j = 1; j <= 100; j++) {
 637                        struct percpu_list_node *node;
 638
 639                        expected_sum += j;
 640
 641                        node = malloc(sizeof(*node));
 642                        assert(node);
 643                        node->data = j;
 644                        node->next = list.c[i].head;
 645                        list.c[i].head = node;
 646                }
 647        }
 648
 649        for (i = 0; i < num_threads; i++) {
 650                ret = pthread_create(&test_threads[i], NULL,
 651                                     test_percpu_list_thread, &list);
 652                if (ret) {
 653                        errno = ret;
 654                        perror("pthread_create");
 655                        abort();
 656                }
 657        }
 658
 659        for (i = 0; i < num_threads; i++) {
 660                ret = pthread_join(test_threads[i], NULL);
 661                if (ret) {
 662                        errno = ret;
 663                        perror("pthread_join");
 664                        abort();
 665                }
 666        }
 667
 668        for (i = 0; i < CPU_SETSIZE; i++) {
 669                struct percpu_list_node *node;
 670
 671                if (!CPU_ISSET(i, &allowed_cpus))
 672                        continue;
 673
 674                while ((node = __percpu_list_pop(&list, i))) {
 675                        sum += node->data;
 676                        free(node);
 677                }
 678        }
 679
 680        /*
 681         * All entries should now be accounted for (unless some external
 682         * actor is interfering with our allowed affinity while this
 683         * test is running).
 684         */
 685        assert(sum == expected_sum);
 686}
 687
 688bool this_cpu_buffer_push(struct percpu_buffer *buffer,
 689                          struct percpu_buffer_node *node,
 690                          int *_cpu)
 691{
 692        bool result = false;
 693        int cpu;
 694
 695        for (;;) {
 696                intptr_t *targetptr_spec, newval_spec;
 697                intptr_t *targetptr_final, newval_final;
 698                intptr_t offset;
 699                int ret;
 700
 701                cpu = rseq_cpu_start();
 702                offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
 703                if (offset == buffer->c[cpu].buflen)
 704                        break;
 705                newval_spec = (intptr_t)node;
 706                targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
 707                newval_final = offset + 1;
 708                targetptr_final = &buffer->c[cpu].offset;
 709                if (opt_mb)
 710                        ret = rseq_cmpeqv_trystorev_storev_release(
 711                                targetptr_final, offset, targetptr_spec,
 712                                newval_spec, newval_final, cpu);
 713                else
 714                        ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
 715                                offset, targetptr_spec, newval_spec,
 716                                newval_final, cpu);
 717                if (rseq_likely(!ret)) {
 718                        result = true;
 719                        break;
 720                }
 721                /* Retry if comparison fails or rseq aborts. */
 722        }
 723        if (_cpu)
 724                *_cpu = cpu;
 725        return result;
 726}
 727
 728struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
 729                                               int *_cpu)
 730{
 731        struct percpu_buffer_node *head;
 732        int cpu;
 733
 734        for (;;) {
 735                intptr_t *targetptr, newval;
 736                intptr_t offset;
 737                int ret;
 738
 739                cpu = rseq_cpu_start();
 740                /* Load offset with single-copy atomicity. */
 741                offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
 742                if (offset == 0) {
 743                        head = NULL;
 744                        break;
 745                }
 746                head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
 747                newval = offset - 1;
 748                targetptr = (intptr_t *)&buffer->c[cpu].offset;
 749                ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
 750                        (intptr_t *)&buffer->c[cpu].array[offset - 1],
 751                        (intptr_t)head, newval, cpu);
 752                if (rseq_likely(!ret))
 753                        break;
 754                /* Retry if comparison fails or rseq aborts. */
 755        }
 756        if (_cpu)
 757                *_cpu = cpu;
 758        return head;
 759}
 760
 761/*
 762 * __percpu_buffer_pop is not safe against concurrent accesses. Should
 763 * only be used on buffers that are not concurrently modified.
 764 */
 765struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
 766                                               int cpu)
 767{
 768        struct percpu_buffer_node *head;
 769        intptr_t offset;
 770
 771        offset = buffer->c[cpu].offset;
 772        if (offset == 0)
 773                return NULL;
 774        head = buffer->c[cpu].array[offset - 1];
 775        buffer->c[cpu].offset = offset - 1;
 776        return head;
 777}
 778
 779void *test_percpu_buffer_thread(void *arg)
 780{
 781        long long i, reps;
 782        struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
 783
 784        if (!opt_disable_rseq && rseq_register_current_thread())
 785                abort();
 786
 787        reps = opt_reps;
 788        for (i = 0; i < reps; i++) {
 789                struct percpu_buffer_node *node;
 790
 791                node = this_cpu_buffer_pop(buffer, NULL);
 792                if (opt_yield)
 793                        sched_yield();  /* encourage shuffling */
 794                if (node) {
 795                        if (!this_cpu_buffer_push(buffer, node, NULL)) {
 796                                /* Should increase buffer size. */
 797                                abort();
 798                        }
 799                }
 800        }
 801
 802        printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
 803                       (int) rseq_gettid(), nr_abort, signals_delivered);
 804        if (!opt_disable_rseq && rseq_unregister_current_thread())
 805                abort();
 806
 807        return NULL;
 808}
 809
 810/* Simultaneous modification to a per-cpu buffer from many threads.  */
 811void test_percpu_buffer(void)
 812{
 813        const int num_threads = opt_threads;
 814        int i, j, ret;
 815        uint64_t sum = 0, expected_sum = 0;
 816        struct percpu_buffer buffer;
 817        pthread_t test_threads[num_threads];
 818        cpu_set_t allowed_cpus;
 819
 820        memset(&buffer, 0, sizeof(buffer));
 821
 822        /* Generate list entries for every usable cpu. */
 823        sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
 824        for (i = 0; i < CPU_SETSIZE; i++) {
 825                if (!CPU_ISSET(i, &allowed_cpus))
 826                        continue;
 827                /* Worse-case is every item in same CPU. */
 828                buffer.c[i].array =
 829                        malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
 830                               BUFFER_ITEM_PER_CPU);
 831                assert(buffer.c[i].array);
 832                buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
 833                for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
 834                        struct percpu_buffer_node *node;
 835
 836                        expected_sum += j;
 837
 838                        /*
 839                         * We could theoretically put the word-sized
 840                         * "data" directly in the buffer. However, we
 841                         * want to model objects that would not fit
 842                         * within a single word, so allocate an object
 843                         * for each node.
 844                         */
 845                        node = malloc(sizeof(*node));
 846                        assert(node);
 847                        node->data = j;
 848                        buffer.c[i].array[j - 1] = node;
 849                        buffer.c[i].offset++;
 850                }
 851        }
 852
 853        for (i = 0; i < num_threads; i++) {
 854                ret = pthread_create(&test_threads[i], NULL,
 855                                     test_percpu_buffer_thread, &buffer);
 856                if (ret) {
 857                        errno = ret;
 858                        perror("pthread_create");
 859                        abort();
 860                }
 861        }
 862
 863        for (i = 0; i < num_threads; i++) {
 864                ret = pthread_join(test_threads[i], NULL);
 865                if (ret) {
 866                        errno = ret;
 867                        perror("pthread_join");
 868                        abort();
 869                }
 870        }
 871
 872        for (i = 0; i < CPU_SETSIZE; i++) {
 873                struct percpu_buffer_node *node;
 874
 875                if (!CPU_ISSET(i, &allowed_cpus))
 876                        continue;
 877
 878                while ((node = __percpu_buffer_pop(&buffer, i))) {
 879                        sum += node->data;
 880                        free(node);
 881                }
 882                free(buffer.c[i].array);
 883        }
 884
 885        /*
 886         * All entries should now be accounted for (unless some external
 887         * actor is interfering with our allowed affinity while this
 888         * test is running).
 889         */
 890        assert(sum == expected_sum);
 891}
 892
 893bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
 894                                 struct percpu_memcpy_buffer_node item,
 895                                 int *_cpu)
 896{
 897        bool result = false;
 898        int cpu;
 899
 900        for (;;) {
 901                intptr_t *targetptr_final, newval_final, offset;
 902                char *destptr, *srcptr;
 903                size_t copylen;
 904                int ret;
 905
 906                cpu = rseq_cpu_start();
 907                /* Load offset with single-copy atomicity. */
 908                offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
 909                if (offset == buffer->c[cpu].buflen)
 910                        break;
 911                destptr = (char *)&buffer->c[cpu].array[offset];
 912                srcptr = (char *)&item;
 913                /* copylen must be <= 4kB. */
 914                copylen = sizeof(item);
 915                newval_final = offset + 1;
 916                targetptr_final = &buffer->c[cpu].offset;
 917                if (opt_mb)
 918                        ret = rseq_cmpeqv_trymemcpy_storev_release(
 919                                targetptr_final, offset,
 920                                destptr, srcptr, copylen,
 921                                newval_final, cpu);
 922                else
 923                        ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
 924                                offset, destptr, srcptr, copylen,
 925                                newval_final, cpu);
 926                if (rseq_likely(!ret)) {
 927                        result = true;
 928                        break;
 929                }
 930                /* Retry if comparison fails or rseq aborts. */
 931        }
 932        if (_cpu)
 933                *_cpu = cpu;
 934        return result;
 935}
 936
 937bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
 938                                struct percpu_memcpy_buffer_node *item,
 939                                int *_cpu)
 940{
 941        bool result = false;
 942        int cpu;
 943
 944        for (;;) {
 945                intptr_t *targetptr_final, newval_final, offset;
 946                char *destptr, *srcptr;
 947                size_t copylen;
 948                int ret;
 949
 950                cpu = rseq_cpu_start();
 951                /* Load offset with single-copy atomicity. */
 952                offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
 953                if (offset == 0)
 954                        break;
 955                destptr = (char *)item;
 956                srcptr = (char *)&buffer->c[cpu].array[offset - 1];
 957                /* copylen must be <= 4kB. */
 958                copylen = sizeof(*item);
 959                newval_final = offset - 1;
 960                targetptr_final = &buffer->c[cpu].offset;
 961                ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
 962                        offset, destptr, srcptr, copylen,
 963                        newval_final, cpu);
 964                if (rseq_likely(!ret)) {
 965                        result = true;
 966                        break;
 967                }
 968                /* Retry if comparison fails or rseq aborts. */
 969        }
 970        if (_cpu)
 971                *_cpu = cpu;
 972        return result;
 973}
 974
 975/*
 976 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
 977 * only be used on buffers that are not concurrently modified.
 978 */
 979bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
 980                                struct percpu_memcpy_buffer_node *item,
 981                                int cpu)
 982{
 983        intptr_t offset;
 984
 985        offset = buffer->c[cpu].offset;
 986        if (offset == 0)
 987                return false;
 988        memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
 989        buffer->c[cpu].offset = offset - 1;
 990        return true;
 991}
 992
 993void *test_percpu_memcpy_buffer_thread(void *arg)
 994{
 995        long long i, reps;
 996        struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
 997
 998        if (!opt_disable_rseq && rseq_register_current_thread())
 999                abort();
1000
1001        reps = opt_reps;
1002        for (i = 0; i < reps; i++) {
1003                struct percpu_memcpy_buffer_node item;
1004                bool result;
1005
1006                result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1007                if (opt_yield)
1008                        sched_yield();  /* encourage shuffling */
1009                if (result) {
1010                        if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1011                                /* Should increase buffer size. */
1012                                abort();
1013                        }
1014                }
1015        }
1016
1017        printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1018                       (int) rseq_gettid(), nr_abort, signals_delivered);
1019        if (!opt_disable_rseq && rseq_unregister_current_thread())
1020                abort();
1021
1022        return NULL;
1023}
1024
1025/* Simultaneous modification to a per-cpu buffer from many threads.  */
1026void test_percpu_memcpy_buffer(void)
1027{
1028        const int num_threads = opt_threads;
1029        int i, j, ret;
1030        uint64_t sum = 0, expected_sum = 0;
1031        struct percpu_memcpy_buffer buffer;
1032        pthread_t test_threads[num_threads];
1033        cpu_set_t allowed_cpus;
1034
1035        memset(&buffer, 0, sizeof(buffer));
1036
1037        /* Generate list entries for every usable cpu. */
1038        sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1039        for (i = 0; i < CPU_SETSIZE; i++) {
1040                if (!CPU_ISSET(i, &allowed_cpus))
1041                        continue;
1042                /* Worse-case is every item in same CPU. */
1043                buffer.c[i].array =
1044                        malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1045                               MEMCPY_BUFFER_ITEM_PER_CPU);
1046                assert(buffer.c[i].array);
1047                buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1048                for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1049                        expected_sum += 2 * j + 1;
1050
1051                        /*
1052                         * We could theoretically put the word-sized
1053                         * "data" directly in the buffer. However, we
1054                         * want to model objects that would not fit
1055                         * within a single word, so allocate an object
1056                         * for each node.
1057                         */
1058                        buffer.c[i].array[j - 1].data1 = j;
1059                        buffer.c[i].array[j - 1].data2 = j + 1;
1060                        buffer.c[i].offset++;
1061                }
1062        }
1063
1064        for (i = 0; i < num_threads; i++) {
1065                ret = pthread_create(&test_threads[i], NULL,
1066                                     test_percpu_memcpy_buffer_thread,
1067                                     &buffer);
1068                if (ret) {
1069                        errno = ret;
1070                        perror("pthread_create");
1071                        abort();
1072                }
1073        }
1074
1075        for (i = 0; i < num_threads; i++) {
1076                ret = pthread_join(test_threads[i], NULL);
1077                if (ret) {
1078                        errno = ret;
1079                        perror("pthread_join");
1080                        abort();
1081                }
1082        }
1083
1084        for (i = 0; i < CPU_SETSIZE; i++) {
1085                struct percpu_memcpy_buffer_node item;
1086
1087                if (!CPU_ISSET(i, &allowed_cpus))
1088                        continue;
1089
1090                while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1091                        sum += item.data1;
1092                        sum += item.data2;
1093                }
1094                free(buffer.c[i].array);
1095        }
1096
1097        /*
1098         * All entries should now be accounted for (unless some external
1099         * actor is interfering with our allowed affinity while this
1100         * test is running).
1101         */
1102        assert(sum == expected_sum);
1103}
1104
1105static void test_signal_interrupt_handler(int signo)
1106{
1107        signals_delivered++;
1108}
1109
1110static int set_signal_handler(void)
1111{
1112        int ret = 0;
1113        struct sigaction sa;
1114        sigset_t sigset;
1115
1116        ret = sigemptyset(&sigset);
1117        if (ret < 0) {
1118                perror("sigemptyset");
1119                return ret;
1120        }
1121
1122        sa.sa_handler = test_signal_interrupt_handler;
1123        sa.sa_mask = sigset;
1124        sa.sa_flags = 0;
1125        ret = sigaction(SIGUSR1, &sa, NULL);
1126        if (ret < 0) {
1127                perror("sigaction");
1128                return ret;
1129        }
1130
1131        printf_verbose("Signal handler set for SIGUSR1\n");
1132
1133        return ret;
1134}
1135
1136/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1137#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
1138struct test_membarrier_thread_args {
1139        int stop;
1140        intptr_t percpu_list_ptr;
1141};
1142
1143/* Worker threads modify data in their "active" percpu lists. */
1144void *test_membarrier_worker_thread(void *arg)
1145{
1146        struct test_membarrier_thread_args *args =
1147                (struct test_membarrier_thread_args *)arg;
1148        const int iters = opt_reps;
1149        int i;
1150
1151        if (rseq_register_current_thread()) {
1152                fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1153                        errno, strerror(errno));
1154                abort();
1155        }
1156
1157        /* Wait for initialization. */
1158        while (!atomic_load(&args->percpu_list_ptr)) {}
1159
1160        for (i = 0; i < iters; ++i) {
1161                int ret;
1162
1163                do {
1164                        int cpu = rseq_cpu_start();
1165
1166                        ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
1167                                sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1168                } while (rseq_unlikely(ret));
1169        }
1170
1171        if (rseq_unregister_current_thread()) {
1172                fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1173                        errno, strerror(errno));
1174                abort();
1175        }
1176        return NULL;
1177}
1178
1179void test_membarrier_init_percpu_list(struct percpu_list *list)
1180{
1181        int i;
1182
1183        memset(list, 0, sizeof(*list));
1184        for (i = 0; i < CPU_SETSIZE; i++) {
1185                struct percpu_list_node *node;
1186
1187                node = malloc(sizeof(*node));
1188                assert(node);
1189                node->data = 0;
1190                node->next = NULL;
1191                list->c[i].head = node;
1192        }
1193}
1194
1195void test_membarrier_free_percpu_list(struct percpu_list *list)
1196{
1197        int i;
1198
1199        for (i = 0; i < CPU_SETSIZE; i++)
1200                free(list->c[i].head);
1201}
1202
1203static int sys_membarrier(int cmd, int flags, int cpu_id)
1204{
1205        return syscall(__NR_membarrier, cmd, flags, cpu_id);
1206}
1207
1208/*
1209 * The manager thread swaps per-cpu lists that worker threads see,
1210 * and validates that there are no unexpected modifications.
1211 */
1212void *test_membarrier_manager_thread(void *arg)
1213{
1214        struct test_membarrier_thread_args *args =
1215                (struct test_membarrier_thread_args *)arg;
1216        struct percpu_list list_a, list_b;
1217        intptr_t expect_a = 0, expect_b = 0;
1218        int cpu_a = 0, cpu_b = 0;
1219
1220        if (rseq_register_current_thread()) {
1221                fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1222                        errno, strerror(errno));
1223                abort();
1224        }
1225
1226        /* Init lists. */
1227        test_membarrier_init_percpu_list(&list_a);
1228        test_membarrier_init_percpu_list(&list_b);
1229
1230        atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1231
1232        while (!atomic_load(&args->stop)) {
1233                /* list_a is "active". */
1234                cpu_a = rand() % CPU_SETSIZE;
1235                /*
1236                 * As list_b is "inactive", we should never see changes
1237                 * to list_b.
1238                 */
1239                if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
1240                        fprintf(stderr, "Membarrier test failed\n");
1241                        abort();
1242                }
1243
1244                /* Make list_b "active". */
1245                atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
1246                if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1247                                        MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
1248                                errno != ENXIO /* missing CPU */) {
1249                        perror("sys_membarrier");
1250                        abort();
1251                }
1252                /*
1253                 * Cpu A should now only modify list_b, so the values
1254                 * in list_a should be stable.
1255                 */
1256                expect_a = atomic_load(&list_a.c[cpu_a].head->data);
1257
1258                cpu_b = rand() % CPU_SETSIZE;
1259                /*
1260                 * As list_a is "inactive", we should never see changes
1261                 * to list_a.
1262                 */
1263                if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
1264                        fprintf(stderr, "Membarrier test failed\n");
1265                        abort();
1266                }
1267
1268                /* Make list_a "active". */
1269                atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1270                if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1271                                        MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
1272                                errno != ENXIO /* missing CPU*/) {
1273                        perror("sys_membarrier");
1274                        abort();
1275                }
1276                /* Remember a value from list_b. */
1277                expect_b = atomic_load(&list_b.c[cpu_b].head->data);
1278        }
1279
1280        test_membarrier_free_percpu_list(&list_a);
1281        test_membarrier_free_percpu_list(&list_b);
1282
1283        if (rseq_unregister_current_thread()) {
1284                fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1285                        errno, strerror(errno));
1286                abort();
1287        }
1288        return NULL;
1289}
1290
1291void test_membarrier(void)
1292{
1293        const int num_threads = opt_threads;
1294        struct test_membarrier_thread_args thread_args;
1295        pthread_t worker_threads[num_threads];
1296        pthread_t manager_thread;
1297        int i, ret;
1298
1299        if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1300                perror("sys_membarrier");
1301                abort();
1302        }
1303
1304        thread_args.stop = 0;
1305        thread_args.percpu_list_ptr = 0;
1306        ret = pthread_create(&manager_thread, NULL,
1307                        test_membarrier_manager_thread, &thread_args);
1308        if (ret) {
1309                errno = ret;
1310                perror("pthread_create");
1311                abort();
1312        }
1313
1314        for (i = 0; i < num_threads; i++) {
1315                ret = pthread_create(&worker_threads[i], NULL,
1316                                test_membarrier_worker_thread, &thread_args);
1317                if (ret) {
1318                        errno = ret;
1319                        perror("pthread_create");
1320                        abort();
1321                }
1322        }
1323
1324
1325        for (i = 0; i < num_threads; i++) {
1326                ret = pthread_join(worker_threads[i], NULL);
1327                if (ret) {
1328                        errno = ret;
1329                        perror("pthread_join");
1330                        abort();
1331                }
1332        }
1333
1334        atomic_store(&thread_args.stop, 1);
1335        ret = pthread_join(manager_thread, NULL);
1336        if (ret) {
1337                errno = ret;
1338                perror("pthread_join");
1339                abort();
1340        }
1341}
1342#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
1343void test_membarrier(void)
1344{
1345        fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1346                        "Skipping membarrier test.\n");
1347}
1348#endif
1349
1350static void show_usage(int argc, char **argv)
1351{
1352        printf("Usage : %s <OPTIONS>\n",
1353                argv[0]);
1354        printf("OPTIONS:\n");
1355        printf("        [-1 loops] Number of loops for delay injection 1\n");
1356        printf("        [-2 loops] Number of loops for delay injection 2\n");
1357        printf("        [-3 loops] Number of loops for delay injection 3\n");
1358        printf("        [-4 loops] Number of loops for delay injection 4\n");
1359        printf("        [-5 loops] Number of loops for delay injection 5\n");
1360        printf("        [-6 loops] Number of loops for delay injection 6\n");
1361        printf("        [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1362        printf("        [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1363        printf("        [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1364        printf("        [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1365        printf("        [-y] Yield\n");
1366        printf("        [-k] Kill thread with signal\n");
1367        printf("        [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1368        printf("        [-t N] Number of threads (default 200)\n");
1369        printf("        [-r N] Number of repetitions per thread (default 5000)\n");
1370        printf("        [-d] Disable rseq system call (no initialization)\n");
1371        printf("        [-D M] Disable rseq for each M threads\n");
1372        printf("        [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1373        printf("        [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1374        printf("        [-v] Verbose output.\n");
1375        printf("        [-h] Show this help.\n");
1376        printf("\n");
1377}
1378
1379int main(int argc, char **argv)
1380{
1381        int i;
1382
1383        for (i = 1; i < argc; i++) {
1384                if (argv[i][0] != '-')
1385                        continue;
1386                switch (argv[i][1]) {
1387                case '1':
1388                case '2':
1389                case '3':
1390                case '4':
1391                case '5':
1392                case '6':
1393                case '7':
1394                case '8':
1395                case '9':
1396                        if (argc < i + 2) {
1397                                show_usage(argc, argv);
1398                                goto error;
1399                        }
1400                        loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1401                        i++;
1402                        break;
1403                case 'm':
1404                        if (argc < i + 2) {
1405                                show_usage(argc, argv);
1406                                goto error;
1407                        }
1408                        opt_modulo = atol(argv[i + 1]);
1409                        if (opt_modulo < 0) {
1410                                show_usage(argc, argv);
1411                                goto error;
1412                        }
1413                        i++;
1414                        break;
1415                case 's':
1416                        if (argc < i + 2) {
1417                                show_usage(argc, argv);
1418                                goto error;
1419                        }
1420                        opt_sleep = atol(argv[i + 1]);
1421                        if (opt_sleep < 0) {
1422                                show_usage(argc, argv);
1423                                goto error;
1424                        }
1425                        i++;
1426                        break;
1427                case 'y':
1428                        opt_yield = 1;
1429                        break;
1430                case 'k':
1431                        opt_signal = 1;
1432                        break;
1433                case 'd':
1434                        opt_disable_rseq = 1;
1435                        break;
1436                case 'D':
1437                        if (argc < i + 2) {
1438                                show_usage(argc, argv);
1439                                goto error;
1440                        }
1441                        opt_disable_mod = atol(argv[i + 1]);
1442                        if (opt_disable_mod < 0) {
1443                                show_usage(argc, argv);
1444                                goto error;
1445                        }
1446                        i++;
1447                        break;
1448                case 't':
1449                        if (argc < i + 2) {
1450                                show_usage(argc, argv);
1451                                goto error;
1452                        }
1453                        opt_threads = atol(argv[i + 1]);
1454                        if (opt_threads < 0) {
1455                                show_usage(argc, argv);
1456                                goto error;
1457                        }
1458                        i++;
1459                        break;
1460                case 'r':
1461                        if (argc < i + 2) {
1462                                show_usage(argc, argv);
1463                                goto error;
1464                        }
1465                        opt_reps = atoll(argv[i + 1]);
1466                        if (opt_reps < 0) {
1467                                show_usage(argc, argv);
1468                                goto error;
1469                        }
1470                        i++;
1471                        break;
1472                case 'h':
1473                        show_usage(argc, argv);
1474                        goto end;
1475                case 'T':
1476                        if (argc < i + 2) {
1477                                show_usage(argc, argv);
1478                                goto error;
1479                        }
1480                        opt_test = *argv[i + 1];
1481                        switch (opt_test) {
1482                        case 's':
1483                        case 'l':
1484                        case 'i':
1485                        case 'b':
1486                        case 'm':
1487                        case 'r':
1488                                break;
1489                        default:
1490                                show_usage(argc, argv);
1491                                goto error;
1492                        }
1493                        i++;
1494                        break;
1495                case 'v':
1496                        verbose = 1;
1497                        break;
1498                case 'M':
1499                        opt_mb = 1;
1500                        break;
1501                default:
1502                        show_usage(argc, argv);
1503                        goto error;
1504                }
1505        }
1506
1507        loop_cnt_1 = loop_cnt[1];
1508        loop_cnt_2 = loop_cnt[2];
1509        loop_cnt_3 = loop_cnt[3];
1510        loop_cnt_4 = loop_cnt[4];
1511        loop_cnt_5 = loop_cnt[5];
1512        loop_cnt_6 = loop_cnt[6];
1513
1514        if (set_signal_handler())
1515                goto error;
1516
1517        if (!opt_disable_rseq && rseq_register_current_thread())
1518                goto error;
1519        switch (opt_test) {
1520        case 's':
1521                printf_verbose("spinlock\n");
1522                test_percpu_spinlock();
1523                break;
1524        case 'l':
1525                printf_verbose("linked list\n");
1526                test_percpu_list();
1527                break;
1528        case 'b':
1529                printf_verbose("buffer\n");
1530                test_percpu_buffer();
1531                break;
1532        case 'm':
1533                printf_verbose("memcpy buffer\n");
1534                test_percpu_memcpy_buffer();
1535                break;
1536        case 'i':
1537                printf_verbose("counter increment\n");
1538                test_percpu_inc();
1539                break;
1540        case 'r':
1541                printf_verbose("membarrier\n");
1542                test_membarrier();
1543                break;
1544        }
1545        if (!opt_disable_rseq && rseq_unregister_current_thread())
1546                abort();
1547end:
1548        return 0;
1549
1550error:
1551        return -1;
1552}
1553