1
2#define _GNU_SOURCE
3#include <assert.h>
4#include <linux/membarrier.h>
5#include <pthread.h>
6#include <sched.h>
7#include <stdatomic.h>
8#include <stdint.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <syscall.h>
13#include <unistd.h>
14#include <poll.h>
15#include <sys/types.h>
16#include <signal.h>
17#include <errno.h>
18#include <stddef.h>
19
20static inline pid_t rseq_gettid(void)
21{
22 return syscall(__NR_gettid);
23}
24
25#define NR_INJECT 9
26static int loop_cnt[NR_INJECT + 1];
27
28static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
29static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
30static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
31static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
32static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
33static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
34
35static int opt_modulo, verbose;
36
37static int opt_yield, opt_signal, opt_sleep,
38 opt_disable_rseq, opt_threads = 200,
39 opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
40
41#ifndef RSEQ_SKIP_FASTPATH
42static long long opt_reps = 5000;
43#else
44static long long opt_reps = 100;
45#endif
46
47static __thread __attribute__((tls_model("initial-exec")))
48unsigned int signals_delivered;
49
50#ifndef BENCHMARK
51
52static __thread __attribute__((tls_model("initial-exec"), unused))
53unsigned int yield_mod_cnt, nr_abort;
54
55#define printf_verbose(fmt, ...) \
56 do { \
57 if (verbose) \
58 printf(fmt, ## __VA_ARGS__); \
59 } while (0)
60
61#ifdef __i386__
62
63#define INJECT_ASM_REG "eax"
64
65#define RSEQ_INJECT_CLOBBER \
66 , INJECT_ASM_REG
67
68#define RSEQ_INJECT_ASM(n) \
69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
71 "jz 333f\n\t" \
72 "222:\n\t" \
73 "dec %%" INJECT_ASM_REG "\n\t" \
74 "jnz 222b\n\t" \
75 "333:\n\t"
76
77#elif defined(__x86_64__)
78
79#define INJECT_ASM_REG_P "rax"
80#define INJECT_ASM_REG "eax"
81
82#define RSEQ_INJECT_CLOBBER \
83 , INJECT_ASM_REG_P \
84 , INJECT_ASM_REG
85
86#define RSEQ_INJECT_ASM(n) \
87 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
88 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
90 "jz 333f\n\t" \
91 "222:\n\t" \
92 "dec %%" INJECT_ASM_REG "\n\t" \
93 "jnz 222b\n\t" \
94 "333:\n\t"
95
96#elif defined(__s390__)
97
98#define RSEQ_INJECT_INPUT \
99 , [loop_cnt_1]"m"(loop_cnt[1]) \
100 , [loop_cnt_2]"m"(loop_cnt[2]) \
101 , [loop_cnt_3]"m"(loop_cnt[3]) \
102 , [loop_cnt_4]"m"(loop_cnt[4]) \
103 , [loop_cnt_5]"m"(loop_cnt[5]) \
104 , [loop_cnt_6]"m"(loop_cnt[6])
105
106#define INJECT_ASM_REG "r12"
107
108#define RSEQ_INJECT_CLOBBER \
109 , INJECT_ASM_REG
110
111#define RSEQ_INJECT_ASM(n) \
112 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
113 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
114 "je 333f\n\t" \
115 "222:\n\t" \
116 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
117 "jnz 222b\n\t" \
118 "333:\n\t"
119
120#elif defined(__ARMEL__)
121
122#define RSEQ_INJECT_INPUT \
123 , [loop_cnt_1]"m"(loop_cnt[1]) \
124 , [loop_cnt_2]"m"(loop_cnt[2]) \
125 , [loop_cnt_3]"m"(loop_cnt[3]) \
126 , [loop_cnt_4]"m"(loop_cnt[4]) \
127 , [loop_cnt_5]"m"(loop_cnt[5]) \
128 , [loop_cnt_6]"m"(loop_cnt[6])
129
130#define INJECT_ASM_REG "r4"
131
132#define RSEQ_INJECT_CLOBBER \
133 , INJECT_ASM_REG
134
135#define RSEQ_INJECT_ASM(n) \
136 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
137 "cmp " INJECT_ASM_REG ", #0\n\t" \
138 "beq 333f\n\t" \
139 "222:\n\t" \
140 "subs " INJECT_ASM_REG ", #1\n\t" \
141 "bne 222b\n\t" \
142 "333:\n\t"
143
144#elif defined(__AARCH64EL__)
145
146#define RSEQ_INJECT_INPUT \
147 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
148 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
149 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
150 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
151 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
152 , [loop_cnt_6] "Qo" (loop_cnt[6])
153
154#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
155
156#define RSEQ_INJECT_ASM(n) \
157 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
158 " cbz " INJECT_ASM_REG ", 333f\n" \
159 "222:\n" \
160 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
161 " cbnz " INJECT_ASM_REG ", 222b\n" \
162 "333:\n"
163
164#elif __PPC__
165
166#define RSEQ_INJECT_INPUT \
167 , [loop_cnt_1]"m"(loop_cnt[1]) \
168 , [loop_cnt_2]"m"(loop_cnt[2]) \
169 , [loop_cnt_3]"m"(loop_cnt[3]) \
170 , [loop_cnt_4]"m"(loop_cnt[4]) \
171 , [loop_cnt_5]"m"(loop_cnt[5]) \
172 , [loop_cnt_6]"m"(loop_cnt[6])
173
174#define INJECT_ASM_REG "r18"
175
176#define RSEQ_INJECT_CLOBBER \
177 , INJECT_ASM_REG
178
179#define RSEQ_INJECT_ASM(n) \
180 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
181 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
182 "beq 333f\n\t" \
183 "222:\n\t" \
184 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
185 "bne 222b\n\t" \
186 "333:\n\t"
187
188#elif defined(__mips__)
189
190#define RSEQ_INJECT_INPUT \
191 , [loop_cnt_1]"m"(loop_cnt[1]) \
192 , [loop_cnt_2]"m"(loop_cnt[2]) \
193 , [loop_cnt_3]"m"(loop_cnt[3]) \
194 , [loop_cnt_4]"m"(loop_cnt[4]) \
195 , [loop_cnt_5]"m"(loop_cnt[5]) \
196 , [loop_cnt_6]"m"(loop_cnt[6])
197
198#define INJECT_ASM_REG "$5"
199
200#define RSEQ_INJECT_CLOBBER \
201 , INJECT_ASM_REG
202
203#define RSEQ_INJECT_ASM(n) \
204 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
205 "beqz " INJECT_ASM_REG ", 333f\n\t" \
206 "222:\n\t" \
207 "addiu " INJECT_ASM_REG ", -1\n\t" \
208 "bnez " INJECT_ASM_REG ", 222b\n\t" \
209 "333:\n\t"
210
211#else
212#error unsupported target
213#endif
214
215#define RSEQ_INJECT_FAILED \
216 nr_abort++;
217
218#define RSEQ_INJECT_C(n) \
219{ \
220 int loc_i, loc_nr_loops = loop_cnt[n]; \
221 \
222 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
223 rseq_barrier(); \
224 } \
225 if (loc_nr_loops == -1 && opt_modulo) { \
226 if (yield_mod_cnt == opt_modulo - 1) { \
227 if (opt_sleep > 0) \
228 poll(NULL, 0, opt_sleep); \
229 if (opt_yield) \
230 sched_yield(); \
231 if (opt_signal) \
232 raise(SIGUSR1); \
233 yield_mod_cnt = 0; \
234 } else { \
235 yield_mod_cnt++; \
236 } \
237 } \
238}
239
240#else
241
242#define printf_verbose(fmt, ...)
243
244#endif
245
246#include "rseq.h"
247
248struct percpu_lock_entry {
249 intptr_t v;
250} __attribute__((aligned(128)));
251
252struct percpu_lock {
253 struct percpu_lock_entry c[CPU_SETSIZE];
254};
255
256struct test_data_entry {
257 intptr_t count;
258} __attribute__((aligned(128)));
259
260struct spinlock_test_data {
261 struct percpu_lock lock;
262 struct test_data_entry c[CPU_SETSIZE];
263};
264
265struct spinlock_thread_test_data {
266 struct spinlock_test_data *data;
267 long long reps;
268 int reg;
269};
270
271struct inc_test_data {
272 struct test_data_entry c[CPU_SETSIZE];
273};
274
275struct inc_thread_test_data {
276 struct inc_test_data *data;
277 long long reps;
278 int reg;
279};
280
281struct percpu_list_node {
282 intptr_t data;
283 struct percpu_list_node *next;
284};
285
286struct percpu_list_entry {
287 struct percpu_list_node *head;
288} __attribute__((aligned(128)));
289
290struct percpu_list {
291 struct percpu_list_entry c[CPU_SETSIZE];
292};
293
294#define BUFFER_ITEM_PER_CPU 100
295
296struct percpu_buffer_node {
297 intptr_t data;
298};
299
300struct percpu_buffer_entry {
301 intptr_t offset;
302 intptr_t buflen;
303 struct percpu_buffer_node **array;
304} __attribute__((aligned(128)));
305
306struct percpu_buffer {
307 struct percpu_buffer_entry c[CPU_SETSIZE];
308};
309
310#define MEMCPY_BUFFER_ITEM_PER_CPU 100
311
312struct percpu_memcpy_buffer_node {
313 intptr_t data1;
314 uint64_t data2;
315};
316
317struct percpu_memcpy_buffer_entry {
318 intptr_t offset;
319 intptr_t buflen;
320 struct percpu_memcpy_buffer_node *array;
321} __attribute__((aligned(128)));
322
323struct percpu_memcpy_buffer {
324 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
325};
326
327
328static int rseq_this_cpu_lock(struct percpu_lock *lock)
329{
330 int cpu;
331
332 for (;;) {
333 int ret;
334
335 cpu = rseq_cpu_start();
336 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
337 0, 1, cpu);
338 if (rseq_likely(!ret))
339 break;
340
341 }
342
343
344
345
346 rseq_smp_acquire__after_ctrl_dep();
347 return cpu;
348}
349
350static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
351{
352 assert(lock->c[cpu].v == 1);
353
354
355
356
357 rseq_smp_store_release(&lock->c[cpu].v, 0);
358}
359
360void *test_percpu_spinlock_thread(void *arg)
361{
362 struct spinlock_thread_test_data *thread_data = arg;
363 struct spinlock_test_data *data = thread_data->data;
364 long long i, reps;
365
366 if (!opt_disable_rseq && thread_data->reg &&
367 rseq_register_current_thread())
368 abort();
369 reps = thread_data->reps;
370 for (i = 0; i < reps; i++) {
371 int cpu = rseq_cpu_start();
372
373 cpu = rseq_this_cpu_lock(&data->lock);
374 data->c[cpu].count++;
375 rseq_percpu_unlock(&data->lock, cpu);
376#ifndef BENCHMARK
377 if (i != 0 && !(i % (reps / 10)))
378 printf_verbose("tid %d: count %lld\n",
379 (int) rseq_gettid(), i);
380#endif
381 }
382 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
383 (int) rseq_gettid(), nr_abort, signals_delivered);
384 if (!opt_disable_rseq && thread_data->reg &&
385 rseq_unregister_current_thread())
386 abort();
387 return NULL;
388}
389
390
391
392
393
394
395
396void test_percpu_spinlock(void)
397{
398 const int num_threads = opt_threads;
399 int i, ret;
400 uint64_t sum;
401 pthread_t test_threads[num_threads];
402 struct spinlock_test_data data;
403 struct spinlock_thread_test_data thread_data[num_threads];
404
405 memset(&data, 0, sizeof(data));
406 for (i = 0; i < num_threads; i++) {
407 thread_data[i].reps = opt_reps;
408 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
409 thread_data[i].reg = 1;
410 else
411 thread_data[i].reg = 0;
412 thread_data[i].data = &data;
413 ret = pthread_create(&test_threads[i], NULL,
414 test_percpu_spinlock_thread,
415 &thread_data[i]);
416 if (ret) {
417 errno = ret;
418 perror("pthread_create");
419 abort();
420 }
421 }
422
423 for (i = 0; i < num_threads; i++) {
424 ret = pthread_join(test_threads[i], NULL);
425 if (ret) {
426 errno = ret;
427 perror("pthread_join");
428 abort();
429 }
430 }
431
432 sum = 0;
433 for (i = 0; i < CPU_SETSIZE; i++)
434 sum += data.c[i].count;
435
436 assert(sum == (uint64_t)opt_reps * num_threads);
437}
438
439void *test_percpu_inc_thread(void *arg)
440{
441 struct inc_thread_test_data *thread_data = arg;
442 struct inc_test_data *data = thread_data->data;
443 long long i, reps;
444
445 if (!opt_disable_rseq && thread_data->reg &&
446 rseq_register_current_thread())
447 abort();
448 reps = thread_data->reps;
449 for (i = 0; i < reps; i++) {
450 int ret;
451
452 do {
453 int cpu;
454
455 cpu = rseq_cpu_start();
456 ret = rseq_addv(&data->c[cpu].count, 1, cpu);
457 } while (rseq_unlikely(ret));
458#ifndef BENCHMARK
459 if (i != 0 && !(i % (reps / 10)))
460 printf_verbose("tid %d: count %lld\n",
461 (int) rseq_gettid(), i);
462#endif
463 }
464 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
465 (int) rseq_gettid(), nr_abort, signals_delivered);
466 if (!opt_disable_rseq && thread_data->reg &&
467 rseq_unregister_current_thread())
468 abort();
469 return NULL;
470}
471
472void test_percpu_inc(void)
473{
474 const int num_threads = opt_threads;
475 int i, ret;
476 uint64_t sum;
477 pthread_t test_threads[num_threads];
478 struct inc_test_data data;
479 struct inc_thread_test_data thread_data[num_threads];
480
481 memset(&data, 0, sizeof(data));
482 for (i = 0; i < num_threads; i++) {
483 thread_data[i].reps = opt_reps;
484 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
485 thread_data[i].reg = 1;
486 else
487 thread_data[i].reg = 0;
488 thread_data[i].data = &data;
489 ret = pthread_create(&test_threads[i], NULL,
490 test_percpu_inc_thread,
491 &thread_data[i]);
492 if (ret) {
493 errno = ret;
494 perror("pthread_create");
495 abort();
496 }
497 }
498
499 for (i = 0; i < num_threads; i++) {
500 ret = pthread_join(test_threads[i], NULL);
501 if (ret) {
502 errno = ret;
503 perror("pthread_join");
504 abort();
505 }
506 }
507
508 sum = 0;
509 for (i = 0; i < CPU_SETSIZE; i++)
510 sum += data.c[i].count;
511
512 assert(sum == (uint64_t)opt_reps * num_threads);
513}
514
515void this_cpu_list_push(struct percpu_list *list,
516 struct percpu_list_node *node,
517 int *_cpu)
518{
519 int cpu;
520
521 for (;;) {
522 intptr_t *targetptr, newval, expect;
523 int ret;
524
525 cpu = rseq_cpu_start();
526
527 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
528 newval = (intptr_t)node;
529 targetptr = (intptr_t *)&list->c[cpu].head;
530 node->next = (struct percpu_list_node *)expect;
531 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
532 if (rseq_likely(!ret))
533 break;
534
535 }
536 if (_cpu)
537 *_cpu = cpu;
538}
539
540
541
542
543
544
545struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
546 int *_cpu)
547{
548 struct percpu_list_node *node = NULL;
549 int cpu;
550
551 for (;;) {
552 struct percpu_list_node *head;
553 intptr_t *targetptr, expectnot, *load;
554 off_t offset;
555 int ret;
556
557 cpu = rseq_cpu_start();
558 targetptr = (intptr_t *)&list->c[cpu].head;
559 expectnot = (intptr_t)NULL;
560 offset = offsetof(struct percpu_list_node, next);
561 load = (intptr_t *)&head;
562 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
563 offset, load, cpu);
564 if (rseq_likely(!ret)) {
565 node = head;
566 break;
567 }
568 if (ret > 0)
569 break;
570
571 }
572 if (_cpu)
573 *_cpu = cpu;
574 return node;
575}
576
577
578
579
580
581struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
582{
583 struct percpu_list_node *node;
584
585 node = list->c[cpu].head;
586 if (!node)
587 return NULL;
588 list->c[cpu].head = node->next;
589 return node;
590}
591
592void *test_percpu_list_thread(void *arg)
593{
594 long long i, reps;
595 struct percpu_list *list = (struct percpu_list *)arg;
596
597 if (!opt_disable_rseq && rseq_register_current_thread())
598 abort();
599
600 reps = opt_reps;
601 for (i = 0; i < reps; i++) {
602 struct percpu_list_node *node;
603
604 node = this_cpu_list_pop(list, NULL);
605 if (opt_yield)
606 sched_yield();
607 if (node)
608 this_cpu_list_push(list, node, NULL);
609 }
610
611 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
612 (int) rseq_gettid(), nr_abort, signals_delivered);
613 if (!opt_disable_rseq && rseq_unregister_current_thread())
614 abort();
615
616 return NULL;
617}
618
619
620void test_percpu_list(void)
621{
622 const int num_threads = opt_threads;
623 int i, j, ret;
624 uint64_t sum = 0, expected_sum = 0;
625 struct percpu_list list;
626 pthread_t test_threads[num_threads];
627 cpu_set_t allowed_cpus;
628
629 memset(&list, 0, sizeof(list));
630
631
632 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
633 for (i = 0; i < CPU_SETSIZE; i++) {
634 if (!CPU_ISSET(i, &allowed_cpus))
635 continue;
636 for (j = 1; j <= 100; j++) {
637 struct percpu_list_node *node;
638
639 expected_sum += j;
640
641 node = malloc(sizeof(*node));
642 assert(node);
643 node->data = j;
644 node->next = list.c[i].head;
645 list.c[i].head = node;
646 }
647 }
648
649 for (i = 0; i < num_threads; i++) {
650 ret = pthread_create(&test_threads[i], NULL,
651 test_percpu_list_thread, &list);
652 if (ret) {
653 errno = ret;
654 perror("pthread_create");
655 abort();
656 }
657 }
658
659 for (i = 0; i < num_threads; i++) {
660 ret = pthread_join(test_threads[i], NULL);
661 if (ret) {
662 errno = ret;
663 perror("pthread_join");
664 abort();
665 }
666 }
667
668 for (i = 0; i < CPU_SETSIZE; i++) {
669 struct percpu_list_node *node;
670
671 if (!CPU_ISSET(i, &allowed_cpus))
672 continue;
673
674 while ((node = __percpu_list_pop(&list, i))) {
675 sum += node->data;
676 free(node);
677 }
678 }
679
680
681
682
683
684
685 assert(sum == expected_sum);
686}
687
688bool this_cpu_buffer_push(struct percpu_buffer *buffer,
689 struct percpu_buffer_node *node,
690 int *_cpu)
691{
692 bool result = false;
693 int cpu;
694
695 for (;;) {
696 intptr_t *targetptr_spec, newval_spec;
697 intptr_t *targetptr_final, newval_final;
698 intptr_t offset;
699 int ret;
700
701 cpu = rseq_cpu_start();
702 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
703 if (offset == buffer->c[cpu].buflen)
704 break;
705 newval_spec = (intptr_t)node;
706 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
707 newval_final = offset + 1;
708 targetptr_final = &buffer->c[cpu].offset;
709 if (opt_mb)
710 ret = rseq_cmpeqv_trystorev_storev_release(
711 targetptr_final, offset, targetptr_spec,
712 newval_spec, newval_final, cpu);
713 else
714 ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
715 offset, targetptr_spec, newval_spec,
716 newval_final, cpu);
717 if (rseq_likely(!ret)) {
718 result = true;
719 break;
720 }
721
722 }
723 if (_cpu)
724 *_cpu = cpu;
725 return result;
726}
727
728struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
729 int *_cpu)
730{
731 struct percpu_buffer_node *head;
732 int cpu;
733
734 for (;;) {
735 intptr_t *targetptr, newval;
736 intptr_t offset;
737 int ret;
738
739 cpu = rseq_cpu_start();
740
741 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
742 if (offset == 0) {
743 head = NULL;
744 break;
745 }
746 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
747 newval = offset - 1;
748 targetptr = (intptr_t *)&buffer->c[cpu].offset;
749 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
750 (intptr_t *)&buffer->c[cpu].array[offset - 1],
751 (intptr_t)head, newval, cpu);
752 if (rseq_likely(!ret))
753 break;
754
755 }
756 if (_cpu)
757 *_cpu = cpu;
758 return head;
759}
760
761
762
763
764
765struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
766 int cpu)
767{
768 struct percpu_buffer_node *head;
769 intptr_t offset;
770
771 offset = buffer->c[cpu].offset;
772 if (offset == 0)
773 return NULL;
774 head = buffer->c[cpu].array[offset - 1];
775 buffer->c[cpu].offset = offset - 1;
776 return head;
777}
778
779void *test_percpu_buffer_thread(void *arg)
780{
781 long long i, reps;
782 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
783
784 if (!opt_disable_rseq && rseq_register_current_thread())
785 abort();
786
787 reps = opt_reps;
788 for (i = 0; i < reps; i++) {
789 struct percpu_buffer_node *node;
790
791 node = this_cpu_buffer_pop(buffer, NULL);
792 if (opt_yield)
793 sched_yield();
794 if (node) {
795 if (!this_cpu_buffer_push(buffer, node, NULL)) {
796
797 abort();
798 }
799 }
800 }
801
802 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
803 (int) rseq_gettid(), nr_abort, signals_delivered);
804 if (!opt_disable_rseq && rseq_unregister_current_thread())
805 abort();
806
807 return NULL;
808}
809
810
811void test_percpu_buffer(void)
812{
813 const int num_threads = opt_threads;
814 int i, j, ret;
815 uint64_t sum = 0, expected_sum = 0;
816 struct percpu_buffer buffer;
817 pthread_t test_threads[num_threads];
818 cpu_set_t allowed_cpus;
819
820 memset(&buffer, 0, sizeof(buffer));
821
822
823 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
824 for (i = 0; i < CPU_SETSIZE; i++) {
825 if (!CPU_ISSET(i, &allowed_cpus))
826 continue;
827
828 buffer.c[i].array =
829 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
830 BUFFER_ITEM_PER_CPU);
831 assert(buffer.c[i].array);
832 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
833 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
834 struct percpu_buffer_node *node;
835
836 expected_sum += j;
837
838
839
840
841
842
843
844
845 node = malloc(sizeof(*node));
846 assert(node);
847 node->data = j;
848 buffer.c[i].array[j - 1] = node;
849 buffer.c[i].offset++;
850 }
851 }
852
853 for (i = 0; i < num_threads; i++) {
854 ret = pthread_create(&test_threads[i], NULL,
855 test_percpu_buffer_thread, &buffer);
856 if (ret) {
857 errno = ret;
858 perror("pthread_create");
859 abort();
860 }
861 }
862
863 for (i = 0; i < num_threads; i++) {
864 ret = pthread_join(test_threads[i], NULL);
865 if (ret) {
866 errno = ret;
867 perror("pthread_join");
868 abort();
869 }
870 }
871
872 for (i = 0; i < CPU_SETSIZE; i++) {
873 struct percpu_buffer_node *node;
874
875 if (!CPU_ISSET(i, &allowed_cpus))
876 continue;
877
878 while ((node = __percpu_buffer_pop(&buffer, i))) {
879 sum += node->data;
880 free(node);
881 }
882 free(buffer.c[i].array);
883 }
884
885
886
887
888
889
890 assert(sum == expected_sum);
891}
892
893bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
894 struct percpu_memcpy_buffer_node item,
895 int *_cpu)
896{
897 bool result = false;
898 int cpu;
899
900 for (;;) {
901 intptr_t *targetptr_final, newval_final, offset;
902 char *destptr, *srcptr;
903 size_t copylen;
904 int ret;
905
906 cpu = rseq_cpu_start();
907
908 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
909 if (offset == buffer->c[cpu].buflen)
910 break;
911 destptr = (char *)&buffer->c[cpu].array[offset];
912 srcptr = (char *)&item;
913
914 copylen = sizeof(item);
915 newval_final = offset + 1;
916 targetptr_final = &buffer->c[cpu].offset;
917 if (opt_mb)
918 ret = rseq_cmpeqv_trymemcpy_storev_release(
919 targetptr_final, offset,
920 destptr, srcptr, copylen,
921 newval_final, cpu);
922 else
923 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
924 offset, destptr, srcptr, copylen,
925 newval_final, cpu);
926 if (rseq_likely(!ret)) {
927 result = true;
928 break;
929 }
930
931 }
932 if (_cpu)
933 *_cpu = cpu;
934 return result;
935}
936
937bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
938 struct percpu_memcpy_buffer_node *item,
939 int *_cpu)
940{
941 bool result = false;
942 int cpu;
943
944 for (;;) {
945 intptr_t *targetptr_final, newval_final, offset;
946 char *destptr, *srcptr;
947 size_t copylen;
948 int ret;
949
950 cpu = rseq_cpu_start();
951
952 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
953 if (offset == 0)
954 break;
955 destptr = (char *)item;
956 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
957
958 copylen = sizeof(*item);
959 newval_final = offset - 1;
960 targetptr_final = &buffer->c[cpu].offset;
961 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
962 offset, destptr, srcptr, copylen,
963 newval_final, cpu);
964 if (rseq_likely(!ret)) {
965 result = true;
966 break;
967 }
968
969 }
970 if (_cpu)
971 *_cpu = cpu;
972 return result;
973}
974
975
976
977
978
979bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
980 struct percpu_memcpy_buffer_node *item,
981 int cpu)
982{
983 intptr_t offset;
984
985 offset = buffer->c[cpu].offset;
986 if (offset == 0)
987 return false;
988 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
989 buffer->c[cpu].offset = offset - 1;
990 return true;
991}
992
993void *test_percpu_memcpy_buffer_thread(void *arg)
994{
995 long long i, reps;
996 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
997
998 if (!opt_disable_rseq && rseq_register_current_thread())
999 abort();
1000
1001 reps = opt_reps;
1002 for (i = 0; i < reps; i++) {
1003 struct percpu_memcpy_buffer_node item;
1004 bool result;
1005
1006 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1007 if (opt_yield)
1008 sched_yield();
1009 if (result) {
1010 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1011
1012 abort();
1013 }
1014 }
1015 }
1016
1017 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1018 (int) rseq_gettid(), nr_abort, signals_delivered);
1019 if (!opt_disable_rseq && rseq_unregister_current_thread())
1020 abort();
1021
1022 return NULL;
1023}
1024
1025
1026void test_percpu_memcpy_buffer(void)
1027{
1028 const int num_threads = opt_threads;
1029 int i, j, ret;
1030 uint64_t sum = 0, expected_sum = 0;
1031 struct percpu_memcpy_buffer buffer;
1032 pthread_t test_threads[num_threads];
1033 cpu_set_t allowed_cpus;
1034
1035 memset(&buffer, 0, sizeof(buffer));
1036
1037
1038 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1039 for (i = 0; i < CPU_SETSIZE; i++) {
1040 if (!CPU_ISSET(i, &allowed_cpus))
1041 continue;
1042
1043 buffer.c[i].array =
1044 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1045 MEMCPY_BUFFER_ITEM_PER_CPU);
1046 assert(buffer.c[i].array);
1047 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1048 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1049 expected_sum += 2 * j + 1;
1050
1051
1052
1053
1054
1055
1056
1057
1058 buffer.c[i].array[j - 1].data1 = j;
1059 buffer.c[i].array[j - 1].data2 = j + 1;
1060 buffer.c[i].offset++;
1061 }
1062 }
1063
1064 for (i = 0; i < num_threads; i++) {
1065 ret = pthread_create(&test_threads[i], NULL,
1066 test_percpu_memcpy_buffer_thread,
1067 &buffer);
1068 if (ret) {
1069 errno = ret;
1070 perror("pthread_create");
1071 abort();
1072 }
1073 }
1074
1075 for (i = 0; i < num_threads; i++) {
1076 ret = pthread_join(test_threads[i], NULL);
1077 if (ret) {
1078 errno = ret;
1079 perror("pthread_join");
1080 abort();
1081 }
1082 }
1083
1084 for (i = 0; i < CPU_SETSIZE; i++) {
1085 struct percpu_memcpy_buffer_node item;
1086
1087 if (!CPU_ISSET(i, &allowed_cpus))
1088 continue;
1089
1090 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1091 sum += item.data1;
1092 sum += item.data2;
1093 }
1094 free(buffer.c[i].array);
1095 }
1096
1097
1098
1099
1100
1101
1102 assert(sum == expected_sum);
1103}
1104
1105static void test_signal_interrupt_handler(int signo)
1106{
1107 signals_delivered++;
1108}
1109
1110static int set_signal_handler(void)
1111{
1112 int ret = 0;
1113 struct sigaction sa;
1114 sigset_t sigset;
1115
1116 ret = sigemptyset(&sigset);
1117 if (ret < 0) {
1118 perror("sigemptyset");
1119 return ret;
1120 }
1121
1122 sa.sa_handler = test_signal_interrupt_handler;
1123 sa.sa_mask = sigset;
1124 sa.sa_flags = 0;
1125 ret = sigaction(SIGUSR1, &sa, NULL);
1126 if (ret < 0) {
1127 perror("sigaction");
1128 return ret;
1129 }
1130
1131 printf_verbose("Signal handler set for SIGUSR1\n");
1132
1133 return ret;
1134}
1135
1136
1137#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
1138struct test_membarrier_thread_args {
1139 int stop;
1140 intptr_t percpu_list_ptr;
1141};
1142
1143
1144void *test_membarrier_worker_thread(void *arg)
1145{
1146 struct test_membarrier_thread_args *args =
1147 (struct test_membarrier_thread_args *)arg;
1148 const int iters = opt_reps;
1149 int i;
1150
1151 if (rseq_register_current_thread()) {
1152 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1153 errno, strerror(errno));
1154 abort();
1155 }
1156
1157
1158 while (!atomic_load(&args->percpu_list_ptr)) {}
1159
1160 for (i = 0; i < iters; ++i) {
1161 int ret;
1162
1163 do {
1164 int cpu = rseq_cpu_start();
1165
1166 ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
1167 sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1168 } while (rseq_unlikely(ret));
1169 }
1170
1171 if (rseq_unregister_current_thread()) {
1172 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1173 errno, strerror(errno));
1174 abort();
1175 }
1176 return NULL;
1177}
1178
1179void test_membarrier_init_percpu_list(struct percpu_list *list)
1180{
1181 int i;
1182
1183 memset(list, 0, sizeof(*list));
1184 for (i = 0; i < CPU_SETSIZE; i++) {
1185 struct percpu_list_node *node;
1186
1187 node = malloc(sizeof(*node));
1188 assert(node);
1189 node->data = 0;
1190 node->next = NULL;
1191 list->c[i].head = node;
1192 }
1193}
1194
1195void test_membarrier_free_percpu_list(struct percpu_list *list)
1196{
1197 int i;
1198
1199 for (i = 0; i < CPU_SETSIZE; i++)
1200 free(list->c[i].head);
1201}
1202
1203static int sys_membarrier(int cmd, int flags, int cpu_id)
1204{
1205 return syscall(__NR_membarrier, cmd, flags, cpu_id);
1206}
1207
1208
1209
1210
1211
1212void *test_membarrier_manager_thread(void *arg)
1213{
1214 struct test_membarrier_thread_args *args =
1215 (struct test_membarrier_thread_args *)arg;
1216 struct percpu_list list_a, list_b;
1217 intptr_t expect_a = 0, expect_b = 0;
1218 int cpu_a = 0, cpu_b = 0;
1219
1220 if (rseq_register_current_thread()) {
1221 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1222 errno, strerror(errno));
1223 abort();
1224 }
1225
1226
1227 test_membarrier_init_percpu_list(&list_a);
1228 test_membarrier_init_percpu_list(&list_b);
1229
1230 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1231
1232 while (!atomic_load(&args->stop)) {
1233
1234 cpu_a = rand() % CPU_SETSIZE;
1235
1236
1237
1238
1239 if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
1240 fprintf(stderr, "Membarrier test failed\n");
1241 abort();
1242 }
1243
1244
1245 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
1246 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1247 MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
1248 errno != ENXIO ) {
1249 perror("sys_membarrier");
1250 abort();
1251 }
1252
1253
1254
1255
1256 expect_a = atomic_load(&list_a.c[cpu_a].head->data);
1257
1258 cpu_b = rand() % CPU_SETSIZE;
1259
1260
1261
1262
1263 if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
1264 fprintf(stderr, "Membarrier test failed\n");
1265 abort();
1266 }
1267
1268
1269 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1270 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1271 MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
1272 errno != ENXIO ) {
1273 perror("sys_membarrier");
1274 abort();
1275 }
1276
1277 expect_b = atomic_load(&list_b.c[cpu_b].head->data);
1278 }
1279
1280 test_membarrier_free_percpu_list(&list_a);
1281 test_membarrier_free_percpu_list(&list_b);
1282
1283 if (rseq_unregister_current_thread()) {
1284 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1285 errno, strerror(errno));
1286 abort();
1287 }
1288 return NULL;
1289}
1290
1291void test_membarrier(void)
1292{
1293 const int num_threads = opt_threads;
1294 struct test_membarrier_thread_args thread_args;
1295 pthread_t worker_threads[num_threads];
1296 pthread_t manager_thread;
1297 int i, ret;
1298
1299 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1300 perror("sys_membarrier");
1301 abort();
1302 }
1303
1304 thread_args.stop = 0;
1305 thread_args.percpu_list_ptr = 0;
1306 ret = pthread_create(&manager_thread, NULL,
1307 test_membarrier_manager_thread, &thread_args);
1308 if (ret) {
1309 errno = ret;
1310 perror("pthread_create");
1311 abort();
1312 }
1313
1314 for (i = 0; i < num_threads; i++) {
1315 ret = pthread_create(&worker_threads[i], NULL,
1316 test_membarrier_worker_thread, &thread_args);
1317 if (ret) {
1318 errno = ret;
1319 perror("pthread_create");
1320 abort();
1321 }
1322 }
1323
1324
1325 for (i = 0; i < num_threads; i++) {
1326 ret = pthread_join(worker_threads[i], NULL);
1327 if (ret) {
1328 errno = ret;
1329 perror("pthread_join");
1330 abort();
1331 }
1332 }
1333
1334 atomic_store(&thread_args.stop, 1);
1335 ret = pthread_join(manager_thread, NULL);
1336 if (ret) {
1337 errno = ret;
1338 perror("pthread_join");
1339 abort();
1340 }
1341}
1342#else
1343void test_membarrier(void)
1344{
1345 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1346 "Skipping membarrier test.\n");
1347}
1348#endif
1349
1350static void show_usage(int argc, char **argv)
1351{
1352 printf("Usage : %s <OPTIONS>\n",
1353 argv[0]);
1354 printf("OPTIONS:\n");
1355 printf(" [-1 loops] Number of loops for delay injection 1\n");
1356 printf(" [-2 loops] Number of loops for delay injection 2\n");
1357 printf(" [-3 loops] Number of loops for delay injection 3\n");
1358 printf(" [-4 loops] Number of loops for delay injection 4\n");
1359 printf(" [-5 loops] Number of loops for delay injection 5\n");
1360 printf(" [-6 loops] Number of loops for delay injection 6\n");
1361 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1362 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1363 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1364 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1365 printf(" [-y] Yield\n");
1366 printf(" [-k] Kill thread with signal\n");
1367 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1368 printf(" [-t N] Number of threads (default 200)\n");
1369 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1370 printf(" [-d] Disable rseq system call (no initialization)\n");
1371 printf(" [-D M] Disable rseq for each M threads\n");
1372 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1373 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1374 printf(" [-v] Verbose output.\n");
1375 printf(" [-h] Show this help.\n");
1376 printf("\n");
1377}
1378
1379int main(int argc, char **argv)
1380{
1381 int i;
1382
1383 for (i = 1; i < argc; i++) {
1384 if (argv[i][0] != '-')
1385 continue;
1386 switch (argv[i][1]) {
1387 case '1':
1388 case '2':
1389 case '3':
1390 case '4':
1391 case '5':
1392 case '6':
1393 case '7':
1394 case '8':
1395 case '9':
1396 if (argc < i + 2) {
1397 show_usage(argc, argv);
1398 goto error;
1399 }
1400 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1401 i++;
1402 break;
1403 case 'm':
1404 if (argc < i + 2) {
1405 show_usage(argc, argv);
1406 goto error;
1407 }
1408 opt_modulo = atol(argv[i + 1]);
1409 if (opt_modulo < 0) {
1410 show_usage(argc, argv);
1411 goto error;
1412 }
1413 i++;
1414 break;
1415 case 's':
1416 if (argc < i + 2) {
1417 show_usage(argc, argv);
1418 goto error;
1419 }
1420 opt_sleep = atol(argv[i + 1]);
1421 if (opt_sleep < 0) {
1422 show_usage(argc, argv);
1423 goto error;
1424 }
1425 i++;
1426 break;
1427 case 'y':
1428 opt_yield = 1;
1429 break;
1430 case 'k':
1431 opt_signal = 1;
1432 break;
1433 case 'd':
1434 opt_disable_rseq = 1;
1435 break;
1436 case 'D':
1437 if (argc < i + 2) {
1438 show_usage(argc, argv);
1439 goto error;
1440 }
1441 opt_disable_mod = atol(argv[i + 1]);
1442 if (opt_disable_mod < 0) {
1443 show_usage(argc, argv);
1444 goto error;
1445 }
1446 i++;
1447 break;
1448 case 't':
1449 if (argc < i + 2) {
1450 show_usage(argc, argv);
1451 goto error;
1452 }
1453 opt_threads = atol(argv[i + 1]);
1454 if (opt_threads < 0) {
1455 show_usage(argc, argv);
1456 goto error;
1457 }
1458 i++;
1459 break;
1460 case 'r':
1461 if (argc < i + 2) {
1462 show_usage(argc, argv);
1463 goto error;
1464 }
1465 opt_reps = atoll(argv[i + 1]);
1466 if (opt_reps < 0) {
1467 show_usage(argc, argv);
1468 goto error;
1469 }
1470 i++;
1471 break;
1472 case 'h':
1473 show_usage(argc, argv);
1474 goto end;
1475 case 'T':
1476 if (argc < i + 2) {
1477 show_usage(argc, argv);
1478 goto error;
1479 }
1480 opt_test = *argv[i + 1];
1481 switch (opt_test) {
1482 case 's':
1483 case 'l':
1484 case 'i':
1485 case 'b':
1486 case 'm':
1487 case 'r':
1488 break;
1489 default:
1490 show_usage(argc, argv);
1491 goto error;
1492 }
1493 i++;
1494 break;
1495 case 'v':
1496 verbose = 1;
1497 break;
1498 case 'M':
1499 opt_mb = 1;
1500 break;
1501 default:
1502 show_usage(argc, argv);
1503 goto error;
1504 }
1505 }
1506
1507 loop_cnt_1 = loop_cnt[1];
1508 loop_cnt_2 = loop_cnt[2];
1509 loop_cnt_3 = loop_cnt[3];
1510 loop_cnt_4 = loop_cnt[4];
1511 loop_cnt_5 = loop_cnt[5];
1512 loop_cnt_6 = loop_cnt[6];
1513
1514 if (set_signal_handler())
1515 goto error;
1516
1517 if (!opt_disable_rseq && rseq_register_current_thread())
1518 goto error;
1519 switch (opt_test) {
1520 case 's':
1521 printf_verbose("spinlock\n");
1522 test_percpu_spinlock();
1523 break;
1524 case 'l':
1525 printf_verbose("linked list\n");
1526 test_percpu_list();
1527 break;
1528 case 'b':
1529 printf_verbose("buffer\n");
1530 test_percpu_buffer();
1531 break;
1532 case 'm':
1533 printf_verbose("memcpy buffer\n");
1534 test_percpu_memcpy_buffer();
1535 break;
1536 case 'i':
1537 printf_verbose("counter increment\n");
1538 test_percpu_inc();
1539 break;
1540 case 'r':
1541 printf_verbose("membarrier\n");
1542 test_membarrier();
1543 break;
1544 }
1545 if (!opt_disable_rseq && rseq_unregister_current_thread())
1546 abort();
1547end:
1548 return 0;
1549
1550error:
1551 return -1;
1552}
1553