linux/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright 2020, Gustavo Luiz Duarte, IBM Corp.
   4 *
   5 * This test starts a transaction and triggers a signal, forcing a pagefault to
   6 * happen when the kernel signal handling code touches the user signal stack.
   7 *
   8 * In order to avoid pre-faulting the signal stack memory and to force the
   9 * pagefault to happen precisely in the kernel signal handling code, the
  10 * pagefault handling is done in userspace using the userfaultfd facility.
  11 *
  12 * Further pagefaults are triggered by crafting the signal handler's ucontext
  13 * to point to additional memory regions managed by the userfaultfd, so using
  14 * the same mechanism used to avoid pre-faulting the signal stack memory.
  15 *
  16 * On failure (bug is present) kernel crashes or never returns control back to
  17 * userspace. If bug is not present, tests completes almost immediately.
  18 */
  19
  20#include <stdio.h>
  21#include <stdlib.h>
  22#include <string.h>
  23#include <linux/userfaultfd.h>
  24#include <poll.h>
  25#include <unistd.h>
  26#include <sys/ioctl.h>
  27#include <sys/syscall.h>
  28#include <fcntl.h>
  29#include <sys/mman.h>
  30#include <pthread.h>
  31#include <signal.h>
  32#include <errno.h>
  33
  34#include "tm.h"
  35
  36
  37#define UF_MEM_SIZE 655360      /* 10 x 64k pages */
  38
  39/* Memory handled by userfaultfd */
  40static char *uf_mem;
  41static size_t uf_mem_offset = 0;
  42
  43/*
  44 * Data that will be copied into the faulting pages (instead of zero-filled
  45 * pages). This is used to make the test more reliable and avoid segfaulting
  46 * when we return from the signal handler. Since we are making the signal
  47 * handler's ucontext point to newly allocated memory, when that memory is
  48 * paged-in it will contain the expected content.
  49 */
  50static char backing_mem[UF_MEM_SIZE];
  51
  52static size_t pagesize;
  53
  54/*
  55 * Return a chunk of at least 'size' bytes of memory that will be handled by
  56 * userfaultfd. If 'backing_data' is not NULL, its content will be save to
  57 * 'backing_mem' and then copied into the faulting pages when the page fault
  58 * is handled.
  59 */
  60void *get_uf_mem(size_t size, void *backing_data)
  61{
  62        void *ret;
  63
  64        if (uf_mem_offset + size > UF_MEM_SIZE) {
  65                fprintf(stderr, "Requesting more uf_mem than expected!\n");
  66                exit(EXIT_FAILURE);
  67        }
  68
  69        ret = &uf_mem[uf_mem_offset];
  70
  71        /* Save the data that will be copied into the faulting page */
  72        if (backing_data != NULL)
  73                memcpy(&backing_mem[uf_mem_offset], backing_data, size);
  74
  75        /* Reserve the requested amount of uf_mem */
  76        uf_mem_offset += size;
  77        /* Keep uf_mem_offset aligned to the page size (round up) */
  78        uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1);
  79
  80        return ret;
  81}
  82
  83void *fault_handler_thread(void *arg)
  84{
  85        struct uffd_msg msg;    /* Data read from userfaultfd */
  86        long uffd;              /* userfaultfd file descriptor */
  87        struct uffdio_copy uffdio_copy;
  88        struct pollfd pollfd;
  89        ssize_t nread, offset;
  90
  91        uffd = (long) arg;
  92
  93        for (;;) {
  94                pollfd.fd = uffd;
  95                pollfd.events = POLLIN;
  96                if (poll(&pollfd, 1, -1) == -1) {
  97                        perror("poll() failed");
  98                        exit(EXIT_FAILURE);
  99                }
 100
 101                nread = read(uffd, &msg, sizeof(msg));
 102                if (nread == 0) {
 103                        fprintf(stderr, "read(): EOF on userfaultfd\n");
 104                        exit(EXIT_FAILURE);
 105                }
 106
 107                if (nread == -1) {
 108                        perror("read() failed");
 109                        exit(EXIT_FAILURE);
 110                }
 111
 112                /* We expect only one kind of event */
 113                if (msg.event != UFFD_EVENT_PAGEFAULT) {
 114                        fprintf(stderr, "Unexpected event on userfaultfd\n");
 115                        exit(EXIT_FAILURE);
 116                }
 117
 118                /*
 119                 * We need to handle page faults in units of pages(!).
 120                 * So, round faulting address down to page boundary.
 121                 */
 122                uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1);
 123
 124                offset = (char *) uffdio_copy.dst - uf_mem;
 125                uffdio_copy.src = (unsigned long) &backing_mem[offset];
 126
 127                uffdio_copy.len = pagesize;
 128                uffdio_copy.mode = 0;
 129                uffdio_copy.copy = 0;
 130                if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) {
 131                        perror("ioctl-UFFDIO_COPY failed");
 132                        exit(EXIT_FAILURE);
 133                }
 134        }
 135}
 136
 137void setup_uf_mem(void)
 138{
 139        long uffd;              /* userfaultfd file descriptor */
 140        pthread_t thr;
 141        struct uffdio_api uffdio_api;
 142        struct uffdio_register uffdio_register;
 143        int ret;
 144
 145        pagesize = sysconf(_SC_PAGE_SIZE);
 146
 147        /* Create and enable userfaultfd object */
 148        uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
 149        if (uffd == -1) {
 150                perror("userfaultfd() failed");
 151                exit(EXIT_FAILURE);
 152        }
 153        uffdio_api.api = UFFD_API;
 154        uffdio_api.features = 0;
 155        if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
 156                perror("ioctl-UFFDIO_API failed");
 157                exit(EXIT_FAILURE);
 158        }
 159
 160        /*
 161         * Create a private anonymous mapping. The memory will be demand-zero
 162         * paged, that is, not yet allocated. When we actually touch the memory
 163         * the related page will be allocated via the userfaultfd mechanism.
 164         */
 165        uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE,
 166                      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 167        if (uf_mem == MAP_FAILED) {
 168                perror("mmap() failed");
 169                exit(EXIT_FAILURE);
 170        }
 171
 172        /*
 173         * Register the memory range of the mapping we've just mapped to be
 174         * handled by the userfaultfd object. In 'mode' we request to track
 175         * missing pages (i.e. pages that have not yet been faulted-in).
 176         */
 177        uffdio_register.range.start = (unsigned long) uf_mem;
 178        uffdio_register.range.len = UF_MEM_SIZE;
 179        uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
 180        if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
 181                perror("ioctl-UFFDIO_REGISTER");
 182                exit(EXIT_FAILURE);
 183        }
 184
 185        /* Create a thread that will process the userfaultfd events */
 186        ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
 187        if (ret != 0) {
 188                fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret);
 189                exit(EXIT_FAILURE);
 190        }
 191}
 192
 193/*
 194 * Assumption: the signal was delivered while userspace was in transactional or
 195 * suspended state, i.e. uc->uc_link != NULL.
 196 */
 197void signal_handler(int signo, siginfo_t *si, void *uc)
 198{
 199        ucontext_t *ucp = uc;
 200
 201        /* Skip 'trap' after returning, otherwise we get a SIGTRAP again */
 202        ucp->uc_link->uc_mcontext.regs->nip += 4;
 203
 204        ucp->uc_mcontext.v_regs =
 205                get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs);
 206
 207        ucp->uc_link->uc_mcontext.v_regs =
 208                get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs);
 209
 210        ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link);
 211}
 212
 213bool have_userfaultfd(void)
 214{
 215        long rc;
 216
 217        errno = 0;
 218        rc = syscall(__NR_userfaultfd, -1);
 219
 220        return rc == 0 || errno != ENOSYS;
 221}
 222
 223int tm_signal_pagefault(void)
 224{
 225        struct sigaction sa;
 226        stack_t ss;
 227
 228        SKIP_IF(!have_htm());
 229        SKIP_IF(!have_userfaultfd());
 230
 231        setup_uf_mem();
 232
 233        /*
 234         * Set an alternative stack that will generate a page fault when the
 235         * signal is raised. The page fault will be treated via userfaultfd,
 236         * i.e. via fault_handler_thread.
 237         */
 238        ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL);
 239        ss.ss_size = SIGSTKSZ;
 240        ss.ss_flags = 0;
 241        if (sigaltstack(&ss, NULL) == -1) {
 242                perror("sigaltstack() failed");
 243                exit(EXIT_FAILURE);
 244        }
 245
 246        sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
 247        sa.sa_sigaction = signal_handler;
 248        if (sigaction(SIGTRAP, &sa, NULL) == -1) {
 249                perror("sigaction() failed");
 250                exit(EXIT_FAILURE);
 251        }
 252
 253        /* Trigger a SIGTRAP in transactional state */
 254        asm __volatile__(
 255                        "tbegin.;"
 256                        "beq    1f;"
 257                        "trap;"
 258                        "1: ;"
 259                        : : : "memory");
 260
 261        /* Trigger a SIGTRAP in suspended state */
 262        asm __volatile__(
 263                        "tbegin.;"
 264                        "beq    1f;"
 265                        "tsuspend.;"
 266                        "trap;"
 267                        "tresume.;"
 268                        "1: ;"
 269                        : : : "memory");
 270
 271        return EXIT_SUCCESS;
 272}
 273
 274int main(int argc, char **argv)
 275{
 276        /*
 277         * Depending on kernel config, the TM Bad Thing might not result in a
 278         * crash, instead the kernel never returns control back to userspace, so
 279         * set a tight timeout. If the test passes it completes almost
 280         * immediately.
 281         */
 282        test_harness_set_timeout(2);
 283        return test_harness(tm_signal_pagefault, "tm_signal_pagefault");
 284}
 285