linux/tools/testing/selftests/x86/protection_keys.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
   4 *
   5 * There are examples in here of:
   6 *  * how to set protection keys on memory
   7 *  * how to set/clear bits in PKRU (the rights register)
   8 *  * how to handle SEGV_PKRU signals and extract pkey-relevant
   9 *    information from the siginfo
  10 *
  11 * Things to add:
  12 *      make sure KSM and KSM COW breaking works
  13 *      prefault pages in at malloc, or not
  14 *      protect MPX bounds tables with protection keys?
  15 *      make sure VMA splitting/merging is working correctly
  16 *      OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
  17 *      look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
  18 *      do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
  19 *
  20 * Compile like this:
  21 *      gcc      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
  22 *      gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
  23 */
  24#define _GNU_SOURCE
  25#include <errno.h>
  26#include <linux/futex.h>
  27#include <sys/time.h>
  28#include <sys/syscall.h>
  29#include <string.h>
  30#include <stdio.h>
  31#include <stdint.h>
  32#include <stdbool.h>
  33#include <signal.h>
  34#include <assert.h>
  35#include <stdlib.h>
  36#include <ucontext.h>
  37#include <sys/mman.h>
  38#include <sys/types.h>
  39#include <sys/wait.h>
  40#include <sys/stat.h>
  41#include <fcntl.h>
  42#include <unistd.h>
  43#include <sys/ptrace.h>
  44#include <setjmp.h>
  45
  46#include "pkey-helpers.h"
  47
  48int iteration_nr = 1;
  49int test_nr;
  50
  51unsigned int shadow_pkru;
  52
  53#define HPAGE_SIZE      (1UL<<21)
  54#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
  55#define ALIGN_UP(x, align_to)   (((x) + ((align_to)-1)) & ~((align_to)-1))
  56#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
  57#define ALIGN_PTR_UP(p, ptr_align_to)   ((typeof(p))ALIGN_UP((unsigned long)(p),        ptr_align_to))
  58#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p),      ptr_align_to))
  59#define __stringify_1(x...)     #x
  60#define __stringify(x...)       __stringify_1(x)
  61
  62#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
  63
  64int dprint_in_signal;
  65char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
  66
  67extern void abort_hooks(void);
  68#define pkey_assert(condition) do {             \
  69        if (!(condition)) {                     \
  70                dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
  71                                __FILE__, __LINE__,     \
  72                                test_nr, iteration_nr); \
  73                dprintf0("errno at assert: %d", errno); \
  74                abort_hooks();                  \
  75                assert(condition);              \
  76        }                                       \
  77} while (0)
  78#define raw_assert(cond) assert(cond)
  79
  80void cat_into_file(char *str, char *file)
  81{
  82        int fd = open(file, O_RDWR);
  83        int ret;
  84
  85        dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
  86        /*
  87         * these need to be raw because they are called under
  88         * pkey_assert()
  89         */
  90        raw_assert(fd >= 0);
  91        ret = write(fd, str, strlen(str));
  92        if (ret != strlen(str)) {
  93                perror("write to file failed");
  94                fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
  95                raw_assert(0);
  96        }
  97        close(fd);
  98}
  99
 100#if CONTROL_TRACING > 0
 101static int warned_tracing;
 102int tracing_root_ok(void)
 103{
 104        if (geteuid() != 0) {
 105                if (!warned_tracing)
 106                        fprintf(stderr, "WARNING: not run as root, "
 107                                        "can not do tracing control\n");
 108                warned_tracing = 1;
 109                return 0;
 110        }
 111        return 1;
 112}
 113#endif
 114
 115void tracing_on(void)
 116{
 117#if CONTROL_TRACING > 0
 118#define TRACEDIR "/sys/kernel/debug/tracing"
 119        char pidstr[32];
 120
 121        if (!tracing_root_ok())
 122                return;
 123
 124        sprintf(pidstr, "%d", getpid());
 125        cat_into_file("0", TRACEDIR "/tracing_on");
 126        cat_into_file("\n", TRACEDIR "/trace");
 127        if (1) {
 128                cat_into_file("function_graph", TRACEDIR "/current_tracer");
 129                cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
 130        } else {
 131                cat_into_file("nop", TRACEDIR "/current_tracer");
 132        }
 133        cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
 134        cat_into_file("1", TRACEDIR "/tracing_on");
 135        dprintf1("enabled tracing\n");
 136#endif
 137}
 138
 139void tracing_off(void)
 140{
 141#if CONTROL_TRACING > 0
 142        if (!tracing_root_ok())
 143                return;
 144        cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
 145#endif
 146}
 147
 148void abort_hooks(void)
 149{
 150        fprintf(stderr, "running %s()...\n", __func__);
 151        tracing_off();
 152#ifdef SLEEP_ON_ABORT
 153        sleep(SLEEP_ON_ABORT);
 154#endif
 155}
 156
 157static inline void __page_o_noops(void)
 158{
 159        /* 8-bytes of instruction * 512 bytes = 1 page */
 160        asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
 161}
 162
 163/*
 164 * This attempts to have roughly a page of instructions followed by a few
 165 * instructions that do a write, and another page of instructions.  That
 166 * way, we are pretty sure that the write is in the second page of
 167 * instructions and has at least a page of padding behind it.
 168 *
 169 * *That* lets us be sure to madvise() away the write instruction, which
 170 * will then fault, which makes sure that the fault code handles
 171 * execute-only memory properly.
 172 */
 173__attribute__((__aligned__(PAGE_SIZE)))
 174void lots_o_noops_around_write(int *write_to_me)
 175{
 176        dprintf3("running %s()\n", __func__);
 177        __page_o_noops();
 178        /* Assume this happens in the second page of instructions: */
 179        *write_to_me = __LINE__;
 180        /* pad out by another page: */
 181        __page_o_noops();
 182        dprintf3("%s() done\n", __func__);
 183}
 184
 185/* Define some kernel-like types */
 186#define  u8 uint8_t
 187#define u16 uint16_t
 188#define u32 uint32_t
 189#define u64 uint64_t
 190
 191#ifdef __i386__
 192
 193#ifndef SYS_mprotect_key
 194# define SYS_mprotect_key 380
 195#endif
 196#ifndef SYS_pkey_alloc
 197# define SYS_pkey_alloc  381
 198# define SYS_pkey_free   382
 199#endif
 200#define REG_IP_IDX REG_EIP
 201#define si_pkey_offset 0x14
 202
 203#else
 204
 205#ifndef SYS_mprotect_key
 206# define SYS_mprotect_key 329
 207#endif
 208#ifndef SYS_pkey_alloc
 209# define SYS_pkey_alloc  330
 210# define SYS_pkey_free   331
 211#endif
 212#define REG_IP_IDX REG_RIP
 213#define si_pkey_offset 0x20
 214
 215#endif
 216
 217void dump_mem(void *dumpme, int len_bytes)
 218{
 219        char *c = (void *)dumpme;
 220        int i;
 221
 222        for (i = 0; i < len_bytes; i += sizeof(u64)) {
 223                u64 *ptr = (u64 *)(c + i);
 224                dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
 225        }
 226}
 227
 228#define SEGV_BNDERR     3  /* failed address bound checks */
 229#define SEGV_PKUERR     4
 230
 231static char *si_code_str(int si_code)
 232{
 233        if (si_code == SEGV_MAPERR)
 234                return "SEGV_MAPERR";
 235        if (si_code == SEGV_ACCERR)
 236                return "SEGV_ACCERR";
 237        if (si_code == SEGV_BNDERR)
 238                return "SEGV_BNDERR";
 239        if (si_code == SEGV_PKUERR)
 240                return "SEGV_PKUERR";
 241        return "UNKNOWN";
 242}
 243
 244int pkru_faults;
 245int last_si_pkey = -1;
 246void signal_handler(int signum, siginfo_t *si, void *vucontext)
 247{
 248        ucontext_t *uctxt = vucontext;
 249        int trapno;
 250        unsigned long ip;
 251        char *fpregs;
 252        u32 *pkru_ptr;
 253        u64 siginfo_pkey;
 254        u32 *si_pkey_ptr;
 255        int pkru_offset;
 256        fpregset_t fpregset;
 257
 258        dprint_in_signal = 1;
 259        dprintf1(">>>>===============SIGSEGV============================\n");
 260        dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
 261                        __rdpkru(), shadow_pkru);
 262
 263        trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
 264        ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
 265        fpregset = uctxt->uc_mcontext.fpregs;
 266        fpregs = (void *)fpregset;
 267
 268        dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
 269                        trapno, ip, si_code_str(si->si_code), si->si_code);
 270#ifdef __i386__
 271        /*
 272         * 32-bit has some extra padding so that userspace can tell whether
 273         * the XSTATE header is present in addition to the "legacy" FPU
 274         * state.  We just assume that it is here.
 275         */
 276        fpregs += 0x70;
 277#endif
 278        pkru_offset = pkru_xstate_offset();
 279        pkru_ptr = (void *)(&fpregs[pkru_offset]);
 280
 281        dprintf1("siginfo: %p\n", si);
 282        dprintf1(" fpregs: %p\n", fpregs);
 283        /*
 284         * If we got a PKRU fault, we *HAVE* to have at least one bit set in
 285         * here.
 286         */
 287        dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
 288        if (DEBUG_LEVEL > 4)
 289                dump_mem(pkru_ptr - 128, 256);
 290        pkey_assert(*pkru_ptr);
 291
 292        si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
 293        dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
 294        dump_mem(si_pkey_ptr - 8, 24);
 295        siginfo_pkey = *si_pkey_ptr;
 296        pkey_assert(siginfo_pkey < NR_PKEYS);
 297        last_si_pkey = siginfo_pkey;
 298
 299        if ((si->si_code == SEGV_MAPERR) ||
 300            (si->si_code == SEGV_ACCERR) ||
 301            (si->si_code == SEGV_BNDERR)) {
 302                printf("non-PK si_code, exiting...\n");
 303                exit(4);
 304        }
 305
 306        dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
 307        /* need __rdpkru() version so we do not do shadow_pkru checking */
 308        dprintf1("signal pkru from  pkru: %08x\n", __rdpkru());
 309        dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
 310        *(u64 *)pkru_ptr = 0x00000000;
 311        dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
 312        pkru_faults++;
 313        dprintf1("<<<<==================================================\n");
 314        return;
 315        if (trapno == 14) {
 316                fprintf(stderr,
 317                        "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
 318                        trapno, ip);
 319                fprintf(stderr, "si_addr %p\n", si->si_addr);
 320                fprintf(stderr, "REG_ERR: %lx\n",
 321                                (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
 322                exit(1);
 323        } else {
 324                fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip);
 325                fprintf(stderr, "si_addr %p\n", si->si_addr);
 326                fprintf(stderr, "REG_ERR: %lx\n",
 327                                (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
 328                exit(2);
 329        }
 330        dprint_in_signal = 0;
 331}
 332
 333int wait_all_children(void)
 334{
 335        int status;
 336        return waitpid(-1, &status, 0);
 337}
 338
 339void sig_chld(int x)
 340{
 341        dprint_in_signal = 1;
 342        dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
 343        dprint_in_signal = 0;
 344}
 345
 346void setup_sigsegv_handler(void)
 347{
 348        int r, rs;
 349        struct sigaction newact;
 350        struct sigaction oldact;
 351
 352        /* #PF is mapped to sigsegv */
 353        int signum  = SIGSEGV;
 354
 355        newact.sa_handler = 0;
 356        newact.sa_sigaction = signal_handler;
 357
 358        /*sigset_t - signals to block while in the handler */
 359        /* get the old signal mask. */
 360        rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
 361        pkey_assert(rs == 0);
 362
 363        /* call sa_sigaction, not sa_handler*/
 364        newact.sa_flags = SA_SIGINFO;
 365
 366        newact.sa_restorer = 0;  /* void(*)(), obsolete */
 367        r = sigaction(signum, &newact, &oldact);
 368        r = sigaction(SIGALRM, &newact, &oldact);
 369        pkey_assert(r == 0);
 370}
 371
 372void setup_handlers(void)
 373{
 374        signal(SIGCHLD, &sig_chld);
 375        setup_sigsegv_handler();
 376}
 377
 378pid_t fork_lazy_child(void)
 379{
 380        pid_t forkret;
 381
 382        forkret = fork();
 383        pkey_assert(forkret >= 0);
 384        dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
 385
 386        if (!forkret) {
 387                /* in the child */
 388                while (1) {
 389                        dprintf1("child sleeping...\n");
 390                        sleep(30);
 391                }
 392        }
 393        return forkret;
 394}
 395
 396#define PKEY_DISABLE_ACCESS    0x1
 397#define PKEY_DISABLE_WRITE     0x2
 398
 399u32 pkey_get(int pkey, unsigned long flags)
 400{
 401        u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
 402        u32 pkru = __rdpkru();
 403        u32 shifted_pkru;
 404        u32 masked_pkru;
 405
 406        dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
 407                        __func__, pkey, flags, 0, 0);
 408        dprintf2("%s() raw pkru: %x\n", __func__, pkru);
 409
 410        shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
 411        dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
 412        masked_pkru = shifted_pkru & mask;
 413        dprintf2("%s() masked  pkru: %x\n", __func__, masked_pkru);
 414        /*
 415         * shift down the relevant bits to the lowest two, then
 416         * mask off all the other high bits.
 417         */
 418        return masked_pkru;
 419}
 420
 421int pkey_set(int pkey, unsigned long rights, unsigned long flags)
 422{
 423        u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
 424        u32 old_pkru = __rdpkru();
 425        u32 new_pkru;
 426
 427        /* make sure that 'rights' only contains the bits we expect: */
 428        assert(!(rights & ~mask));
 429
 430        /* copy old pkru */
 431        new_pkru = old_pkru;
 432        /* mask out bits from pkey in old value: */
 433        new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
 434        /* OR in new bits for pkey: */
 435        new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
 436
 437        __wrpkru(new_pkru);
 438
 439        dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
 440                        __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
 441        return 0;
 442}
 443
 444void pkey_disable_set(int pkey, int flags)
 445{
 446        unsigned long syscall_flags = 0;
 447        int ret;
 448        int pkey_rights;
 449        u32 orig_pkru = rdpkru();
 450
 451        dprintf1("START->%s(%d, 0x%x)\n", __func__,
 452                pkey, flags);
 453        pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
 454
 455        pkey_rights = pkey_get(pkey, syscall_flags);
 456
 457        dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
 458                        pkey, pkey, pkey_rights);
 459        pkey_assert(pkey_rights >= 0);
 460
 461        pkey_rights |= flags;
 462
 463        ret = pkey_set(pkey, pkey_rights, syscall_flags);
 464        assert(!ret);
 465        /*pkru and flags have the same format */
 466        shadow_pkru |= flags << (pkey * 2);
 467        dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
 468
 469        pkey_assert(ret >= 0);
 470
 471        pkey_rights = pkey_get(pkey, syscall_flags);
 472        dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
 473                        pkey, pkey, pkey_rights);
 474
 475        dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
 476        if (flags)
 477                pkey_assert(rdpkru() > orig_pkru);
 478        dprintf1("END<---%s(%d, 0x%x)\n", __func__,
 479                pkey, flags);
 480}
 481
 482void pkey_disable_clear(int pkey, int flags)
 483{
 484        unsigned long syscall_flags = 0;
 485        int ret;
 486        int pkey_rights = pkey_get(pkey, syscall_flags);
 487        u32 orig_pkru = rdpkru();
 488
 489        pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
 490
 491        dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
 492                        pkey, pkey, pkey_rights);
 493        pkey_assert(pkey_rights >= 0);
 494
 495        pkey_rights |= flags;
 496
 497        ret = pkey_set(pkey, pkey_rights, 0);
 498        /* pkru and flags have the same format */
 499        shadow_pkru &= ~(flags << (pkey * 2));
 500        pkey_assert(ret >= 0);
 501
 502        pkey_rights = pkey_get(pkey, syscall_flags);
 503        dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
 504                        pkey, pkey, pkey_rights);
 505
 506        dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
 507        if (flags)
 508                assert(rdpkru() > orig_pkru);
 509}
 510
 511void pkey_write_allow(int pkey)
 512{
 513        pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
 514}
 515void pkey_write_deny(int pkey)
 516{
 517        pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
 518}
 519void pkey_access_allow(int pkey)
 520{
 521        pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
 522}
 523void pkey_access_deny(int pkey)
 524{
 525        pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
 526}
 527
 528int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
 529                unsigned long pkey)
 530{
 531        int sret;
 532
 533        dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
 534                        ptr, size, orig_prot, pkey);
 535
 536        errno = 0;
 537        sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
 538        if (errno) {
 539                dprintf2("SYS_mprotect_key sret: %d\n", sret);
 540                dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
 541                dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
 542                if (DEBUG_LEVEL >= 2)
 543                        perror("SYS_mprotect_pkey");
 544        }
 545        return sret;
 546}
 547
 548int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
 549{
 550        int ret = syscall(SYS_pkey_alloc, flags, init_val);
 551        dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
 552                        __func__, flags, init_val, ret, errno);
 553        return ret;
 554}
 555
 556int alloc_pkey(void)
 557{
 558        int ret;
 559        unsigned long init_val = 0x0;
 560
 561        dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
 562                        __LINE__, __rdpkru(), shadow_pkru);
 563        ret = sys_pkey_alloc(0, init_val);
 564        /*
 565         * pkey_alloc() sets PKRU, so we need to reflect it in
 566         * shadow_pkru:
 567         */
 568        dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
 569                        __LINE__, ret, __rdpkru(), shadow_pkru);
 570        if (ret) {
 571                /* clear both the bits: */
 572                shadow_pkru &= ~(0x3      << (ret * 2));
 573                dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
 574                                __LINE__, ret, __rdpkru(), shadow_pkru);
 575                /*
 576                 * move the new state in from init_val
 577                 * (remember, we cheated and init_val == pkru format)
 578                 */
 579                shadow_pkru |=  (init_val << (ret * 2));
 580        }
 581        dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
 582                        __LINE__, ret, __rdpkru(), shadow_pkru);
 583        dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
 584        /* for shadow checking: */
 585        rdpkru();
 586        dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
 587                        __LINE__, ret, __rdpkru(), shadow_pkru);
 588        return ret;
 589}
 590
 591int sys_pkey_free(unsigned long pkey)
 592{
 593        int ret = syscall(SYS_pkey_free, pkey);
 594        dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
 595        return ret;
 596}
 597
 598/*
 599 * I had a bug where pkey bits could be set by mprotect() but
 600 * not cleared.  This ensures we get lots of random bit sets
 601 * and clears on the vma and pte pkey bits.
 602 */
 603int alloc_random_pkey(void)
 604{
 605        int max_nr_pkey_allocs;
 606        int ret;
 607        int i;
 608        int alloced_pkeys[NR_PKEYS];
 609        int nr_alloced = 0;
 610        int random_index;
 611        memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
 612
 613        /* allocate every possible key and make a note of which ones we got */
 614        max_nr_pkey_allocs = NR_PKEYS;
 615        max_nr_pkey_allocs = 1;
 616        for (i = 0; i < max_nr_pkey_allocs; i++) {
 617                int new_pkey = alloc_pkey();
 618                if (new_pkey < 0)
 619                        break;
 620                alloced_pkeys[nr_alloced++] = new_pkey;
 621        }
 622
 623        pkey_assert(nr_alloced > 0);
 624        /* select a random one out of the allocated ones */
 625        random_index = rand() % nr_alloced;
 626        ret = alloced_pkeys[random_index];
 627        /* now zero it out so we don't free it next */
 628        alloced_pkeys[random_index] = 0;
 629
 630        /* go through the allocated ones that we did not want and free them */
 631        for (i = 0; i < nr_alloced; i++) {
 632                int free_ret;
 633                if (!alloced_pkeys[i])
 634                        continue;
 635                free_ret = sys_pkey_free(alloced_pkeys[i]);
 636                pkey_assert(!free_ret);
 637        }
 638        dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
 639                        __LINE__, ret, __rdpkru(), shadow_pkru);
 640        return ret;
 641}
 642
 643int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
 644                unsigned long pkey)
 645{
 646        int nr_iterations = random() % 100;
 647        int ret;
 648
 649        while (0) {
 650                int rpkey = alloc_random_pkey();
 651                ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
 652                dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
 653                                ptr, size, orig_prot, pkey, ret);
 654                if (nr_iterations-- < 0)
 655                        break;
 656
 657                dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
 658                        __LINE__, ret, __rdpkru(), shadow_pkru);
 659                sys_pkey_free(rpkey);
 660                dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
 661                        __LINE__, ret, __rdpkru(), shadow_pkru);
 662        }
 663        pkey_assert(pkey < NR_PKEYS);
 664
 665        ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
 666        dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
 667                        ptr, size, orig_prot, pkey, ret);
 668        pkey_assert(!ret);
 669        dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
 670                        __LINE__, ret, __rdpkru(), shadow_pkru);
 671        return ret;
 672}
 673
 674struct pkey_malloc_record {
 675        void *ptr;
 676        long size;
 677};
 678struct pkey_malloc_record *pkey_malloc_records;
 679long nr_pkey_malloc_records;
 680void record_pkey_malloc(void *ptr, long size)
 681{
 682        long i;
 683        struct pkey_malloc_record *rec = NULL;
 684
 685        for (i = 0; i < nr_pkey_malloc_records; i++) {
 686                rec = &pkey_malloc_records[i];
 687                /* find a free record */
 688                if (rec)
 689                        break;
 690        }
 691        if (!rec) {
 692                /* every record is full */
 693                size_t old_nr_records = nr_pkey_malloc_records;
 694                size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
 695                size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
 696                dprintf2("new_nr_records: %zd\n", new_nr_records);
 697                dprintf2("new_size: %zd\n", new_size);
 698                pkey_malloc_records = realloc(pkey_malloc_records, new_size);
 699                pkey_assert(pkey_malloc_records != NULL);
 700                rec = &pkey_malloc_records[nr_pkey_malloc_records];
 701                /*
 702                 * realloc() does not initialize memory, so zero it from
 703                 * the first new record all the way to the end.
 704                 */
 705                for (i = 0; i < new_nr_records - old_nr_records; i++)
 706                        memset(rec + i, 0, sizeof(*rec));
 707        }
 708        dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
 709                (int)(rec - pkey_malloc_records), rec, ptr, size);
 710        rec->ptr = ptr;
 711        rec->size = size;
 712        nr_pkey_malloc_records++;
 713}
 714
 715void free_pkey_malloc(void *ptr)
 716{
 717        long i;
 718        int ret;
 719        dprintf3("%s(%p)\n", __func__, ptr);
 720        for (i = 0; i < nr_pkey_malloc_records; i++) {
 721                struct pkey_malloc_record *rec = &pkey_malloc_records[i];
 722                dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
 723                                ptr, i, rec, rec->ptr, rec->size);
 724                if ((ptr <  rec->ptr) ||
 725                    (ptr >= rec->ptr + rec->size))
 726                        continue;
 727
 728                dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
 729                                ptr, i, rec, rec->ptr, rec->size);
 730                nr_pkey_malloc_records--;
 731                ret = munmap(rec->ptr, rec->size);
 732                dprintf3("munmap ret: %d\n", ret);
 733                pkey_assert(!ret);
 734                dprintf3("clearing rec->ptr, rec: %p\n", rec);
 735                rec->ptr = NULL;
 736                dprintf3("done clearing rec->ptr, rec: %p\n", rec);
 737                return;
 738        }
 739        pkey_assert(false);
 740}
 741
 742
 743void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
 744{
 745        void *ptr;
 746        int ret;
 747
 748        rdpkru();
 749        dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
 750                        size, prot, pkey);
 751        pkey_assert(pkey < NR_PKEYS);
 752        ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
 753        pkey_assert(ptr != (void *)-1);
 754        ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
 755        pkey_assert(!ret);
 756        record_pkey_malloc(ptr, size);
 757        rdpkru();
 758
 759        dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
 760        return ptr;
 761}
 762
 763void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
 764{
 765        int ret;
 766        void *ptr;
 767
 768        dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
 769                        size, prot, pkey);
 770        /*
 771         * Guarantee we can fit at least one huge page in the resulting
 772         * allocation by allocating space for 2:
 773         */
 774        size = ALIGN_UP(size, HPAGE_SIZE * 2);
 775        ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
 776        pkey_assert(ptr != (void *)-1);
 777        record_pkey_malloc(ptr, size);
 778        mprotect_pkey(ptr, size, prot, pkey);
 779
 780        dprintf1("unaligned ptr: %p\n", ptr);
 781        ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
 782        dprintf1("  aligned ptr: %p\n", ptr);
 783        ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
 784        dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
 785        ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
 786        dprintf1("MADV_WILLNEED ret: %d\n", ret);
 787        memset(ptr, 0, HPAGE_SIZE);
 788
 789        dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
 790        return ptr;
 791}
 792
 793int hugetlb_setup_ok;
 794#define GET_NR_HUGE_PAGES 10
 795void setup_hugetlbfs(void)
 796{
 797        int err;
 798        int fd;
 799        char buf[] = "123";
 800
 801        if (geteuid() != 0) {
 802                fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
 803                return;
 804        }
 805
 806        cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
 807
 808        /*
 809         * Now go make sure that we got the pages and that they
 810         * are 2M pages.  Someone might have made 1G the default.
 811         */
 812        fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
 813        if (fd < 0) {
 814                perror("opening sysfs 2M hugetlb config");
 815                return;
 816        }
 817
 818        /* -1 to guarantee leaving the trailing \0 */
 819        err = read(fd, buf, sizeof(buf)-1);
 820        close(fd);
 821        if (err <= 0) {
 822                perror("reading sysfs 2M hugetlb config");
 823                return;
 824        }
 825
 826        if (atoi(buf) != GET_NR_HUGE_PAGES) {
 827                fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
 828                        buf, GET_NR_HUGE_PAGES);
 829                return;
 830        }
 831
 832        hugetlb_setup_ok = 1;
 833}
 834
 835void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
 836{
 837        void *ptr;
 838        int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
 839
 840        if (!hugetlb_setup_ok)
 841                return PTR_ERR_ENOTSUP;
 842
 843        dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
 844        size = ALIGN_UP(size, HPAGE_SIZE * 2);
 845        pkey_assert(pkey < NR_PKEYS);
 846        ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
 847        pkey_assert(ptr != (void *)-1);
 848        mprotect_pkey(ptr, size, prot, pkey);
 849
 850        record_pkey_malloc(ptr, size);
 851
 852        dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
 853        return ptr;
 854}
 855
 856void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
 857{
 858        void *ptr;
 859        int fd;
 860
 861        dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
 862                        size, prot, pkey);
 863        pkey_assert(pkey < NR_PKEYS);
 864        fd = open("/dax/foo", O_RDWR);
 865        pkey_assert(fd >= 0);
 866
 867        ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
 868        pkey_assert(ptr != (void *)-1);
 869
 870        mprotect_pkey(ptr, size, prot, pkey);
 871
 872        record_pkey_malloc(ptr, size);
 873
 874        dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
 875        close(fd);
 876        return ptr;
 877}
 878
 879void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
 880
 881        malloc_pkey_with_mprotect,
 882        malloc_pkey_anon_huge,
 883        malloc_pkey_hugetlb
 884/* can not do direct with the pkey_mprotect() API:
 885        malloc_pkey_mmap_direct,
 886        malloc_pkey_mmap_dax,
 887*/
 888};
 889
 890void *malloc_pkey(long size, int prot, u16 pkey)
 891{
 892        void *ret;
 893        static int malloc_type;
 894        int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
 895
 896        pkey_assert(pkey < NR_PKEYS);
 897
 898        while (1) {
 899                pkey_assert(malloc_type < nr_malloc_types);
 900
 901                ret = pkey_malloc[malloc_type](size, prot, pkey);
 902                pkey_assert(ret != (void *)-1);
 903
 904                malloc_type++;
 905                if (malloc_type >= nr_malloc_types)
 906                        malloc_type = (random()%nr_malloc_types);
 907
 908                /* try again if the malloc_type we tried is unsupported */
 909                if (ret == PTR_ERR_ENOTSUP)
 910                        continue;
 911
 912                break;
 913        }
 914
 915        dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
 916                        size, prot, pkey, ret);
 917        return ret;
 918}
 919
 920int last_pkru_faults;
 921void expected_pk_fault(int pkey)
 922{
 923        dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
 924                        __func__, last_pkru_faults, pkru_faults);
 925        dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
 926        pkey_assert(last_pkru_faults + 1 == pkru_faults);
 927        pkey_assert(last_si_pkey == pkey);
 928        /*
 929         * The signal handler shold have cleared out PKRU to let the
 930         * test program continue.  We now have to restore it.
 931         */
 932        if (__rdpkru() != 0)
 933                pkey_assert(0);
 934
 935        __wrpkru(shadow_pkru);
 936        dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
 937                        __func__, shadow_pkru);
 938        last_pkru_faults = pkru_faults;
 939        last_si_pkey = -1;
 940}
 941
 942void do_not_expect_pk_fault(void)
 943{
 944        pkey_assert(last_pkru_faults == pkru_faults);
 945}
 946
 947int test_fds[10] = { -1 };
 948int nr_test_fds;
 949void __save_test_fd(int fd)
 950{
 951        pkey_assert(fd >= 0);
 952        pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
 953        test_fds[nr_test_fds] = fd;
 954        nr_test_fds++;
 955}
 956
 957int get_test_read_fd(void)
 958{
 959        int test_fd = open("/etc/passwd", O_RDONLY);
 960        __save_test_fd(test_fd);
 961        return test_fd;
 962}
 963
 964void close_test_fds(void)
 965{
 966        int i;
 967
 968        for (i = 0; i < nr_test_fds; i++) {
 969                if (test_fds[i] < 0)
 970                        continue;
 971                close(test_fds[i]);
 972                test_fds[i] = -1;
 973        }
 974        nr_test_fds = 0;
 975}
 976
 977#define barrier() __asm__ __volatile__("": : :"memory")
 978__attribute__((noinline)) int read_ptr(int *ptr)
 979{
 980        /*
 981         * Keep GCC from optimizing this away somehow
 982         */
 983        barrier();
 984        return *ptr;
 985}
 986
 987void test_read_of_write_disabled_region(int *ptr, u16 pkey)
 988{
 989        int ptr_contents;
 990
 991        dprintf1("disabling write access to PKEY[1], doing read\n");
 992        pkey_write_deny(pkey);
 993        ptr_contents = read_ptr(ptr);
 994        dprintf1("*ptr: %d\n", ptr_contents);
 995        dprintf1("\n");
 996}
 997void test_read_of_access_disabled_region(int *ptr, u16 pkey)
 998{
 999        int ptr_contents;
1000
1001        dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
1002        rdpkru();
1003        pkey_access_deny(pkey);
1004        ptr_contents = read_ptr(ptr);
1005        dprintf1("*ptr: %d\n", ptr_contents);
1006        expected_pk_fault(pkey);
1007}
1008void test_write_of_write_disabled_region(int *ptr, u16 pkey)
1009{
1010        dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
1011        pkey_write_deny(pkey);
1012        *ptr = __LINE__;
1013        expected_pk_fault(pkey);
1014}
1015void test_write_of_access_disabled_region(int *ptr, u16 pkey)
1016{
1017        dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
1018        pkey_access_deny(pkey);
1019        *ptr = __LINE__;
1020        expected_pk_fault(pkey);
1021}
1022void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
1023{
1024        int ret;
1025        int test_fd = get_test_read_fd();
1026
1027        dprintf1("disabling access to PKEY[%02d], "
1028                 "having kernel read() to buffer\n", pkey);
1029        pkey_access_deny(pkey);
1030        ret = read(test_fd, ptr, 1);
1031        dprintf1("read ret: %d\n", ret);
1032        pkey_assert(ret);
1033}
1034void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
1035{
1036        int ret;
1037        int test_fd = get_test_read_fd();
1038
1039        pkey_write_deny(pkey);
1040        ret = read(test_fd, ptr, 100);
1041        dprintf1("read ret: %d\n", ret);
1042        if (ret < 0 && (DEBUG_LEVEL > 0))
1043                perror("verbose read result (OK for this to be bad)");
1044        pkey_assert(ret);
1045}
1046
1047void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
1048{
1049        int pipe_ret, vmsplice_ret;
1050        struct iovec iov;
1051        int pipe_fds[2];
1052
1053        pipe_ret = pipe(pipe_fds);
1054
1055        pkey_assert(pipe_ret == 0);
1056        dprintf1("disabling access to PKEY[%02d], "
1057                 "having kernel vmsplice from buffer\n", pkey);
1058        pkey_access_deny(pkey);
1059        iov.iov_base = ptr;
1060        iov.iov_len = PAGE_SIZE;
1061        vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
1062        dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
1063        pkey_assert(vmsplice_ret == -1);
1064
1065        close(pipe_fds[0]);
1066        close(pipe_fds[1]);
1067}
1068
1069void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
1070{
1071        int ignored = 0xdada;
1072        int futex_ret;
1073        int some_int = __LINE__;
1074
1075        dprintf1("disabling write to PKEY[%02d], "
1076                 "doing futex gunk in buffer\n", pkey);
1077        *ptr = some_int;
1078        pkey_write_deny(pkey);
1079        futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
1080                        &ignored, ignored);
1081        if (DEBUG_LEVEL > 0)
1082                perror("futex");
1083        dprintf1("futex() ret: %d\n", futex_ret);
1084}
1085
1086/* Assumes that all pkeys other than 'pkey' are unallocated */
1087void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
1088{
1089        int err;
1090        int i;
1091
1092        /* Note: 0 is the default pkey, so don't mess with it */
1093        for (i = 1; i < NR_PKEYS; i++) {
1094                if (pkey == i)
1095                        continue;
1096
1097                dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
1098                err = sys_pkey_free(i);
1099                pkey_assert(err);
1100
1101                err = sys_pkey_free(i);
1102                pkey_assert(err);
1103
1104                err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
1105                pkey_assert(err);
1106        }
1107}
1108
1109/* Assumes that all pkeys other than 'pkey' are unallocated */
1110void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
1111{
1112        int err;
1113        int bad_pkey = NR_PKEYS+99;
1114
1115        /* pass a known-invalid pkey in: */
1116        err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
1117        pkey_assert(err);
1118}
1119
1120/* Assumes that all pkeys other than 'pkey' are unallocated */
1121void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
1122{
1123        int err;
1124        int allocated_pkeys[NR_PKEYS] = {0};
1125        int nr_allocated_pkeys = 0;
1126        int i;
1127
1128        for (i = 0; i < NR_PKEYS*2; i++) {
1129                int new_pkey;
1130                dprintf1("%s() alloc loop: %d\n", __func__, i);
1131                new_pkey = alloc_pkey();
1132                dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
1133                                __LINE__, err, __rdpkru(), shadow_pkru);
1134                rdpkru(); /* for shadow checking */
1135                dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
1136                if ((new_pkey == -1) && (errno == ENOSPC)) {
1137                        dprintf2("%s() failed to allocate pkey after %d tries\n",
1138                                __func__, nr_allocated_pkeys);
1139                        break;
1140                }
1141                pkey_assert(nr_allocated_pkeys < NR_PKEYS);
1142                allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
1143        }
1144
1145        dprintf3("%s()::%d\n", __func__, __LINE__);
1146
1147        /*
1148         * ensure it did not reach the end of the loop without
1149         * failure:
1150         */
1151        pkey_assert(i < NR_PKEYS*2);
1152
1153        /*
1154         * There are 16 pkeys supported in hardware.  One is taken
1155         * up for the default (0) and another can be taken up by
1156         * an execute-only mapping.  Ensure that we can allocate
1157         * at least 14 (16-2).
1158         */
1159        pkey_assert(i >= NR_PKEYS-2);
1160
1161        for (i = 0; i < nr_allocated_pkeys; i++) {
1162                err = sys_pkey_free(allocated_pkeys[i]);
1163                pkey_assert(!err);
1164                rdpkru(); /* for shadow checking */
1165        }
1166}
1167
1168void test_ptrace_of_child(int *ptr, u16 pkey)
1169{
1170        __attribute__((__unused__)) int peek_result;
1171        pid_t child_pid;
1172        void *ignored = 0;
1173        long ret;
1174        int status;
1175        /*
1176         * This is the "control" for our little expermient.  Make sure
1177         * we can always access it when ptracing.
1178         */
1179        int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
1180        int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
1181
1182        /*
1183         * Fork a child which is an exact copy of this process, of course.
1184         * That means we can do all of our tests via ptrace() and then plain
1185         * memory access and ensure they work differently.
1186         */
1187        child_pid = fork_lazy_child();
1188        dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
1189
1190        ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
1191        if (ret)
1192                perror("attach");
1193        dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
1194        pkey_assert(ret != -1);
1195        ret = waitpid(child_pid, &status, WUNTRACED);
1196        if ((ret != child_pid) || !(WIFSTOPPED(status))) {
1197                fprintf(stderr, "weird waitpid result %ld stat %x\n",
1198                                ret, status);
1199                pkey_assert(0);
1200        }
1201        dprintf2("waitpid ret: %ld\n", ret);
1202        dprintf2("waitpid status: %d\n", status);
1203
1204        pkey_access_deny(pkey);
1205        pkey_write_deny(pkey);
1206
1207        /* Write access, untested for now:
1208        ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
1209        pkey_assert(ret != -1);
1210        dprintf1("poke at %p: %ld\n", peek_at, ret);
1211        */
1212
1213        /*
1214         * Try to access the pkey-protected "ptr" via ptrace:
1215         */
1216        ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
1217        /* expect it to work, without an error: */
1218        pkey_assert(ret != -1);
1219        /* Now access from the current task, and expect an exception: */
1220        peek_result = read_ptr(ptr);
1221        expected_pk_fault(pkey);
1222
1223        /*
1224         * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
1225         */
1226        ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
1227        /* expect it to work, without an error: */
1228        pkey_assert(ret != -1);
1229        /* Now access from the current task, and expect NO exception: */
1230        peek_result = read_ptr(plain_ptr);
1231        do_not_expect_pk_fault();
1232
1233        ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
1234        pkey_assert(ret != -1);
1235
1236        ret = kill(child_pid, SIGKILL);
1237        pkey_assert(ret != -1);
1238
1239        wait(&status);
1240
1241        free(plain_ptr_unaligned);
1242}
1243
1244void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
1245{
1246        void *p1;
1247        int scratch;
1248        int ptr_contents;
1249        int ret;
1250
1251        p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
1252        dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
1253        /* lots_o_noops_around_write should be page-aligned already */
1254        assert(p1 == &lots_o_noops_around_write);
1255
1256        /* Point 'p1' at the *second* page of the function: */
1257        p1 += PAGE_SIZE;
1258
1259        madvise(p1, PAGE_SIZE, MADV_DONTNEED);
1260        lots_o_noops_around_write(&scratch);
1261        ptr_contents = read_ptr(p1);
1262        dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
1263
1264        ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
1265        pkey_assert(!ret);
1266        pkey_access_deny(pkey);
1267
1268        dprintf2("pkru: %x\n", rdpkru());
1269
1270        /*
1271         * Make sure this is an *instruction* fault
1272         */
1273        madvise(p1, PAGE_SIZE, MADV_DONTNEED);
1274        lots_o_noops_around_write(&scratch);
1275        do_not_expect_pk_fault();
1276        ptr_contents = read_ptr(p1);
1277        dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
1278        expected_pk_fault(pkey);
1279}
1280
1281void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
1282{
1283        int size = PAGE_SIZE;
1284        int sret;
1285
1286        if (cpu_has_pku()) {
1287                dprintf1("SKIP: %s: no CPU support\n", __func__);
1288                return;
1289        }
1290
1291        sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
1292        pkey_assert(sret < 0);
1293}
1294
1295void (*pkey_tests[])(int *ptr, u16 pkey) = {
1296        test_read_of_write_disabled_region,
1297        test_read_of_access_disabled_region,
1298        test_write_of_write_disabled_region,
1299        test_write_of_access_disabled_region,
1300        test_kernel_write_of_access_disabled_region,
1301        test_kernel_write_of_write_disabled_region,
1302        test_kernel_gup_of_access_disabled_region,
1303        test_kernel_gup_write_to_write_disabled_region,
1304        test_executing_on_unreadable_memory,
1305        test_ptrace_of_child,
1306        test_pkey_syscalls_on_non_allocated_pkey,
1307        test_pkey_syscalls_bad_args,
1308        test_pkey_alloc_exhaust,
1309};
1310
1311void run_tests_once(void)
1312{
1313        int *ptr;
1314        int prot = PROT_READ|PROT_WRITE;
1315
1316        for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
1317                int pkey;
1318                int orig_pkru_faults = pkru_faults;
1319
1320                dprintf1("======================\n");
1321                dprintf1("test %d preparing...\n", test_nr);
1322
1323                tracing_on();
1324                pkey = alloc_random_pkey();
1325                dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
1326                ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
1327                dprintf1("test %d starting...\n", test_nr);
1328                pkey_tests[test_nr](ptr, pkey);
1329                dprintf1("freeing test memory: %p\n", ptr);
1330                free_pkey_malloc(ptr);
1331                sys_pkey_free(pkey);
1332
1333                dprintf1("pkru_faults: %d\n", pkru_faults);
1334                dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
1335
1336                tracing_off();
1337                close_test_fds();
1338
1339                printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
1340                dprintf1("======================\n\n");
1341        }
1342        iteration_nr++;
1343}
1344
1345void pkey_setup_shadow(void)
1346{
1347        shadow_pkru = __rdpkru();
1348}
1349
1350int main(void)
1351{
1352        int nr_iterations = 22;
1353
1354        setup_handlers();
1355
1356        printf("has pku: %d\n", cpu_has_pku());
1357
1358        if (!cpu_has_pku()) {
1359                int size = PAGE_SIZE;
1360                int *ptr;
1361
1362                printf("running PKEY tests for unsupported CPU/OS\n");
1363
1364                ptr  = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
1365                assert(ptr != (void *)-1);
1366                test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
1367                exit(0);
1368        }
1369
1370        pkey_setup_shadow();
1371        printf("startup pkru: %x\n", rdpkru());
1372        setup_hugetlbfs();
1373
1374        while (nr_iterations-- > 0)
1375                run_tests_once();
1376
1377        printf("done (all tests OK)\n");
1378        return 0;
1379}
1380