linux/tools/testing/selftests/x86/syscall_numbering.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * syscall_numbering.c - test calling the x86-64 kernel with various
   4 * valid and invalid system call numbers.
   5 *
   6 * Copyright (c) 2018 Andrew Lutomirski
   7 */
   8
   9#define _GNU_SOURCE
  10
  11#include <stdlib.h>
  12#include <stdio.h>
  13#include <stdbool.h>
  14#include <errno.h>
  15#include <unistd.h>
  16#include <string.h>
  17#include <fcntl.h>
  18#include <limits.h>
  19#include <signal.h>
  20#include <sysexits.h>
  21
  22#include <sys/ptrace.h>
  23#include <sys/user.h>
  24#include <sys/wait.h>
  25#include <sys/mman.h>
  26
  27#include <linux/ptrace.h>
  28
  29/* Common system call numbers */
  30#define SYS_READ          0
  31#define SYS_WRITE         1
  32#define SYS_GETPID       39
  33/* x64-only system call numbers */
  34#define X64_IOCTL        16
  35#define X64_READV        19
  36#define X64_WRITEV       20
  37/* x32-only system call numbers (without X32_BIT) */
  38#define X32_IOCTL       514
  39#define X32_READV       515
  40#define X32_WRITEV      516
  41
  42#define X32_BIT 0x40000000
  43
  44static int nullfd = -1;         /* File descriptor for /dev/null */
  45static bool with_x32;           /* x32 supported on this kernel? */
  46
  47enum ptrace_pass {
  48        PTP_NOTHING,
  49        PTP_GETREGS,
  50        PTP_WRITEBACK,
  51        PTP_FUZZRET,
  52        PTP_FUZZHIGH,
  53        PTP_INTNUM,
  54        PTP_DONE
  55};
  56
  57static const char * const ptrace_pass_name[] =
  58{
  59        [PTP_NOTHING]   = "just stop, no data read",
  60        [PTP_GETREGS]   = "only getregs",
  61        [PTP_WRITEBACK] = "getregs, unmodified setregs",
  62        [PTP_FUZZRET]   = "modifying the default return",
  63        [PTP_FUZZHIGH]  = "clobbering the top 32 bits",
  64        [PTP_INTNUM]    = "sign-extending the syscall number",
  65};
  66
  67/*
  68 * Shared memory block between tracer and test
  69 */
  70struct shared {
  71        unsigned int nerr;      /* Total error count */
  72        unsigned int indent;    /* Message indentation level */
  73        enum ptrace_pass ptrace_pass;
  74        bool probing_syscall;   /* In probe_syscall() */
  75};
  76static volatile struct shared *sh;
  77
  78static inline unsigned int offset(void)
  79{
  80        unsigned int level = sh ? sh->indent : 0;
  81
  82        return 8 + level * 4;
  83}
  84
  85#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
  86                                  ## __VA_ARGS__)
  87
  88#define run(fmt, ...)  msg(RUN,  fmt, ## __VA_ARGS__)
  89#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
  90#define ok(fmt, ...)   msg(OK,   fmt, ## __VA_ARGS__)
  91
  92#define fail(fmt, ...)                                  \
  93        do {                                            \
  94                msg(FAIL, fmt, ## __VA_ARGS__);         \
  95                sh->nerr++;                             \
  96       } while (0)
  97
  98#define crit(fmt, ...)                                  \
  99        do {                                            \
 100                sh->indent = 0;                         \
 101                msg(FAIL, fmt, ## __VA_ARGS__);         \
 102                msg(SKIP, "Unable to run test\n");      \
 103                exit(EX_OSERR);                         \
 104       } while (0)
 105
 106/* Sentinel for ptrace-modified return value */
 107#define MODIFIED_BY_PTRACE      -9999
 108
 109/*
 110 * Directly invokes the given syscall with nullfd as the first argument
 111 * and the rest zero. Avoids involving glibc wrappers in case they ever
 112 * end up intercepting some system calls for some reason, or modify
 113 * the system call number itself.
 114 */
 115static long long probe_syscall(int msb, int lsb)
 116{
 117        register long long arg1 asm("rdi") = nullfd;
 118        register long long arg2 asm("rsi") = 0;
 119        register long long arg3 asm("rdx") = 0;
 120        register long long arg4 asm("r10") = 0;
 121        register long long arg5 asm("r8")  = 0;
 122        register long long arg6 asm("r9")  = 0;
 123        long long nr = ((long long)msb << 32) | (unsigned int)lsb;
 124        long long ret;
 125
 126        /*
 127         * We pass in an extra copy of the extended system call number
 128         * in %rbx, so we can examine it from the ptrace handler without
 129         * worrying about it being possibly modified. This is to test
 130         * the validity of struct user regs.orig_rax a.k.a.
 131         * struct pt_regs.orig_ax.
 132         */
 133        sh->probing_syscall = true;
 134        asm volatile("syscall"
 135                     : "=a" (ret)
 136                     : "a" (nr), "b" (nr),
 137                       "r" (arg1), "r" (arg2), "r" (arg3),
 138                       "r" (arg4), "r" (arg5), "r" (arg6)
 139                     : "rcx", "r11", "memory", "cc");
 140        sh->probing_syscall = false;
 141
 142        return ret;
 143}
 144
 145static const char *syscall_str(int msb, int start, int end)
 146{
 147        static char buf[64];
 148        const char * const type = (start & X32_BIT) ? "x32" : "x64";
 149        int lsb = start;
 150
 151        /*
 152         * Improve readability by stripping the x32 bit, but round
 153         * toward zero so we don't display -1 as -1073741825.
 154         */
 155        if (lsb < 0)
 156                lsb |= X32_BIT;
 157        else
 158                lsb &= ~X32_BIT;
 159
 160        if (start == end)
 161                snprintf(buf, sizeof buf, "%s syscall %d:%d",
 162                         type, msb, lsb);
 163        else
 164                snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
 165                         type, msb, lsb, lsb + (end-start));
 166
 167        return buf;
 168}
 169
 170static unsigned int _check_for(int msb, int start, int end, long long expect,
 171                               const char *expect_str)
 172{
 173        unsigned int err = 0;
 174
 175        sh->indent++;
 176        if (start != end)
 177                sh->indent++;
 178
 179        for (int nr = start; nr <= end; nr++) {
 180                long long ret = probe_syscall(msb, nr);
 181
 182                if (ret != expect) {
 183                        fail("%s returned %lld, but it should have returned %s\n",
 184                               syscall_str(msb, nr, nr),
 185                               ret, expect_str);
 186                        err++;
 187                }
 188        }
 189
 190        if (start != end)
 191                sh->indent--;
 192
 193        if (err) {
 194                if (start != end)
 195                        fail("%s had %u failure%s\n",
 196                             syscall_str(msb, start, end),
 197                             err, err == 1 ? "s" : "");
 198        } else {
 199                ok("%s returned %s as expected\n",
 200                   syscall_str(msb, start, end), expect_str);
 201        }
 202
 203        sh->indent--;
 204
 205        return err;
 206}
 207
 208#define check_for(msb,start,end,expect) \
 209        _check_for(msb,start,end,expect,#expect)
 210
 211static bool check_zero(int msb, int nr)
 212{
 213        return check_for(msb, nr, nr, 0);
 214}
 215
 216static bool check_enosys(int msb, int nr)
 217{
 218        return check_for(msb, nr, nr, -ENOSYS);
 219}
 220
 221/*
 222 * Anyone diagnosing a failure will want to know whether the kernel
 223 * supports x32. Tell them. This can also be used to conditionalize
 224 * tests based on existence or nonexistence of x32.
 225 */
 226static bool test_x32(void)
 227{
 228        long long ret;
 229        pid_t mypid = getpid();
 230
 231        run("Checking for x32 by calling x32 getpid()\n");
 232        ret = probe_syscall(0, SYS_GETPID | X32_BIT);
 233
 234        sh->indent++;
 235        if (ret == mypid) {
 236                info("x32 is supported\n");
 237                with_x32 = true;
 238        } else if (ret == -ENOSYS) {
 239                info("x32 is not supported\n");
 240                with_x32 = false;
 241        } else {
 242                fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
 243                with_x32 = false;
 244        }
 245        sh->indent--;
 246        return with_x32;
 247}
 248
 249static void test_syscalls_common(int msb)
 250{
 251        enum ptrace_pass pass = sh->ptrace_pass;
 252
 253        run("Checking some common syscalls as 64 bit\n");
 254        check_zero(msb, SYS_READ);
 255        check_zero(msb, SYS_WRITE);
 256
 257        run("Checking some 64-bit only syscalls as 64 bit\n");
 258        check_zero(msb, X64_READV);
 259        check_zero(msb, X64_WRITEV);
 260
 261        run("Checking out of range system calls\n");
 262        check_for(msb, -64, -2, -ENOSYS);
 263        if (pass >= PTP_FUZZRET)
 264                check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
 265        else
 266                check_for(msb, -1, -1, -ENOSYS);
 267        check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
 268        check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
 269        check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
 270}
 271
 272static void test_syscalls_with_x32(int msb)
 273{
 274        /*
 275         * Syscalls 512-547 are "x32" syscalls.  They are
 276         * intended to be called with the x32 (0x40000000) bit
 277         * set.  Calling them without the x32 bit set is
 278         * nonsense and should not work.
 279         */
 280        run("Checking x32 syscalls as 64 bit\n");
 281        check_for(msb, 512, 547, -ENOSYS);
 282
 283        run("Checking some common syscalls as x32\n");
 284        check_zero(msb, SYS_READ   | X32_BIT);
 285        check_zero(msb, SYS_WRITE  | X32_BIT);
 286
 287        run("Checking some x32 syscalls as x32\n");
 288        check_zero(msb, X32_READV  | X32_BIT);
 289        check_zero(msb, X32_WRITEV | X32_BIT);
 290
 291        run("Checking some 64-bit syscalls as x32\n");
 292        check_enosys(msb, X64_IOCTL  | X32_BIT);
 293        check_enosys(msb, X64_READV  | X32_BIT);
 294        check_enosys(msb, X64_WRITEV | X32_BIT);
 295}
 296
 297static void test_syscalls_without_x32(int msb)
 298{
 299        run("Checking for absence of x32 system calls\n");
 300        check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
 301}
 302
 303static void test_syscall_numbering(void)
 304{
 305        static const int msbs[] = {
 306                0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
 307                INT_MIN, INT_MIN+1
 308        };
 309
 310        sh->indent++;
 311
 312        /*
 313         * The MSB is supposed to be ignored, so we loop over a few
 314         * to test that out.
 315         */
 316        for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
 317                int msb = msbs[i];
 318                run("Checking system calls with msb = %d (0x%x)\n",
 319                    msb, msb);
 320
 321                sh->indent++;
 322
 323                test_syscalls_common(msb);
 324                if (with_x32)
 325                        test_syscalls_with_x32(msb);
 326                else
 327                        test_syscalls_without_x32(msb);
 328
 329                sh->indent--;
 330        }
 331
 332        sh->indent--;
 333}
 334
 335static void syscall_numbering_tracee(void)
 336{
 337        enum ptrace_pass pass;
 338
 339        if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
 340                crit("Failed to request tracing\n");
 341                return;
 342        }
 343        raise(SIGSTOP);
 344
 345        for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
 346             sh->ptrace_pass = ++pass) {
 347                run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
 348                test_syscall_numbering();
 349        }
 350}
 351
 352static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
 353{
 354        struct user_regs_struct regs;
 355
 356        sh->probing_syscall = false; /* Do this on entry only */
 357
 358        /* For these, don't even getregs */
 359        if (pass == PTP_NOTHING || pass == PTP_DONE)
 360                return;
 361
 362        ptrace(PTRACE_GETREGS, testpid, NULL, &regs);
 363
 364        if (regs.orig_rax != regs.rbx) {
 365                fail("orig_rax %#llx doesn't match syscall number %#llx\n",
 366                     (unsigned long long)regs.orig_rax,
 367                     (unsigned long long)regs.rbx);
 368        }
 369
 370        switch (pass) {
 371        case PTP_GETREGS:
 372                /* Just read, no writeback */
 373                return;
 374        case PTP_WRITEBACK:
 375                /* Write back the same register state verbatim */
 376                break;
 377        case PTP_FUZZRET:
 378                regs.rax = MODIFIED_BY_PTRACE;
 379                break;
 380        case PTP_FUZZHIGH:
 381                regs.rax = MODIFIED_BY_PTRACE;
 382                regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
 383                break;
 384        case PTP_INTNUM:
 385                regs.rax = MODIFIED_BY_PTRACE;
 386                regs.orig_rax = (int)regs.orig_rax;
 387                break;
 388        default:
 389                crit("invalid ptrace_pass\n");
 390                break;
 391        }
 392
 393        ptrace(PTRACE_SETREGS, testpid, NULL, &regs);
 394}
 395
 396static void syscall_numbering_tracer(pid_t testpid)
 397{
 398        int wstatus;
 399
 400        do {
 401                pid_t wpid = waitpid(testpid, &wstatus, 0);
 402                if (wpid < 0 && errno != EINTR)
 403                        break;
 404                if (wpid != testpid)
 405                        continue;
 406                if (!WIFSTOPPED(wstatus))
 407                        break;  /* Thread exited? */
 408
 409                if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
 410                        mess_with_syscall(testpid, sh->ptrace_pass);
 411        } while (sh->ptrace_pass != PTP_DONE &&
 412                 !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
 413
 414        ptrace(PTRACE_DETACH, testpid, NULL, NULL);
 415
 416        /* Wait for the child process to terminate */
 417        while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
 418                /* wait some more */;
 419}
 420
 421static void test_traced_syscall_numbering(void)
 422{
 423        pid_t testpid;
 424
 425        /* Launch the test thread; this thread continues as the tracer thread */
 426        testpid = fork();
 427
 428        if (testpid < 0) {
 429                crit("Unable to launch tracer process\n");
 430        } else if (testpid == 0) {
 431                syscall_numbering_tracee();
 432                _exit(0);
 433        } else {
 434                syscall_numbering_tracer(testpid);
 435        }
 436}
 437
 438int main(void)
 439{
 440        unsigned int nerr;
 441
 442        /*
 443         * It is quite likely to get a segfault on a failure, so make
 444         * sure the message gets out by setting stdout to nonbuffered.
 445         */
 446        setvbuf(stdout, NULL, _IONBF, 0);
 447
 448        /*
 449         * Harmless file descriptor to work on...
 450         */
 451        nullfd = open("/dev/null", O_RDWR);
 452        if (nullfd < 0) {
 453                crit("Unable to open /dev/null: %s\n", strerror(errno));
 454        }
 455
 456        /*
 457         * Set up a block of shared memory...
 458         */
 459        sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
 460                  MAP_ANONYMOUS|MAP_SHARED, 0, 0);
 461        if (sh == MAP_FAILED) {
 462                crit("Unable to allocated shared memory block: %s\n",
 463                     strerror(errno));
 464        }
 465
 466        with_x32 = test_x32();
 467
 468        run("Running tests without ptrace...\n");
 469        test_syscall_numbering();
 470
 471        test_traced_syscall_numbering();
 472
 473        nerr = sh->nerr;
 474        if (!nerr) {
 475                ok("All system calls succeeded or failed as expected\n");
 476                return 0;
 477        } else {
 478                fail("A total of %u system call%s had incorrect behavior\n",
 479                     nerr, nerr != 1 ? "s" : "");
 480                return 1;
 481        }
 482}
 483