linux/tools/testing/selftests/x86/test_syscall_vdso.c
<<
>>
Prefs
   1/*
   2 * 32-bit syscall ABI conformance test.
   3 *
   4 * Copyright (c) 2015 Denys Vlasenko
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms and conditions of the GNU General Public License,
   8 * version 2, as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License for more details.
  14 */
  15/*
  16 * Can be built statically:
  17 * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S
  18 */
  19#undef _GNU_SOURCE
  20#define _GNU_SOURCE 1
  21#undef __USE_GNU
  22#define __USE_GNU 1
  23#include <unistd.h>
  24#include <stdlib.h>
  25#include <string.h>
  26#include <stdio.h>
  27#include <signal.h>
  28#include <sys/types.h>
  29#include <sys/select.h>
  30#include <sys/time.h>
  31#include <elf.h>
  32#include <sys/ptrace.h>
  33#include <sys/wait.h>
  34
  35#if !defined(__i386__)
  36int main(int argc, char **argv, char **envp)
  37{
  38        printf("[SKIP]\tNot a 32-bit x86 userspace\n");
  39        return 0;
  40}
  41#else
  42
  43long syscall_addr;
  44long get_syscall(char **envp)
  45{
  46        Elf32_auxv_t *auxv;
  47        while (*envp++ != NULL)
  48                continue;
  49        for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++)
  50                if (auxv->a_type == AT_SYSINFO)
  51                        return auxv->a_un.a_val;
  52        printf("[WARN]\tAT_SYSINFO not supplied\n");
  53        return 0;
  54}
  55
  56asm (
  57        "       .pushsection .text\n"
  58        "       .global int80\n"
  59        "int80:\n"
  60        "       int     $0x80\n"
  61        "       ret\n"
  62        "       .popsection\n"
  63);
  64extern char int80;
  65
  66struct regs64 {
  67        uint64_t rax, rbx, rcx, rdx;
  68        uint64_t rsi, rdi, rbp, rsp;
  69        uint64_t r8,  r9,  r10, r11;
  70        uint64_t r12, r13, r14, r15;
  71};
  72struct regs64 regs64;
  73int kernel_is_64bit;
  74
  75asm (
  76        "       .pushsection .text\n"
  77        "       .code64\n"
  78        "get_regs64:\n"
  79        "       push    %rax\n"
  80        "       mov     $regs64, %eax\n"
  81        "       pop     0*8(%rax)\n"
  82        "       movq    %rbx, 1*8(%rax)\n"
  83        "       movq    %rcx, 2*8(%rax)\n"
  84        "       movq    %rdx, 3*8(%rax)\n"
  85        "       movq    %rsi, 4*8(%rax)\n"
  86        "       movq    %rdi, 5*8(%rax)\n"
  87        "       movq    %rbp, 6*8(%rax)\n"
  88        "       movq    %rsp, 7*8(%rax)\n"
  89        "       movq    %r8,  8*8(%rax)\n"
  90        "       movq    %r9,  9*8(%rax)\n"
  91        "       movq    %r10, 10*8(%rax)\n"
  92        "       movq    %r11, 11*8(%rax)\n"
  93        "       movq    %r12, 12*8(%rax)\n"
  94        "       movq    %r13, 13*8(%rax)\n"
  95        "       movq    %r14, 14*8(%rax)\n"
  96        "       movq    %r15, 15*8(%rax)\n"
  97        "       ret\n"
  98        "poison_regs64:\n"
  99        "       movq    $0x7f7f7f7f, %r8\n"
 100        "       shl     $32, %r8\n"
 101        "       orq     $0x7f7f7f7f, %r8\n"
 102        "       movq    %r8, %r9\n"
 103        "       incq    %r9\n"
 104        "       movq    %r9, %r10\n"
 105        "       incq    %r10\n"
 106        "       movq    %r10, %r11\n"
 107        "       incq    %r11\n"
 108        "       movq    %r11, %r12\n"
 109        "       incq    %r12\n"
 110        "       movq    %r12, %r13\n"
 111        "       incq    %r13\n"
 112        "       movq    %r13, %r14\n"
 113        "       incq    %r14\n"
 114        "       movq    %r14, %r15\n"
 115        "       incq    %r15\n"
 116        "       ret\n"
 117        "       .code32\n"
 118        "       .popsection\n"
 119);
 120extern void get_regs64(void);
 121extern void poison_regs64(void);
 122extern unsigned long call64_from_32(void (*function)(void));
 123void print_regs64(void)
 124{
 125        if (!kernel_is_64bit)
 126                return;
 127        printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax,  regs64.rbx,  regs64.rcx,  regs64.rdx);
 128        printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi,  regs64.rdi,  regs64.rbp,  regs64.rsp);
 129        printf(" 8:%016llx  9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 ,  regs64.r9 ,  regs64.r10,  regs64.r11);
 130        printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12,  regs64.r13,  regs64.r14,  regs64.r15);
 131}
 132
 133int check_regs64(void)
 134{
 135        int err = 0;
 136        int num = 8;
 137        uint64_t *r64 = &regs64.r8;
 138        uint64_t expected = 0x7f7f7f7f7f7f7f7fULL;
 139
 140        if (!kernel_is_64bit)
 141                return 0;
 142
 143        do {
 144                if (*r64 == expected++)
 145                        continue; /* register did not change */
 146                if (syscall_addr != (long)&int80) {
 147                        /*
 148                         * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs:
 149                         * either clear them to 0, or for R11, load EFLAGS.
 150                         */
 151                        if (*r64 == 0)
 152                                continue;
 153                        if (num == 11) {
 154                                printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64);
 155                                continue;
 156                        }
 157                } else {
 158                        /*
 159                         * INT80 syscall entrypoint can be used by
 160                         * 64-bit programs too, unlike SYSCALL/SYSENTER.
 161                         * Therefore it must preserve R12+
 162                         * (they are callee-saved registers in 64-bit C ABI).
 163                         *
 164                         * Starting in Linux 4.17 (and any kernel that
 165                         * backports the change), R8..11 are preserved.
 166                         * Historically (and probably unintentionally), they
 167                         * were clobbered or zeroed.
 168                         */
 169                }
 170                printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
 171                err++;
 172        } while (r64++, ++num < 16);
 173
 174        if (!err)
 175                printf("[OK]\tR8..R15 did not leak kernel data\n");
 176        return err;
 177}
 178
 179int nfds;
 180fd_set rfds;
 181fd_set wfds;
 182fd_set efds;
 183struct timespec timeout;
 184sigset_t sigmask;
 185struct {
 186        sigset_t *sp;
 187        int sz;
 188} sigmask_desc;
 189
 190void prep_args()
 191{
 192        nfds = 42;
 193        FD_ZERO(&rfds);
 194        FD_ZERO(&wfds);
 195        FD_ZERO(&efds);
 196        FD_SET(0, &rfds);
 197        FD_SET(1, &wfds);
 198        FD_SET(2, &efds);
 199        timeout.tv_sec = 0;
 200        timeout.tv_nsec = 123;
 201        sigemptyset(&sigmask);
 202        sigaddset(&sigmask, SIGINT);
 203        sigaddset(&sigmask, SIGUSR2);
 204        sigaddset(&sigmask, SIGRTMAX);
 205        sigmask_desc.sp = &sigmask;
 206        sigmask_desc.sz = 8; /* bytes */
 207}
 208
 209static void print_flags(const char *name, unsigned long r)
 210{
 211        static const char *bitarray[] = {
 212        "\n" ,"c\n" ,/* Carry Flag */
 213        "0 " ,"1 "  ,/* Bit 1 - always on */
 214        ""   ,"p "  ,/* Parity Flag */
 215        "0 " ,"3? " ,
 216        ""   ,"a "  ,/* Auxiliary carry Flag */
 217        "0 " ,"5? " ,
 218        ""   ,"z "  ,/* Zero Flag */
 219        ""   ,"s "  ,/* Sign Flag */
 220        ""   ,"t "  ,/* Trap Flag */
 221        ""   ,"i "  ,/* Interrupt Flag */
 222        ""   ,"d "  ,/* Direction Flag */
 223        ""   ,"o "  ,/* Overflow Flag */
 224        "0 " ,"1 "  ,/* I/O Privilege Level (2 bits) */
 225        "0"  ,"1"   ,/* I/O Privilege Level (2 bits) */
 226        ""   ,"n "  ,/* Nested Task */
 227        "0 " ,"15? ",
 228        ""   ,"r "  ,/* Resume Flag */
 229        ""   ,"v "  ,/* Virtual Mode */
 230        ""   ,"ac " ,/* Alignment Check/Access Control */
 231        ""   ,"vif ",/* Virtual Interrupt Flag */
 232        ""   ,"vip ",/* Virtual Interrupt Pending */
 233        ""   ,"id " ,/* CPUID detection */
 234        NULL
 235        };
 236        const char **bitstr;
 237        int bit;
 238
 239        printf("%s=%016lx ", name, r);
 240        bitstr = bitarray + 42;
 241        bit = 21;
 242        if ((r >> 22) != 0)
 243                printf("(extra bits are set) ");
 244        do {
 245                if (bitstr[(r >> bit) & 1][0])
 246                        fputs(bitstr[(r >> bit) & 1], stdout);
 247                bitstr -= 2;
 248                bit--;
 249        } while (bit >= 0);
 250}
 251
 252int run_syscall(void)
 253{
 254        long flags, bad_arg;
 255
 256        prep_args();
 257
 258        if (kernel_is_64bit)
 259                call64_from_32(poison_regs64);
 260        /*print_regs64();*/
 261
 262        asm("\n"
 263        /* Try 6-arg syscall: pselect. It should return quickly */
 264        "       push    %%ebp\n"
 265        "       mov     $308, %%eax\n"     /* PSELECT */
 266        "       mov     nfds, %%ebx\n"     /* ebx  arg1 */
 267        "       mov     $rfds, %%ecx\n"    /* ecx  arg2 */
 268        "       mov     $wfds, %%edx\n"    /* edx  arg3 */
 269        "       mov     $efds, %%esi\n"    /* esi  arg4 */
 270        "       mov     $timeout, %%edi\n" /* edi  arg5 */
 271        "       mov     $sigmask_desc, %%ebp\n" /* %ebp arg6 */
 272        "       push    $0x200ed7\n"      /* set almost all flags */
 273        "       popf\n"         /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
 274        "       call    *syscall_addr\n"
 275        /* Check that registers are not clobbered */
 276        "       pushf\n"
 277        "       pop     %%eax\n"
 278        "       cld\n"
 279        "       cmp     nfds, %%ebx\n"     /* ebx  arg1 */
 280        "       mov     $1, %%ebx\n"
 281        "       jne     1f\n"
 282        "       cmp     $rfds, %%ecx\n"    /* ecx  arg2 */
 283        "       mov     $2, %%ebx\n"
 284        "       jne     1f\n"
 285        "       cmp     $wfds, %%edx\n"    /* edx  arg3 */
 286        "       mov     $3, %%ebx\n"
 287        "       jne     1f\n"
 288        "       cmp     $efds, %%esi\n"    /* esi  arg4 */
 289        "       mov     $4, %%ebx\n"
 290        "       jne     1f\n"
 291        "       cmp     $timeout, %%edi\n" /* edi  arg5 */
 292        "       mov     $5, %%ebx\n"
 293        "       jne     1f\n"
 294        "       cmpl    $sigmask_desc, %%ebp\n" /* %ebp arg6 */
 295        "       mov     $6, %%ebx\n"
 296        "       jne     1f\n"
 297        "       mov     $0, %%ebx\n"
 298        "1:\n"
 299        "       pop     %%ebp\n"
 300        : "=a" (flags), "=b" (bad_arg)
 301        :
 302        : "cx", "dx", "si", "di"
 303        );
 304
 305        if (kernel_is_64bit) {
 306                memset(&regs64, 0x77, sizeof(regs64));
 307                call64_from_32(get_regs64);
 308                /*print_regs64();*/
 309        }
 310
 311        /*
 312         * On paravirt kernels, flags are not preserved across syscalls.
 313         * Thus, we do not consider it a bug if some are changed.
 314         * We just show ones which do.
 315         */
 316        if ((0x200ed7 ^ flags) != 0) {
 317                print_flags("[WARN]\tFlags before", 0x200ed7);
 318                print_flags("[WARN]\tFlags  after", flags);
 319                print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags));
 320        }
 321
 322        if (bad_arg) {
 323                printf("[FAIL]\targ#%ld clobbered\n", bad_arg);
 324                return 1;
 325        }
 326        printf("[OK]\tArguments are preserved across syscall\n");
 327
 328        return check_regs64();
 329}
 330
 331int run_syscall_twice()
 332{
 333        int exitcode = 0;
 334        long sv;
 335
 336        if (syscall_addr) {
 337                printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n");
 338                exitcode = run_syscall();
 339        }
 340        sv = syscall_addr;
 341        syscall_addr = (long)&int80;
 342        printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n");
 343        exitcode += run_syscall();
 344        syscall_addr = sv;
 345        return exitcode;
 346}
 347
 348void ptrace_me()
 349{
 350        pid_t pid;
 351
 352        fflush(NULL);
 353        pid = fork();
 354        if (pid < 0)
 355                exit(1);
 356        if (pid == 0) {
 357                /* child */
 358                if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0)
 359                        exit(0);
 360                raise(SIGSTOP);
 361                return;
 362        }
 363        /* parent */
 364        printf("[RUN]\tRunning tests under ptrace\n");
 365        while (1) {
 366                int status;
 367                pid = waitpid(-1, &status, __WALL);
 368                if (WIFEXITED(status))
 369                        exit(WEXITSTATUS(status));
 370                if (WIFSIGNALED(status))
 371                        exit(WTERMSIG(status));
 372                if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */
 373                        exit(255);
 374                /*
 375                 * Note: we do not inject sig = WSTOPSIG(status).
 376                 * We probably should, but careful: do not inject SIGTRAP
 377                 * generated by syscall entry/exit stops.
 378                 * That kills the child.
 379                 */
 380                ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/);
 381        }
 382}
 383
 384int main(int argc, char **argv, char **envp)
 385{
 386        int exitcode = 0;
 387        int cs;
 388
 389        asm("\n"
 390        "       movl    %%cs, %%eax\n"
 391        : "=a" (cs)
 392        );
 393        kernel_is_64bit = (cs == 0x23);
 394        if (!kernel_is_64bit)
 395                printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n");
 396
 397        /* This only works for non-static builds:
 398         * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall");
 399         */
 400        syscall_addr = get_syscall(envp);
 401
 402        exitcode += run_syscall_twice();
 403        ptrace_me();
 404        exitcode += run_syscall_twice();
 405
 406        return exitcode;
 407}
 408#endif
 409