linux/tools/testing/selftests/x86/sigreturn.c
<<
>>
Prefs
   1/*
   2 * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
   3 * Copyright (c) 2014-2015 Andrew Lutomirski
   4 *
   5 * This program is free software; you can redistribute it and/or modify
   6 * it under the terms and conditions of the GNU General Public License,
   7 * version 2, as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope it will be useful, but
  10 * WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12 * General Public License for more details.
  13 *
  14 * This is a series of tests that exercises the sigreturn(2) syscall and
  15 * the IRET / SYSRET paths in the kernel.
  16 *
  17 * For now, this focuses on the effects of unusual CS and SS values,
  18 * and it has a bunch of tests to make sure that ESP/RSP is restored
  19 * properly.
  20 *
  21 * The basic idea behind these tests is to raise(SIGUSR1) to create a
  22 * sigcontext frame, plug in the values to be tested, and then return,
  23 * which implicitly invokes sigreturn(2) and programs the user context
  24 * as desired.
  25 *
  26 * For tests for which we expect sigreturn and the subsequent return to
  27 * user mode to succeed, we return to a short trampoline that generates
  28 * SIGTRAP so that the meat of the tests can be ordinary C code in a
  29 * SIGTRAP handler.
  30 *
  31 * The inner workings of each test is documented below.
  32 *
  33 * Do not run on outdated, unpatched kernels at risk of nasty crashes.
  34 */
  35
  36#define _GNU_SOURCE
  37
  38#include <sys/time.h>
  39#include <time.h>
  40#include <stdlib.h>
  41#include <sys/syscall.h>
  42#include <unistd.h>
  43#include <stdio.h>
  44#include <string.h>
  45#include <inttypes.h>
  46#include <sys/mman.h>
  47#include <sys/signal.h>
  48#include <sys/ucontext.h>
  49#include <asm/ldt.h>
  50#include <err.h>
  51#include <setjmp.h>
  52#include <stddef.h>
  53#include <stdbool.h>
  54#include <sys/ptrace.h>
  55#include <sys/user.h>
  56
  57/* Pull in AR_xyz defines. */
  58typedef unsigned int u32;
  59typedef unsigned short u16;
  60#include "../../../../arch/x86/include/asm/desc_defs.h"
  61
  62/*
  63 * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
  64 * headers.
  65 */
  66#ifdef __x86_64__
  67/*
  68 * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
  69 * kernels that save SS in the sigcontext.  All kernels that set
  70 * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
  71 * regardless of SS (i.e. they implement espfix).
  72 *
  73 * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
  74 * when delivering a signal that came from 64-bit code.
  75 *
  76 * Sigreturn restores SS as follows:
  77 *
  78 * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
  79 *     saved CS is not 64-bit)
  80 *         new SS = saved SS  (will fail IRET and signal if invalid)
  81 * else
  82 *         new SS = a flat 32-bit data segment
  83 */
  84#define UC_SIGCONTEXT_SS       0x2
  85#define UC_STRICT_RESTORE_SS   0x4
  86#endif
  87
  88/*
  89 * In principle, this test can run on Linux emulation layers (e.g.
  90 * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
  91 * entries 0-5 for their own internal purposes, so start our LDT
  92 * allocations above that reservation.  (The tests don't pass on LX
  93 * branded zones, but at least this lets them run.)
  94 */
  95#define LDT_OFFSET 6
  96
  97/* An aligned stack accessible through some of our segments. */
  98static unsigned char stack16[65536] __attribute__((aligned(4096)));
  99
 100/*
 101 * An aligned int3 instruction used as a trampoline.  Some of the tests
 102 * want to fish out their ss values, so this trampoline copies ss to eax
 103 * before the int3.
 104 */
 105asm (".pushsection .text\n\t"
 106     ".type int3, @function\n\t"
 107     ".align 4096\n\t"
 108     "int3:\n\t"
 109     "mov %ss,%ecx\n\t"
 110     "int3\n\t"
 111     ".size int3, . - int3\n\t"
 112     ".align 4096, 0xcc\n\t"
 113     ".popsection");
 114extern char int3[4096];
 115
 116/*
 117 * At startup, we prepapre:
 118 *
 119 * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
 120 *   descriptor or out of bounds).
 121 * - code16_sel: A 16-bit LDT code segment pointing to int3.
 122 * - data16_sel: A 16-bit LDT data segment pointing to stack16.
 123 * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
 124 * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
 125 * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
 126 * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
 127 *   stack16.
 128 *
 129 * For no particularly good reason, xyz_sel is a selector value with the
 130 * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
 131 * descriptor table.  These variables will be zero if their respective
 132 * segments could not be allocated.
 133 */
 134static unsigned short ldt_nonexistent_sel;
 135static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
 136
 137static unsigned short gdt_data16_idx, gdt_npdata32_idx;
 138
 139static unsigned short GDT3(int idx)
 140{
 141        return (idx << 3) | 3;
 142}
 143
 144static unsigned short LDT3(int idx)
 145{
 146        return (idx << 3) | 7;
 147}
 148
 149/* Our sigaltstack scratch space. */
 150static char altstack_data[SIGSTKSZ];
 151
 152static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
 153                       int flags)
 154{
 155        struct sigaction sa;
 156        memset(&sa, 0, sizeof(sa));
 157        sa.sa_sigaction = handler;
 158        sa.sa_flags = SA_SIGINFO | flags;
 159        sigemptyset(&sa.sa_mask);
 160        if (sigaction(sig, &sa, 0))
 161                err(1, "sigaction");
 162}
 163
 164static void clearhandler(int sig)
 165{
 166        struct sigaction sa;
 167        memset(&sa, 0, sizeof(sa));
 168        sa.sa_handler = SIG_DFL;
 169        sigemptyset(&sa.sa_mask);
 170        if (sigaction(sig, &sa, 0))
 171                err(1, "sigaction");
 172}
 173
 174static void add_ldt(const struct user_desc *desc, unsigned short *var,
 175                    const char *name)
 176{
 177        if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
 178                *var = LDT3(desc->entry_number);
 179        } else {
 180                printf("[NOTE]\tFailed to create %s segment\n", name);
 181                *var = 0;
 182        }
 183}
 184
 185static void setup_ldt(void)
 186{
 187        if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
 188                errx(1, "stack16 is too high\n");
 189        if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
 190                errx(1, "int3 is too high\n");
 191
 192        ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
 193
 194        const struct user_desc code16_desc = {
 195                .entry_number    = LDT_OFFSET + 0,
 196                .base_addr       = (unsigned long)int3,
 197                .limit           = 4095,
 198                .seg_32bit       = 0,
 199                .contents        = 2, /* Code, not conforming */
 200                .read_exec_only  = 0,
 201                .limit_in_pages  = 0,
 202                .seg_not_present = 0,
 203                .useable         = 0
 204        };
 205        add_ldt(&code16_desc, &code16_sel, "code16");
 206
 207        const struct user_desc data16_desc = {
 208                .entry_number    = LDT_OFFSET + 1,
 209                .base_addr       = (unsigned long)stack16,
 210                .limit           = 0xffff,
 211                .seg_32bit       = 0,
 212                .contents        = 0, /* Data, grow-up */
 213                .read_exec_only  = 0,
 214                .limit_in_pages  = 0,
 215                .seg_not_present = 0,
 216                .useable         = 0
 217        };
 218        add_ldt(&data16_desc, &data16_sel, "data16");
 219
 220        const struct user_desc npcode32_desc = {
 221                .entry_number    = LDT_OFFSET + 3,
 222                .base_addr       = (unsigned long)int3,
 223                .limit           = 4095,
 224                .seg_32bit       = 1,
 225                .contents        = 2, /* Code, not conforming */
 226                .read_exec_only  = 0,
 227                .limit_in_pages  = 0,
 228                .seg_not_present = 1,
 229                .useable         = 0
 230        };
 231        add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
 232
 233        const struct user_desc npdata32_desc = {
 234                .entry_number    = LDT_OFFSET + 4,
 235                .base_addr       = (unsigned long)stack16,
 236                .limit           = 0xffff,
 237                .seg_32bit       = 1,
 238                .contents        = 0, /* Data, grow-up */
 239                .read_exec_only  = 0,
 240                .limit_in_pages  = 0,
 241                .seg_not_present = 1,
 242                .useable         = 0
 243        };
 244        add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
 245
 246        struct user_desc gdt_data16_desc = {
 247                .entry_number    = -1,
 248                .base_addr       = (unsigned long)stack16,
 249                .limit           = 0xffff,
 250                .seg_32bit       = 0,
 251                .contents        = 0, /* Data, grow-up */
 252                .read_exec_only  = 0,
 253                .limit_in_pages  = 0,
 254                .seg_not_present = 0,
 255                .useable         = 0
 256        };
 257
 258        if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
 259                /*
 260                 * This probably indicates vulnerability to CVE-2014-8133.
 261                 * Merely getting here isn't definitive, though, and we'll
 262                 * diagnose the problem for real later on.
 263                 */
 264                printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
 265                       gdt_data16_desc.entry_number);
 266                gdt_data16_idx = gdt_data16_desc.entry_number;
 267        } else {
 268                printf("[OK]\tset_thread_area refused 16-bit data\n");
 269        }
 270
 271        struct user_desc gdt_npdata32_desc = {
 272                .entry_number    = -1,
 273                .base_addr       = (unsigned long)stack16,
 274                .limit           = 0xffff,
 275                .seg_32bit       = 1,
 276                .contents        = 0, /* Data, grow-up */
 277                .read_exec_only  = 0,
 278                .limit_in_pages  = 0,
 279                .seg_not_present = 1,
 280                .useable         = 0
 281        };
 282
 283        if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
 284                /*
 285                 * As a hardening measure, newer kernels don't allow this.
 286                 */
 287                printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
 288                       gdt_npdata32_desc.entry_number);
 289                gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
 290        } else {
 291                printf("[OK]\tset_thread_area refused 16-bit data\n");
 292        }
 293}
 294
 295/* State used by our signal handlers. */
 296static gregset_t initial_regs, requested_regs, resulting_regs;
 297
 298/* Instructions for the SIGUSR1 handler. */
 299static volatile unsigned short sig_cs, sig_ss;
 300static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
 301#ifdef __x86_64__
 302static volatile sig_atomic_t sig_corrupt_final_ss;
 303#endif
 304
 305/* Abstractions for some 32-bit vs 64-bit differences. */
 306#ifdef __x86_64__
 307# define REG_IP REG_RIP
 308# define REG_SP REG_RSP
 309# define REG_CX REG_RCX
 310
 311struct selectors {
 312        unsigned short cs, gs, fs, ss;
 313};
 314
 315static unsigned short *ssptr(ucontext_t *ctx)
 316{
 317        struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
 318        return &sels->ss;
 319}
 320
 321static unsigned short *csptr(ucontext_t *ctx)
 322{
 323        struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
 324        return &sels->cs;
 325}
 326#else
 327# define REG_IP REG_EIP
 328# define REG_SP REG_ESP
 329# define REG_CX REG_ECX
 330
 331static greg_t *ssptr(ucontext_t *ctx)
 332{
 333        return &ctx->uc_mcontext.gregs[REG_SS];
 334}
 335
 336static greg_t *csptr(ucontext_t *ctx)
 337{
 338        return &ctx->uc_mcontext.gregs[REG_CS];
 339}
 340#endif
 341
 342/*
 343 * Checks a given selector for its code bitness or returns -1 if it's not
 344 * a usable code segment selector.
 345 */
 346int cs_bitness(unsigned short cs)
 347{
 348        uint32_t valid = 0, ar;
 349        asm ("lar %[cs], %[ar]\n\t"
 350             "jnz 1f\n\t"
 351             "mov $1, %[valid]\n\t"
 352             "1:"
 353             : [ar] "=r" (ar), [valid] "+rm" (valid)
 354             : [cs] "r" (cs));
 355
 356        if (!valid)
 357                return -1;
 358
 359        bool db = (ar & (1 << 22));
 360        bool l = (ar & (1 << 21));
 361
 362        if (!(ar & (1<<11)))
 363            return -1;  /* Not code. */
 364
 365        if (l && !db)
 366                return 64;
 367        else if (!l && db)
 368                return 32;
 369        else if (!l && !db)
 370                return 16;
 371        else
 372                return -1;      /* Unknown bitness. */
 373}
 374
 375/*
 376 * Checks a given selector for its code bitness or returns -1 if it's not
 377 * a usable code segment selector.
 378 */
 379bool is_valid_ss(unsigned short cs)
 380{
 381        uint32_t valid = 0, ar;
 382        asm ("lar %[cs], %[ar]\n\t"
 383             "jnz 1f\n\t"
 384             "mov $1, %[valid]\n\t"
 385             "1:"
 386             : [ar] "=r" (ar), [valid] "+rm" (valid)
 387             : [cs] "r" (cs));
 388
 389        if (!valid)
 390                return false;
 391
 392        if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
 393            (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
 394                return false;
 395
 396        return (ar & AR_P);
 397}
 398
 399/* Number of errors in the current test case. */
 400static volatile sig_atomic_t nerrs;
 401
 402static void validate_signal_ss(int sig, ucontext_t *ctx)
 403{
 404#ifdef __x86_64__
 405        bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
 406
 407        if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
 408                printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
 409                nerrs++;
 410
 411                /*
 412                 * This happens on Linux 4.1.  The rest will fail, too, so
 413                 * return now to reduce the noise.
 414                 */
 415                return;
 416        }
 417
 418        /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
 419        if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
 420                printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
 421                       sig);
 422                nerrs++;
 423        }
 424
 425        if (is_valid_ss(*ssptr(ctx))) {
 426                /*
 427                 * DOSEMU was written before 64-bit sigcontext had SS, and
 428                 * it tries to figure out the signal source SS by looking at
 429                 * the physical register.  Make sure that keeps working.
 430                 */
 431                unsigned short hw_ss;
 432                asm ("mov %%ss, %0" : "=rm" (hw_ss));
 433                if (hw_ss != *ssptr(ctx)) {
 434                        printf("[FAIL]\tHW SS didn't match saved SS\n");
 435                        nerrs++;
 436                }
 437        }
 438#endif
 439}
 440
 441/*
 442 * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
 443 * int3 trampoline.  Sets SP to a large known value so that we can see
 444 * whether the value round-trips back to user mode correctly.
 445 */
 446static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
 447{
 448        ucontext_t *ctx = (ucontext_t*)ctx_void;
 449
 450        validate_signal_ss(sig, ctx);
 451
 452        memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
 453
 454        *csptr(ctx) = sig_cs;
 455        *ssptr(ctx) = sig_ss;
 456
 457        ctx->uc_mcontext.gregs[REG_IP] =
 458                sig_cs == code16_sel ? 0 : (unsigned long)&int3;
 459        ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
 460        ctx->uc_mcontext.gregs[REG_CX] = 0;
 461
 462        memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
 463        requested_regs[REG_CX] = *ssptr(ctx);   /* The asm code does this. */
 464
 465        return;
 466}
 467
 468/*
 469 * Called after a successful sigreturn (via int3) or from a failed
 470 * sigreturn (directly by kernel).  Restores our state so that the
 471 * original raise(SIGUSR1) returns.
 472 */
 473static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
 474{
 475        ucontext_t *ctx = (ucontext_t*)ctx_void;
 476
 477        validate_signal_ss(sig, ctx);
 478
 479        sig_err = ctx->uc_mcontext.gregs[REG_ERR];
 480        sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
 481
 482        unsigned short ss;
 483        asm ("mov %%ss,%0" : "=r" (ss));
 484
 485        greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
 486        if (asm_ss != sig_ss && sig == SIGTRAP) {
 487                /* Sanity check failure. */
 488                printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
 489                       ss, *ssptr(ctx), (unsigned long long)asm_ss);
 490                nerrs++;
 491        }
 492
 493        memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
 494        memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
 495
 496#ifdef __x86_64__
 497        if (sig_corrupt_final_ss) {
 498                if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
 499                        printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
 500                        nerrs++;
 501                } else {
 502                        /*
 503                         * DOSEMU transitions from 32-bit to 64-bit mode by
 504                         * adjusting sigcontext, and it requires that this work
 505                         * even if the saved SS is bogus.
 506                         */
 507                        printf("\tCorrupting SS on return to 64-bit mode\n");
 508                        *ssptr(ctx) = 0;
 509                }
 510        }
 511#endif
 512
 513        sig_trapped = sig;
 514}
 515
 516#ifdef __x86_64__
 517/* Tests recovery if !UC_STRICT_RESTORE_SS */
 518static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
 519{
 520        ucontext_t *ctx = (ucontext_t*)ctx_void;
 521
 522        if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
 523                printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
 524                nerrs++;
 525                return;  /* We can't do the rest. */
 526        }
 527
 528        ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
 529        *ssptr(ctx) = 0;
 530
 531        /* Return.  The kernel should recover without sending another signal. */
 532}
 533
 534static int test_nonstrict_ss(void)
 535{
 536        clearhandler(SIGUSR1);
 537        clearhandler(SIGTRAP);
 538        clearhandler(SIGSEGV);
 539        clearhandler(SIGILL);
 540        sethandler(SIGUSR2, sigusr2, 0);
 541
 542        nerrs = 0;
 543
 544        printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
 545        raise(SIGUSR2);
 546        if (!nerrs)
 547                printf("[OK]\tIt worked\n");
 548
 549        return nerrs;
 550}
 551#endif
 552
 553/* Finds a usable code segment of the requested bitness. */
 554int find_cs(int bitness)
 555{
 556        unsigned short my_cs;
 557
 558        asm ("mov %%cs,%0" :  "=r" (my_cs));
 559
 560        if (cs_bitness(my_cs) == bitness)
 561                return my_cs;
 562        if (cs_bitness(my_cs + (2 << 3)) == bitness)
 563                return my_cs + (2 << 3);
 564        if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
 565            return my_cs - (2 << 3);
 566        if (cs_bitness(code16_sel) == bitness)
 567                return code16_sel;
 568
 569        printf("[WARN]\tCould not find %d-bit CS\n", bitness);
 570        return -1;
 571}
 572
 573static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
 574{
 575        int cs = find_cs(cs_bits);
 576        if (cs == -1) {
 577                printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
 578                       cs_bits, use_16bit_ss ? 16 : 32);
 579                return 0;
 580        }
 581
 582        if (force_ss != -1) {
 583                sig_ss = force_ss;
 584        } else {
 585                if (use_16bit_ss) {
 586                        if (!data16_sel) {
 587                                printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
 588                                       cs_bits);
 589                                return 0;
 590                        }
 591                        sig_ss = data16_sel;
 592                } else {
 593                        asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
 594                }
 595        }
 596
 597        sig_cs = cs;
 598
 599        printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
 600               cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
 601               (sig_ss & 4) ? "" : ", GDT");
 602
 603        raise(SIGUSR1);
 604
 605        nerrs = 0;
 606
 607        /*
 608         * Check that each register had an acceptable value when the
 609         * int3 trampoline was invoked.
 610         */
 611        for (int i = 0; i < NGREG; i++) {
 612                greg_t req = requested_regs[i], res = resulting_regs[i];
 613
 614                if (i == REG_TRAPNO || i == REG_IP)
 615                        continue;       /* don't care */
 616
 617                if (i == REG_SP) {
 618                        /*
 619                         * If we were using a 16-bit stack segment, then
 620                         * the kernel is a bit stuck: IRET only restores
 621                         * the low 16 bits of ESP/RSP if SS is 16-bit.
 622                         * The kernel uses a hack to restore bits 31:16,
 623                         * but that hack doesn't help with bits 63:32.
 624                         * On Intel CPUs, bits 63:32 end up zeroed, and, on
 625                         * AMD CPUs, they leak the high bits of the kernel
 626                         * espfix64 stack pointer.  There's very little that
 627                         * the kernel can do about it.
 628                         *
 629                         * Similarly, if we are returning to a 32-bit context,
 630                         * the CPU will often lose the high 32 bits of RSP.
 631                         */
 632
 633                        if (res == req)
 634                                continue;
 635
 636                        if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
 637                                printf("[NOTE]\tSP: %llx -> %llx\n",
 638                                       (unsigned long long)req,
 639                                       (unsigned long long)res);
 640                                continue;
 641                        }
 642
 643                        printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
 644                               (unsigned long long)requested_regs[i],
 645                               (unsigned long long)resulting_regs[i]);
 646                        nerrs++;
 647                        continue;
 648                }
 649
 650                bool ignore_reg = false;
 651#if __i386__
 652                if (i == REG_UESP)
 653                        ignore_reg = true;
 654#else
 655                if (i == REG_CSGSFS) {
 656                        struct selectors *req_sels =
 657                                (void *)&requested_regs[REG_CSGSFS];
 658                        struct selectors *res_sels =
 659                                (void *)&resulting_regs[REG_CSGSFS];
 660                        if (req_sels->cs != res_sels->cs) {
 661                                printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
 662                                       req_sels->cs, res_sels->cs);
 663                                nerrs++;
 664                        }
 665
 666                        if (req_sels->ss != res_sels->ss) {
 667                                printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
 668                                       req_sels->ss, res_sels->ss);
 669                                nerrs++;
 670                        }
 671
 672                        continue;
 673                }
 674#endif
 675
 676                /* Sanity check on the kernel */
 677                if (i == REG_CX && req != res) {
 678                        printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
 679                               (unsigned long long)req,
 680                               (unsigned long long)res);
 681                        nerrs++;
 682                        continue;
 683                }
 684
 685                if (req != res && !ignore_reg) {
 686                        printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
 687                               i, (unsigned long long)req,
 688                               (unsigned long long)res);
 689                        nerrs++;
 690                }
 691        }
 692
 693        if (nerrs == 0)
 694                printf("[OK]\tall registers okay\n");
 695
 696        return nerrs;
 697}
 698
 699static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
 700{
 701        int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
 702        if (cs == -1)
 703                return 0;
 704
 705        sig_cs = cs;
 706        sig_ss = ss;
 707
 708        printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
 709               cs_bits, sig_cs, sig_ss);
 710
 711        sig_trapped = 0;
 712        raise(SIGUSR1);
 713        if (sig_trapped) {
 714                char errdesc[32] = "";
 715                if (sig_err) {
 716                        const char *src = (sig_err & 1) ? " EXT" : "";
 717                        const char *table;
 718                        if ((sig_err & 0x6) == 0x0)
 719                                table = "GDT";
 720                        else if ((sig_err & 0x6) == 0x4)
 721                                table = "LDT";
 722                        else if ((sig_err & 0x6) == 0x2)
 723                                table = "IDT";
 724                        else
 725                                table = "???";
 726
 727                        sprintf(errdesc, "%s%s index %d, ",
 728                                table, src, sig_err >> 3);
 729                }
 730
 731                char trapname[32];
 732                if (sig_trapno == 13)
 733                        strcpy(trapname, "GP");
 734                else if (sig_trapno == 11)
 735                        strcpy(trapname, "NP");
 736                else if (sig_trapno == 12)
 737                        strcpy(trapname, "SS");
 738                else if (sig_trapno == 32)
 739                        strcpy(trapname, "IRET");  /* X86_TRAP_IRET */
 740                else
 741                        sprintf(trapname, "%d", sig_trapno);
 742
 743                printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
 744                       trapname, (unsigned long)sig_err,
 745                       errdesc, strsignal(sig_trapped));
 746                return 0;
 747        } else {
 748                /*
 749                 * This also implicitly tests UC_STRICT_RESTORE_SS:
 750                 * We check that these signals set UC_STRICT_RESTORE_SS and,
 751                 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
 752                 * then we won't get SIGSEGV.
 753                 */
 754                printf("[FAIL]\tDid not get SIGSEGV\n");
 755                return 1;
 756        }
 757}
 758
 759int main()
 760{
 761        int total_nerrs = 0;
 762        unsigned short my_cs, my_ss;
 763
 764        asm volatile ("mov %%cs,%0" : "=r" (my_cs));
 765        asm volatile ("mov %%ss,%0" : "=r" (my_ss));
 766        setup_ldt();
 767
 768        stack_t stack = {
 769                .ss_sp = altstack_data,
 770                .ss_size = SIGSTKSZ,
 771        };
 772        if (sigaltstack(&stack, NULL) != 0)
 773                err(1, "sigaltstack");
 774
 775        sethandler(SIGUSR1, sigusr1, 0);
 776        sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
 777
 778        /* Easy cases: return to a 32-bit SS in each possible CS bitness. */
 779        total_nerrs += test_valid_sigreturn(64, false, -1);
 780        total_nerrs += test_valid_sigreturn(32, false, -1);
 781        total_nerrs += test_valid_sigreturn(16, false, -1);
 782
 783        /*
 784         * Test easy espfix cases: return to a 16-bit LDT SS in each possible
 785         * CS bitness.  NB: with a long mode CS, the SS bitness is irrelevant.
 786         *
 787         * This catches the original missing-espfix-on-64-bit-kernels issue
 788         * as well as CVE-2014-8134.
 789         */
 790        total_nerrs += test_valid_sigreturn(64, true, -1);
 791        total_nerrs += test_valid_sigreturn(32, true, -1);
 792        total_nerrs += test_valid_sigreturn(16, true, -1);
 793
 794        if (gdt_data16_idx) {
 795                /*
 796                 * For performance reasons, Linux skips espfix if SS points
 797                 * to the GDT.  If we were able to allocate a 16-bit SS in
 798                 * the GDT, see if it leaks parts of the kernel stack pointer.
 799                 *
 800                 * This tests for CVE-2014-8133.
 801                 */
 802                total_nerrs += test_valid_sigreturn(64, true,
 803                                                    GDT3(gdt_data16_idx));
 804                total_nerrs += test_valid_sigreturn(32, true,
 805                                                    GDT3(gdt_data16_idx));
 806                total_nerrs += test_valid_sigreturn(16, true,
 807                                                    GDT3(gdt_data16_idx));
 808        }
 809
 810#ifdef __x86_64__
 811        /* Nasty ABI case: check SS corruption handling. */
 812        sig_corrupt_final_ss = 1;
 813        total_nerrs += test_valid_sigreturn(32, false, -1);
 814        total_nerrs += test_valid_sigreturn(32, true, -1);
 815        sig_corrupt_final_ss = 0;
 816#endif
 817
 818        /*
 819         * We're done testing valid sigreturn cases.  Now we test states
 820         * for which sigreturn itself will succeed but the subsequent
 821         * entry to user mode will fail.
 822         *
 823         * Depending on the failure mode and the kernel bitness, these
 824         * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
 825         */
 826        clearhandler(SIGTRAP);
 827        sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
 828        sethandler(SIGBUS, sigtrap, SA_ONSTACK);
 829        sethandler(SIGILL, sigtrap, SA_ONSTACK);  /* 32-bit kernels do this */
 830
 831        /* Easy failures: invalid SS, resulting in #GP(0) */
 832        test_bad_iret(64, ldt_nonexistent_sel, -1);
 833        test_bad_iret(32, ldt_nonexistent_sel, -1);
 834        test_bad_iret(16, ldt_nonexistent_sel, -1);
 835
 836        /* These fail because SS isn't a data segment, resulting in #GP(SS) */
 837        test_bad_iret(64, my_cs, -1);
 838        test_bad_iret(32, my_cs, -1);
 839        test_bad_iret(16, my_cs, -1);
 840
 841        /* Try to return to a not-present code segment, triggering #NP(SS). */
 842        test_bad_iret(32, my_ss, npcode32_sel);
 843
 844        /*
 845         * Try to return to a not-present but otherwise valid data segment.
 846         * This will cause IRET to fail with #SS on the espfix stack.  This
 847         * exercises CVE-2014-9322.
 848         *
 849         * Note that, if espfix is enabled, 64-bit Linux will lose track
 850         * of the actual cause of failure and report #GP(0) instead.
 851         * This would be very difficult for Linux to avoid, because
 852         * espfix64 causes IRET failures to be promoted to #DF, so the
 853         * original exception frame is never pushed onto the stack.
 854         */
 855        test_bad_iret(32, npdata32_sel, -1);
 856
 857        /*
 858         * Try to return to a not-present but otherwise valid data
 859         * segment without invoking espfix.  Newer kernels don't allow
 860         * this to happen in the first place.  On older kernels, though,
 861         * this can trigger CVE-2014-9322.
 862         */
 863        if (gdt_npdata32_idx)
 864                test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
 865
 866#ifdef __x86_64__
 867        total_nerrs += test_nonstrict_ss();
 868#endif
 869
 870        return total_nerrs ? 1 : 0;
 871}
 872