linux/tools/testing/selftests/x86/sigreturn.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
   4 * Copyright (c) 2014-2015 Andrew Lutomirski
   5 *
   6 * This is a series of tests that exercises the sigreturn(2) syscall and
   7 * the IRET / SYSRET paths in the kernel.
   8 *
   9 * For now, this focuses on the effects of unusual CS and SS values,
  10 * and it has a bunch of tests to make sure that ESP/RSP is restored
  11 * properly.
  12 *
  13 * The basic idea behind these tests is to raise(SIGUSR1) to create a
  14 * sigcontext frame, plug in the values to be tested, and then return,
  15 * which implicitly invokes sigreturn(2) and programs the user context
  16 * as desired.
  17 *
  18 * For tests for which we expect sigreturn and the subsequent return to
  19 * user mode to succeed, we return to a short trampoline that generates
  20 * SIGTRAP so that the meat of the tests can be ordinary C code in a
  21 * SIGTRAP handler.
  22 *
  23 * The inner workings of each test is documented below.
  24 *
  25 * Do not run on outdated, unpatched kernels at risk of nasty crashes.
  26 */
  27
  28#define _GNU_SOURCE
  29
  30#include <sys/time.h>
  31#include <time.h>
  32#include <stdlib.h>
  33#include <sys/syscall.h>
  34#include <unistd.h>
  35#include <stdio.h>
  36#include <string.h>
  37#include <inttypes.h>
  38#include <sys/mman.h>
  39#include <sys/signal.h>
  40#include <sys/ucontext.h>
  41#include <asm/ldt.h>
  42#include <err.h>
  43#include <setjmp.h>
  44#include <stddef.h>
  45#include <stdbool.h>
  46#include <sys/ptrace.h>
  47#include <sys/user.h>
  48
  49/* Pull in AR_xyz defines. */
  50typedef unsigned int u32;
  51typedef unsigned short u16;
  52#include "../../../../arch/x86/include/asm/desc_defs.h"
  53
  54/*
  55 * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
  56 * headers.
  57 */
  58#ifdef __x86_64__
  59/*
  60 * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
  61 * kernels that save SS in the sigcontext.  All kernels that set
  62 * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
  63 * regardless of SS (i.e. they implement espfix).
  64 *
  65 * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
  66 * when delivering a signal that came from 64-bit code.
  67 *
  68 * Sigreturn restores SS as follows:
  69 *
  70 * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
  71 *     saved CS is not 64-bit)
  72 *         new SS = saved SS  (will fail IRET and signal if invalid)
  73 * else
  74 *         new SS = a flat 32-bit data segment
  75 */
  76#define UC_SIGCONTEXT_SS       0x2
  77#define UC_STRICT_RESTORE_SS   0x4
  78#endif
  79
  80/*
  81 * In principle, this test can run on Linux emulation layers (e.g.
  82 * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
  83 * entries 0-5 for their own internal purposes, so start our LDT
  84 * allocations above that reservation.  (The tests don't pass on LX
  85 * branded zones, but at least this lets them run.)
  86 */
  87#define LDT_OFFSET 6
  88
  89/* An aligned stack accessible through some of our segments. */
  90static unsigned char stack16[65536] __attribute__((aligned(4096)));
  91
  92/*
  93 * An aligned int3 instruction used as a trampoline.  Some of the tests
  94 * want to fish out their ss values, so this trampoline copies ss to eax
  95 * before the int3.
  96 */
  97asm (".pushsection .text\n\t"
  98     ".type int3, @function\n\t"
  99     ".align 4096\n\t"
 100     "int3:\n\t"
 101     "mov %ss,%ecx\n\t"
 102     "int3\n\t"
 103     ".size int3, . - int3\n\t"
 104     ".align 4096, 0xcc\n\t"
 105     ".popsection");
 106extern char int3[4096];
 107
 108/*
 109 * At startup, we prepapre:
 110 *
 111 * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
 112 *   descriptor or out of bounds).
 113 * - code16_sel: A 16-bit LDT code segment pointing to int3.
 114 * - data16_sel: A 16-bit LDT data segment pointing to stack16.
 115 * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
 116 * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
 117 * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
 118 * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
 119 *   stack16.
 120 *
 121 * For no particularly good reason, xyz_sel is a selector value with the
 122 * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
 123 * descriptor table.  These variables will be zero if their respective
 124 * segments could not be allocated.
 125 */
 126static unsigned short ldt_nonexistent_sel;
 127static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
 128
 129static unsigned short gdt_data16_idx, gdt_npdata32_idx;
 130
 131static unsigned short GDT3(int idx)
 132{
 133        return (idx << 3) | 3;
 134}
 135
 136static unsigned short LDT3(int idx)
 137{
 138        return (idx << 3) | 7;
 139}
 140
 141static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
 142                       int flags)
 143{
 144        struct sigaction sa;
 145        memset(&sa, 0, sizeof(sa));
 146        sa.sa_sigaction = handler;
 147        sa.sa_flags = SA_SIGINFO | flags;
 148        sigemptyset(&sa.sa_mask);
 149        if (sigaction(sig, &sa, 0))
 150                err(1, "sigaction");
 151}
 152
 153static void clearhandler(int sig)
 154{
 155        struct sigaction sa;
 156        memset(&sa, 0, sizeof(sa));
 157        sa.sa_handler = SIG_DFL;
 158        sigemptyset(&sa.sa_mask);
 159        if (sigaction(sig, &sa, 0))
 160                err(1, "sigaction");
 161}
 162
 163static void add_ldt(const struct user_desc *desc, unsigned short *var,
 164                    const char *name)
 165{
 166        if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
 167                *var = LDT3(desc->entry_number);
 168        } else {
 169                printf("[NOTE]\tFailed to create %s segment\n", name);
 170                *var = 0;
 171        }
 172}
 173
 174static void setup_ldt(void)
 175{
 176        if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
 177                errx(1, "stack16 is too high\n");
 178        if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
 179                errx(1, "int3 is too high\n");
 180
 181        ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
 182
 183        const struct user_desc code16_desc = {
 184                .entry_number    = LDT_OFFSET + 0,
 185                .base_addr       = (unsigned long)int3,
 186                .limit           = 4095,
 187                .seg_32bit       = 0,
 188                .contents        = 2, /* Code, not conforming */
 189                .read_exec_only  = 0,
 190                .limit_in_pages  = 0,
 191                .seg_not_present = 0,
 192                .useable         = 0
 193        };
 194        add_ldt(&code16_desc, &code16_sel, "code16");
 195
 196        const struct user_desc data16_desc = {
 197                .entry_number    = LDT_OFFSET + 1,
 198                .base_addr       = (unsigned long)stack16,
 199                .limit           = 0xffff,
 200                .seg_32bit       = 0,
 201                .contents        = 0, /* Data, grow-up */
 202                .read_exec_only  = 0,
 203                .limit_in_pages  = 0,
 204                .seg_not_present = 0,
 205                .useable         = 0
 206        };
 207        add_ldt(&data16_desc, &data16_sel, "data16");
 208
 209        const struct user_desc npcode32_desc = {
 210                .entry_number    = LDT_OFFSET + 3,
 211                .base_addr       = (unsigned long)int3,
 212                .limit           = 4095,
 213                .seg_32bit       = 1,
 214                .contents        = 2, /* Code, not conforming */
 215                .read_exec_only  = 0,
 216                .limit_in_pages  = 0,
 217                .seg_not_present = 1,
 218                .useable         = 0
 219        };
 220        add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
 221
 222        const struct user_desc npdata32_desc = {
 223                .entry_number    = LDT_OFFSET + 4,
 224                .base_addr       = (unsigned long)stack16,
 225                .limit           = 0xffff,
 226                .seg_32bit       = 1,
 227                .contents        = 0, /* Data, grow-up */
 228                .read_exec_only  = 0,
 229                .limit_in_pages  = 0,
 230                .seg_not_present = 1,
 231                .useable         = 0
 232        };
 233        add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
 234
 235        struct user_desc gdt_data16_desc = {
 236                .entry_number    = -1,
 237                .base_addr       = (unsigned long)stack16,
 238                .limit           = 0xffff,
 239                .seg_32bit       = 0,
 240                .contents        = 0, /* Data, grow-up */
 241                .read_exec_only  = 0,
 242                .limit_in_pages  = 0,
 243                .seg_not_present = 0,
 244                .useable         = 0
 245        };
 246
 247        if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
 248                /*
 249                 * This probably indicates vulnerability to CVE-2014-8133.
 250                 * Merely getting here isn't definitive, though, and we'll
 251                 * diagnose the problem for real later on.
 252                 */
 253                printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
 254                       gdt_data16_desc.entry_number);
 255                gdt_data16_idx = gdt_data16_desc.entry_number;
 256        } else {
 257                printf("[OK]\tset_thread_area refused 16-bit data\n");
 258        }
 259
 260        struct user_desc gdt_npdata32_desc = {
 261                .entry_number    = -1,
 262                .base_addr       = (unsigned long)stack16,
 263                .limit           = 0xffff,
 264                .seg_32bit       = 1,
 265                .contents        = 0, /* Data, grow-up */
 266                .read_exec_only  = 0,
 267                .limit_in_pages  = 0,
 268                .seg_not_present = 1,
 269                .useable         = 0
 270        };
 271
 272        if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
 273                /*
 274                 * As a hardening measure, newer kernels don't allow this.
 275                 */
 276                printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
 277                       gdt_npdata32_desc.entry_number);
 278                gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
 279        } else {
 280                printf("[OK]\tset_thread_area refused 16-bit data\n");
 281        }
 282}
 283
 284/* State used by our signal handlers. */
 285static gregset_t initial_regs, requested_regs, resulting_regs;
 286
 287/* Instructions for the SIGUSR1 handler. */
 288static volatile unsigned short sig_cs, sig_ss;
 289static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
 290#ifdef __x86_64__
 291static volatile sig_atomic_t sig_corrupt_final_ss;
 292#endif
 293
 294/* Abstractions for some 32-bit vs 64-bit differences. */
 295#ifdef __x86_64__
 296# define REG_IP REG_RIP
 297# define REG_SP REG_RSP
 298# define REG_CX REG_RCX
 299
 300struct selectors {
 301        unsigned short cs, gs, fs, ss;
 302};
 303
 304static unsigned short *ssptr(ucontext_t *ctx)
 305{
 306        struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
 307        return &sels->ss;
 308}
 309
 310static unsigned short *csptr(ucontext_t *ctx)
 311{
 312        struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
 313        return &sels->cs;
 314}
 315#else
 316# define REG_IP REG_EIP
 317# define REG_SP REG_ESP
 318# define REG_CX REG_ECX
 319
 320static greg_t *ssptr(ucontext_t *ctx)
 321{
 322        return &ctx->uc_mcontext.gregs[REG_SS];
 323}
 324
 325static greg_t *csptr(ucontext_t *ctx)
 326{
 327        return &ctx->uc_mcontext.gregs[REG_CS];
 328}
 329#endif
 330
 331/*
 332 * Checks a given selector for its code bitness or returns -1 if it's not
 333 * a usable code segment selector.
 334 */
 335int cs_bitness(unsigned short cs)
 336{
 337        uint32_t valid = 0, ar;
 338        asm ("lar %[cs], %[ar]\n\t"
 339             "jnz 1f\n\t"
 340             "mov $1, %[valid]\n\t"
 341             "1:"
 342             : [ar] "=r" (ar), [valid] "+rm" (valid)
 343             : [cs] "r" (cs));
 344
 345        if (!valid)
 346                return -1;
 347
 348        bool db = (ar & (1 << 22));
 349        bool l = (ar & (1 << 21));
 350
 351        if (!(ar & (1<<11)))
 352            return -1;  /* Not code. */
 353
 354        if (l && !db)
 355                return 64;
 356        else if (!l && db)
 357                return 32;
 358        else if (!l && !db)
 359                return 16;
 360        else
 361                return -1;      /* Unknown bitness. */
 362}
 363
 364/*
 365 * Checks a given selector for its code bitness or returns -1 if it's not
 366 * a usable code segment selector.
 367 */
 368bool is_valid_ss(unsigned short cs)
 369{
 370        uint32_t valid = 0, ar;
 371        asm ("lar %[cs], %[ar]\n\t"
 372             "jnz 1f\n\t"
 373             "mov $1, %[valid]\n\t"
 374             "1:"
 375             : [ar] "=r" (ar), [valid] "+rm" (valid)
 376             : [cs] "r" (cs));
 377
 378        if (!valid)
 379                return false;
 380
 381        if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
 382            (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
 383                return false;
 384
 385        return (ar & AR_P);
 386}
 387
 388/* Number of errors in the current test case. */
 389static volatile sig_atomic_t nerrs;
 390
 391static void validate_signal_ss(int sig, ucontext_t *ctx)
 392{
 393#ifdef __x86_64__
 394        bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
 395
 396        if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
 397                printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
 398                nerrs++;
 399
 400                /*
 401                 * This happens on Linux 4.1.  The rest will fail, too, so
 402                 * return now to reduce the noise.
 403                 */
 404                return;
 405        }
 406
 407        /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
 408        if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
 409                printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
 410                       sig);
 411                nerrs++;
 412        }
 413
 414        if (is_valid_ss(*ssptr(ctx))) {
 415                /*
 416                 * DOSEMU was written before 64-bit sigcontext had SS, and
 417                 * it tries to figure out the signal source SS by looking at
 418                 * the physical register.  Make sure that keeps working.
 419                 */
 420                unsigned short hw_ss;
 421                asm ("mov %%ss, %0" : "=rm" (hw_ss));
 422                if (hw_ss != *ssptr(ctx)) {
 423                        printf("[FAIL]\tHW SS didn't match saved SS\n");
 424                        nerrs++;
 425                }
 426        }
 427#endif
 428}
 429
 430/*
 431 * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
 432 * int3 trampoline.  Sets SP to a large known value so that we can see
 433 * whether the value round-trips back to user mode correctly.
 434 */
 435static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
 436{
 437        ucontext_t *ctx = (ucontext_t*)ctx_void;
 438
 439        validate_signal_ss(sig, ctx);
 440
 441        memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
 442
 443        *csptr(ctx) = sig_cs;
 444        *ssptr(ctx) = sig_ss;
 445
 446        ctx->uc_mcontext.gregs[REG_IP] =
 447                sig_cs == code16_sel ? 0 : (unsigned long)&int3;
 448        ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
 449        ctx->uc_mcontext.gregs[REG_CX] = 0;
 450
 451#ifdef __i386__
 452        /*
 453         * Make sure the kernel doesn't inadvertently use DS or ES-relative
 454         * accesses in a region where user DS or ES is loaded.
 455         *
 456         * Skip this for 64-bit builds because long mode doesn't care about
 457         * DS and ES and skipping it increases test coverage a little bit,
 458         * since 64-bit kernels can still run the 32-bit build.
 459         */
 460        ctx->uc_mcontext.gregs[REG_DS] = 0;
 461        ctx->uc_mcontext.gregs[REG_ES] = 0;
 462#endif
 463
 464        memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
 465        requested_regs[REG_CX] = *ssptr(ctx);   /* The asm code does this. */
 466
 467        return;
 468}
 469
 470/*
 471 * Called after a successful sigreturn (via int3) or from a failed
 472 * sigreturn (directly by kernel).  Restores our state so that the
 473 * original raise(SIGUSR1) returns.
 474 */
 475static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
 476{
 477        ucontext_t *ctx = (ucontext_t*)ctx_void;
 478
 479        validate_signal_ss(sig, ctx);
 480
 481        sig_err = ctx->uc_mcontext.gregs[REG_ERR];
 482        sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
 483
 484        unsigned short ss;
 485        asm ("mov %%ss,%0" : "=r" (ss));
 486
 487        greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
 488        if (asm_ss != sig_ss && sig == SIGTRAP) {
 489                /* Sanity check failure. */
 490                printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
 491                       ss, *ssptr(ctx), (unsigned long long)asm_ss);
 492                nerrs++;
 493        }
 494
 495        memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
 496        memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
 497
 498#ifdef __x86_64__
 499        if (sig_corrupt_final_ss) {
 500                if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
 501                        printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
 502                        nerrs++;
 503                } else {
 504                        /*
 505                         * DOSEMU transitions from 32-bit to 64-bit mode by
 506                         * adjusting sigcontext, and it requires that this work
 507                         * even if the saved SS is bogus.
 508                         */
 509                        printf("\tCorrupting SS on return to 64-bit mode\n");
 510                        *ssptr(ctx) = 0;
 511                }
 512        }
 513#endif
 514
 515        sig_trapped = sig;
 516}
 517
 518#ifdef __x86_64__
 519/* Tests recovery if !UC_STRICT_RESTORE_SS */
 520static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
 521{
 522        ucontext_t *ctx = (ucontext_t*)ctx_void;
 523
 524        if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
 525                printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
 526                nerrs++;
 527                return;  /* We can't do the rest. */
 528        }
 529
 530        ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
 531        *ssptr(ctx) = 0;
 532
 533        /* Return.  The kernel should recover without sending another signal. */
 534}
 535
 536static int test_nonstrict_ss(void)
 537{
 538        clearhandler(SIGUSR1);
 539        clearhandler(SIGTRAP);
 540        clearhandler(SIGSEGV);
 541        clearhandler(SIGILL);
 542        sethandler(SIGUSR2, sigusr2, 0);
 543
 544        nerrs = 0;
 545
 546        printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
 547        raise(SIGUSR2);
 548        if (!nerrs)
 549                printf("[OK]\tIt worked\n");
 550
 551        return nerrs;
 552}
 553#endif
 554
 555/* Finds a usable code segment of the requested bitness. */
 556int find_cs(int bitness)
 557{
 558        unsigned short my_cs;
 559
 560        asm ("mov %%cs,%0" :  "=r" (my_cs));
 561
 562        if (cs_bitness(my_cs) == bitness)
 563                return my_cs;
 564        if (cs_bitness(my_cs + (2 << 3)) == bitness)
 565                return my_cs + (2 << 3);
 566        if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
 567            return my_cs - (2 << 3);
 568        if (cs_bitness(code16_sel) == bitness)
 569                return code16_sel;
 570
 571        printf("[WARN]\tCould not find %d-bit CS\n", bitness);
 572        return -1;
 573}
 574
 575static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
 576{
 577        int cs = find_cs(cs_bits);
 578        if (cs == -1) {
 579                printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
 580                       cs_bits, use_16bit_ss ? 16 : 32);
 581                return 0;
 582        }
 583
 584        if (force_ss != -1) {
 585                sig_ss = force_ss;
 586        } else {
 587                if (use_16bit_ss) {
 588                        if (!data16_sel) {
 589                                printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
 590                                       cs_bits);
 591                                return 0;
 592                        }
 593                        sig_ss = data16_sel;
 594                } else {
 595                        asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
 596                }
 597        }
 598
 599        sig_cs = cs;
 600
 601        printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
 602               cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
 603               (sig_ss & 4) ? "" : ", GDT");
 604
 605        raise(SIGUSR1);
 606
 607        nerrs = 0;
 608
 609        /*
 610         * Check that each register had an acceptable value when the
 611         * int3 trampoline was invoked.
 612         */
 613        for (int i = 0; i < NGREG; i++) {
 614                greg_t req = requested_regs[i], res = resulting_regs[i];
 615
 616                if (i == REG_TRAPNO || i == REG_IP)
 617                        continue;       /* don't care */
 618
 619                if (i == REG_SP) {
 620                        /*
 621                         * If we were using a 16-bit stack segment, then
 622                         * the kernel is a bit stuck: IRET only restores
 623                         * the low 16 bits of ESP/RSP if SS is 16-bit.
 624                         * The kernel uses a hack to restore bits 31:16,
 625                         * but that hack doesn't help with bits 63:32.
 626                         * On Intel CPUs, bits 63:32 end up zeroed, and, on
 627                         * AMD CPUs, they leak the high bits of the kernel
 628                         * espfix64 stack pointer.  There's very little that
 629                         * the kernel can do about it.
 630                         *
 631                         * Similarly, if we are returning to a 32-bit context,
 632                         * the CPU will often lose the high 32 bits of RSP.
 633                         */
 634
 635                        if (res == req)
 636                                continue;
 637
 638                        if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
 639                                printf("[NOTE]\tSP: %llx -> %llx\n",
 640                                       (unsigned long long)req,
 641                                       (unsigned long long)res);
 642                                continue;
 643                        }
 644
 645                        printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
 646                               (unsigned long long)requested_regs[i],
 647                               (unsigned long long)resulting_regs[i]);
 648                        nerrs++;
 649                        continue;
 650                }
 651
 652                bool ignore_reg = false;
 653#if __i386__
 654                if (i == REG_UESP)
 655                        ignore_reg = true;
 656#else
 657                if (i == REG_CSGSFS) {
 658                        struct selectors *req_sels =
 659                                (void *)&requested_regs[REG_CSGSFS];
 660                        struct selectors *res_sels =
 661                                (void *)&resulting_regs[REG_CSGSFS];
 662                        if (req_sels->cs != res_sels->cs) {
 663                                printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
 664                                       req_sels->cs, res_sels->cs);
 665                                nerrs++;
 666                        }
 667
 668                        if (req_sels->ss != res_sels->ss) {
 669                                printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
 670                                       req_sels->ss, res_sels->ss);
 671                                nerrs++;
 672                        }
 673
 674                        continue;
 675                }
 676#endif
 677
 678                /* Sanity check on the kernel */
 679                if (i == REG_CX && req != res) {
 680                        printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
 681                               (unsigned long long)req,
 682                               (unsigned long long)res);
 683                        nerrs++;
 684                        continue;
 685                }
 686
 687                if (req != res && !ignore_reg) {
 688                        printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
 689                               i, (unsigned long long)req,
 690                               (unsigned long long)res);
 691                        nerrs++;
 692                }
 693        }
 694
 695        if (nerrs == 0)
 696                printf("[OK]\tall registers okay\n");
 697
 698        return nerrs;
 699}
 700
 701static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
 702{
 703        int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
 704        if (cs == -1)
 705                return 0;
 706
 707        sig_cs = cs;
 708        sig_ss = ss;
 709
 710        printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
 711               cs_bits, sig_cs, sig_ss);
 712
 713        sig_trapped = 0;
 714        raise(SIGUSR1);
 715        if (sig_trapped) {
 716                char errdesc[32] = "";
 717                if (sig_err) {
 718                        const char *src = (sig_err & 1) ? " EXT" : "";
 719                        const char *table;
 720                        if ((sig_err & 0x6) == 0x0)
 721                                table = "GDT";
 722                        else if ((sig_err & 0x6) == 0x4)
 723                                table = "LDT";
 724                        else if ((sig_err & 0x6) == 0x2)
 725                                table = "IDT";
 726                        else
 727                                table = "???";
 728
 729                        sprintf(errdesc, "%s%s index %d, ",
 730                                table, src, sig_err >> 3);
 731                }
 732
 733                char trapname[32];
 734                if (sig_trapno == 13)
 735                        strcpy(trapname, "GP");
 736                else if (sig_trapno == 11)
 737                        strcpy(trapname, "NP");
 738                else if (sig_trapno == 12)
 739                        strcpy(trapname, "SS");
 740                else if (sig_trapno == 32)
 741                        strcpy(trapname, "IRET");  /* X86_TRAP_IRET */
 742                else
 743                        sprintf(trapname, "%d", sig_trapno);
 744
 745                printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
 746                       trapname, (unsigned long)sig_err,
 747                       errdesc, strsignal(sig_trapped));
 748                return 0;
 749        } else {
 750                /*
 751                 * This also implicitly tests UC_STRICT_RESTORE_SS:
 752                 * We check that these signals set UC_STRICT_RESTORE_SS and,
 753                 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
 754                 * then we won't get SIGSEGV.
 755                 */
 756                printf("[FAIL]\tDid not get SIGSEGV\n");
 757                return 1;
 758        }
 759}
 760
 761int main()
 762{
 763        int total_nerrs = 0;
 764        unsigned short my_cs, my_ss;
 765
 766        asm volatile ("mov %%cs,%0" : "=r" (my_cs));
 767        asm volatile ("mov %%ss,%0" : "=r" (my_ss));
 768        setup_ldt();
 769
 770        stack_t stack = {
 771                /* Our sigaltstack scratch space. */
 772                .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
 773                .ss_size = SIGSTKSZ,
 774        };
 775        if (sigaltstack(&stack, NULL) != 0)
 776                err(1, "sigaltstack");
 777
 778        sethandler(SIGUSR1, sigusr1, 0);
 779        sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
 780
 781        /* Easy cases: return to a 32-bit SS in each possible CS bitness. */
 782        total_nerrs += test_valid_sigreturn(64, false, -1);
 783        total_nerrs += test_valid_sigreturn(32, false, -1);
 784        total_nerrs += test_valid_sigreturn(16, false, -1);
 785
 786        /*
 787         * Test easy espfix cases: return to a 16-bit LDT SS in each possible
 788         * CS bitness.  NB: with a long mode CS, the SS bitness is irrelevant.
 789         *
 790         * This catches the original missing-espfix-on-64-bit-kernels issue
 791         * as well as CVE-2014-8134.
 792         */
 793        total_nerrs += test_valid_sigreturn(64, true, -1);
 794        total_nerrs += test_valid_sigreturn(32, true, -1);
 795        total_nerrs += test_valid_sigreturn(16, true, -1);
 796
 797        if (gdt_data16_idx) {
 798                /*
 799                 * For performance reasons, Linux skips espfix if SS points
 800                 * to the GDT.  If we were able to allocate a 16-bit SS in
 801                 * the GDT, see if it leaks parts of the kernel stack pointer.
 802                 *
 803                 * This tests for CVE-2014-8133.
 804                 */
 805                total_nerrs += test_valid_sigreturn(64, true,
 806                                                    GDT3(gdt_data16_idx));
 807                total_nerrs += test_valid_sigreturn(32, true,
 808                                                    GDT3(gdt_data16_idx));
 809                total_nerrs += test_valid_sigreturn(16, true,
 810                                                    GDT3(gdt_data16_idx));
 811        }
 812
 813#ifdef __x86_64__
 814        /* Nasty ABI case: check SS corruption handling. */
 815        sig_corrupt_final_ss = 1;
 816        total_nerrs += test_valid_sigreturn(32, false, -1);
 817        total_nerrs += test_valid_sigreturn(32, true, -1);
 818        sig_corrupt_final_ss = 0;
 819#endif
 820
 821        /*
 822         * We're done testing valid sigreturn cases.  Now we test states
 823         * for which sigreturn itself will succeed but the subsequent
 824         * entry to user mode will fail.
 825         *
 826         * Depending on the failure mode and the kernel bitness, these
 827         * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
 828         */
 829        clearhandler(SIGTRAP);
 830        sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
 831        sethandler(SIGBUS, sigtrap, SA_ONSTACK);
 832        sethandler(SIGILL, sigtrap, SA_ONSTACK);  /* 32-bit kernels do this */
 833
 834        /* Easy failures: invalid SS, resulting in #GP(0) */
 835        test_bad_iret(64, ldt_nonexistent_sel, -1);
 836        test_bad_iret(32, ldt_nonexistent_sel, -1);
 837        test_bad_iret(16, ldt_nonexistent_sel, -1);
 838
 839        /* These fail because SS isn't a data segment, resulting in #GP(SS) */
 840        test_bad_iret(64, my_cs, -1);
 841        test_bad_iret(32, my_cs, -1);
 842        test_bad_iret(16, my_cs, -1);
 843
 844        /* Try to return to a not-present code segment, triggering #NP(SS). */
 845        test_bad_iret(32, my_ss, npcode32_sel);
 846
 847        /*
 848         * Try to return to a not-present but otherwise valid data segment.
 849         * This will cause IRET to fail with #SS on the espfix stack.  This
 850         * exercises CVE-2014-9322.
 851         *
 852         * Note that, if espfix is enabled, 64-bit Linux will lose track
 853         * of the actual cause of failure and report #GP(0) instead.
 854         * This would be very difficult for Linux to avoid, because
 855         * espfix64 causes IRET failures to be promoted to #DF, so the
 856         * original exception frame is never pushed onto the stack.
 857         */
 858        test_bad_iret(32, npdata32_sel, -1);
 859
 860        /*
 861         * Try to return to a not-present but otherwise valid data
 862         * segment without invoking espfix.  Newer kernels don't allow
 863         * this to happen in the first place.  On older kernels, though,
 864         * this can trigger CVE-2014-9322.
 865         */
 866        if (gdt_npdata32_idx)
 867                test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
 868
 869#ifdef __x86_64__
 870        total_nerrs += test_nonstrict_ss();
 871#endif
 872
 873        free(stack.ss_sp);
 874        return total_nerrs ? 1 : 0;
 875}
 876