linux/arch/x86/kernel/umip.c
<<
>>
Prefs
   1/*
   2 * umip.c Emulation for instruction protected by the User-Mode Instruction
   3 * Prevention feature
   4 *
   5 * Copyright (c) 2017, Intel Corporation.
   6 * Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
   7 */
   8
   9#include <linux/uaccess.h>
  10#include <asm/umip.h>
  11#include <asm/traps.h>
  12#include <asm/insn.h>
  13#include <asm/insn-eval.h>
  14#include <linux/ratelimit.h>
  15
  16#undef pr_fmt
  17#define pr_fmt(fmt) "umip: " fmt
  18
  19/** DOC: Emulation for User-Mode Instruction Prevention (UMIP)
  20 *
  21 * User-Mode Instruction Prevention is a security feature present in recent
  22 * x86 processors that, when enabled, prevents a group of instructions (SGDT,
  23 * SIDT, SLDT, SMSW and STR) from being run in user mode by issuing a general
  24 * protection fault if the instruction is executed with CPL > 0.
  25 *
  26 * Rather than relaying to the user space the general protection fault caused by
  27 * the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be
  28 * trapped and emulate the result of such instructions to provide dummy values.
  29 * This allows to both conserve the current kernel behavior and not reveal the
  30 * system resources that UMIP intends to protect (i.e., the locations of the
  31 * global descriptor and interrupt descriptor tables, the segment selectors of
  32 * the local descriptor table, the value of the task state register and the
  33 * contents of the CR0 register).
  34 *
  35 * This emulation is needed because certain applications (e.g., WineHQ and
  36 * DOSEMU2) rely on this subset of instructions to function.
  37 *
  38 * The instructions protected by UMIP can be split in two groups. Those which
  39 * return a kernel memory address (SGDT and SIDT) and those which return a
  40 * value (SLDT, STR and SMSW).
  41 *
  42 * For the instructions that return a kernel memory address, applications
  43 * such as WineHQ rely on the result being located in the kernel memory space,
  44 * not the actual location of the table. The result is emulated as a hard-coded
  45 * value that, lies close to the top of the kernel memory. The limit for the GDT
  46 * and the IDT are set to zero.
  47 *
  48 * The instruction SMSW is emulated to return the value that the register CR0
  49 * has at boot time as set in the head_32.
  50 * SLDT and STR are emulated to return the values that the kernel programmatically
  51 * assigns:
  52 * - SLDT returns (GDT_ENTRY_LDT * 8) if an LDT has been set, 0 if not.
  53 * - STR returns (GDT_ENTRY_TSS * 8).
  54 *
  55 * Emulation is provided for both 32-bit and 64-bit processes.
  56 *
  57 * Care is taken to appropriately emulate the results when segmentation is
  58 * used. That is, rather than relying on USER_DS and USER_CS, the function
  59 * insn_get_addr_ref() inspects the segment descriptor pointed by the
  60 * registers in pt_regs. This ensures that we correctly obtain the segment
  61 * base address and the address and operand sizes even if the user space
  62 * application uses a local descriptor table.
  63 */
  64
  65#define UMIP_DUMMY_GDT_BASE 0xfffffffffffe0000ULL
  66#define UMIP_DUMMY_IDT_BASE 0xffffffffffff0000ULL
  67
  68/*
  69 * The SGDT and SIDT instructions store the contents of the global descriptor
  70 * table and interrupt table registers, respectively. The destination is a
  71 * memory operand of X+2 bytes. X bytes are used to store the base address of
  72 * the table and 2 bytes are used to store the limit. In 32-bit processes X
  73 * has a value of 4, in 64-bit processes X has a value of 8.
  74 */
  75#define UMIP_GDT_IDT_BASE_SIZE_64BIT 8
  76#define UMIP_GDT_IDT_BASE_SIZE_32BIT 4
  77#define UMIP_GDT_IDT_LIMIT_SIZE 2
  78
  79#define UMIP_INST_SGDT  0       /* 0F 01 /0 */
  80#define UMIP_INST_SIDT  1       /* 0F 01 /1 */
  81#define UMIP_INST_SMSW  2       /* 0F 01 /4 */
  82#define UMIP_INST_SLDT  3       /* 0F 00 /0 */
  83#define UMIP_INST_STR   4       /* 0F 00 /1 */
  84
  85static const char * const umip_insns[5] = {
  86        [UMIP_INST_SGDT] = "SGDT",
  87        [UMIP_INST_SIDT] = "SIDT",
  88        [UMIP_INST_SMSW] = "SMSW",
  89        [UMIP_INST_SLDT] = "SLDT",
  90        [UMIP_INST_STR] = "STR",
  91};
  92
  93#define umip_pr_err(regs, fmt, ...) \
  94        umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
  95#define umip_pr_warn(regs, fmt, ...) \
  96        umip_printk(regs, KERN_WARNING, fmt,  ##__VA_ARGS__)
  97
  98/**
  99 * umip_printk() - Print a rate-limited message
 100 * @regs:       Register set with the context in which the warning is printed
 101 * @log_level:  Kernel log level to print the message
 102 * @fmt:        The text string to print
 103 *
 104 * Print the text contained in @fmt. The print rate is limited to bursts of 5
 105 * messages every two minutes. The purpose of this customized version of
 106 * printk() is to print messages when user space processes use any of the
 107 * UMIP-protected instructions. Thus, the printed text is prepended with the
 108 * task name and process ID number of the current task as well as the
 109 * instruction and stack pointers in @regs as seen when entering kernel mode.
 110 *
 111 * Returns:
 112 *
 113 * None.
 114 */
 115static __printf(3, 4)
 116void umip_printk(const struct pt_regs *regs, const char *log_level,
 117                 const char *fmt, ...)
 118{
 119        /* Bursts of 5 messages every two minutes */
 120        static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5);
 121        struct task_struct *tsk = current;
 122        struct va_format vaf;
 123        va_list args;
 124
 125        if (!__ratelimit(&ratelimit))
 126                return;
 127
 128        va_start(args, fmt);
 129        vaf.fmt = fmt;
 130        vaf.va = &args;
 131        printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm,
 132               task_pid_nr(tsk), regs->ip, regs->sp, &vaf);
 133        va_end(args);
 134}
 135
 136/**
 137 * identify_insn() - Identify a UMIP-protected instruction
 138 * @insn:       Instruction structure with opcode and ModRM byte.
 139 *
 140 * From the opcode and ModRM.reg in @insn identify, if any, a UMIP-protected
 141 * instruction that can be emulated.
 142 *
 143 * Returns:
 144 *
 145 * On success, a constant identifying a specific UMIP-protected instruction that
 146 * can be emulated.
 147 *
 148 * -EINVAL on error or when not an UMIP-protected instruction that can be
 149 * emulated.
 150 */
 151static int identify_insn(struct insn *insn)
 152{
 153        /* By getting modrm we also get the opcode. */
 154        insn_get_modrm(insn);
 155
 156        if (!insn->modrm.nbytes)
 157                return -EINVAL;
 158
 159        /* All the instructions of interest start with 0x0f. */
 160        if (insn->opcode.bytes[0] != 0xf)
 161                return -EINVAL;
 162
 163        if (insn->opcode.bytes[1] == 0x1) {
 164                switch (X86_MODRM_REG(insn->modrm.value)) {
 165                case 0:
 166                        return UMIP_INST_SGDT;
 167                case 1:
 168                        return UMIP_INST_SIDT;
 169                case 4:
 170                        return UMIP_INST_SMSW;
 171                default:
 172                        return -EINVAL;
 173                }
 174        } else if (insn->opcode.bytes[1] == 0x0) {
 175                if (X86_MODRM_REG(insn->modrm.value) == 0)
 176                        return UMIP_INST_SLDT;
 177                else if (X86_MODRM_REG(insn->modrm.value) == 1)
 178                        return UMIP_INST_STR;
 179                else
 180                        return -EINVAL;
 181        } else {
 182                return -EINVAL;
 183        }
 184}
 185
 186/**
 187 * emulate_umip_insn() - Emulate UMIP instructions and return dummy values
 188 * @insn:       Instruction structure with operands
 189 * @umip_inst:  A constant indicating the instruction to emulate
 190 * @data:       Buffer into which the dummy result is stored
 191 * @data_size:  Size of the emulated result
 192 * @x86_64:     true if process is 64-bit, false otherwise
 193 *
 194 * Emulate an instruction protected by UMIP and provide a dummy result. The
 195 * result of the emulation is saved in @data. The size of the results depends
 196 * on both the instruction and type of operand (register vs memory address).
 197 * The size of the result is updated in @data_size. Caller is responsible
 198 * of providing a @data buffer of at least UMIP_GDT_IDT_BASE_SIZE +
 199 * UMIP_GDT_IDT_LIMIT_SIZE bytes.
 200 *
 201 * Returns:
 202 *
 203 * 0 on success, -EINVAL on error while emulating.
 204 */
 205static int emulate_umip_insn(struct insn *insn, int umip_inst,
 206                             unsigned char *data, int *data_size, bool x86_64)
 207{
 208        if (!data || !data_size || !insn)
 209                return -EINVAL;
 210        /*
 211         * These two instructions return the base address and limit of the
 212         * global and interrupt descriptor table, respectively. According to the
 213         * Intel Software Development manual, the base address can be 24-bit,
 214         * 32-bit or 64-bit. Limit is always 16-bit. If the operand size is
 215         * 16-bit, the returned value of the base address is supposed to be a
 216         * zero-extended 24-byte number. However, it seems that a 32-byte number
 217         * is always returned irrespective of the operand size.
 218         */
 219        if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) {
 220                u64 dummy_base_addr;
 221                u16 dummy_limit = 0;
 222
 223                /* SGDT and SIDT do not use registers operands. */
 224                if (X86_MODRM_MOD(insn->modrm.value) == 3)
 225                        return -EINVAL;
 226
 227                if (umip_inst == UMIP_INST_SGDT)
 228                        dummy_base_addr = UMIP_DUMMY_GDT_BASE;
 229                else
 230                        dummy_base_addr = UMIP_DUMMY_IDT_BASE;
 231
 232                /*
 233                 * 64-bit processes use the entire dummy base address.
 234                 * 32-bit processes use the lower 32 bits of the base address.
 235                 * dummy_base_addr is always 64 bits, but we memcpy the correct
 236                 * number of bytes from it to the destination.
 237                 */
 238                if (x86_64)
 239                        *data_size = UMIP_GDT_IDT_BASE_SIZE_64BIT;
 240                else
 241                        *data_size = UMIP_GDT_IDT_BASE_SIZE_32BIT;
 242
 243                memcpy(data + 2, &dummy_base_addr, *data_size);
 244
 245                *data_size += UMIP_GDT_IDT_LIMIT_SIZE;
 246                memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
 247
 248        } else if (umip_inst == UMIP_INST_SMSW || umip_inst == UMIP_INST_SLDT ||
 249                   umip_inst == UMIP_INST_STR) {
 250                unsigned long dummy_value;
 251
 252                if (umip_inst == UMIP_INST_SMSW) {
 253                        dummy_value = CR0_STATE;
 254                } else if (umip_inst == UMIP_INST_STR) {
 255                        dummy_value = GDT_ENTRY_TSS * 8;
 256                } else if (umip_inst == UMIP_INST_SLDT) {
 257#ifdef CONFIG_MODIFY_LDT_SYSCALL
 258                        down_read(&current->mm->context.ldt_usr_sem);
 259                        if (current->mm->context.ldt)
 260                                dummy_value = GDT_ENTRY_LDT * 8;
 261                        else
 262                                dummy_value = 0;
 263                        up_read(&current->mm->context.ldt_usr_sem);
 264#else
 265                        dummy_value = 0;
 266#endif
 267                }
 268
 269                /*
 270                 * For these 3 instructions, the number
 271                 * of bytes to be copied in the result buffer is determined
 272                 * by whether the operand is a register or a memory location.
 273                 * If operand is a register, return as many bytes as the operand
 274                 * size. If operand is memory, return only the two least
 275                 * significant bytes.
 276                 */
 277                if (X86_MODRM_MOD(insn->modrm.value) == 3)
 278                        *data_size = insn->opnd_bytes;
 279                else
 280                        *data_size = 2;
 281
 282                memcpy(data, &dummy_value, *data_size);
 283        } else {
 284                return -EINVAL;
 285        }
 286
 287        return 0;
 288}
 289
 290/**
 291 * force_sig_info_umip_fault() - Force a SIGSEGV with SEGV_MAPERR
 292 * @addr:       Address that caused the signal
 293 * @regs:       Register set containing the instruction pointer
 294 *
 295 * Force a SIGSEGV signal with SEGV_MAPERR as the error code. This function is
 296 * intended to be used to provide a segmentation fault when the result of the
 297 * UMIP emulation could not be copied to the user space memory.
 298 *
 299 * Returns: none
 300 */
 301static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
 302{
 303        struct task_struct *tsk = current;
 304
 305        tsk->thread.cr2         = (unsigned long)addr;
 306        tsk->thread.error_code  = X86_PF_USER | X86_PF_WRITE;
 307        tsk->thread.trap_nr     = X86_TRAP_PF;
 308
 309        force_sig_fault(SIGSEGV, SEGV_MAPERR, addr);
 310
 311        if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
 312                return;
 313
 314        umip_pr_err(regs, "segfault in emulation. error%x\n",
 315                    X86_PF_USER | X86_PF_WRITE);
 316}
 317
 318/**
 319 * fixup_umip_exception() - Fixup a general protection fault caused by UMIP
 320 * @regs:       Registers as saved when entering the #GP handler
 321 *
 322 * The instructions SGDT, SIDT, STR, SMSW and SLDT cause a general protection
 323 * fault if executed with CPL > 0 (i.e., from user space). This function fixes
 324 * the exception up and provides dummy results for SGDT, SIDT and SMSW; STR
 325 * and SLDT are not fixed up.
 326 *
 327 * If operands are memory addresses, results are copied to user-space memory as
 328 * indicated by the instruction pointed by eIP using the registers indicated in
 329 * the instruction operands. If operands are registers, results are copied into
 330 * the context that was saved when entering kernel mode.
 331 *
 332 * Returns:
 333 *
 334 * True if emulation was successful; false if not.
 335 */
 336bool fixup_umip_exception(struct pt_regs *regs)
 337{
 338        int nr_copied, reg_offset, dummy_data_size, umip_inst;
 339        /* 10 bytes is the maximum size of the result of UMIP instructions */
 340        unsigned char dummy_data[10] = { 0 };
 341        unsigned char buf[MAX_INSN_SIZE];
 342        unsigned long *reg_addr;
 343        void __user *uaddr;
 344        struct insn insn;
 345
 346        if (!regs)
 347                return false;
 348
 349        /*
 350         * Give up on emulation if fetching the instruction failed. Should a
 351         * page fault or a #GP be issued?
 352         */
 353        nr_copied = insn_fetch_from_user(regs, buf);
 354        if (nr_copied <= 0)
 355                return false;
 356
 357        if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
 358                return false;
 359
 360        umip_inst = identify_insn(&insn);
 361        if (umip_inst < 0)
 362                return false;
 363
 364        umip_pr_warn(regs, "%s instruction cannot be used by applications.\n",
 365                        umip_insns[umip_inst]);
 366
 367        umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n");
 368
 369        if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,
 370                              user_64bit_mode(regs)))
 371                return false;
 372
 373        /*
 374         * If operand is a register, write result to the copy of the register
 375         * value that was pushed to the stack when entering into kernel mode.
 376         * Upon exit, the value we write will be restored to the actual hardware
 377         * register.
 378         */
 379        if (X86_MODRM_MOD(insn.modrm.value) == 3) {
 380                reg_offset = insn_get_modrm_rm_off(&insn, regs);
 381
 382                /*
 383                 * Negative values are usually errors. In memory addressing,
 384                 * the exception is -EDOM. Since we expect a register operand,
 385                 * all negative values are errors.
 386                 */
 387                if (reg_offset < 0)
 388                        return false;
 389
 390                reg_addr = (unsigned long *)((unsigned long)regs + reg_offset);
 391                memcpy(reg_addr, dummy_data, dummy_data_size);
 392        } else {
 393                uaddr = insn_get_addr_ref(&insn, regs);
 394                if ((unsigned long)uaddr == -1L)
 395                        return false;
 396
 397                nr_copied = copy_to_user(uaddr, dummy_data, dummy_data_size);
 398                if (nr_copied  > 0) {
 399                        /*
 400                         * If copy fails, send a signal and tell caller that
 401                         * fault was fixed up.
 402                         */
 403                        force_sig_info_umip_fault(uaddr, regs);
 404                        return true;
 405                }
 406        }
 407
 408        /* increase IP to let the program keep going */
 409        regs->ip += insn.length;
 410        return true;
 411}
 412