linux/arch/x86/kernel/umip.c
<<
>>
Prefs
   1/*
   2 * umip.c Emulation for instruction protected by the Intel User-Mode
   3 * Instruction Prevention feature
   4 *
   5 * Copyright (c) 2017, Intel Corporation.
   6 * Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
   7 */
   8
   9#include <linux/uaccess.h>
  10#include <asm/umip.h>
  11#include <asm/traps.h>
  12#include <asm/insn.h>
  13#include <asm/insn-eval.h>
  14#include <linux/ratelimit.h>
  15
  16#undef pr_fmt
  17#define pr_fmt(fmt) "umip: " fmt
  18
  19/** DOC: Emulation for User-Mode Instruction Prevention (UMIP)
  20 *
  21 * The feature User-Mode Instruction Prevention present in recent Intel
  22 * processor prevents a group of instructions (sgdt, sidt, sldt, smsw, and str)
  23 * from being executed with CPL > 0. Otherwise, a general protection fault is
  24 * issued.
  25 *
  26 * Rather than relaying to the user space the general protection fault caused by
  27 * the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be
  28 * trapped and emulate the result of such instructions to provide dummy values.
  29 * This allows to both conserve the current kernel behavior and not reveal the
  30 * system resources that UMIP intends to protect (i.e., the locations of the
  31 * global descriptor and interrupt descriptor tables, the segment selectors of
  32 * the local descriptor table, the value of the task state register and the
  33 * contents of the CR0 register).
  34 *
  35 * This emulation is needed because certain applications (e.g., WineHQ and
  36 * DOSEMU2) rely on this subset of instructions to function.
  37 *
  38 * The instructions protected by UMIP can be split in two groups. Those which
  39 * return a kernel memory address (sgdt and sidt) and those which return a
  40 * value (sldt, str and smsw).
  41 *
  42 * For the instructions that return a kernel memory address, applications
  43 * such as WineHQ rely on the result being located in the kernel memory space,
  44 * not the actual location of the table. The result is emulated as a hard-coded
  45 * value that, lies close to the top of the kernel memory. The limit for the GDT
  46 * and the IDT are set to zero.
  47 *
  48 * Given that sldt and str are not commonly used in programs that run on WineHQ
  49 * or DOSEMU2, they are not emulated.
  50 *
  51 * The instruction smsw is emulated to return the value that the register CR0
  52 * has at boot time as set in the head_32.
  53 *
  54 * Also, emulation is provided only for 32-bit processes; 64-bit processes
  55 * that attempt to use the instructions that UMIP protects will receive the
  56 * SIGSEGV signal issued as a consequence of the general protection fault.
  57 *
  58 * Care is taken to appropriately emulate the results when segmentation is
  59 * used. That is, rather than relying on USER_DS and USER_CS, the function
  60 * insn_get_addr_ref() inspects the segment descriptor pointed by the
  61 * registers in pt_regs. This ensures that we correctly obtain the segment
  62 * base address and the address and operand sizes even if the user space
  63 * application uses a local descriptor table.
  64 */
  65
  66#define UMIP_DUMMY_GDT_BASE 0xfffe0000
  67#define UMIP_DUMMY_IDT_BASE 0xffff0000
  68
  69/*
  70 * The SGDT and SIDT instructions store the contents of the global descriptor
  71 * table and interrupt table registers, respectively. The destination is a
  72 * memory operand of X+2 bytes. X bytes are used to store the base address of
  73 * the table and 2 bytes are used to store the limit. In 32-bit processes, the
  74 * only processes for which emulation is provided, X has a value of 4.
  75 */
  76#define UMIP_GDT_IDT_BASE_SIZE 4
  77#define UMIP_GDT_IDT_LIMIT_SIZE 2
  78
  79#define UMIP_INST_SGDT  0       /* 0F 01 /0 */
  80#define UMIP_INST_SIDT  1       /* 0F 01 /1 */
  81#define UMIP_INST_SMSW  2       /* 0F 01 /4 */
  82#define UMIP_INST_SLDT  3       /* 0F 00 /0 */
  83#define UMIP_INST_STR   4       /* 0F 00 /1 */
  84
  85const char * const umip_insns[5] = {
  86        [UMIP_INST_SGDT] = "SGDT",
  87        [UMIP_INST_SIDT] = "SIDT",
  88        [UMIP_INST_SMSW] = "SMSW",
  89        [UMIP_INST_SLDT] = "SLDT",
  90        [UMIP_INST_STR] = "STR",
  91};
  92
  93#define umip_pr_err(regs, fmt, ...) \
  94        umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
  95#define umip_pr_warning(regs, fmt, ...) \
  96        umip_printk(regs, KERN_WARNING, fmt,  ##__VA_ARGS__)
  97
  98/**
  99 * umip_printk() - Print a rate-limited message
 100 * @regs:       Register set with the context in which the warning is printed
 101 * @log_level:  Kernel log level to print the message
 102 * @fmt:        The text string to print
 103 *
 104 * Print the text contained in @fmt. The print rate is limited to bursts of 5
 105 * messages every two minutes. The purpose of this customized version of
 106 * printk() is to print messages when user space processes use any of the
 107 * UMIP-protected instructions. Thus, the printed text is prepended with the
 108 * task name and process ID number of the current task as well as the
 109 * instruction and stack pointers in @regs as seen when entering kernel mode.
 110 *
 111 * Returns:
 112 *
 113 * None.
 114 */
 115static __printf(3, 4)
 116void umip_printk(const struct pt_regs *regs, const char *log_level,
 117                 const char *fmt, ...)
 118{
 119        /* Bursts of 5 messages every two minutes */
 120        static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5);
 121        struct task_struct *tsk = current;
 122        struct va_format vaf;
 123        va_list args;
 124
 125        if (!__ratelimit(&ratelimit))
 126                return;
 127
 128        va_start(args, fmt);
 129        vaf.fmt = fmt;
 130        vaf.va = &args;
 131        printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm,
 132               task_pid_nr(tsk), regs->ip, regs->sp, &vaf);
 133        va_end(args);
 134}
 135
 136/**
 137 * identify_insn() - Identify a UMIP-protected instruction
 138 * @insn:       Instruction structure with opcode and ModRM byte.
 139 *
 140 * From the opcode and ModRM.reg in @insn identify, if any, a UMIP-protected
 141 * instruction that can be emulated.
 142 *
 143 * Returns:
 144 *
 145 * On success, a constant identifying a specific UMIP-protected instruction that
 146 * can be emulated.
 147 *
 148 * -EINVAL on error or when not an UMIP-protected instruction that can be
 149 * emulated.
 150 */
 151static int identify_insn(struct insn *insn)
 152{
 153        /* By getting modrm we also get the opcode. */
 154        insn_get_modrm(insn);
 155
 156        if (!insn->modrm.nbytes)
 157                return -EINVAL;
 158
 159        /* All the instructions of interest start with 0x0f. */
 160        if (insn->opcode.bytes[0] != 0xf)
 161                return -EINVAL;
 162
 163        if (insn->opcode.bytes[1] == 0x1) {
 164                switch (X86_MODRM_REG(insn->modrm.value)) {
 165                case 0:
 166                        return UMIP_INST_SGDT;
 167                case 1:
 168                        return UMIP_INST_SIDT;
 169                case 4:
 170                        return UMIP_INST_SMSW;
 171                default:
 172                        return -EINVAL;
 173                }
 174        } else if (insn->opcode.bytes[1] == 0x0) {
 175                if (X86_MODRM_REG(insn->modrm.value) == 0)
 176                        return UMIP_INST_SLDT;
 177                else if (X86_MODRM_REG(insn->modrm.value) == 1)
 178                        return UMIP_INST_STR;
 179                else
 180                        return -EINVAL;
 181        } else {
 182                return -EINVAL;
 183        }
 184}
 185
 186/**
 187 * emulate_umip_insn() - Emulate UMIP instructions and return dummy values
 188 * @insn:       Instruction structure with operands
 189 * @umip_inst:  A constant indicating the instruction to emulate
 190 * @data:       Buffer into which the dummy result is stored
 191 * @data_size:  Size of the emulated result
 192 *
 193 * Emulate an instruction protected by UMIP and provide a dummy result. The
 194 * result of the emulation is saved in @data. The size of the results depends
 195 * on both the instruction and type of operand (register vs memory address).
 196 * The size of the result is updated in @data_size. Caller is responsible
 197 * of providing a @data buffer of at least UMIP_GDT_IDT_BASE_SIZE +
 198 * UMIP_GDT_IDT_LIMIT_SIZE bytes.
 199 *
 200 * Returns:
 201 *
 202 * 0 on success, -EINVAL on error while emulating.
 203 */
 204static int emulate_umip_insn(struct insn *insn, int umip_inst,
 205                             unsigned char *data, int *data_size)
 206{
 207        unsigned long dummy_base_addr, dummy_value;
 208        unsigned short dummy_limit = 0;
 209
 210        if (!data || !data_size || !insn)
 211                return -EINVAL;
 212        /*
 213         * These two instructions return the base address and limit of the
 214         * global and interrupt descriptor table, respectively. According to the
 215         * Intel Software Development manual, the base address can be 24-bit,
 216         * 32-bit or 64-bit. Limit is always 16-bit. If the operand size is
 217         * 16-bit, the returned value of the base address is supposed to be a
 218         * zero-extended 24-byte number. However, it seems that a 32-byte number
 219         * is always returned irrespective of the operand size.
 220         */
 221        if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) {
 222                /* SGDT and SIDT do not use registers operands. */
 223                if (X86_MODRM_MOD(insn->modrm.value) == 3)
 224                        return -EINVAL;
 225
 226                if (umip_inst == UMIP_INST_SGDT)
 227                        dummy_base_addr = UMIP_DUMMY_GDT_BASE;
 228                else
 229                        dummy_base_addr = UMIP_DUMMY_IDT_BASE;
 230
 231                *data_size = UMIP_GDT_IDT_LIMIT_SIZE + UMIP_GDT_IDT_BASE_SIZE;
 232
 233                memcpy(data + 2, &dummy_base_addr, UMIP_GDT_IDT_BASE_SIZE);
 234                memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
 235
 236        } else if (umip_inst == UMIP_INST_SMSW) {
 237                dummy_value = CR0_STATE;
 238
 239                /*
 240                 * Even though the CR0 register has 4 bytes, the number
 241                 * of bytes to be copied in the result buffer is determined
 242                 * by whether the operand is a register or a memory location.
 243                 * If operand is a register, return as many bytes as the operand
 244                 * size. If operand is memory, return only the two least
 245                 * siginificant bytes of CR0.
 246                 */
 247                if (X86_MODRM_MOD(insn->modrm.value) == 3)
 248                        *data_size = insn->opnd_bytes;
 249                else
 250                        *data_size = 2;
 251
 252                memcpy(data, &dummy_value, *data_size);
 253        /* STR and SLDT  are not emulated */
 254        } else {
 255                return -EINVAL;
 256        }
 257
 258        return 0;
 259}
 260
 261/**
 262 * force_sig_info_umip_fault() - Force a SIGSEGV with SEGV_MAPERR
 263 * @addr:       Address that caused the signal
 264 * @regs:       Register set containing the instruction pointer
 265 *
 266 * Force a SIGSEGV signal with SEGV_MAPERR as the error code. This function is
 267 * intended to be used to provide a segmentation fault when the result of the
 268 * UMIP emulation could not be copied to the user space memory.
 269 *
 270 * Returns: none
 271 */
 272static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
 273{
 274        siginfo_t info;
 275        struct task_struct *tsk = current;
 276
 277        tsk->thread.cr2         = (unsigned long)addr;
 278        tsk->thread.error_code  = X86_PF_USER | X86_PF_WRITE;
 279        tsk->thread.trap_nr     = X86_TRAP_PF;
 280
 281        info.si_signo   = SIGSEGV;
 282        info.si_errno   = 0;
 283        info.si_code    = SEGV_MAPERR;
 284        info.si_addr    = addr;
 285        force_sig_info(SIGSEGV, &info, tsk);
 286
 287        if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
 288                return;
 289
 290        umip_pr_err(regs, "segfault in emulation. error%x\n",
 291                    X86_PF_USER | X86_PF_WRITE);
 292}
 293
 294/**
 295 * fixup_umip_exception() - Fixup a general protection fault caused by UMIP
 296 * @regs:       Registers as saved when entering the #GP handler
 297 *
 298 * The instructions sgdt, sidt, str, smsw, sldt cause a general protection
 299 * fault if executed with CPL > 0 (i.e., from user space). If the offending
 300 * user-space process is not in long mode, this function fixes the exception
 301 * up and provides dummy results for sgdt, sidt and smsw; str and sldt are not
 302 * fixed up. Also long mode user-space processes are not fixed up.
 303 *
 304 * If operands are memory addresses, results are copied to user-space memory as
 305 * indicated by the instruction pointed by eIP using the registers indicated in
 306 * the instruction operands. If operands are registers, results are copied into
 307 * the context that was saved when entering kernel mode.
 308 *
 309 * Returns:
 310 *
 311 * True if emulation was successful; false if not.
 312 */
 313bool fixup_umip_exception(struct pt_regs *regs)
 314{
 315        int not_copied, nr_copied, reg_offset, dummy_data_size, umip_inst;
 316        unsigned long seg_base = 0, *reg_addr;
 317        /* 10 bytes is the maximum size of the result of UMIP instructions */
 318        unsigned char dummy_data[10] = { 0 };
 319        unsigned char buf[MAX_INSN_SIZE];
 320        void __user *uaddr;
 321        struct insn insn;
 322        char seg_defs;
 323
 324        if (!regs)
 325                return false;
 326
 327        /*
 328         * If not in user-space long mode, a custom code segment could be in
 329         * use. This is true in protected mode (if the process defined a local
 330         * descriptor table), or virtual-8086 mode. In most of the cases
 331         * seg_base will be zero as in USER_CS.
 332         */
 333        if (!user_64bit_mode(regs))
 334                seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
 335
 336        if (seg_base == -1L)
 337                return false;
 338
 339        not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
 340                                    sizeof(buf));
 341        nr_copied = sizeof(buf) - not_copied;
 342
 343        /*
 344         * The copy_from_user above could have failed if user code is protected
 345         * by a memory protection key. Give up on emulation in such a case.
 346         * Should we issue a page fault?
 347         */
 348        if (!nr_copied)
 349                return false;
 350
 351        insn_init(&insn, buf, nr_copied, user_64bit_mode(regs));
 352
 353        /*
 354         * Override the default operand and address sizes with what is specified
 355         * in the code segment descriptor. The instruction decoder only sets
 356         * the address size it to either 4 or 8 address bytes and does nothing
 357         * for the operand bytes. This OK for most of the cases, but we could
 358         * have special cases where, for instance, a 16-bit code segment
 359         * descriptor is used.
 360         * If there is an address override prefix, the instruction decoder
 361         * correctly updates these values, even for 16-bit defaults.
 362         */
 363        seg_defs = insn_get_code_seg_params(regs);
 364        if (seg_defs == -EINVAL)
 365                return false;
 366
 367        insn.addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
 368        insn.opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
 369
 370        insn_get_length(&insn);
 371        if (nr_copied < insn.length)
 372                return false;
 373
 374        umip_inst = identify_insn(&insn);
 375        if (umip_inst < 0)
 376                return false;
 377
 378        umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
 379                        umip_insns[umip_inst]);
 380
 381        /* Do not emulate SLDT, STR or user long mode processes. */
 382        if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs))
 383                return false;
 384
 385        umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
 386
 387        if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
 388                return false;
 389
 390        /*
 391         * If operand is a register, write result to the copy of the register
 392         * value that was pushed to the stack when entering into kernel mode.
 393         * Upon exit, the value we write will be restored to the actual hardware
 394         * register.
 395         */
 396        if (X86_MODRM_MOD(insn.modrm.value) == 3) {
 397                reg_offset = insn_get_modrm_rm_off(&insn, regs);
 398
 399                /*
 400                 * Negative values are usually errors. In memory addressing,
 401                 * the exception is -EDOM. Since we expect a register operand,
 402                 * all negative values are errors.
 403                 */
 404                if (reg_offset < 0)
 405                        return false;
 406
 407                reg_addr = (unsigned long *)((unsigned long)regs + reg_offset);
 408                memcpy(reg_addr, dummy_data, dummy_data_size);
 409        } else {
 410                uaddr = insn_get_addr_ref(&insn, regs);
 411                if ((unsigned long)uaddr == -1L)
 412                        return false;
 413
 414                nr_copied = copy_to_user(uaddr, dummy_data, dummy_data_size);
 415                if (nr_copied  > 0) {
 416                        /*
 417                         * If copy fails, send a signal and tell caller that
 418                         * fault was fixed up.
 419                         */
 420                        force_sig_info_umip_fault(uaddr, regs);
 421                        return true;
 422                }
 423        }
 424
 425        /* increase IP to let the program keep going */
 426        regs->ip += insn.length;
 427        return true;
 428}
 429