linux/arch/x86/kernel/vm86_32.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  Copyright (C) 1994  Linus Torvalds
   4 *
   5 *  29 dec 2001 - Fixed oopses caused by unchecked access to the vm86
   6 *                stack - Manfred Spraul <manfred@colorfullife.com>
   7 *
   8 *  22 mar 2002 - Manfred detected the stackfaults, but didn't handle
   9 *                them correctly. Now the emulation will be in a
  10 *                consistent state after stackfaults - Kasper Dupont
  11 *                <kasperd@daimi.au.dk>
  12 *
  13 *  22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont
  14 *                <kasperd@daimi.au.dk>
  15 *
  16 *  ?? ??? 2002 - Fixed premature returns from handle_vm86_fault
  17 *                caused by Kasper Dupont's changes - Stas Sergeev
  18 *
  19 *   4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes.
  20 *                Kasper Dupont <kasperd@daimi.au.dk>
  21 *
  22 *   9 apr 2002 - Changed syntax of macros in handle_vm86_fault.
  23 *                Kasper Dupont <kasperd@daimi.au.dk>
  24 *
  25 *   9 apr 2002 - Changed stack access macros to jump to a label
  26 *                instead of returning to userspace. This simplifies
  27 *                do_int, and is needed by handle_vm6_fault. Kasper
  28 *                Dupont <kasperd@daimi.au.dk>
  29 *
  30 */
  31
  32#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  33
  34#include <linux/capability.h>
  35#include <linux/errno.h>
  36#include <linux/interrupt.h>
  37#include <linux/syscalls.h>
  38#include <linux/sched.h>
  39#include <linux/sched/task_stack.h>
  40#include <linux/kernel.h>
  41#include <linux/signal.h>
  42#include <linux/string.h>
  43#include <linux/mm.h>
  44#include <linux/smp.h>
  45#include <linux/highmem.h>
  46#include <linux/ptrace.h>
  47#include <linux/audit.h>
  48#include <linux/stddef.h>
  49#include <linux/slab.h>
  50#include <linux/security.h>
  51
  52#include <linux/uaccess.h>
  53#include <asm/io.h>
  54#include <asm/tlbflush.h>
  55#include <asm/irq.h>
  56#include <asm/traps.h>
  57#include <asm/vm86.h>
  58#include <asm/switch_to.h>
  59
  60/*
  61 * Known problems:
  62 *
  63 * Interrupt handling is not guaranteed:
  64 * - a real x86 will disable all interrupts for one instruction
  65 *   after a "mov ss,xx" to make stack handling atomic even without
  66 *   the 'lss' instruction. We can't guarantee this in v86 mode,
  67 *   as the next instruction might result in a page fault or similar.
  68 * - a real x86 will have interrupts disabled for one instruction
  69 *   past the 'sti' that enables them. We don't bother with all the
  70 *   details yet.
  71 *
  72 * Let's hope these problems do not actually matter for anything.
  73 */
  74
  75
  76/*
  77 * 8- and 16-bit register defines..
  78 */
  79#define AL(regs)        (((unsigned char *)&((regs)->pt.ax))[0])
  80#define AH(regs)        (((unsigned char *)&((regs)->pt.ax))[1])
  81#define IP(regs)        (*(unsigned short *)&((regs)->pt.ip))
  82#define SP(regs)        (*(unsigned short *)&((regs)->pt.sp))
  83
  84/*
  85 * virtual flags (16 and 32-bit versions)
  86 */
  87#define VFLAGS  (*(unsigned short *)&(current->thread.vm86->veflags))
  88#define VEFLAGS (current->thread.vm86->veflags)
  89
  90#define set_flags(X, new, mask) \
  91((X) = ((X) & ~(mask)) | ((new) & (mask)))
  92
  93#define SAFE_MASK       (0xDD5)
  94#define RETURN_MASK     (0xDFF)
  95
  96void save_v86_state(struct kernel_vm86_regs *regs, int retval)
  97{
  98        struct task_struct *tsk = current;
  99        struct vm86plus_struct __user *user;
 100        struct vm86 *vm86 = current->thread.vm86;
 101
 102        /*
 103         * This gets called from entry.S with interrupts disabled, but
 104         * from process context. Enable interrupts here, before trying
 105         * to access user space.
 106         */
 107        local_irq_enable();
 108
 109        if (!vm86 || !vm86->user_vm86) {
 110                pr_alert("no user_vm86: BAD\n");
 111                do_exit(SIGSEGV);
 112        }
 113        set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
 114        user = vm86->user_vm86;
 115
 116        if (!user_access_begin(user, vm86->vm86plus.is_vm86pus ?
 117                       sizeof(struct vm86plus_struct) :
 118                       sizeof(struct vm86_struct)))
 119                goto Efault;
 120
 121        unsafe_put_user(regs->pt.bx, &user->regs.ebx, Efault_end);
 122        unsafe_put_user(regs->pt.cx, &user->regs.ecx, Efault_end);
 123        unsafe_put_user(regs->pt.dx, &user->regs.edx, Efault_end);
 124        unsafe_put_user(regs->pt.si, &user->regs.esi, Efault_end);
 125        unsafe_put_user(regs->pt.di, &user->regs.edi, Efault_end);
 126        unsafe_put_user(regs->pt.bp, &user->regs.ebp, Efault_end);
 127        unsafe_put_user(regs->pt.ax, &user->regs.eax, Efault_end);
 128        unsafe_put_user(regs->pt.ip, &user->regs.eip, Efault_end);
 129        unsafe_put_user(regs->pt.cs, &user->regs.cs, Efault_end);
 130        unsafe_put_user(regs->pt.flags, &user->regs.eflags, Efault_end);
 131        unsafe_put_user(regs->pt.sp, &user->regs.esp, Efault_end);
 132        unsafe_put_user(regs->pt.ss, &user->regs.ss, Efault_end);
 133        unsafe_put_user(regs->es, &user->regs.es, Efault_end);
 134        unsafe_put_user(regs->ds, &user->regs.ds, Efault_end);
 135        unsafe_put_user(regs->fs, &user->regs.fs, Efault_end);
 136        unsafe_put_user(regs->gs, &user->regs.gs, Efault_end);
 137
 138        /*
 139         * Don't write screen_bitmap in case some user had a value there
 140         * and expected it to remain unchanged.
 141         */
 142
 143        user_access_end();
 144
 145        preempt_disable();
 146        tsk->thread.sp0 = vm86->saved_sp0;
 147        tsk->thread.sysenter_cs = __KERNEL_CS;
 148        update_task_stack(tsk);
 149        refresh_sysenter_cs(&tsk->thread);
 150        vm86->saved_sp0 = 0;
 151        preempt_enable();
 152
 153        memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
 154
 155        lazy_load_gs(vm86->regs32.gs);
 156
 157        regs->pt.ax = retval;
 158        return;
 159
 160Efault_end:
 161        user_access_end();
 162Efault:
 163        pr_alert("could not access userspace vm86 info\n");
 164        do_exit(SIGSEGV);
 165}
 166
 167static int do_vm86_irq_handling(int subfunction, int irqnumber);
 168static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
 169
 170SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, user_vm86)
 171{
 172        return do_sys_vm86((struct vm86plus_struct __user *) user_vm86, false);
 173}
 174
 175
 176SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
 177{
 178        switch (cmd) {
 179        case VM86_REQUEST_IRQ:
 180        case VM86_FREE_IRQ:
 181        case VM86_GET_IRQ_BITS:
 182        case VM86_GET_AND_RESET_IRQ:
 183                return do_vm86_irq_handling(cmd, (int)arg);
 184        case VM86_PLUS_INSTALL_CHECK:
 185                /*
 186                 * NOTE: on old vm86 stuff this will return the error
 187                 *  from access_ok(), because the subfunction is
 188                 *  interpreted as (invalid) address to vm86_struct.
 189                 *  So the installation check works.
 190                 */
 191                return 0;
 192        }
 193
 194        /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
 195        return do_sys_vm86((struct vm86plus_struct __user *) arg, true);
 196}
 197
 198
 199static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 200{
 201        struct task_struct *tsk = current;
 202        struct vm86 *vm86 = tsk->thread.vm86;
 203        struct kernel_vm86_regs vm86regs;
 204        struct pt_regs *regs = current_pt_regs();
 205        unsigned long err = 0;
 206        struct vm86_struct v;
 207
 208        err = security_mmap_addr(0);
 209        if (err) {
 210                /*
 211                 * vm86 cannot virtualize the address space, so vm86 users
 212                 * need to manage the low 1MB themselves using mmap.  Given
 213                 * that BIOS places important data in the first page, vm86
 214                 * is essentially useless if mmap_min_addr != 0.  DOSEMU,
 215                 * for example, won't even bother trying to use vm86 if it
 216                 * can't map a page at virtual address 0.
 217                 *
 218                 * To reduce the available kernel attack surface, simply
 219                 * disallow vm86(old) for users who cannot mmap at va 0.
 220                 *
 221                 * The implementation of security_mmap_addr will allow
 222                 * suitably privileged users to map va 0 even if
 223                 * vm.mmap_min_addr is set above 0, and we want this
 224                 * behavior for vm86 as well, as it ensures that legacy
 225                 * tools like vbetool will not fail just because of
 226                 * vm.mmap_min_addr.
 227                 */
 228                pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d).  Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n",
 229                             current->comm, task_pid_nr(current),
 230                             from_kuid_munged(&init_user_ns, current_uid()));
 231                return -EPERM;
 232        }
 233
 234        if (!vm86) {
 235                if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL)))
 236                        return -ENOMEM;
 237                tsk->thread.vm86 = vm86;
 238        }
 239        if (vm86->saved_sp0)
 240                return -EPERM;
 241
 242        if (copy_from_user(&v, user_vm86,
 243                        offsetof(struct vm86_struct, int_revectored)))
 244                return -EFAULT;
 245
 246
 247        /* VM86_SCREEN_BITMAP had numerous bugs and appears to have no users. */
 248        if (v.flags & VM86_SCREEN_BITMAP) {
 249                char comm[TASK_COMM_LEN];
 250
 251                pr_info_once("vm86: '%s' uses VM86_SCREEN_BITMAP, which is no longer supported\n", get_task_comm(comm, current));
 252                return -EINVAL;
 253        }
 254
 255        memset(&vm86regs, 0, sizeof(vm86regs));
 256
 257        vm86regs.pt.bx = v.regs.ebx;
 258        vm86regs.pt.cx = v.regs.ecx;
 259        vm86regs.pt.dx = v.regs.edx;
 260        vm86regs.pt.si = v.regs.esi;
 261        vm86regs.pt.di = v.regs.edi;
 262        vm86regs.pt.bp = v.regs.ebp;
 263        vm86regs.pt.ax = v.regs.eax;
 264        vm86regs.pt.ip = v.regs.eip;
 265        vm86regs.pt.cs = v.regs.cs;
 266        vm86regs.pt.flags = v.regs.eflags;
 267        vm86regs.pt.sp = v.regs.esp;
 268        vm86regs.pt.ss = v.regs.ss;
 269        vm86regs.es = v.regs.es;
 270        vm86regs.ds = v.regs.ds;
 271        vm86regs.fs = v.regs.fs;
 272        vm86regs.gs = v.regs.gs;
 273
 274        vm86->flags = v.flags;
 275        vm86->cpu_type = v.cpu_type;
 276
 277        if (copy_from_user(&vm86->int_revectored,
 278                           &user_vm86->int_revectored,
 279                           sizeof(struct revectored_struct)))
 280                return -EFAULT;
 281        if (copy_from_user(&vm86->int21_revectored,
 282                           &user_vm86->int21_revectored,
 283                           sizeof(struct revectored_struct)))
 284                return -EFAULT;
 285        if (plus) {
 286                if (copy_from_user(&vm86->vm86plus, &user_vm86->vm86plus,
 287                                   sizeof(struct vm86plus_info_struct)))
 288                        return -EFAULT;
 289                vm86->vm86plus.is_vm86pus = 1;
 290        } else
 291                memset(&vm86->vm86plus, 0,
 292                       sizeof(struct vm86plus_info_struct));
 293
 294        memcpy(&vm86->regs32, regs, sizeof(struct pt_regs));
 295        vm86->user_vm86 = user_vm86;
 296
 297/*
 298 * The flags register is also special: we cannot trust that the user
 299 * has set it up safely, so this makes sure interrupt etc flags are
 300 * inherited from protected mode.
 301 */
 302        VEFLAGS = vm86regs.pt.flags;
 303        vm86regs.pt.flags &= SAFE_MASK;
 304        vm86regs.pt.flags |= regs->flags & ~SAFE_MASK;
 305        vm86regs.pt.flags |= X86_VM_MASK;
 306
 307        vm86regs.pt.orig_ax = regs->orig_ax;
 308
 309        switch (vm86->cpu_type) {
 310        case CPU_286:
 311                vm86->veflags_mask = 0;
 312                break;
 313        case CPU_386:
 314                vm86->veflags_mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
 315                break;
 316        case CPU_486:
 317                vm86->veflags_mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
 318                break;
 319        default:
 320                vm86->veflags_mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
 321                break;
 322        }
 323
 324/*
 325 * Save old state
 326 */
 327        vm86->saved_sp0 = tsk->thread.sp0;
 328        lazy_save_gs(vm86->regs32.gs);
 329
 330        /* make room for real-mode segments */
 331        preempt_disable();
 332        tsk->thread.sp0 += 16;
 333
 334        if (boot_cpu_has(X86_FEATURE_SEP)) {
 335                tsk->thread.sysenter_cs = 0;
 336                refresh_sysenter_cs(&tsk->thread);
 337        }
 338
 339        update_task_stack(tsk);
 340        preempt_enable();
 341
 342        memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
 343        return regs->ax;
 344}
 345
 346static inline void set_IF(struct kernel_vm86_regs *regs)
 347{
 348        VEFLAGS |= X86_EFLAGS_VIF;
 349}
 350
 351static inline void clear_IF(struct kernel_vm86_regs *regs)
 352{
 353        VEFLAGS &= ~X86_EFLAGS_VIF;
 354}
 355
 356static inline void clear_TF(struct kernel_vm86_regs *regs)
 357{
 358        regs->pt.flags &= ~X86_EFLAGS_TF;
 359}
 360
 361static inline void clear_AC(struct kernel_vm86_regs *regs)
 362{
 363        regs->pt.flags &= ~X86_EFLAGS_AC;
 364}
 365
 366/*
 367 * It is correct to call set_IF(regs) from the set_vflags_*
 368 * functions. However someone forgot to call clear_IF(regs)
 369 * in the opposite case.
 370 * After the command sequence CLI PUSHF STI POPF you should
 371 * end up with interrupts disabled, but you ended up with
 372 * interrupts enabled.
 373 *  ( I was testing my own changes, but the only bug I
 374 *    could find was in a function I had not changed. )
 375 * [KD]
 376 */
 377
 378static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs)
 379{
 380        set_flags(VEFLAGS, flags, current->thread.vm86->veflags_mask);
 381        set_flags(regs->pt.flags, flags, SAFE_MASK);
 382        if (flags & X86_EFLAGS_IF)
 383                set_IF(regs);
 384        else
 385                clear_IF(regs);
 386}
 387
 388static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs)
 389{
 390        set_flags(VFLAGS, flags, current->thread.vm86->veflags_mask);
 391        set_flags(regs->pt.flags, flags, SAFE_MASK);
 392        if (flags & X86_EFLAGS_IF)
 393                set_IF(regs);
 394        else
 395                clear_IF(regs);
 396}
 397
 398static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
 399{
 400        unsigned long flags = regs->pt.flags & RETURN_MASK;
 401
 402        if (VEFLAGS & X86_EFLAGS_VIF)
 403                flags |= X86_EFLAGS_IF;
 404        flags |= X86_EFLAGS_IOPL;
 405        return flags | (VEFLAGS & current->thread.vm86->veflags_mask);
 406}
 407
 408static inline int is_revectored(int nr, struct revectored_struct *bitmap)
 409{
 410        return test_bit(nr, bitmap->__map);
 411}
 412
 413#define val_byte(val, n) (((__u8 *)&val)[n])
 414
 415#define pushb(base, ptr, val, err_label) \
 416        do { \
 417                __u8 __val = val; \
 418                ptr--; \
 419                if (put_user(__val, base + ptr) < 0) \
 420                        goto err_label; \
 421        } while (0)
 422
 423#define pushw(base, ptr, val, err_label) \
 424        do { \
 425                __u16 __val = val; \
 426                ptr--; \
 427                if (put_user(val_byte(__val, 1), base + ptr) < 0) \
 428                        goto err_label; \
 429                ptr--; \
 430                if (put_user(val_byte(__val, 0), base + ptr) < 0) \
 431                        goto err_label; \
 432        } while (0)
 433
 434#define pushl(base, ptr, val, err_label) \
 435        do { \
 436                __u32 __val = val; \
 437                ptr--; \
 438                if (put_user(val_byte(__val, 3), base + ptr) < 0) \
 439                        goto err_label; \
 440                ptr--; \
 441                if (put_user(val_byte(__val, 2), base + ptr) < 0) \
 442                        goto err_label; \
 443                ptr--; \
 444                if (put_user(val_byte(__val, 1), base + ptr) < 0) \
 445                        goto err_label; \
 446                ptr--; \
 447                if (put_user(val_byte(__val, 0), base + ptr) < 0) \
 448                        goto err_label; \
 449        } while (0)
 450
 451#define popb(base, ptr, err_label) \
 452        ({ \
 453                __u8 __res; \
 454                if (get_user(__res, base + ptr) < 0) \
 455                        goto err_label; \
 456                ptr++; \
 457                __res; \
 458        })
 459
 460#define popw(base, ptr, err_label) \
 461        ({ \
 462                __u16 __res; \
 463                if (get_user(val_byte(__res, 0), base + ptr) < 0) \
 464                        goto err_label; \
 465                ptr++; \
 466                if (get_user(val_byte(__res, 1), base + ptr) < 0) \
 467                        goto err_label; \
 468                ptr++; \
 469                __res; \
 470        })
 471
 472#define popl(base, ptr, err_label) \
 473        ({ \
 474                __u32 __res; \
 475                if (get_user(val_byte(__res, 0), base + ptr) < 0) \
 476                        goto err_label; \
 477                ptr++; \
 478                if (get_user(val_byte(__res, 1), base + ptr) < 0) \
 479                        goto err_label; \
 480                ptr++; \
 481                if (get_user(val_byte(__res, 2), base + ptr) < 0) \
 482                        goto err_label; \
 483                ptr++; \
 484                if (get_user(val_byte(__res, 3), base + ptr) < 0) \
 485                        goto err_label; \
 486                ptr++; \
 487                __res; \
 488        })
 489
 490/* There are so many possible reasons for this function to return
 491 * VM86_INTx, so adding another doesn't bother me. We can expect
 492 * userspace programs to be able to handle it. (Getting a problem
 493 * in userspace is always better than an Oops anyway.) [KD]
 494 */
 495static void do_int(struct kernel_vm86_regs *regs, int i,
 496    unsigned char __user *ssp, unsigned short sp)
 497{
 498        unsigned long __user *intr_ptr;
 499        unsigned long segoffs;
 500        struct vm86 *vm86 = current->thread.vm86;
 501
 502        if (regs->pt.cs == BIOSSEG)
 503                goto cannot_handle;
 504        if (is_revectored(i, &vm86->int_revectored))
 505                goto cannot_handle;
 506        if (i == 0x21 && is_revectored(AH(regs), &vm86->int21_revectored))
 507                goto cannot_handle;
 508        intr_ptr = (unsigned long __user *) (i << 2);
 509        if (get_user(segoffs, intr_ptr))
 510                goto cannot_handle;
 511        if ((segoffs >> 16) == BIOSSEG)
 512                goto cannot_handle;
 513        pushw(ssp, sp, get_vflags(regs), cannot_handle);
 514        pushw(ssp, sp, regs->pt.cs, cannot_handle);
 515        pushw(ssp, sp, IP(regs), cannot_handle);
 516        regs->pt.cs = segoffs >> 16;
 517        SP(regs) -= 6;
 518        IP(regs) = segoffs & 0xffff;
 519        clear_TF(regs);
 520        clear_IF(regs);
 521        clear_AC(regs);
 522        return;
 523
 524cannot_handle:
 525        save_v86_state(regs, VM86_INTx + (i << 8));
 526}
 527
 528int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
 529{
 530        struct vm86 *vm86 = current->thread.vm86;
 531
 532        if (vm86->vm86plus.is_vm86pus) {
 533                if ((trapno == 3) || (trapno == 1)) {
 534                        save_v86_state(regs, VM86_TRAP + (trapno << 8));
 535                        return 0;
 536                }
 537                do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
 538                return 0;
 539        }
 540        if (trapno != 1)
 541                return 1; /* we let this handle by the calling routine */
 542        current->thread.trap_nr = trapno;
 543        current->thread.error_code = error_code;
 544        force_sig(SIGTRAP);
 545        return 0;
 546}
 547
 548void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
 549{
 550        unsigned char opcode;
 551        unsigned char __user *csp;
 552        unsigned char __user *ssp;
 553        unsigned short ip, sp, orig_flags;
 554        int data32, pref_done;
 555        struct vm86plus_info_struct *vmpi = &current->thread.vm86->vm86plus;
 556
 557#define CHECK_IF_IN_TRAP \
 558        if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \
 559                newflags |= X86_EFLAGS_TF
 560
 561        orig_flags = *(unsigned short *)&regs->pt.flags;
 562
 563        csp = (unsigned char __user *) (regs->pt.cs << 4);
 564        ssp = (unsigned char __user *) (regs->pt.ss << 4);
 565        sp = SP(regs);
 566        ip = IP(regs);
 567
 568        data32 = 0;
 569        pref_done = 0;
 570        do {
 571                switch (opcode = popb(csp, ip, simulate_sigsegv)) {
 572                case 0x66:      /* 32-bit data */     data32 = 1; break;
 573                case 0x67:      /* 32-bit address */  break;
 574                case 0x2e:      /* CS */              break;
 575                case 0x3e:      /* DS */              break;
 576                case 0x26:      /* ES */              break;
 577                case 0x36:      /* SS */              break;
 578                case 0x65:      /* GS */              break;
 579                case 0x64:      /* FS */              break;
 580                case 0xf2:      /* repnz */       break;
 581                case 0xf3:      /* rep */             break;
 582                default: pref_done = 1;
 583                }
 584        } while (!pref_done);
 585
 586        switch (opcode) {
 587
 588        /* pushf */
 589        case 0x9c:
 590                if (data32) {
 591                        pushl(ssp, sp, get_vflags(regs), simulate_sigsegv);
 592                        SP(regs) -= 4;
 593                } else {
 594                        pushw(ssp, sp, get_vflags(regs), simulate_sigsegv);
 595                        SP(regs) -= 2;
 596                }
 597                IP(regs) = ip;
 598                goto vm86_fault_return;
 599
 600        /* popf */
 601        case 0x9d:
 602                {
 603                unsigned long newflags;
 604                if (data32) {
 605                        newflags = popl(ssp, sp, simulate_sigsegv);
 606                        SP(regs) += 4;
 607                } else {
 608                        newflags = popw(ssp, sp, simulate_sigsegv);
 609                        SP(regs) += 2;
 610                }
 611                IP(regs) = ip;
 612                CHECK_IF_IN_TRAP;
 613                if (data32)
 614                        set_vflags_long(newflags, regs);
 615                else
 616                        set_vflags_short(newflags, regs);
 617
 618                goto check_vip;
 619                }
 620
 621        /* int xx */
 622        case 0xcd: {
 623                int intno = popb(csp, ip, simulate_sigsegv);
 624                IP(regs) = ip;
 625                if (vmpi->vm86dbg_active) {
 626                        if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) {
 627                                save_v86_state(regs, VM86_INTx + (intno << 8));
 628                                return;
 629                        }
 630                }
 631                do_int(regs, intno, ssp, sp);
 632                return;
 633        }
 634
 635        /* iret */
 636        case 0xcf:
 637                {
 638                unsigned long newip;
 639                unsigned long newcs;
 640                unsigned long newflags;
 641                if (data32) {
 642                        newip = popl(ssp, sp, simulate_sigsegv);
 643                        newcs = popl(ssp, sp, simulate_sigsegv);
 644                        newflags = popl(ssp, sp, simulate_sigsegv);
 645                        SP(regs) += 12;
 646                } else {
 647                        newip = popw(ssp, sp, simulate_sigsegv);
 648                        newcs = popw(ssp, sp, simulate_sigsegv);
 649                        newflags = popw(ssp, sp, simulate_sigsegv);
 650                        SP(regs) += 6;
 651                }
 652                IP(regs) = newip;
 653                regs->pt.cs = newcs;
 654                CHECK_IF_IN_TRAP;
 655                if (data32) {
 656                        set_vflags_long(newflags, regs);
 657                } else {
 658                        set_vflags_short(newflags, regs);
 659                }
 660                goto check_vip;
 661                }
 662
 663        /* cli */
 664        case 0xfa:
 665                IP(regs) = ip;
 666                clear_IF(regs);
 667                goto vm86_fault_return;
 668
 669        /* sti */
 670        /*
 671         * Damn. This is incorrect: the 'sti' instruction should actually
 672         * enable interrupts after the /next/ instruction. Not good.
 673         *
 674         * Probably needs some horsing around with the TF flag. Aiee..
 675         */
 676        case 0xfb:
 677                IP(regs) = ip;
 678                set_IF(regs);
 679                goto check_vip;
 680
 681        default:
 682                save_v86_state(regs, VM86_UNKNOWN);
 683        }
 684
 685        return;
 686
 687check_vip:
 688        if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) ==
 689            (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) {
 690                save_v86_state(regs, VM86_STI);
 691                return;
 692        }
 693
 694vm86_fault_return:
 695        if (vmpi->force_return_for_pic  && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) {
 696                save_v86_state(regs, VM86_PICRETURN);
 697                return;
 698        }
 699        if (orig_flags & X86_EFLAGS_TF)
 700                handle_vm86_trap(regs, 0, X86_TRAP_DB);
 701        return;
 702
 703simulate_sigsegv:
 704        /* FIXME: After a long discussion with Stas we finally
 705         *        agreed, that this is wrong. Here we should
 706         *        really send a SIGSEGV to the user program.
 707         *        But how do we create the correct context? We
 708         *        are inside a general protection fault handler
 709         *        and has just returned from a page fault handler.
 710         *        The correct context for the signal handler
 711         *        should be a mixture of the two, but how do we
 712         *        get the information? [KD]
 713         */
 714        save_v86_state(regs, VM86_UNKNOWN);
 715}
 716
 717/* ---------------- vm86 special IRQ passing stuff ----------------- */
 718
 719#define VM86_IRQNAME            "vm86irq"
 720
 721static struct vm86_irqs {
 722        struct task_struct *tsk;
 723        int sig;
 724} vm86_irqs[16];
 725
 726static DEFINE_SPINLOCK(irqbits_lock);
 727static int irqbits;
 728
 729#define ALLOWED_SIGS (1 /* 0 = don't send a signal */ \
 730        | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO)  | (1 << SIGURG) \
 731        | (1 << SIGUNUSED))
 732
 733static irqreturn_t irq_handler(int intno, void *dev_id)
 734{
 735        int irq_bit;
 736        unsigned long flags;
 737
 738        spin_lock_irqsave(&irqbits_lock, flags);
 739        irq_bit = 1 << intno;
 740        if ((irqbits & irq_bit) || !vm86_irqs[intno].tsk)
 741                goto out;
 742        irqbits |= irq_bit;
 743        if (vm86_irqs[intno].sig)
 744                send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1);
 745        /*
 746         * IRQ will be re-enabled when user asks for the irq (whether
 747         * polling or as a result of the signal)
 748         */
 749        disable_irq_nosync(intno);
 750        spin_unlock_irqrestore(&irqbits_lock, flags);
 751        return IRQ_HANDLED;
 752
 753out:
 754        spin_unlock_irqrestore(&irqbits_lock, flags);
 755        return IRQ_NONE;
 756}
 757
 758static inline void free_vm86_irq(int irqnumber)
 759{
 760        unsigned long flags;
 761
 762        free_irq(irqnumber, NULL);
 763        vm86_irqs[irqnumber].tsk = NULL;
 764
 765        spin_lock_irqsave(&irqbits_lock, flags);
 766        irqbits &= ~(1 << irqnumber);
 767        spin_unlock_irqrestore(&irqbits_lock, flags);
 768}
 769
 770void release_vm86_irqs(struct task_struct *task)
 771{
 772        int i;
 773        for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++)
 774            if (vm86_irqs[i].tsk == task)
 775                free_vm86_irq(i);
 776}
 777
 778static inline int get_and_reset_irq(int irqnumber)
 779{
 780        int bit;
 781        unsigned long flags;
 782        int ret = 0;
 783
 784        if (invalid_vm86_irq(irqnumber)) return 0;
 785        if (vm86_irqs[irqnumber].tsk != current) return 0;
 786        spin_lock_irqsave(&irqbits_lock, flags);
 787        bit = irqbits & (1 << irqnumber);
 788        irqbits &= ~bit;
 789        if (bit) {
 790                enable_irq(irqnumber);
 791                ret = 1;
 792        }
 793
 794        spin_unlock_irqrestore(&irqbits_lock, flags);
 795        return ret;
 796}
 797
 798
 799static int do_vm86_irq_handling(int subfunction, int irqnumber)
 800{
 801        int ret;
 802        switch (subfunction) {
 803                case VM86_GET_AND_RESET_IRQ: {
 804                        return get_and_reset_irq(irqnumber);
 805                }
 806                case VM86_GET_IRQ_BITS: {
 807                        return irqbits;
 808                }
 809                case VM86_REQUEST_IRQ: {
 810                        int sig = irqnumber >> 8;
 811                        int irq = irqnumber & 255;
 812                        if (!capable(CAP_SYS_ADMIN)) return -EPERM;
 813                        if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM;
 814                        if (invalid_vm86_irq(irq)) return -EPERM;
 815                        if (vm86_irqs[irq].tsk) return -EPERM;
 816                        ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL);
 817                        if (ret) return ret;
 818                        vm86_irqs[irq].sig = sig;
 819                        vm86_irqs[irq].tsk = current;
 820                        return irq;
 821                }
 822                case  VM86_FREE_IRQ: {
 823                        if (invalid_vm86_irq(irqnumber)) return -EPERM;
 824                        if (!vm86_irqs[irqnumber].tsk) return 0;
 825                        if (vm86_irqs[irqnumber].tsk != current) return -EPERM;
 826                        free_vm86_irq(irqnumber);
 827                        return 0;
 828                }
 829        }
 830        return -EINVAL;
 831}
 832
 833