linux/arch/x86/kernel/vm86_32.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 1994  Linus Torvalds
   3 *
   4 *  29 dec 2001 - Fixed oopses caused by unchecked access to the vm86
   5 *                stack - Manfred Spraul <manfred@colorfullife.com>
   6 *
   7 *  22 mar 2002 - Manfred detected the stackfaults, but didn't handle
   8 *                them correctly. Now the emulation will be in a
   9 *                consistent state after stackfaults - Kasper Dupont
  10 *                <kasperd@daimi.au.dk>
  11 *
  12 *  22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont
  13 *                <kasperd@daimi.au.dk>
  14 *
  15 *  ?? ??? 2002 - Fixed premature returns from handle_vm86_fault
  16 *                caused by Kasper Dupont's changes - Stas Sergeev
  17 *
  18 *   4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes.
  19 *                Kasper Dupont <kasperd@daimi.au.dk>
  20 *
  21 *   9 apr 2002 - Changed syntax of macros in handle_vm86_fault.
  22 *                Kasper Dupont <kasperd@daimi.au.dk>
  23 *
  24 *   9 apr 2002 - Changed stack access macros to jump to a label
  25 *                instead of returning to userspace. This simplifies
  26 *                do_int, and is needed by handle_vm6_fault. Kasper
  27 *                Dupont <kasperd@daimi.au.dk>
  28 *
  29 */
  30
  31#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  32
  33#include <linux/capability.h>
  34#include <linux/errno.h>
  35#include <linux/interrupt.h>
  36#include <linux/syscalls.h>
  37#include <linux/sched.h>
  38#include <linux/kernel.h>
  39#include <linux/signal.h>
  40#include <linux/string.h>
  41#include <linux/mm.h>
  42#include <linux/smp.h>
  43#include <linux/highmem.h>
  44#include <linux/ptrace.h>
  45#include <linux/audit.h>
  46#include <linux/stddef.h>
  47
  48#include <asm/uaccess.h>
  49#include <asm/io.h>
  50#include <asm/tlbflush.h>
  51#include <asm/irq.h>
  52
  53/*
  54 * Known problems:
  55 *
  56 * Interrupt handling is not guaranteed:
  57 * - a real x86 will disable all interrupts for one instruction
  58 *   after a "mov ss,xx" to make stack handling atomic even without
  59 *   the 'lss' instruction. We can't guarantee this in v86 mode,
  60 *   as the next instruction might result in a page fault or similar.
  61 * - a real x86 will have interrupts disabled for one instruction
  62 *   past the 'sti' that enables them. We don't bother with all the
  63 *   details yet.
  64 *
  65 * Let's hope these problems do not actually matter for anything.
  66 */
  67
  68
  69#define KVM86   ((struct kernel_vm86_struct *)regs)
  70#define VMPI    KVM86->vm86plus
  71
  72
  73/*
  74 * 8- and 16-bit register defines..
  75 */
  76#define AL(regs)        (((unsigned char *)&((regs)->pt.ax))[0])
  77#define AH(regs)        (((unsigned char *)&((regs)->pt.ax))[1])
  78#define IP(regs)        (*(unsigned short *)&((regs)->pt.ip))
  79#define SP(regs)        (*(unsigned short *)&((regs)->pt.sp))
  80
  81/*
  82 * virtual flags (16 and 32-bit versions)
  83 */
  84#define VFLAGS  (*(unsigned short *)&(current->thread.v86flags))
  85#define VEFLAGS (current->thread.v86flags)
  86
  87#define set_flags(X, new, mask) \
  88((X) = ((X) & ~(mask)) | ((new) & (mask)))
  89
  90#define SAFE_MASK       (0xDD5)
  91#define RETURN_MASK     (0xDFF)
  92
  93/* convert kernel_vm86_regs to vm86_regs */
  94static int copy_vm86_regs_to_user(struct vm86_regs __user *user,
  95                                  const struct kernel_vm86_regs *regs)
  96{
  97        int ret = 0;
  98
  99        /*
 100         * kernel_vm86_regs is missing gs, so copy everything up to
 101         * (but not including) orig_eax, and then rest including orig_eax.
 102         */
 103        ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax));
 104        ret += copy_to_user(&user->orig_eax, &regs->pt.orig_ax,
 105                            sizeof(struct kernel_vm86_regs) -
 106                            offsetof(struct kernel_vm86_regs, pt.orig_ax));
 107
 108        return ret;
 109}
 110
 111/* convert vm86_regs to kernel_vm86_regs */
 112static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
 113                                    const struct vm86_regs __user *user,
 114                                    unsigned extra)
 115{
 116        int ret = 0;
 117
 118        /* copy ax-fs inclusive */
 119        ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_ax));
 120        /* copy orig_ax-__gsh+extra */
 121        ret += copy_from_user(&regs->pt.orig_ax, &user->orig_eax,
 122                              sizeof(struct kernel_vm86_regs) -
 123                              offsetof(struct kernel_vm86_regs, pt.orig_ax) +
 124                              extra);
 125        return ret;
 126}
 127
 128struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
 129{
 130        struct tss_struct *tss;
 131        struct pt_regs *ret;
 132        unsigned long tmp;
 133
 134        /*
 135         * This gets called from entry.S with interrupts disabled, but
 136         * from process context. Enable interrupts here, before trying
 137         * to access user space.
 138         */
 139        local_irq_enable();
 140
 141        if (!current->thread.vm86_info) {
 142                pr_alert("no vm86_info: BAD\n");
 143                do_exit(SIGSEGV);
 144        }
 145        set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask);
 146        tmp = copy_vm86_regs_to_user(&current->thread.vm86_info->regs, regs);
 147        tmp += put_user(current->thread.screen_bitmap, &current->thread.vm86_info->screen_bitmap);
 148        if (tmp) {
 149                pr_alert("could not access userspace vm86_info\n");
 150                do_exit(SIGSEGV);
 151        }
 152
 153        tss = &per_cpu(init_tss, get_cpu());
 154        current->thread.sp0 = current->thread.saved_sp0;
 155        current->thread.sysenter_cs = __KERNEL_CS;
 156        load_sp0(tss, &current->thread);
 157        current->thread.saved_sp0 = 0;
 158        put_cpu();
 159
 160        ret = KVM86->regs32;
 161
 162        ret->fs = current->thread.saved_fs;
 163        set_user_gs(ret, current->thread.saved_gs);
 164
 165        return ret;
 166}
 167
 168static void mark_screen_rdonly(struct mm_struct *mm)
 169{
 170        pgd_t *pgd;
 171        pud_t *pud;
 172        pmd_t *pmd;
 173        pte_t *pte;
 174        spinlock_t *ptl;
 175        int i;
 176
 177        down_write(&mm->mmap_sem);
 178        pgd = pgd_offset(mm, 0xA0000);
 179        if (pgd_none_or_clear_bad(pgd))
 180                goto out;
 181        pud = pud_offset(pgd, 0xA0000);
 182        if (pud_none_or_clear_bad(pud))
 183                goto out;
 184        pmd = pmd_offset(pud, 0xA0000);
 185        split_huge_page_pmd_mm(mm, 0xA0000, pmd);
 186        if (pmd_none_or_clear_bad(pmd))
 187                goto out;
 188        pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
 189        for (i = 0; i < 32; i++) {
 190                if (pte_present(*pte))
 191                        set_pte(pte, pte_wrprotect(*pte));
 192                pte++;
 193        }
 194        pte_unmap_unlock(pte, ptl);
 195out:
 196        up_write(&mm->mmap_sem);
 197        flush_tlb();
 198}
 199
 200
 201
 202static int do_vm86_irq_handling(int subfunction, int irqnumber);
 203static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
 204
 205SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, v86)
 206{
 207        struct kernel_vm86_struct info; /* declare this _on top_,
 208                                         * this avoids wasting of stack space.
 209                                         * This remains on the stack until we
 210                                         * return to 32 bit user space.
 211                                         */
 212        struct task_struct *tsk = current;
 213        int tmp;
 214
 215        if (tsk->thread.saved_sp0)
 216                return -EPERM;
 217        tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
 218                                       offsetof(struct kernel_vm86_struct, vm86plus) -
 219                                       sizeof(info.regs));
 220        if (tmp)
 221                return -EFAULT;
 222        memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
 223        info.regs32 = current_pt_regs();
 224        tsk->thread.vm86_info = v86;
 225        do_sys_vm86(&info, tsk);
 226        return 0;       /* we never return here */
 227}
 228
 229
 230SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
 231{
 232        struct kernel_vm86_struct info; /* declare this _on top_,
 233                                         * this avoids wasting of stack space.
 234                                         * This remains on the stack until we
 235                                         * return to 32 bit user space.
 236                                         */
 237        struct task_struct *tsk;
 238        int tmp;
 239        struct vm86plus_struct __user *v86;
 240
 241        tsk = current;
 242        switch (cmd) {
 243        case VM86_REQUEST_IRQ:
 244        case VM86_FREE_IRQ:
 245        case VM86_GET_IRQ_BITS:
 246        case VM86_GET_AND_RESET_IRQ:
 247                return do_vm86_irq_handling(cmd, (int)arg);
 248        case VM86_PLUS_INSTALL_CHECK:
 249                /*
 250                 * NOTE: on old vm86 stuff this will return the error
 251                 *  from access_ok(), because the subfunction is
 252                 *  interpreted as (invalid) address to vm86_struct.
 253                 *  So the installation check works.
 254                 */
 255                return 0;
 256        }
 257
 258        /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
 259        if (tsk->thread.saved_sp0)
 260                return -EPERM;
 261        v86 = (struct vm86plus_struct __user *)arg;
 262        tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
 263                                       offsetof(struct kernel_vm86_struct, regs32) -
 264                                       sizeof(info.regs));
 265        if (tmp)
 266                return -EFAULT;
 267        info.regs32 = current_pt_regs();
 268        info.vm86plus.is_vm86pus = 1;
 269        tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
 270        do_sys_vm86(&info, tsk);
 271        return 0;       /* we never return here */
 272}
 273
 274
 275static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
 276{
 277        struct tss_struct *tss;
 278/*
 279 * make sure the vm86() system call doesn't try to do anything silly
 280 */
 281        info->regs.pt.ds = 0;
 282        info->regs.pt.es = 0;
 283        info->regs.pt.fs = 0;
 284#ifndef CONFIG_X86_32_LAZY_GS
 285        info->regs.pt.gs = 0;
 286#endif
 287
 288/*
 289 * The flags register is also special: we cannot trust that the user
 290 * has set it up safely, so this makes sure interrupt etc flags are
 291 * inherited from protected mode.
 292 */
 293        VEFLAGS = info->regs.pt.flags;
 294        info->regs.pt.flags &= SAFE_MASK;
 295        info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK;
 296        info->regs.pt.flags |= X86_VM_MASK;
 297
 298        switch (info->cpu_type) {
 299        case CPU_286:
 300                tsk->thread.v86mask = 0;
 301                break;
 302        case CPU_386:
 303                tsk->thread.v86mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
 304                break;
 305        case CPU_486:
 306                tsk->thread.v86mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
 307                break;
 308        default:
 309                tsk->thread.v86mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
 310                break;
 311        }
 312
 313/*
 314 * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL)
 315 */
 316        info->regs32->ax = VM86_SIGNAL;
 317        tsk->thread.saved_sp0 = tsk->thread.sp0;
 318        tsk->thread.saved_fs = info->regs32->fs;
 319        tsk->thread.saved_gs = get_user_gs(info->regs32);
 320
 321        tss = &per_cpu(init_tss, get_cpu());
 322        tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
 323        if (cpu_has_sep)
 324                tsk->thread.sysenter_cs = 0;
 325        load_sp0(tss, &tsk->thread);
 326        put_cpu();
 327
 328        tsk->thread.screen_bitmap = info->screen_bitmap;
 329        if (info->flags & VM86_SCREEN_BITMAP)
 330                mark_screen_rdonly(tsk->mm);
 331
 332        /*call __audit_syscall_exit since we do not exit via the normal paths */
 333#ifdef CONFIG_AUDITSYSCALL
 334        if (unlikely(current->audit_context))
 335                __audit_syscall_exit(1, 0);
 336#endif
 337
 338        __asm__ __volatile__(
 339                "movl %0,%%esp\n\t"
 340                "movl %1,%%ebp\n\t"
 341#ifdef CONFIG_X86_32_LAZY_GS
 342                "mov  %2, %%gs\n\t"
 343#endif
 344                "jmp resume_userspace"
 345                : /* no outputs */
 346                :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));
 347        /* we never return here */
 348}
 349
 350static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval)
 351{
 352        struct pt_regs *regs32;
 353
 354        regs32 = save_v86_state(regs16);
 355        regs32->ax = retval;
 356        __asm__ __volatile__("movl %0,%%esp\n\t"
 357                "movl %1,%%ebp\n\t"
 358                "jmp resume_userspace"
 359                : : "r" (regs32), "r" (current_thread_info()));
 360}
 361
 362static inline void set_IF(struct kernel_vm86_regs *regs)
 363{
 364        VEFLAGS |= X86_EFLAGS_VIF;
 365        if (VEFLAGS & X86_EFLAGS_VIP)
 366                return_to_32bit(regs, VM86_STI);
 367}
 368
 369static inline void clear_IF(struct kernel_vm86_regs *regs)
 370{
 371        VEFLAGS &= ~X86_EFLAGS_VIF;
 372}
 373
 374static inline void clear_TF(struct kernel_vm86_regs *regs)
 375{
 376        regs->pt.flags &= ~X86_EFLAGS_TF;
 377}
 378
 379static inline void clear_AC(struct kernel_vm86_regs *regs)
 380{
 381        regs->pt.flags &= ~X86_EFLAGS_AC;
 382}
 383
 384/*
 385 * It is correct to call set_IF(regs) from the set_vflags_*
 386 * functions. However someone forgot to call clear_IF(regs)
 387 * in the opposite case.
 388 * After the command sequence CLI PUSHF STI POPF you should
 389 * end up with interrupts disabled, but you ended up with
 390 * interrupts enabled.
 391 *  ( I was testing my own changes, but the only bug I
 392 *    could find was in a function I had not changed. )
 393 * [KD]
 394 */
 395
 396static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs)
 397{
 398        set_flags(VEFLAGS, flags, current->thread.v86mask);
 399        set_flags(regs->pt.flags, flags, SAFE_MASK);
 400        if (flags & X86_EFLAGS_IF)
 401                set_IF(regs);
 402        else
 403                clear_IF(regs);
 404}
 405
 406static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs)
 407{
 408        set_flags(VFLAGS, flags, current->thread.v86mask);
 409        set_flags(regs->pt.flags, flags, SAFE_MASK);
 410        if (flags & X86_EFLAGS_IF)
 411                set_IF(regs);
 412        else
 413                clear_IF(regs);
 414}
 415
 416static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
 417{
 418        unsigned long flags = regs->pt.flags & RETURN_MASK;
 419
 420        if (VEFLAGS & X86_EFLAGS_VIF)
 421                flags |= X86_EFLAGS_IF;
 422        flags |= X86_EFLAGS_IOPL;
 423        return flags | (VEFLAGS & current->thread.v86mask);
 424}
 425
 426static inline int is_revectored(int nr, struct revectored_struct *bitmap)
 427{
 428        __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0"
 429                :"=r" (nr)
 430                :"m" (*bitmap), "r" (nr));
 431        return nr;
 432}
 433
 434#define val_byte(val, n) (((__u8 *)&val)[n])
 435
 436#define pushb(base, ptr, val, err_label) \
 437        do { \
 438                __u8 __val = val; \
 439                ptr--; \
 440                if (put_user(__val, base + ptr) < 0) \
 441                        goto err_label; \
 442        } while (0)
 443
 444#define pushw(base, ptr, val, err_label) \
 445        do { \
 446                __u16 __val = val; \
 447                ptr--; \
 448                if (put_user(val_byte(__val, 1), base + ptr) < 0) \
 449                        goto err_label; \
 450                ptr--; \
 451                if (put_user(val_byte(__val, 0), base + ptr) < 0) \
 452                        goto err_label; \
 453        } while (0)
 454
 455#define pushl(base, ptr, val, err_label) \
 456        do { \
 457                __u32 __val = val; \
 458                ptr--; \
 459                if (put_user(val_byte(__val, 3), base + ptr) < 0) \
 460                        goto err_label; \
 461                ptr--; \
 462                if (put_user(val_byte(__val, 2), base + ptr) < 0) \
 463                        goto err_label; \
 464                ptr--; \
 465                if (put_user(val_byte(__val, 1), base + ptr) < 0) \
 466                        goto err_label; \
 467                ptr--; \
 468                if (put_user(val_byte(__val, 0), base + ptr) < 0) \
 469                        goto err_label; \
 470        } while (0)
 471
 472#define popb(base, ptr, err_label) \
 473        ({ \
 474                __u8 __res; \
 475                if (get_user(__res, base + ptr) < 0) \
 476                        goto err_label; \
 477                ptr++; \
 478                __res; \
 479        })
 480
 481#define popw(base, ptr, err_label) \
 482        ({ \
 483                __u16 __res; \
 484                if (get_user(val_byte(__res, 0), base + ptr) < 0) \
 485                        goto err_label; \
 486                ptr++; \
 487                if (get_user(val_byte(__res, 1), base + ptr) < 0) \
 488                        goto err_label; \
 489                ptr++; \
 490                __res; \
 491        })
 492
 493#define popl(base, ptr, err_label) \
 494        ({ \
 495                __u32 __res; \
 496                if (get_user(val_byte(__res, 0), base + ptr) < 0) \
 497                        goto err_label; \
 498                ptr++; \
 499                if (get_user(val_byte(__res, 1), base + ptr) < 0) \
 500                        goto err_label; \
 501                ptr++; \
 502                if (get_user(val_byte(__res, 2), base + ptr) < 0) \
 503                        goto err_label; \
 504                ptr++; \
 505                if (get_user(val_byte(__res, 3), base + ptr) < 0) \
 506                        goto err_label; \
 507                ptr++; \
 508                __res; \
 509        })
 510
 511/* There are so many possible reasons for this function to return
 512 * VM86_INTx, so adding another doesn't bother me. We can expect
 513 * userspace programs to be able to handle it. (Getting a problem
 514 * in userspace is always better than an Oops anyway.) [KD]
 515 */
 516static void do_int(struct kernel_vm86_regs *regs, int i,
 517    unsigned char __user *ssp, unsigned short sp)
 518{
 519        unsigned long __user *intr_ptr;
 520        unsigned long segoffs;
 521
 522        if (regs->pt.cs == BIOSSEG)
 523                goto cannot_handle;
 524        if (is_revectored(i, &KVM86->int_revectored))
 525                goto cannot_handle;
 526        if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored))
 527                goto cannot_handle;
 528        intr_ptr = (unsigned long __user *) (i << 2);
 529        if (get_user(segoffs, intr_ptr))
 530                goto cannot_handle;
 531        if ((segoffs >> 16) == BIOSSEG)
 532                goto cannot_handle;
 533        pushw(ssp, sp, get_vflags(regs), cannot_handle);
 534        pushw(ssp, sp, regs->pt.cs, cannot_handle);
 535        pushw(ssp, sp, IP(regs), cannot_handle);
 536        regs->pt.cs = segoffs >> 16;
 537        SP(regs) -= 6;
 538        IP(regs) = segoffs & 0xffff;
 539        clear_TF(regs);
 540        clear_IF(regs);
 541        clear_AC(regs);
 542        return;
 543
 544cannot_handle:
 545        return_to_32bit(regs, VM86_INTx + (i << 8));
 546}
 547
 548int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
 549{
 550        if (VMPI.is_vm86pus) {
 551                if ((trapno == 3) || (trapno == 1)) {
 552                        KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
 553                        /* setting this flag forces the code in entry_32.S to
 554                           the path where we call save_v86_state() and change
 555                           the stack pointer to KVM86->regs32 */
 556                        set_thread_flag(TIF_NOTIFY_RESUME);
 557                        return 0;
 558                }
 559                do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
 560                return 0;
 561        }
 562        if (trapno != 1)
 563                return 1; /* we let this handle by the calling routine */
 564        current->thread.trap_nr = trapno;
 565        current->thread.error_code = error_code;
 566        force_sig(SIGTRAP, current);
 567        return 0;
 568}
 569
 570void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
 571{
 572        unsigned char opcode;
 573        unsigned char __user *csp;
 574        unsigned char __user *ssp;
 575        unsigned short ip, sp, orig_flags;
 576        int data32, pref_done;
 577
 578#define CHECK_IF_IN_TRAP \
 579        if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \
 580                newflags |= X86_EFLAGS_TF
 581#define VM86_FAULT_RETURN do { \
 582        if (VMPI.force_return_for_pic  && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \
 583                return_to_32bit(regs, VM86_PICRETURN); \
 584        if (orig_flags & X86_EFLAGS_TF) \
 585                handle_vm86_trap(regs, 0, 1); \
 586        return; } while (0)
 587
 588        orig_flags = *(unsigned short *)&regs->pt.flags;
 589
 590        csp = (unsigned char __user *) (regs->pt.cs << 4);
 591        ssp = (unsigned char __user *) (regs->pt.ss << 4);
 592        sp = SP(regs);
 593        ip = IP(regs);
 594
 595        data32 = 0;
 596        pref_done = 0;
 597        do {
 598                switch (opcode = popb(csp, ip, simulate_sigsegv)) {
 599                case 0x66:      /* 32-bit data */     data32 = 1; break;
 600                case 0x67:      /* 32-bit address */  break;
 601                case 0x2e:      /* CS */              break;
 602                case 0x3e:      /* DS */              break;
 603                case 0x26:      /* ES */              break;
 604                case 0x36:      /* SS */              break;
 605                case 0x65:      /* GS */              break;
 606                case 0x64:      /* FS */              break;
 607                case 0xf2:      /* repnz */       break;
 608                case 0xf3:      /* rep */             break;
 609                default: pref_done = 1;
 610                }
 611        } while (!pref_done);
 612
 613        switch (opcode) {
 614
 615        /* pushf */
 616        case 0x9c:
 617                if (data32) {
 618                        pushl(ssp, sp, get_vflags(regs), simulate_sigsegv);
 619                        SP(regs) -= 4;
 620                } else {
 621                        pushw(ssp, sp, get_vflags(regs), simulate_sigsegv);
 622                        SP(regs) -= 2;
 623                }
 624                IP(regs) = ip;
 625                VM86_FAULT_RETURN;
 626
 627        /* popf */
 628        case 0x9d:
 629                {
 630                unsigned long newflags;
 631                if (data32) {
 632                        newflags = popl(ssp, sp, simulate_sigsegv);
 633                        SP(regs) += 4;
 634                } else {
 635                        newflags = popw(ssp, sp, simulate_sigsegv);
 636                        SP(regs) += 2;
 637                }
 638                IP(regs) = ip;
 639                CHECK_IF_IN_TRAP;
 640                if (data32)
 641                        set_vflags_long(newflags, regs);
 642                else
 643                        set_vflags_short(newflags, regs);
 644
 645                VM86_FAULT_RETURN;
 646                }
 647
 648        /* int xx */
 649        case 0xcd: {
 650                int intno = popb(csp, ip, simulate_sigsegv);
 651                IP(regs) = ip;
 652                if (VMPI.vm86dbg_active) {
 653                        if ((1 << (intno & 7)) & VMPI.vm86dbg_intxxtab[intno >> 3])
 654                                return_to_32bit(regs, VM86_INTx + (intno << 8));
 655                }
 656                do_int(regs, intno, ssp, sp);
 657                return;
 658        }
 659
 660        /* iret */
 661        case 0xcf:
 662                {
 663                unsigned long newip;
 664                unsigned long newcs;
 665                unsigned long newflags;
 666                if (data32) {
 667                        newip = popl(ssp, sp, simulate_sigsegv);
 668                        newcs = popl(ssp, sp, simulate_sigsegv);
 669                        newflags = popl(ssp, sp, simulate_sigsegv);
 670                        SP(regs) += 12;
 671                } else {
 672                        newip = popw(ssp, sp, simulate_sigsegv);
 673                        newcs = popw(ssp, sp, simulate_sigsegv);
 674                        newflags = popw(ssp, sp, simulate_sigsegv);
 675                        SP(regs) += 6;
 676                }
 677                IP(regs) = newip;
 678                regs->pt.cs = newcs;
 679                CHECK_IF_IN_TRAP;
 680                if (data32) {
 681                        set_vflags_long(newflags, regs);
 682                } else {
 683                        set_vflags_short(newflags, regs);
 684                }
 685                VM86_FAULT_RETURN;
 686                }
 687
 688        /* cli */
 689        case 0xfa:
 690                IP(regs) = ip;
 691                clear_IF(regs);
 692                VM86_FAULT_RETURN;
 693
 694        /* sti */
 695        /*
 696         * Damn. This is incorrect: the 'sti' instruction should actually
 697         * enable interrupts after the /next/ instruction. Not good.
 698         *
 699         * Probably needs some horsing around with the TF flag. Aiee..
 700         */
 701        case 0xfb:
 702                IP(regs) = ip;
 703                set_IF(regs);
 704                VM86_FAULT_RETURN;
 705
 706        default:
 707                return_to_32bit(regs, VM86_UNKNOWN);
 708        }
 709
 710        return;
 711
 712simulate_sigsegv:
 713        /* FIXME: After a long discussion with Stas we finally
 714         *        agreed, that this is wrong. Here we should
 715         *        really send a SIGSEGV to the user program.
 716         *        But how do we create the correct context? We
 717         *        are inside a general protection fault handler
 718         *        and has just returned from a page fault handler.
 719         *        The correct context for the signal handler
 720         *        should be a mixture of the two, but how do we
 721         *        get the information? [KD]
 722         */
 723        return_to_32bit(regs, VM86_UNKNOWN);
 724}
 725
 726/* ---------------- vm86 special IRQ passing stuff ----------------- */
 727
 728#define VM86_IRQNAME            "vm86irq"
 729
 730static struct vm86_irqs {
 731        struct task_struct *tsk;
 732        int sig;
 733} vm86_irqs[16];
 734
 735static DEFINE_SPINLOCK(irqbits_lock);
 736static int irqbits;
 737
 738#define ALLOWED_SIGS (1 /* 0 = don't send a signal */ \
 739        | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO)  | (1 << SIGURG) \
 740        | (1 << SIGUNUSED))
 741
 742static irqreturn_t irq_handler(int intno, void *dev_id)
 743{
 744        int irq_bit;
 745        unsigned long flags;
 746
 747        spin_lock_irqsave(&irqbits_lock, flags);
 748        irq_bit = 1 << intno;
 749        if ((irqbits & irq_bit) || !vm86_irqs[intno].tsk)
 750                goto out;
 751        irqbits |= irq_bit;
 752        if (vm86_irqs[intno].sig)
 753                send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1);
 754        /*
 755         * IRQ will be re-enabled when user asks for the irq (whether
 756         * polling or as a result of the signal)
 757         */
 758        disable_irq_nosync(intno);
 759        spin_unlock_irqrestore(&irqbits_lock, flags);
 760        return IRQ_HANDLED;
 761
 762out:
 763        spin_unlock_irqrestore(&irqbits_lock, flags);
 764        return IRQ_NONE;
 765}
 766
 767static inline void free_vm86_irq(int irqnumber)
 768{
 769        unsigned long flags;
 770
 771        free_irq(irqnumber, NULL);
 772        vm86_irqs[irqnumber].tsk = NULL;
 773
 774        spin_lock_irqsave(&irqbits_lock, flags);
 775        irqbits &= ~(1 << irqnumber);
 776        spin_unlock_irqrestore(&irqbits_lock, flags);
 777}
 778
 779void release_vm86_irqs(struct task_struct *task)
 780{
 781        int i;
 782        for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++)
 783            if (vm86_irqs[i].tsk == task)
 784                free_vm86_irq(i);
 785}
 786
 787static inline int get_and_reset_irq(int irqnumber)
 788{
 789        int bit;
 790        unsigned long flags;
 791        int ret = 0;
 792
 793        if (invalid_vm86_irq(irqnumber)) return 0;
 794        if (vm86_irqs[irqnumber].tsk != current) return 0;
 795        spin_lock_irqsave(&irqbits_lock, flags);
 796        bit = irqbits & (1 << irqnumber);
 797        irqbits &= ~bit;
 798        if (bit) {
 799                enable_irq(irqnumber);
 800                ret = 1;
 801        }
 802
 803        spin_unlock_irqrestore(&irqbits_lock, flags);
 804        return ret;
 805}
 806
 807
 808static int do_vm86_irq_handling(int subfunction, int irqnumber)
 809{
 810        int ret;
 811        switch (subfunction) {
 812                case VM86_GET_AND_RESET_IRQ: {
 813                        return get_and_reset_irq(irqnumber);
 814                }
 815                case VM86_GET_IRQ_BITS: {
 816                        return irqbits;
 817                }
 818                case VM86_REQUEST_IRQ: {
 819                        int sig = irqnumber >> 8;
 820                        int irq = irqnumber & 255;
 821                        if (!capable(CAP_SYS_ADMIN)) return -EPERM;
 822                        if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM;
 823                        if (invalid_vm86_irq(irq)) return -EPERM;
 824                        if (vm86_irqs[irq].tsk) return -EPERM;
 825                        ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL);
 826                        if (ret) return ret;
 827                        vm86_irqs[irq].sig = sig;
 828                        vm86_irqs[irq].tsk = current;
 829                        return irq;
 830                }
 831                case  VM86_FREE_IRQ: {
 832                        if (invalid_vm86_irq(irqnumber)) return -EPERM;
 833                        if (!vm86_irqs[irqnumber].tsk) return 0;
 834                        if (vm86_irqs[irqnumber].tsk != current) return -EPERM;
 835                        free_vm86_irq(irqnumber);
 836                        return 0;
 837                }
 838        }
 839        return -EINVAL;
 840}
 841
 842