qemu/linux-user/i386/cpu_loop.c
<<
>>
Prefs
   1/*
   2 *  qemu user cpu loop
   3 *
   4 *  Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 *  This program is free software; you can redistribute it and/or modify
   7 *  it under the terms of the GNU General Public License as published by
   8 *  the Free Software Foundation; either version 2 of the License, or
   9 *  (at your option) any later version.
  10 *
  11 *  This program is distributed in the hope that it will be useful,
  12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 *  GNU General Public License for more details.
  15 *
  16 *  You should have received a copy of the GNU General Public License
  17 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu.h"
  22#include "qemu/timer.h"
  23#include "user-internals.h"
  24#include "cpu_loop-common.h"
  25#include "signal-common.h"
  26#include "user-mmap.h"
  27
  28/***********************************************************/
  29/* CPUX86 core interface */
  30
  31uint64_t cpu_get_tsc(CPUX86State *env)
  32{
  33    return cpu_get_host_ticks();
  34}
  35
  36static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
  37              int flags)
  38{
  39    unsigned int e1, e2;
  40    uint32_t *p;
  41    e1 = (addr << 16) | (limit & 0xffff);
  42    e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
  43    e2 |= flags;
  44    p = ptr;
  45    p[0] = tswap32(e1);
  46    p[1] = tswap32(e2);
  47}
  48
  49static uint64_t *idt_table;
  50#ifdef TARGET_X86_64
  51static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
  52                       uint64_t addr, unsigned int sel)
  53{
  54    uint32_t *p, e1, e2;
  55    e1 = (addr & 0xffff) | (sel << 16);
  56    e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
  57    p = ptr;
  58    p[0] = tswap32(e1);
  59    p[1] = tswap32(e2);
  60    p[2] = tswap32(addr >> 32);
  61    p[3] = 0;
  62}
  63/* only dpl matters as we do only user space emulation */
  64static void set_idt(int n, unsigned int dpl)
  65{
  66    set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
  67}
  68#else
  69static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
  70                     uint32_t addr, unsigned int sel)
  71{
  72    uint32_t *p, e1, e2;
  73    e1 = (addr & 0xffff) | (sel << 16);
  74    e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
  75    p = ptr;
  76    p[0] = tswap32(e1);
  77    p[1] = tswap32(e2);
  78}
  79
  80/* only dpl matters as we do only user space emulation */
  81static void set_idt(int n, unsigned int dpl)
  82{
  83    set_gate(idt_table + n, 0, dpl, 0, 0);
  84}
  85#endif
  86
  87#ifdef TARGET_X86_64
  88static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
  89{
  90    /*
  91     * For all the vsyscalls, NULL means "don't write anything" not
  92     * "write it at address 0".
  93     */
  94    if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) {
  95        return true;
  96    }
  97
  98    env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
  99    force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
 100    return false;
 101}
 102
 103/*
 104 * Since v3.1, the kernel traps and emulates the vsyscall page.
 105 * Entry points other than the official generate SIGSEGV.
 106 */
 107static void emulate_vsyscall(CPUX86State *env)
 108{
 109    int syscall;
 110    abi_ulong ret;
 111    uint64_t caller;
 112
 113    /*
 114     * Validate the entry point.  We have already validated the page
 115     * during translation to get here; now verify the offset.
 116     */
 117    switch (env->eip & ~TARGET_PAGE_MASK) {
 118    case 0x000:
 119        syscall = TARGET_NR_gettimeofday;
 120        break;
 121    case 0x400:
 122        syscall = TARGET_NR_time;
 123        break;
 124    case 0x800:
 125        syscall = TARGET_NR_getcpu;
 126        break;
 127    default:
 128        goto sigsegv;
 129    }
 130
 131    /*
 132     * Validate the return address.
 133     * Note that the kernel treats this the same as an invalid entry point.
 134     */
 135    if (get_user_u64(caller, env->regs[R_ESP])) {
 136        goto sigsegv;
 137    }
 138
 139    /*
 140     * Validate the pointer arguments.
 141     */
 142    switch (syscall) {
 143    case TARGET_NR_gettimeofday:
 144        if (!write_ok_or_segv(env, env->regs[R_EDI],
 145                              sizeof(struct target_timeval)) ||
 146            !write_ok_or_segv(env, env->regs[R_ESI],
 147                              sizeof(struct target_timezone))) {
 148            return;
 149        }
 150        break;
 151    case TARGET_NR_time:
 152        if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
 153            return;
 154        }
 155        break;
 156    case TARGET_NR_getcpu:
 157        if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
 158            !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
 159            return;
 160        }
 161        break;
 162    default:
 163        g_assert_not_reached();
 164    }
 165
 166    /*
 167     * Perform the syscall.  None of the vsyscalls should need restarting.
 168     */
 169    ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
 170                     env->regs[R_EDX], env->regs[10], env->regs[8],
 171                     env->regs[9], 0, 0);
 172    g_assert(ret != -QEMU_ERESTARTSYS);
 173    g_assert(ret != -QEMU_ESIGRETURN);
 174    if (ret == -TARGET_EFAULT) {
 175        goto sigsegv;
 176    }
 177    env->regs[R_EAX] = ret;
 178
 179    /* Emulate a ret instruction to leave the vsyscall page.  */
 180    env->eip = caller;
 181    env->regs[R_ESP] += 8;
 182    return;
 183
 184 sigsegv:
 185    force_sig(TARGET_SIGSEGV);
 186}
 187#endif
 188
 189static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr)
 190{
 191#ifndef TARGET_X86_64
 192    if (env->eflags & VM_MASK) {
 193        handle_vm86_trap(env, trapnr);
 194        return true;
 195    }
 196#endif
 197    return false;
 198}
 199
 200void cpu_loop(CPUX86State *env)
 201{
 202    CPUState *cs = env_cpu(env);
 203    int trapnr;
 204    abi_ulong pc;
 205    abi_ulong ret;
 206
 207    for(;;) {
 208        cpu_exec_start(cs);
 209        trapnr = cpu_exec(cs);
 210        cpu_exec_end(cs);
 211        process_queued_cpu_work(cs);
 212
 213        switch(trapnr) {
 214        case 0x80:
 215            /* linux syscall from int $0x80 */
 216            ret = do_syscall(env,
 217                             env->regs[R_EAX],
 218                             env->regs[R_EBX],
 219                             env->regs[R_ECX],
 220                             env->regs[R_EDX],
 221                             env->regs[R_ESI],
 222                             env->regs[R_EDI],
 223                             env->regs[R_EBP],
 224                             0, 0);
 225            if (ret == -QEMU_ERESTARTSYS) {
 226                env->eip -= 2;
 227            } else if (ret != -QEMU_ESIGRETURN) {
 228                env->regs[R_EAX] = ret;
 229            }
 230            break;
 231#ifndef TARGET_ABI32
 232        case EXCP_SYSCALL:
 233            /* linux syscall from syscall instruction */
 234            ret = do_syscall(env,
 235                             env->regs[R_EAX],
 236                             env->regs[R_EDI],
 237                             env->regs[R_ESI],
 238                             env->regs[R_EDX],
 239                             env->regs[10],
 240                             env->regs[8],
 241                             env->regs[9],
 242                             0, 0);
 243            if (ret == -QEMU_ERESTARTSYS) {
 244                env->eip -= 2;
 245            } else if (ret != -QEMU_ESIGRETURN) {
 246                env->regs[R_EAX] = ret;
 247            }
 248            break;
 249#endif
 250#ifdef TARGET_X86_64
 251        case EXCP_VSYSCALL:
 252            emulate_vsyscall(env);
 253            break;
 254#endif
 255        case EXCP0B_NOSEG:
 256        case EXCP0C_STACK:
 257            force_sig(TARGET_SIGBUS);
 258            break;
 259        case EXCP0D_GPF:
 260            /* XXX: potential problem if ABI32 */
 261            if (maybe_handle_vm86_trap(env, trapnr)) {
 262                break;
 263            }
 264            force_sig(TARGET_SIGSEGV);
 265            break;
 266        case EXCP0E_PAGE:
 267            force_sig_fault(TARGET_SIGSEGV,
 268                            (env->error_code & PG_ERROR_P_MASK ?
 269                             TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
 270                            env->cr[2]);
 271            break;
 272        case EXCP00_DIVZ:
 273            if (maybe_handle_vm86_trap(env, trapnr)) {
 274                break;
 275            }
 276            force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
 277            break;
 278        case EXCP01_DB:
 279            if (maybe_handle_vm86_trap(env, trapnr)) {
 280                break;
 281            }
 282            force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
 283            break;
 284        case EXCP03_INT3:
 285            if (maybe_handle_vm86_trap(env, trapnr)) {
 286                break;
 287            }
 288            force_sig(TARGET_SIGTRAP);
 289            break;
 290        case EXCP04_INTO:
 291        case EXCP05_BOUND:
 292            if (maybe_handle_vm86_trap(env, trapnr)) {
 293                break;
 294            }
 295            force_sig(TARGET_SIGSEGV);
 296            break;
 297        case EXCP06_ILLOP:
 298            force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
 299            break;
 300        case EXCP_INTERRUPT:
 301            /* just indicate that signals should be handled asap */
 302            break;
 303        case EXCP_DEBUG:
 304            force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
 305            break;
 306        case EXCP_ATOMIC:
 307            cpu_exec_step_atomic(cs);
 308            break;
 309        default:
 310            pc = env->segs[R_CS].base + env->eip;
 311            EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
 312                      (long)pc, trapnr);
 313            abort();
 314        }
 315        process_pending_signals(env);
 316    }
 317}
 318
 319void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
 320{
 321    env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
 322    env->hflags |= HF_PE_MASK | HF_CPL_MASK;
 323    if (env->features[FEAT_1_EDX] & CPUID_SSE) {
 324        env->cr[4] |= CR4_OSFXSR_MASK;
 325        env->hflags |= HF_OSFXSR_MASK;
 326    }
 327#ifndef TARGET_ABI32
 328    /* enable 64 bit mode if possible */
 329    if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
 330        fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
 331        exit(EXIT_FAILURE);
 332    }
 333    env->cr[4] |= CR4_PAE_MASK;
 334    env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
 335    env->hflags |= HF_LMA_MASK;
 336#endif
 337
 338    /* flags setup : we activate the IRQs by default as in user mode */
 339    env->eflags |= IF_MASK;
 340
 341    /* linux register setup */
 342#ifndef TARGET_ABI32
 343    env->regs[R_EAX] = regs->rax;
 344    env->regs[R_EBX] = regs->rbx;
 345    env->regs[R_ECX] = regs->rcx;
 346    env->regs[R_EDX] = regs->rdx;
 347    env->regs[R_ESI] = regs->rsi;
 348    env->regs[R_EDI] = regs->rdi;
 349    env->regs[R_EBP] = regs->rbp;
 350    env->regs[R_ESP] = regs->rsp;
 351    env->eip = regs->rip;
 352#else
 353    env->regs[R_EAX] = regs->eax;
 354    env->regs[R_EBX] = regs->ebx;
 355    env->regs[R_ECX] = regs->ecx;
 356    env->regs[R_EDX] = regs->edx;
 357    env->regs[R_ESI] = regs->esi;
 358    env->regs[R_EDI] = regs->edi;
 359    env->regs[R_EBP] = regs->ebp;
 360    env->regs[R_ESP] = regs->esp;
 361    env->eip = regs->eip;
 362#endif
 363
 364    /* linux interrupt setup */
 365#ifndef TARGET_ABI32
 366    env->idt.limit = 511;
 367#else
 368    env->idt.limit = 255;
 369#endif
 370    env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
 371                                PROT_READ|PROT_WRITE,
 372                                MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
 373    idt_table = g2h_untagged(env->idt.base);
 374    set_idt(0, 0);
 375    set_idt(1, 0);
 376    set_idt(2, 0);
 377    set_idt(3, 3);
 378    set_idt(4, 3);
 379    set_idt(5, 0);
 380    set_idt(6, 0);
 381    set_idt(7, 0);
 382    set_idt(8, 0);
 383    set_idt(9, 0);
 384    set_idt(10, 0);
 385    set_idt(11, 0);
 386    set_idt(12, 0);
 387    set_idt(13, 0);
 388    set_idt(14, 0);
 389    set_idt(15, 0);
 390    set_idt(16, 0);
 391    set_idt(17, 0);
 392    set_idt(18, 0);
 393    set_idt(19, 0);
 394    set_idt(0x80, 3);
 395
 396    /* linux segment setup */
 397    {
 398        uint64_t *gdt_table;
 399        env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
 400                                    PROT_READ|PROT_WRITE,
 401                                    MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
 402        env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
 403        gdt_table = g2h_untagged(env->gdt.base);
 404#ifdef TARGET_ABI32
 405        write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
 406                 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
 407                 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
 408#else
 409        /* 64 bit code segment */
 410        write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
 411                 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
 412                 DESC_L_MASK |
 413                 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
 414#endif
 415        write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
 416                 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
 417                 (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
 418    }
 419    cpu_x86_load_seg(env, R_CS, __USER_CS);
 420    cpu_x86_load_seg(env, R_SS, __USER_DS);
 421#ifdef TARGET_ABI32
 422    cpu_x86_load_seg(env, R_DS, __USER_DS);
 423    cpu_x86_load_seg(env, R_ES, __USER_DS);
 424    cpu_x86_load_seg(env, R_FS, __USER_DS);
 425    cpu_x86_load_seg(env, R_GS, __USER_DS);
 426    /* This hack makes Wine work... */
 427    env->segs[R_FS].selector = 0;
 428#else
 429    cpu_x86_load_seg(env, R_DS, 0);
 430    cpu_x86_load_seg(env, R_ES, 0);
 431    cpu_x86_load_seg(env, R_FS, 0);
 432    cpu_x86_load_seg(env, R_GS, 0);
 433#endif
 434}
 435