qemu/linux-user/i386/cpu_loop.c
<<
>>
Prefs
   1/*
   2 *  qemu user cpu loop
   3 *
   4 *  Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 *  This program is free software; you can redistribute it and/or modify
   7 *  it under the terms of the GNU General Public License as published by
   8 *  the Free Software Foundation; either version 2 of the License, or
   9 *  (at your option) any later version.
  10 *
  11 *  This program is distributed in the hope that it will be useful,
  12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 *  GNU General Public License for more details.
  15 *
  16 *  You should have received a copy of the GNU General Public License
  17 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu-common.h"
  22#include "qemu.h"
  23#include "cpu_loop-common.h"
  24
  25/***********************************************************/
  26/* CPUX86 core interface */
  27
  28uint64_t cpu_get_tsc(CPUX86State *env)
  29{
  30    return cpu_get_host_ticks();
  31}
  32
  33static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
  34              int flags)
  35{
  36    unsigned int e1, e2;
  37    uint32_t *p;
  38    e1 = (addr << 16) | (limit & 0xffff);
  39    e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
  40    e2 |= flags;
  41    p = ptr;
  42    p[0] = tswap32(e1);
  43    p[1] = tswap32(e2);
  44}
  45
  46static uint64_t *idt_table;
  47#ifdef TARGET_X86_64
  48static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
  49                       uint64_t addr, unsigned int sel)
  50{
  51    uint32_t *p, e1, e2;
  52    e1 = (addr & 0xffff) | (sel << 16);
  53    e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
  54    p = ptr;
  55    p[0] = tswap32(e1);
  56    p[1] = tswap32(e2);
  57    p[2] = tswap32(addr >> 32);
  58    p[3] = 0;
  59}
  60/* only dpl matters as we do only user space emulation */
  61static void set_idt(int n, unsigned int dpl)
  62{
  63    set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
  64}
  65#else
  66static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
  67                     uint32_t addr, unsigned int sel)
  68{
  69    uint32_t *p, e1, e2;
  70    e1 = (addr & 0xffff) | (sel << 16);
  71    e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
  72    p = ptr;
  73    p[0] = tswap32(e1);
  74    p[1] = tswap32(e2);
  75}
  76
  77/* only dpl matters as we do only user space emulation */
  78static void set_idt(int n, unsigned int dpl)
  79{
  80    set_gate(idt_table + n, 0, dpl, 0, 0);
  81}
  82#endif
  83
  84static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr)
  85{
  86    target_siginfo_t info = {
  87        .si_signo = sig,
  88        .si_code = code,
  89        ._sifields._sigfault._addr = addr
  90    };
  91
  92    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
  93}
  94
  95#ifdef TARGET_X86_64
  96static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
  97{
  98    /*
  99     * For all the vsyscalls, NULL means "don't write anything" not
 100     * "write it at address 0".
 101     */
 102    if (addr == 0 || access_ok(VERIFY_WRITE, addr, len)) {
 103        return true;
 104    }
 105
 106    env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
 107    gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
 108    return false;
 109}
 110
 111/*
 112 * Since v3.1, the kernel traps and emulates the vsyscall page.
 113 * Entry points other than the official generate SIGSEGV.
 114 */
 115static void emulate_vsyscall(CPUX86State *env)
 116{
 117    int syscall;
 118    abi_ulong ret;
 119    uint64_t caller;
 120
 121    /*
 122     * Validate the entry point.  We have already validated the page
 123     * during translation to get here; now verify the offset.
 124     */
 125    switch (env->eip & ~TARGET_PAGE_MASK) {
 126    case 0x000:
 127        syscall = TARGET_NR_gettimeofday;
 128        break;
 129    case 0x400:
 130        syscall = TARGET_NR_time;
 131        break;
 132    case 0x800:
 133        syscall = TARGET_NR_getcpu;
 134        break;
 135    default:
 136        goto sigsegv;
 137    }
 138
 139    /*
 140     * Validate the return address.
 141     * Note that the kernel treats this the same as an invalid entry point.
 142     */
 143    if (get_user_u64(caller, env->regs[R_ESP])) {
 144        goto sigsegv;
 145    }
 146
 147    /*
 148     * Validate the the pointer arguments.
 149     */
 150    switch (syscall) {
 151    case TARGET_NR_gettimeofday:
 152        if (!write_ok_or_segv(env, env->regs[R_EDI],
 153                              sizeof(struct target_timeval)) ||
 154            !write_ok_or_segv(env, env->regs[R_ESI],
 155                              sizeof(struct target_timezone))) {
 156            return;
 157        }
 158        break;
 159    case TARGET_NR_time:
 160        if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
 161            return;
 162        }
 163        break;
 164    case TARGET_NR_getcpu:
 165        if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
 166            !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
 167            return;
 168        }
 169        break;
 170    default:
 171        g_assert_not_reached();
 172    }
 173
 174    /*
 175     * Perform the syscall.  None of the vsyscalls should need restarting.
 176     */
 177    ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
 178                     env->regs[R_EDX], env->regs[10], env->regs[8],
 179                     env->regs[9], 0, 0);
 180    g_assert(ret != -TARGET_ERESTARTSYS);
 181    g_assert(ret != -TARGET_QEMU_ESIGRETURN);
 182    if (ret == -TARGET_EFAULT) {
 183        goto sigsegv;
 184    }
 185    env->regs[R_EAX] = ret;
 186
 187    /* Emulate a ret instruction to leave the vsyscall page.  */
 188    env->eip = caller;
 189    env->regs[R_ESP] += 8;
 190    return;
 191
 192 sigsegv:
 193    /* Like force_sig(SIGSEGV).  */
 194    gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
 195}
 196#endif
 197
 198void cpu_loop(CPUX86State *env)
 199{
 200    CPUState *cs = env_cpu(env);
 201    int trapnr;
 202    abi_ulong pc;
 203    abi_ulong ret;
 204
 205    for(;;) {
 206        cpu_exec_start(cs);
 207        trapnr = cpu_exec(cs);
 208        cpu_exec_end(cs);
 209        process_queued_cpu_work(cs);
 210
 211        switch(trapnr) {
 212        case 0x80:
 213            /* linux syscall from int $0x80 */
 214            ret = do_syscall(env,
 215                             env->regs[R_EAX],
 216                             env->regs[R_EBX],
 217                             env->regs[R_ECX],
 218                             env->regs[R_EDX],
 219                             env->regs[R_ESI],
 220                             env->regs[R_EDI],
 221                             env->regs[R_EBP],
 222                             0, 0);
 223            if (ret == -TARGET_ERESTARTSYS) {
 224                env->eip -= 2;
 225            } else if (ret != -TARGET_QEMU_ESIGRETURN) {
 226                env->regs[R_EAX] = ret;
 227            }
 228            break;
 229#ifndef TARGET_ABI32
 230        case EXCP_SYSCALL:
 231            /* linux syscall from syscall instruction */
 232            ret = do_syscall(env,
 233                             env->regs[R_EAX],
 234                             env->regs[R_EDI],
 235                             env->regs[R_ESI],
 236                             env->regs[R_EDX],
 237                             env->regs[10],
 238                             env->regs[8],
 239                             env->regs[9],
 240                             0, 0);
 241            if (ret == -TARGET_ERESTARTSYS) {
 242                env->eip -= 2;
 243            } else if (ret != -TARGET_QEMU_ESIGRETURN) {
 244                env->regs[R_EAX] = ret;
 245            }
 246            break;
 247#endif
 248#ifdef TARGET_X86_64
 249        case EXCP_VSYSCALL:
 250            emulate_vsyscall(env);
 251            break;
 252#endif
 253        case EXCP0B_NOSEG:
 254        case EXCP0C_STACK:
 255            gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0);
 256            break;
 257        case EXCP0D_GPF:
 258            /* XXX: potential problem if ABI32 */
 259#ifndef TARGET_X86_64
 260            if (env->eflags & VM_MASK) {
 261                handle_vm86_fault(env);
 262                break;
 263            }
 264#endif
 265            gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
 266            break;
 267        case EXCP0E_PAGE:
 268            gen_signal(env, TARGET_SIGSEGV,
 269                       (env->error_code & 1 ?
 270                        TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
 271                       env->cr[2]);
 272            break;
 273        case EXCP00_DIVZ:
 274#ifndef TARGET_X86_64
 275            if (env->eflags & VM_MASK) {
 276                handle_vm86_trap(env, trapnr);
 277                break;
 278            }
 279#endif
 280            gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
 281            break;
 282        case EXCP01_DB:
 283        case EXCP03_INT3:
 284#ifndef TARGET_X86_64
 285            if (env->eflags & VM_MASK) {
 286                handle_vm86_trap(env, trapnr);
 287                break;
 288            }
 289#endif
 290            if (trapnr == EXCP01_DB) {
 291                gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
 292            } else {
 293                gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0);
 294            }
 295            break;
 296        case EXCP04_INTO:
 297        case EXCP05_BOUND:
 298#ifndef TARGET_X86_64
 299            if (env->eflags & VM_MASK) {
 300                handle_vm86_trap(env, trapnr);
 301                break;
 302            }
 303#endif
 304            gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
 305            break;
 306        case EXCP06_ILLOP:
 307            gen_signal(env, TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
 308            break;
 309        case EXCP_INTERRUPT:
 310            /* just indicate that signals should be handled asap */
 311            break;
 312        case EXCP_DEBUG:
 313            gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0);
 314            break;
 315        case EXCP_ATOMIC:
 316            cpu_exec_step_atomic(cs);
 317            break;
 318        default:
 319            pc = env->segs[R_CS].base + env->eip;
 320            EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
 321                      (long)pc, trapnr);
 322            abort();
 323        }
 324        process_pending_signals(env);
 325    }
 326}
 327
 328void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
 329{
 330    env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
 331    env->hflags |= HF_PE_MASK | HF_CPL_MASK;
 332    if (env->features[FEAT_1_EDX] & CPUID_SSE) {
 333        env->cr[4] |= CR4_OSFXSR_MASK;
 334        env->hflags |= HF_OSFXSR_MASK;
 335    }
 336#ifndef TARGET_ABI32
 337    /* enable 64 bit mode if possible */
 338    if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
 339        fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
 340        exit(EXIT_FAILURE);
 341    }
 342    env->cr[4] |= CR4_PAE_MASK;
 343    env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
 344    env->hflags |= HF_LMA_MASK;
 345#endif
 346
 347    /* flags setup : we activate the IRQs by default as in user mode */
 348    env->eflags |= IF_MASK;
 349
 350    /* linux register setup */
 351#ifndef TARGET_ABI32
 352    env->regs[R_EAX] = regs->rax;
 353    env->regs[R_EBX] = regs->rbx;
 354    env->regs[R_ECX] = regs->rcx;
 355    env->regs[R_EDX] = regs->rdx;
 356    env->regs[R_ESI] = regs->rsi;
 357    env->regs[R_EDI] = regs->rdi;
 358    env->regs[R_EBP] = regs->rbp;
 359    env->regs[R_ESP] = regs->rsp;
 360    env->eip = regs->rip;
 361#else
 362    env->regs[R_EAX] = regs->eax;
 363    env->regs[R_EBX] = regs->ebx;
 364    env->regs[R_ECX] = regs->ecx;
 365    env->regs[R_EDX] = regs->edx;
 366    env->regs[R_ESI] = regs->esi;
 367    env->regs[R_EDI] = regs->edi;
 368    env->regs[R_EBP] = regs->ebp;
 369    env->regs[R_ESP] = regs->esp;
 370    env->eip = regs->eip;
 371#endif
 372
 373    /* linux interrupt setup */
 374#ifndef TARGET_ABI32
 375    env->idt.limit = 511;
 376#else
 377    env->idt.limit = 255;
 378#endif
 379    env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
 380                                PROT_READ|PROT_WRITE,
 381                                MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
 382    idt_table = g2h(env->idt.base);
 383    set_idt(0, 0);
 384    set_idt(1, 0);
 385    set_idt(2, 0);
 386    set_idt(3, 3);
 387    set_idt(4, 3);
 388    set_idt(5, 0);
 389    set_idt(6, 0);
 390    set_idt(7, 0);
 391    set_idt(8, 0);
 392    set_idt(9, 0);
 393    set_idt(10, 0);
 394    set_idt(11, 0);
 395    set_idt(12, 0);
 396    set_idt(13, 0);
 397    set_idt(14, 0);
 398    set_idt(15, 0);
 399    set_idt(16, 0);
 400    set_idt(17, 0);
 401    set_idt(18, 0);
 402    set_idt(19, 0);
 403    set_idt(0x80, 3);
 404
 405    /* linux segment setup */
 406    {
 407        uint64_t *gdt_table;
 408        env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
 409                                    PROT_READ|PROT_WRITE,
 410                                    MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
 411        env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
 412        gdt_table = g2h(env->gdt.base);
 413#ifdef TARGET_ABI32
 414        write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
 415                 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
 416                 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
 417#else
 418        /* 64 bit code segment */
 419        write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
 420                 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
 421                 DESC_L_MASK |
 422                 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
 423#endif
 424        write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
 425                 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
 426                 (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
 427    }
 428    cpu_x86_load_seg(env, R_CS, __USER_CS);
 429    cpu_x86_load_seg(env, R_SS, __USER_DS);
 430#ifdef TARGET_ABI32
 431    cpu_x86_load_seg(env, R_DS, __USER_DS);
 432    cpu_x86_load_seg(env, R_ES, __USER_DS);
 433    cpu_x86_load_seg(env, R_FS, __USER_DS);
 434    cpu_x86_load_seg(env, R_GS, __USER_DS);
 435    /* This hack makes Wine work... */
 436    env->segs[R_FS].selector = 0;
 437#else
 438    cpu_x86_load_seg(env, R_DS, 0);
 439    cpu_x86_load_seg(env, R_ES, 0);
 440    cpu_x86_load_seg(env, R_FS, 0);
 441    cpu_x86_load_seg(env, R_GS, 0);
 442#endif
 443}
 444