qemu/linux-user/i386/cpu_loop.c
<<
>>
Prefs
   1/*
   2 *  qemu user cpu loop
   3 *
   4 *  Copyright (c) 2003-2008 Fabrice Bellard
   5 *
   6 *  This program is free software; you can redistribute it and/or modify
   7 *  it under the terms of the GNU General Public License as published by
   8 *  the Free Software Foundation; either version 2 of the License, or
   9 *  (at your option) any later version.
  10 *
  11 *  This program is distributed in the hope that it will be useful,
  12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 *  GNU General Public License for more details.
  15 *
  16 *  You should have received a copy of the GNU General Public License
  17 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu-common.h"
  22#include "qemu.h"
  23#include "user-internals.h"
  24#include "cpu_loop-common.h"
  25#include "signal-common.h"
  26#include "user-mmap.h"
  27
  28/***********************************************************/
  29/* CPUX86 core interface */
  30
  31uint64_t cpu_get_tsc(CPUX86State *env)
  32{
  33    return cpu_get_host_ticks();
  34}
  35
  36static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
  37              int flags)
  38{
  39    unsigned int e1, e2;
  40    uint32_t *p;
  41    e1 = (addr << 16) | (limit & 0xffff);
  42    e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
  43    e2 |= flags;
  44    p = ptr;
  45    p[0] = tswap32(e1);
  46    p[1] = tswap32(e2);
  47}
  48
  49static uint64_t *idt_table;
  50#ifdef TARGET_X86_64
  51static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
  52                       uint64_t addr, unsigned int sel)
  53{
  54    uint32_t *p, e1, e2;
  55    e1 = (addr & 0xffff) | (sel << 16);
  56    e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
  57    p = ptr;
  58    p[0] = tswap32(e1);
  59    p[1] = tswap32(e2);
  60    p[2] = tswap32(addr >> 32);
  61    p[3] = 0;
  62}
  63/* only dpl matters as we do only user space emulation */
  64static void set_idt(int n, unsigned int dpl)
  65{
  66    set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
  67}
  68#else
  69static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
  70                     uint32_t addr, unsigned int sel)
  71{
  72    uint32_t *p, e1, e2;
  73    e1 = (addr & 0xffff) | (sel << 16);
  74    e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
  75    p = ptr;
  76    p[0] = tswap32(e1);
  77    p[1] = tswap32(e2);
  78}
  79
  80/* only dpl matters as we do only user space emulation */
  81static void set_idt(int n, unsigned int dpl)
  82{
  83    set_gate(idt_table + n, 0, dpl, 0, 0);
  84}
  85#endif
  86
  87static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr)
  88{
  89    target_siginfo_t info = {
  90        .si_signo = sig,
  91        .si_code = code,
  92        ._sifields._sigfault._addr = addr
  93    };
  94
  95    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
  96}
  97
  98#ifdef TARGET_X86_64
  99static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
 100{
 101    /*
 102     * For all the vsyscalls, NULL means "don't write anything" not
 103     * "write it at address 0".
 104     */
 105    if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) {
 106        return true;
 107    }
 108
 109    env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
 110    gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
 111    return false;
 112}
 113
 114/*
 115 * Since v3.1, the kernel traps and emulates the vsyscall page.
 116 * Entry points other than the official generate SIGSEGV.
 117 */
 118static void emulate_vsyscall(CPUX86State *env)
 119{
 120    int syscall;
 121    abi_ulong ret;
 122    uint64_t caller;
 123
 124    /*
 125     * Validate the entry point.  We have already validated the page
 126     * during translation to get here; now verify the offset.
 127     */
 128    switch (env->eip & ~TARGET_PAGE_MASK) {
 129    case 0x000:
 130        syscall = TARGET_NR_gettimeofday;
 131        break;
 132    case 0x400:
 133        syscall = TARGET_NR_time;
 134        break;
 135    case 0x800:
 136        syscall = TARGET_NR_getcpu;
 137        break;
 138    default:
 139        goto sigsegv;
 140    }
 141
 142    /*
 143     * Validate the return address.
 144     * Note that the kernel treats this the same as an invalid entry point.
 145     */
 146    if (get_user_u64(caller, env->regs[R_ESP])) {
 147        goto sigsegv;
 148    }
 149
 150    /*
 151     * Validate the the pointer arguments.
 152     */
 153    switch (syscall) {
 154    case TARGET_NR_gettimeofday:
 155        if (!write_ok_or_segv(env, env->regs[R_EDI],
 156                              sizeof(struct target_timeval)) ||
 157            !write_ok_or_segv(env, env->regs[R_ESI],
 158                              sizeof(struct target_timezone))) {
 159            return;
 160        }
 161        break;
 162    case TARGET_NR_time:
 163        if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
 164            return;
 165        }
 166        break;
 167    case TARGET_NR_getcpu:
 168        if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
 169            !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
 170            return;
 171        }
 172        break;
 173    default:
 174        g_assert_not_reached();
 175    }
 176
 177    /*
 178     * Perform the syscall.  None of the vsyscalls should need restarting.
 179     */
 180    ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
 181                     env->regs[R_EDX], env->regs[10], env->regs[8],
 182                     env->regs[9], 0, 0);
 183    g_assert(ret != -TARGET_ERESTARTSYS);
 184    g_assert(ret != -TARGET_QEMU_ESIGRETURN);
 185    if (ret == -TARGET_EFAULT) {
 186        goto sigsegv;
 187    }
 188    env->regs[R_EAX] = ret;
 189
 190    /* Emulate a ret instruction to leave the vsyscall page.  */
 191    env->eip = caller;
 192    env->regs[R_ESP] += 8;
 193    return;
 194
 195 sigsegv:
 196    /* Like force_sig(SIGSEGV).  */
 197    gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
 198}
 199#endif
 200
 201void cpu_loop(CPUX86State *env)
 202{
 203    CPUState *cs = env_cpu(env);
 204    int trapnr;
 205    abi_ulong pc;
 206    abi_ulong ret;
 207
 208    for(;;) {
 209        cpu_exec_start(cs);
 210        trapnr = cpu_exec(cs);
 211        cpu_exec_end(cs);
 212        process_queued_cpu_work(cs);
 213
 214        switch(trapnr) {
 215        case 0x80:
 216            /* linux syscall from int $0x80 */
 217            ret = do_syscall(env,
 218                             env->regs[R_EAX],
 219                             env->regs[R_EBX],
 220                             env->regs[R_ECX],
 221                             env->regs[R_EDX],
 222                             env->regs[R_ESI],
 223                             env->regs[R_EDI],
 224                             env->regs[R_EBP],
 225                             0, 0);
 226            if (ret == -TARGET_ERESTARTSYS) {
 227                env->eip -= 2;
 228            } else if (ret != -TARGET_QEMU_ESIGRETURN) {
 229                env->regs[R_EAX] = ret;
 230            }
 231            break;
 232#ifndef TARGET_ABI32
 233        case EXCP_SYSCALL:
 234            /* linux syscall from syscall instruction */
 235            ret = do_syscall(env,
 236                             env->regs[R_EAX],
 237                             env->regs[R_EDI],
 238                             env->regs[R_ESI],
 239                             env->regs[R_EDX],
 240                             env->regs[10],
 241                             env->regs[8],
 242                             env->regs[9],
 243                             0, 0);
 244            if (ret == -TARGET_ERESTARTSYS) {
 245                env->eip -= 2;
 246            } else if (ret != -TARGET_QEMU_ESIGRETURN) {
 247                env->regs[R_EAX] = ret;
 248            }
 249            break;
 250#endif
 251#ifdef TARGET_X86_64
 252        case EXCP_VSYSCALL:
 253            emulate_vsyscall(env);
 254            break;
 255#endif
 256        case EXCP0B_NOSEG:
 257        case EXCP0C_STACK:
 258            gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0);
 259            break;
 260        case EXCP0D_GPF:
 261            /* XXX: potential problem if ABI32 */
 262#ifndef TARGET_X86_64
 263            if (env->eflags & VM_MASK) {
 264                handle_vm86_fault(env);
 265                break;
 266            }
 267#endif
 268            gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
 269            break;
 270        case EXCP0E_PAGE:
 271            gen_signal(env, TARGET_SIGSEGV,
 272                       (env->error_code & 1 ?
 273                        TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
 274                       env->cr[2]);
 275            break;
 276        case EXCP00_DIVZ:
 277#ifndef TARGET_X86_64
 278            if (env->eflags & VM_MASK) {
 279                handle_vm86_trap(env, trapnr);
 280                break;
 281            }
 282#endif
 283            gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
 284            break;
 285        case EXCP01_DB:
 286        case EXCP03_INT3:
 287#ifndef TARGET_X86_64
 288            if (env->eflags & VM_MASK) {
 289                handle_vm86_trap(env, trapnr);
 290                break;
 291            }
 292#endif
 293            if (trapnr == EXCP01_DB) {
 294                gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
 295            } else {
 296                gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0);
 297            }
 298            break;
 299        case EXCP04_INTO:
 300        case EXCP05_BOUND:
 301#ifndef TARGET_X86_64
 302            if (env->eflags & VM_MASK) {
 303                handle_vm86_trap(env, trapnr);
 304                break;
 305            }
 306#endif
 307            gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
 308            break;
 309        case EXCP06_ILLOP:
 310            gen_signal(env, TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
 311            break;
 312        case EXCP_INTERRUPT:
 313            /* just indicate that signals should be handled asap */
 314            break;
 315        case EXCP_DEBUG:
 316            gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0);
 317            break;
 318        case EXCP_ATOMIC:
 319            cpu_exec_step_atomic(cs);
 320            break;
 321        default:
 322            pc = env->segs[R_CS].base + env->eip;
 323            EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
 324                      (long)pc, trapnr);
 325            abort();
 326        }
 327        process_pending_signals(env);
 328    }
 329}
 330
 331void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
 332{
 333    env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
 334    env->hflags |= HF_PE_MASK | HF_CPL_MASK;
 335    if (env->features[FEAT_1_EDX] & CPUID_SSE) {
 336        env->cr[4] |= CR4_OSFXSR_MASK;
 337        env->hflags |= HF_OSFXSR_MASK;
 338    }
 339#ifndef TARGET_ABI32
 340    /* enable 64 bit mode if possible */
 341    if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
 342        fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
 343        exit(EXIT_FAILURE);
 344    }
 345    env->cr[4] |= CR4_PAE_MASK;
 346    env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
 347    env->hflags |= HF_LMA_MASK;
 348#endif
 349
 350    /* flags setup : we activate the IRQs by default as in user mode */
 351    env->eflags |= IF_MASK;
 352
 353    /* linux register setup */
 354#ifndef TARGET_ABI32
 355    env->regs[R_EAX] = regs->rax;
 356    env->regs[R_EBX] = regs->rbx;
 357    env->regs[R_ECX] = regs->rcx;
 358    env->regs[R_EDX] = regs->rdx;
 359    env->regs[R_ESI] = regs->rsi;
 360    env->regs[R_EDI] = regs->rdi;
 361    env->regs[R_EBP] = regs->rbp;
 362    env->regs[R_ESP] = regs->rsp;
 363    env->eip = regs->rip;
 364#else
 365    env->regs[R_EAX] = regs->eax;
 366    env->regs[R_EBX] = regs->ebx;
 367    env->regs[R_ECX] = regs->ecx;
 368    env->regs[R_EDX] = regs->edx;
 369    env->regs[R_ESI] = regs->esi;
 370    env->regs[R_EDI] = regs->edi;
 371    env->regs[R_EBP] = regs->ebp;
 372    env->regs[R_ESP] = regs->esp;
 373    env->eip = regs->eip;
 374#endif
 375
 376    /* linux interrupt setup */
 377#ifndef TARGET_ABI32
 378    env->idt.limit = 511;
 379#else
 380    env->idt.limit = 255;
 381#endif
 382    env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
 383                                PROT_READ|PROT_WRITE,
 384                                MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
 385    idt_table = g2h_untagged(env->idt.base);
 386    set_idt(0, 0);
 387    set_idt(1, 0);
 388    set_idt(2, 0);
 389    set_idt(3, 3);
 390    set_idt(4, 3);
 391    set_idt(5, 0);
 392    set_idt(6, 0);
 393    set_idt(7, 0);
 394    set_idt(8, 0);
 395    set_idt(9, 0);
 396    set_idt(10, 0);
 397    set_idt(11, 0);
 398    set_idt(12, 0);
 399    set_idt(13, 0);
 400    set_idt(14, 0);
 401    set_idt(15, 0);
 402    set_idt(16, 0);
 403    set_idt(17, 0);
 404    set_idt(18, 0);
 405    set_idt(19, 0);
 406    set_idt(0x80, 3);
 407
 408    /* linux segment setup */
 409    {
 410        uint64_t *gdt_table;
 411        env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
 412                                    PROT_READ|PROT_WRITE,
 413                                    MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
 414        env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
 415        gdt_table = g2h_untagged(env->gdt.base);
 416#ifdef TARGET_ABI32
 417        write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
 418                 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
 419                 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
 420#else
 421        /* 64 bit code segment */
 422        write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
 423                 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
 424                 DESC_L_MASK |
 425                 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
 426#endif
 427        write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
 428                 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
 429                 (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
 430    }
 431    cpu_x86_load_seg(env, R_CS, __USER_CS);
 432    cpu_x86_load_seg(env, R_SS, __USER_DS);
 433#ifdef TARGET_ABI32
 434    cpu_x86_load_seg(env, R_DS, __USER_DS);
 435    cpu_x86_load_seg(env, R_ES, __USER_DS);
 436    cpu_x86_load_seg(env, R_FS, __USER_DS);
 437    cpu_x86_load_seg(env, R_GS, __USER_DS);
 438    /* This hack makes Wine work... */
 439    env->segs[R_FS].selector = 0;
 440#else
 441    cpu_x86_load_seg(env, R_DS, 0);
 442    cpu_x86_load_seg(env, R_ES, 0);
 443    cpu_x86_load_seg(env, R_FS, 0);
 444    cpu_x86_load_seg(env, R_GS, 0);
 445#endif
 446}
 447