qemu/target-ppc/kvm.c
<<
>>
Prefs
   1/*
   2 * PowerPC implementation of KVM hooks
   3 *
   4 * Copyright IBM Corp. 2007
   5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6 *
   7 * Authors:
   8 *  Jerone Young <jyoung5@us.ibm.com>
   9 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10 *  Hollis Blanchard <hollisb@us.ibm.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13 * See the COPYING file in the top-level directory.
  14 *
  15 */
  16
  17#include "qemu/osdep.h"
  18#include <dirent.h>
  19#include <sys/ioctl.h>
  20#include <sys/mman.h>
  21#include <sys/vfs.h>
  22
  23#include <linux/kvm.h>
  24
  25#include "qemu-common.h"
  26#include "qemu/error-report.h"
  27#include "qemu/timer.h"
  28#include "sysemu/sysemu.h"
  29#include "sysemu/kvm.h"
  30#include "kvm_ppc.h"
  31#include "cpu.h"
  32#include "sysemu/cpus.h"
  33#include "sysemu/device_tree.h"
  34#include "mmu-hash64.h"
  35
  36#include "hw/sysbus.h"
  37#include "hw/ppc/spapr.h"
  38#include "hw/ppc/spapr_vio.h"
  39#include "hw/ppc/ppc.h"
  40#include "sysemu/watchdog.h"
  41#include "trace.h"
  42#include "exec/gdbstub.h"
  43#include "exec/memattrs.h"
  44#include "sysemu/hostmem.h"
  45#include "qemu/cutils.h"
  46
  47//#define DEBUG_KVM
  48
  49#ifdef DEBUG_KVM
  50#define DPRINTF(fmt, ...) \
  51    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  52#else
  53#define DPRINTF(fmt, ...) \
  54    do { } while (0)
  55#endif
  56
  57#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  58
  59const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  60    KVM_CAP_LAST_INFO
  61};
  62
  63static int cap_interrupt_unset = false;
  64static int cap_interrupt_level = false;
  65static int cap_segstate;
  66static int cap_booke_sregs;
  67static int cap_ppc_smt;
  68static int cap_ppc_rma;
  69static int cap_spapr_tce;
  70static int cap_spapr_multitce;
  71static int cap_spapr_vfio;
  72static int cap_hior;
  73static int cap_one_reg;
  74static int cap_epr;
  75static int cap_ppc_watchdog;
  76static int cap_papr;
  77static int cap_htab_fd;
  78static int cap_fixup_hcalls;
  79
  80static uint32_t debug_inst_opcode;
  81
  82/* XXX We have a race condition where we actually have a level triggered
  83 *     interrupt, but the infrastructure can't expose that yet, so the guest
  84 *     takes but ignores it, goes to sleep and never gets notified that there's
  85 *     still an interrupt pending.
  86 *
  87 *     As a quick workaround, let's just wake up again 20 ms after we injected
  88 *     an interrupt. That way we can assure that we're always reinjecting
  89 *     interrupts in case the guest swallowed them.
  90 */
  91static QEMUTimer *idle_timer;
  92
  93static void kvm_kick_cpu(void *opaque)
  94{
  95    PowerPCCPU *cpu = opaque;
  96
  97    qemu_cpu_kick(CPU(cpu));
  98}
  99
 100static int kvm_ppc_register_host_cpu_type(void);
 101
 102int kvm_arch_init(MachineState *ms, KVMState *s)
 103{
 104    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 105    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 106    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 107    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 108    cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 109    cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 110    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 111    cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 112    cap_spapr_vfio = false;
 113    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 114    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 115    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 116    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 117    /* Note: we don't set cap_papr here, because this capability is
 118     * only activated after this by kvmppc_set_papr() */
 119    cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 120    cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 121
 122    if (!cap_interrupt_level) {
 123        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 124                        "VM to stall at times!\n");
 125    }
 126
 127    kvm_ppc_register_host_cpu_type();
 128
 129    return 0;
 130}
 131
 132static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 133{
 134    CPUPPCState *cenv = &cpu->env;
 135    CPUState *cs = CPU(cpu);
 136    struct kvm_sregs sregs;
 137    int ret;
 138
 139    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 140        /* What we're really trying to say is "if we're on BookE, we use
 141           the native PVR for now". This is the only sane way to check
 142           it though, so we potentially confuse users that they can run
 143           BookE guests on BookS. Let's hope nobody dares enough :) */
 144        return 0;
 145    } else {
 146        if (!cap_segstate) {
 147            fprintf(stderr, "kvm error: missing PVR setting capability\n");
 148            return -ENOSYS;
 149        }
 150    }
 151
 152    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 153    if (ret) {
 154        return ret;
 155    }
 156
 157    sregs.pvr = cenv->spr[SPR_PVR];
 158    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 159}
 160
 161/* Set up a shared TLB array with KVM */
 162static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 163{
 164    CPUPPCState *env = &cpu->env;
 165    CPUState *cs = CPU(cpu);
 166    struct kvm_book3e_206_tlb_params params = {};
 167    struct kvm_config_tlb cfg = {};
 168    unsigned int entries = 0;
 169    int ret, i;
 170
 171    if (!kvm_enabled() ||
 172        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 173        return 0;
 174    }
 175
 176    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 177
 178    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 179        params.tlb_sizes[i] = booke206_tlb_size(env, i);
 180        params.tlb_ways[i] = booke206_tlb_ways(env, i);
 181        entries += params.tlb_sizes[i];
 182    }
 183
 184    assert(entries == env->nb_tlb);
 185    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 186
 187    env->tlb_dirty = true;
 188
 189    cfg.array = (uintptr_t)env->tlb.tlbm;
 190    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 191    cfg.params = (uintptr_t)&params;
 192    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 193
 194    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 195    if (ret < 0) {
 196        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 197                __func__, strerror(-ret));
 198        return ret;
 199    }
 200
 201    env->kvm_sw_tlb = true;
 202    return 0;
 203}
 204
 205
 206#if defined(TARGET_PPC64)
 207static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 208                                       struct kvm_ppc_smmu_info *info)
 209{
 210    CPUPPCState *env = &cpu->env;
 211    CPUState *cs = CPU(cpu);
 212
 213    memset(info, 0, sizeof(*info));
 214
 215    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 216     * need to "guess" what the supported page sizes are.
 217     *
 218     * For that to work we make a few assumptions:
 219     *
 220     * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 221     *   KVM which only supports 4K and 16M pages, but supports them
 222     *   regardless of the backing store characteritics. We also don't
 223     *   support 1T segments.
 224     *
 225     *   This is safe as if HV KVM ever supports that capability or PR
 226     *   KVM grows supports for more page/segment sizes, those versions
 227     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 228     *   will not hit this fallback
 229     *
 230     * - Else we are running HV KVM. This means we only support page
 231     *   sizes that fit in the backing store. Additionally we only
 232     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 233     *   P7 encodings for the SLB and hash table. Here too, we assume
 234     *   support for any newer processor will mean a kernel that
 235     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 236     *   this fallback.
 237     */
 238    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 239        /* No flags */
 240        info->flags = 0;
 241        info->slb_size = 64;
 242
 243        /* Standard 4k base page size segment */
 244        info->sps[0].page_shift = 12;
 245        info->sps[0].slb_enc = 0;
 246        info->sps[0].enc[0].page_shift = 12;
 247        info->sps[0].enc[0].pte_enc = 0;
 248
 249        /* Standard 16M large page size segment */
 250        info->sps[1].page_shift = 24;
 251        info->sps[1].slb_enc = SLB_VSID_L;
 252        info->sps[1].enc[0].page_shift = 24;
 253        info->sps[1].enc[0].pte_enc = 0;
 254    } else {
 255        int i = 0;
 256
 257        /* HV KVM has backing store size restrictions */
 258        info->flags = KVM_PPC_PAGE_SIZES_REAL;
 259
 260        if (env->mmu_model & POWERPC_MMU_1TSEG) {
 261            info->flags |= KVM_PPC_1T_SEGMENTS;
 262        }
 263
 264        if (env->mmu_model == POWERPC_MMU_2_06 ||
 265            env->mmu_model == POWERPC_MMU_2_07) {
 266            info->slb_size = 32;
 267        } else {
 268            info->slb_size = 64;
 269        }
 270
 271        /* Standard 4k base page size segment */
 272        info->sps[i].page_shift = 12;
 273        info->sps[i].slb_enc = 0;
 274        info->sps[i].enc[0].page_shift = 12;
 275        info->sps[i].enc[0].pte_enc = 0;
 276        i++;
 277
 278        /* 64K on MMU 2.06 and later */
 279        if (env->mmu_model == POWERPC_MMU_2_06 ||
 280            env->mmu_model == POWERPC_MMU_2_07) {
 281            info->sps[i].page_shift = 16;
 282            info->sps[i].slb_enc = 0x110;
 283            info->sps[i].enc[0].page_shift = 16;
 284            info->sps[i].enc[0].pte_enc = 1;
 285            i++;
 286        }
 287
 288        /* Standard 16M large page size segment */
 289        info->sps[i].page_shift = 24;
 290        info->sps[i].slb_enc = SLB_VSID_L;
 291        info->sps[i].enc[0].page_shift = 24;
 292        info->sps[i].enc[0].pte_enc = 0;
 293    }
 294}
 295
 296static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 297{
 298    CPUState *cs = CPU(cpu);
 299    int ret;
 300
 301    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 302        ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 303        if (ret == 0) {
 304            return;
 305        }
 306    }
 307
 308    kvm_get_fallback_smmu_info(cpu, info);
 309}
 310
 311static long gethugepagesize(const char *mem_path)
 312{
 313    struct statfs fs;
 314    int ret;
 315
 316    do {
 317        ret = statfs(mem_path, &fs);
 318    } while (ret != 0 && errno == EINTR);
 319
 320    if (ret != 0) {
 321        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 322                strerror(errno));
 323        exit(1);
 324    }
 325
 326#define HUGETLBFS_MAGIC       0x958458f6
 327
 328    if (fs.f_type != HUGETLBFS_MAGIC) {
 329        /* Explicit mempath, but it's ordinary pages */
 330        return getpagesize();
 331    }
 332
 333    /* It's hugepage, return the huge page size */
 334    return fs.f_bsize;
 335}
 336
 337/*
 338 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 339 * may or may not name the same files / on the same filesystem now as
 340 * when we actually open and map them.  Iterate over the file
 341 * descriptors instead, and use qemu_fd_getpagesize().
 342 */
 343static int find_max_supported_pagesize(Object *obj, void *opaque)
 344{
 345    char *mem_path;
 346    long *hpsize_min = opaque;
 347
 348    if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 349        mem_path = object_property_get_str(obj, "mem-path", NULL);
 350        if (mem_path) {
 351            long hpsize = gethugepagesize(mem_path);
 352            if (hpsize < *hpsize_min) {
 353                *hpsize_min = hpsize;
 354            }
 355        } else {
 356            *hpsize_min = getpagesize();
 357        }
 358    }
 359
 360    return 0;
 361}
 362
 363static long getrampagesize(void)
 364{
 365    long hpsize = LONG_MAX;
 366    Object *memdev_root;
 367
 368    if (mem_path) {
 369        return gethugepagesize(mem_path);
 370    }
 371
 372    /* it's possible we have memory-backend objects with
 373     * hugepage-backed RAM. these may get mapped into system
 374     * address space via -numa parameters or memory hotplug
 375     * hooks. we want to take these into account, but we
 376     * also want to make sure these supported hugepage
 377     * sizes are applicable across the entire range of memory
 378     * we may boot from, so we take the min across all
 379     * backends, and assume normal pages in cases where a
 380     * backend isn't backed by hugepages.
 381     */
 382    memdev_root = object_resolve_path("/objects", NULL);
 383    if (!memdev_root) {
 384        return getpagesize();
 385    }
 386
 387    object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 388
 389    return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
 390}
 391
 392static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 393{
 394    if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 395        return true;
 396    }
 397
 398    return (1ul << shift) <= rampgsize;
 399}
 400
 401static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 402{
 403    static struct kvm_ppc_smmu_info smmu_info;
 404    static bool has_smmu_info;
 405    CPUPPCState *env = &cpu->env;
 406    long rampagesize;
 407    int iq, ik, jq, jk;
 408
 409    /* We only handle page sizes for 64-bit server guests for now */
 410    if (!(env->mmu_model & POWERPC_MMU_64)) {
 411        return;
 412    }
 413
 414    /* Collect MMU info from kernel if not already */
 415    if (!has_smmu_info) {
 416        kvm_get_smmu_info(cpu, &smmu_info);
 417        has_smmu_info = true;
 418    }
 419
 420    rampagesize = getrampagesize();
 421
 422    /* Convert to QEMU form */
 423    memset(&env->sps, 0, sizeof(env->sps));
 424
 425    /* If we have HV KVM, we need to forbid CI large pages if our
 426     * host page size is smaller than 64K.
 427     */
 428    if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 429        env->ci_large_pages = getpagesize() >= 0x10000;
 430    }
 431
 432    /*
 433     * XXX This loop should be an entry wide AND of the capabilities that
 434     *     the selected CPU has with the capabilities that KVM supports.
 435     */
 436    for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 437        struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 438        struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 439
 440        if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 441                                 ksps->page_shift)) {
 442            continue;
 443        }
 444        qsps->page_shift = ksps->page_shift;
 445        qsps->slb_enc = ksps->slb_enc;
 446        for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 447            if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 448                                     ksps->enc[jk].page_shift)) {
 449                continue;
 450            }
 451            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 452            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 453            if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 454                break;
 455            }
 456        }
 457        if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 458            break;
 459        }
 460    }
 461    env->slb_nr = smmu_info.slb_size;
 462    if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 463        env->mmu_model &= ~POWERPC_MMU_1TSEG;
 464    }
 465}
 466#else /* defined (TARGET_PPC64) */
 467
 468static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 469{
 470}
 471
 472#endif /* !defined (TARGET_PPC64) */
 473
 474unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 475{
 476    return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 477}
 478
 479/* e500 supports 2 h/w breakpoint and 2 watchpoint.
 480 * book3s supports only 1 watchpoint, so array size
 481 * of 4 is sufficient for now.
 482 */
 483#define MAX_HW_BKPTS 4
 484
 485static struct HWBreakpoint {
 486    target_ulong addr;
 487    int type;
 488} hw_debug_points[MAX_HW_BKPTS];
 489
 490static CPUWatchpoint hw_watchpoint;
 491
 492/* Default there is no breakpoint and watchpoint supported */
 493static int max_hw_breakpoint;
 494static int max_hw_watchpoint;
 495static int nb_hw_breakpoint;
 496static int nb_hw_watchpoint;
 497
 498static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 499{
 500    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 501        max_hw_breakpoint = 2;
 502        max_hw_watchpoint = 2;
 503    }
 504
 505    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 506        fprintf(stderr, "Error initializing h/w breakpoints\n");
 507        return;
 508    }
 509}
 510
 511int kvm_arch_init_vcpu(CPUState *cs)
 512{
 513    PowerPCCPU *cpu = POWERPC_CPU(cs);
 514    CPUPPCState *cenv = &cpu->env;
 515    int ret;
 516
 517    /* Gather server mmu info from KVM and update the CPU state */
 518    kvm_fixup_page_sizes(cpu);
 519
 520    /* Synchronize sregs with kvm */
 521    ret = kvm_arch_sync_sregs(cpu);
 522    if (ret) {
 523        if (ret == -EINVAL) {
 524            error_report("Register sync failed... If you're using kvm-hv.ko,"
 525                         " only \"-cpu host\" is possible");
 526        }
 527        return ret;
 528    }
 529
 530    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 531
 532    /* Some targets support access to KVM's guest TLB. */
 533    switch (cenv->mmu_model) {
 534    case POWERPC_MMU_BOOKE206:
 535        ret = kvm_booke206_tlb_init(cpu);
 536        break;
 537    default:
 538        break;
 539    }
 540
 541    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 542    kvmppc_hw_debug_points_init(cenv);
 543
 544    return ret;
 545}
 546
 547static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 548{
 549    CPUPPCState *env = &cpu->env;
 550    CPUState *cs = CPU(cpu);
 551    struct kvm_dirty_tlb dirty_tlb;
 552    unsigned char *bitmap;
 553    int ret;
 554
 555    if (!env->kvm_sw_tlb) {
 556        return;
 557    }
 558
 559    bitmap = g_malloc((env->nb_tlb + 7) / 8);
 560    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 561
 562    dirty_tlb.bitmap = (uintptr_t)bitmap;
 563    dirty_tlb.num_dirty = env->nb_tlb;
 564
 565    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 566    if (ret) {
 567        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 568                __func__, strerror(-ret));
 569    }
 570
 571    g_free(bitmap);
 572}
 573
 574static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 575{
 576    PowerPCCPU *cpu = POWERPC_CPU(cs);
 577    CPUPPCState *env = &cpu->env;
 578    union {
 579        uint32_t u32;
 580        uint64_t u64;
 581    } val;
 582    struct kvm_one_reg reg = {
 583        .id = id,
 584        .addr = (uintptr_t) &val,
 585    };
 586    int ret;
 587
 588    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 589    if (ret != 0) {
 590        trace_kvm_failed_spr_get(spr, strerror(errno));
 591    } else {
 592        switch (id & KVM_REG_SIZE_MASK) {
 593        case KVM_REG_SIZE_U32:
 594            env->spr[spr] = val.u32;
 595            break;
 596
 597        case KVM_REG_SIZE_U64:
 598            env->spr[spr] = val.u64;
 599            break;
 600
 601        default:
 602            /* Don't handle this size yet */
 603            abort();
 604        }
 605    }
 606}
 607
 608static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 609{
 610    PowerPCCPU *cpu = POWERPC_CPU(cs);
 611    CPUPPCState *env = &cpu->env;
 612    union {
 613        uint32_t u32;
 614        uint64_t u64;
 615    } val;
 616    struct kvm_one_reg reg = {
 617        .id = id,
 618        .addr = (uintptr_t) &val,
 619    };
 620    int ret;
 621
 622    switch (id & KVM_REG_SIZE_MASK) {
 623    case KVM_REG_SIZE_U32:
 624        val.u32 = env->spr[spr];
 625        break;
 626
 627    case KVM_REG_SIZE_U64:
 628        val.u64 = env->spr[spr];
 629        break;
 630
 631    default:
 632        /* Don't handle this size yet */
 633        abort();
 634    }
 635
 636    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 637    if (ret != 0) {
 638        trace_kvm_failed_spr_set(spr, strerror(errno));
 639    }
 640}
 641
 642static int kvm_put_fp(CPUState *cs)
 643{
 644    PowerPCCPU *cpu = POWERPC_CPU(cs);
 645    CPUPPCState *env = &cpu->env;
 646    struct kvm_one_reg reg;
 647    int i;
 648    int ret;
 649
 650    if (env->insns_flags & PPC_FLOAT) {
 651        uint64_t fpscr = env->fpscr;
 652        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 653
 654        reg.id = KVM_REG_PPC_FPSCR;
 655        reg.addr = (uintptr_t)&fpscr;
 656        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 657        if (ret < 0) {
 658            DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 659            return ret;
 660        }
 661
 662        for (i = 0; i < 32; i++) {
 663            uint64_t vsr[2];
 664
 665#ifdef HOST_WORDS_BIGENDIAN
 666            vsr[0] = float64_val(env->fpr[i]);
 667            vsr[1] = env->vsr[i];
 668#else
 669            vsr[0] = env->vsr[i];
 670            vsr[1] = float64_val(env->fpr[i]);
 671#endif
 672            reg.addr = (uintptr_t) &vsr;
 673            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 674
 675            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 676            if (ret < 0) {
 677                DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 678                        i, strerror(errno));
 679                return ret;
 680            }
 681        }
 682    }
 683
 684    if (env->insns_flags & PPC_ALTIVEC) {
 685        reg.id = KVM_REG_PPC_VSCR;
 686        reg.addr = (uintptr_t)&env->vscr;
 687        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 688        if (ret < 0) {
 689            DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 690            return ret;
 691        }
 692
 693        for (i = 0; i < 32; i++) {
 694            reg.id = KVM_REG_PPC_VR(i);
 695            reg.addr = (uintptr_t)&env->avr[i];
 696            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 697            if (ret < 0) {
 698                DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 699                return ret;
 700            }
 701        }
 702    }
 703
 704    return 0;
 705}
 706
 707static int kvm_get_fp(CPUState *cs)
 708{
 709    PowerPCCPU *cpu = POWERPC_CPU(cs);
 710    CPUPPCState *env = &cpu->env;
 711    struct kvm_one_reg reg;
 712    int i;
 713    int ret;
 714
 715    if (env->insns_flags & PPC_FLOAT) {
 716        uint64_t fpscr;
 717        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 718
 719        reg.id = KVM_REG_PPC_FPSCR;
 720        reg.addr = (uintptr_t)&fpscr;
 721        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 722        if (ret < 0) {
 723            DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 724            return ret;
 725        } else {
 726            env->fpscr = fpscr;
 727        }
 728
 729        for (i = 0; i < 32; i++) {
 730            uint64_t vsr[2];
 731
 732            reg.addr = (uintptr_t) &vsr;
 733            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 734
 735            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 736            if (ret < 0) {
 737                DPRINTF("Unable to get %s%d from KVM: %s\n",
 738                        vsx ? "VSR" : "FPR", i, strerror(errno));
 739                return ret;
 740            } else {
 741#ifdef HOST_WORDS_BIGENDIAN
 742                env->fpr[i] = vsr[0];
 743                if (vsx) {
 744                    env->vsr[i] = vsr[1];
 745                }
 746#else
 747                env->fpr[i] = vsr[1];
 748                if (vsx) {
 749                    env->vsr[i] = vsr[0];
 750                }
 751#endif
 752            }
 753        }
 754    }
 755
 756    if (env->insns_flags & PPC_ALTIVEC) {
 757        reg.id = KVM_REG_PPC_VSCR;
 758        reg.addr = (uintptr_t)&env->vscr;
 759        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 760        if (ret < 0) {
 761            DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 762            return ret;
 763        }
 764
 765        for (i = 0; i < 32; i++) {
 766            reg.id = KVM_REG_PPC_VR(i);
 767            reg.addr = (uintptr_t)&env->avr[i];
 768            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 769            if (ret < 0) {
 770                DPRINTF("Unable to get VR%d from KVM: %s\n",
 771                        i, strerror(errno));
 772                return ret;
 773            }
 774        }
 775    }
 776
 777    return 0;
 778}
 779
 780#if defined(TARGET_PPC64)
 781static int kvm_get_vpa(CPUState *cs)
 782{
 783    PowerPCCPU *cpu = POWERPC_CPU(cs);
 784    CPUPPCState *env = &cpu->env;
 785    struct kvm_one_reg reg;
 786    int ret;
 787
 788    reg.id = KVM_REG_PPC_VPA_ADDR;
 789    reg.addr = (uintptr_t)&env->vpa_addr;
 790    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 791    if (ret < 0) {
 792        DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 793        return ret;
 794    }
 795
 796    assert((uintptr_t)&env->slb_shadow_size
 797           == ((uintptr_t)&env->slb_shadow_addr + 8));
 798    reg.id = KVM_REG_PPC_VPA_SLB;
 799    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 800    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 801    if (ret < 0) {
 802        DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 803                strerror(errno));
 804        return ret;
 805    }
 806
 807    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 808    reg.id = KVM_REG_PPC_VPA_DTL;
 809    reg.addr = (uintptr_t)&env->dtl_addr;
 810    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 811    if (ret < 0) {
 812        DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 813                strerror(errno));
 814        return ret;
 815    }
 816
 817    return 0;
 818}
 819
 820static int kvm_put_vpa(CPUState *cs)
 821{
 822    PowerPCCPU *cpu = POWERPC_CPU(cs);
 823    CPUPPCState *env = &cpu->env;
 824    struct kvm_one_reg reg;
 825    int ret;
 826
 827    /* SLB shadow or DTL can't be registered unless a master VPA is
 828     * registered.  That means when restoring state, if a VPA *is*
 829     * registered, we need to set that up first.  If not, we need to
 830     * deregister the others before deregistering the master VPA */
 831    assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 832
 833    if (env->vpa_addr) {
 834        reg.id = KVM_REG_PPC_VPA_ADDR;
 835        reg.addr = (uintptr_t)&env->vpa_addr;
 836        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 837        if (ret < 0) {
 838            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 839            return ret;
 840        }
 841    }
 842
 843    assert((uintptr_t)&env->slb_shadow_size
 844           == ((uintptr_t)&env->slb_shadow_addr + 8));
 845    reg.id = KVM_REG_PPC_VPA_SLB;
 846    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 847    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 848    if (ret < 0) {
 849        DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 850        return ret;
 851    }
 852
 853    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 854    reg.id = KVM_REG_PPC_VPA_DTL;
 855    reg.addr = (uintptr_t)&env->dtl_addr;
 856    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 857    if (ret < 0) {
 858        DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 859                strerror(errno));
 860        return ret;
 861    }
 862
 863    if (!env->vpa_addr) {
 864        reg.id = KVM_REG_PPC_VPA_ADDR;
 865        reg.addr = (uintptr_t)&env->vpa_addr;
 866        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 867        if (ret < 0) {
 868            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 869            return ret;
 870        }
 871    }
 872
 873    return 0;
 874}
 875#endif /* TARGET_PPC64 */
 876
 877int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 878{
 879    CPUPPCState *env = &cpu->env;
 880    struct kvm_sregs sregs;
 881    int i;
 882
 883    sregs.pvr = env->spr[SPR_PVR];
 884
 885    sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 886
 887    /* Sync SLB */
 888#ifdef TARGET_PPC64
 889    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 890        sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 891        if (env->slb[i].esid & SLB_ESID_V) {
 892            sregs.u.s.ppc64.slb[i].slbe |= i;
 893        }
 894        sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 895    }
 896#endif
 897
 898    /* Sync SRs */
 899    for (i = 0; i < 16; i++) {
 900        sregs.u.s.ppc32.sr[i] = env->sr[i];
 901    }
 902
 903    /* Sync BATs */
 904    for (i = 0; i < 8; i++) {
 905        /* Beware. We have to swap upper and lower bits here */
 906        sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 907            | env->DBAT[1][i];
 908        sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 909            | env->IBAT[1][i];
 910    }
 911
 912    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 913}
 914
 915int kvm_arch_put_registers(CPUState *cs, int level)
 916{
 917    PowerPCCPU *cpu = POWERPC_CPU(cs);
 918    CPUPPCState *env = &cpu->env;
 919    struct kvm_regs regs;
 920    int ret;
 921    int i;
 922
 923    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 924    if (ret < 0) {
 925        return ret;
 926    }
 927
 928    regs.ctr = env->ctr;
 929    regs.lr  = env->lr;
 930    regs.xer = cpu_read_xer(env);
 931    regs.msr = env->msr;
 932    regs.pc = env->nip;
 933
 934    regs.srr0 = env->spr[SPR_SRR0];
 935    regs.srr1 = env->spr[SPR_SRR1];
 936
 937    regs.sprg0 = env->spr[SPR_SPRG0];
 938    regs.sprg1 = env->spr[SPR_SPRG1];
 939    regs.sprg2 = env->spr[SPR_SPRG2];
 940    regs.sprg3 = env->spr[SPR_SPRG3];
 941    regs.sprg4 = env->spr[SPR_SPRG4];
 942    regs.sprg5 = env->spr[SPR_SPRG5];
 943    regs.sprg6 = env->spr[SPR_SPRG6];
 944    regs.sprg7 = env->spr[SPR_SPRG7];
 945
 946    regs.pid = env->spr[SPR_BOOKE_PID];
 947
 948    for (i = 0;i < 32; i++)
 949        regs.gpr[i] = env->gpr[i];
 950
 951    regs.cr = 0;
 952    for (i = 0; i < 8; i++) {
 953        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 954    }
 955
 956    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 957    if (ret < 0)
 958        return ret;
 959
 960    kvm_put_fp(cs);
 961
 962    if (env->tlb_dirty) {
 963        kvm_sw_tlb_put(cpu);
 964        env->tlb_dirty = false;
 965    }
 966
 967    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 968        ret = kvmppc_put_books_sregs(cpu);
 969        if (ret < 0) {
 970            return ret;
 971        }
 972    }
 973
 974    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 975        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 976    }
 977
 978    if (cap_one_reg) {
 979        int i;
 980
 981        /* We deliberately ignore errors here, for kernels which have
 982         * the ONE_REG calls, but don't support the specific
 983         * registers, there's a reasonable chance things will still
 984         * work, at least until we try to migrate. */
 985        for (i = 0; i < 1024; i++) {
 986            uint64_t id = env->spr_cb[i].one_reg_id;
 987
 988            if (id != 0) {
 989                kvm_put_one_spr(cs, id, i);
 990            }
 991        }
 992
 993#ifdef TARGET_PPC64
 994        if (msr_ts) {
 995            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 996                kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 997            }
 998            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 999                kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1000            }
1001            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1002            kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1003            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1004            kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1005            kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1006            kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1007            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1008            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1009            kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1010            kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1011        }
1012
1013        if (cap_papr) {
1014            if (kvm_put_vpa(cs) < 0) {
1015                DPRINTF("Warning: Unable to set VPA information to KVM\n");
1016            }
1017        }
1018
1019        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1020#endif /* TARGET_PPC64 */
1021    }
1022
1023    return ret;
1024}
1025
1026static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1027{
1028     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1029}
1030
1031static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1032{
1033    CPUPPCState *env = &cpu->env;
1034    struct kvm_sregs sregs;
1035    int ret;
1036
1037    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1038    if (ret < 0) {
1039        return ret;
1040    }
1041
1042    if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1043        env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1044        env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1045        env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1046        env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1047        env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1048        env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1049        env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1050        env->spr[SPR_DECR] = sregs.u.e.dec;
1051        env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1052        env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1053        env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1054    }
1055
1056    if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1057        env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1058        env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1059        env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1060        env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1061        env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1062    }
1063
1064    if (sregs.u.e.features & KVM_SREGS_E_64) {
1065        env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1066    }
1067
1068    if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1069        env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1070    }
1071
1072    if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1073        env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1074        kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1075        env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1076        kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1077        env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1078        kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1079        env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1080        kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1081        env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1082        kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1083        env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1084        kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1085        env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1086        kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1087        env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1088        kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1089        env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1090        kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1091        env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1092        kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1093        env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1094        kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1095        env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1096        kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1097        env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1098        kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1099        env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1100        kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1101        env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1102        kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1103        env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1104        kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1105
1106        if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1107            env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1108            kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1109            env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1110            kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1111            env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1112            kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1113        }
1114
1115        if (sregs.u.e.features & KVM_SREGS_E_PM) {
1116            env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1117            kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1118        }
1119
1120        if (sregs.u.e.features & KVM_SREGS_E_PC) {
1121            env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1122            kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1123            env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1124            kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1125        }
1126    }
1127
1128    if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1129        env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1130        env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1131        env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1132        env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1133        env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1134        env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1135        env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1136        env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1137        env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1138        env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1139    }
1140
1141    if (sregs.u.e.features & KVM_SREGS_EXP) {
1142        env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1143    }
1144
1145    if (sregs.u.e.features & KVM_SREGS_E_PD) {
1146        env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1147        env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1148    }
1149
1150    if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1151        env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1152        env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1153        env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1154
1155        if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1156            env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1157            env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1158        }
1159    }
1160
1161    return 0;
1162}
1163
1164static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1165{
1166    CPUPPCState *env = &cpu->env;
1167    struct kvm_sregs sregs;
1168    int ret;
1169    int i;
1170
1171    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1172    if (ret < 0) {
1173        return ret;
1174    }
1175
1176    if (!env->external_htab) {
1177        ppc_store_sdr1(env, sregs.u.s.sdr1);
1178    }
1179
1180    /* Sync SLB */
1181#ifdef TARGET_PPC64
1182    /*
1183     * The packed SLB array we get from KVM_GET_SREGS only contains
1184     * information about valid entries. So we flush our internal copy
1185     * to get rid of stale ones, then put all valid SLB entries back
1186     * in.
1187     */
1188    memset(env->slb, 0, sizeof(env->slb));
1189    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1190        target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1191        target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1192        /*
1193         * Only restore valid entries
1194         */
1195        if (rb & SLB_ESID_V) {
1196            ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1197        }
1198    }
1199#endif
1200
1201    /* Sync SRs */
1202    for (i = 0; i < 16; i++) {
1203        env->sr[i] = sregs.u.s.ppc32.sr[i];
1204    }
1205
1206    /* Sync BATs */
1207    for (i = 0; i < 8; i++) {
1208        env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1209        env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1210        env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1211        env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1212    }
1213
1214    return 0;
1215}
1216
1217int kvm_arch_get_registers(CPUState *cs)
1218{
1219    PowerPCCPU *cpu = POWERPC_CPU(cs);
1220    CPUPPCState *env = &cpu->env;
1221    struct kvm_regs regs;
1222    uint32_t cr;
1223    int i, ret;
1224
1225    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1226    if (ret < 0)
1227        return ret;
1228
1229    cr = regs.cr;
1230    for (i = 7; i >= 0; i--) {
1231        env->crf[i] = cr & 15;
1232        cr >>= 4;
1233    }
1234
1235    env->ctr = regs.ctr;
1236    env->lr = regs.lr;
1237    cpu_write_xer(env, regs.xer);
1238    env->msr = regs.msr;
1239    env->nip = regs.pc;
1240
1241    env->spr[SPR_SRR0] = regs.srr0;
1242    env->spr[SPR_SRR1] = regs.srr1;
1243
1244    env->spr[SPR_SPRG0] = regs.sprg0;
1245    env->spr[SPR_SPRG1] = regs.sprg1;
1246    env->spr[SPR_SPRG2] = regs.sprg2;
1247    env->spr[SPR_SPRG3] = regs.sprg3;
1248    env->spr[SPR_SPRG4] = regs.sprg4;
1249    env->spr[SPR_SPRG5] = regs.sprg5;
1250    env->spr[SPR_SPRG6] = regs.sprg6;
1251    env->spr[SPR_SPRG7] = regs.sprg7;
1252
1253    env->spr[SPR_BOOKE_PID] = regs.pid;
1254
1255    for (i = 0;i < 32; i++)
1256        env->gpr[i] = regs.gpr[i];
1257
1258    kvm_get_fp(cs);
1259
1260    if (cap_booke_sregs) {
1261        ret = kvmppc_get_booke_sregs(cpu);
1262        if (ret < 0) {
1263            return ret;
1264        }
1265    }
1266
1267    if (cap_segstate) {
1268        ret = kvmppc_get_books_sregs(cpu);
1269        if (ret < 0) {
1270            return ret;
1271        }
1272    }
1273
1274    if (cap_hior) {
1275        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1276    }
1277
1278    if (cap_one_reg) {
1279        int i;
1280
1281        /* We deliberately ignore errors here, for kernels which have
1282         * the ONE_REG calls, but don't support the specific
1283         * registers, there's a reasonable chance things will still
1284         * work, at least until we try to migrate. */
1285        for (i = 0; i < 1024; i++) {
1286            uint64_t id = env->spr_cb[i].one_reg_id;
1287
1288            if (id != 0) {
1289                kvm_get_one_spr(cs, id, i);
1290            }
1291        }
1292
1293#ifdef TARGET_PPC64
1294        if (msr_ts) {
1295            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1296                kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1297            }
1298            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1299                kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1300            }
1301            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1302            kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1303            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1304            kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1305            kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1306            kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1307            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1308            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1309            kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1310            kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1311        }
1312
1313        if (cap_papr) {
1314            if (kvm_get_vpa(cs) < 0) {
1315                DPRINTF("Warning: Unable to get VPA information from KVM\n");
1316            }
1317        }
1318
1319        kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1320#endif
1321    }
1322
1323    return 0;
1324}
1325
1326int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1327{
1328    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1329
1330    if (irq != PPC_INTERRUPT_EXT) {
1331        return 0;
1332    }
1333
1334    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1335        return 0;
1336    }
1337
1338    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1339
1340    return 0;
1341}
1342
1343#if defined(TARGET_PPCEMB)
1344#define PPC_INPUT_INT PPC40x_INPUT_INT
1345#elif defined(TARGET_PPC64)
1346#define PPC_INPUT_INT PPC970_INPUT_INT
1347#else
1348#define PPC_INPUT_INT PPC6xx_INPUT_INT
1349#endif
1350
1351void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1352{
1353    PowerPCCPU *cpu = POWERPC_CPU(cs);
1354    CPUPPCState *env = &cpu->env;
1355    int r;
1356    unsigned irq;
1357
1358    qemu_mutex_lock_iothread();
1359
1360    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1361     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1362    if (!cap_interrupt_level &&
1363        run->ready_for_interrupt_injection &&
1364        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1365        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1366    {
1367        /* For now KVM disregards the 'irq' argument. However, in the
1368         * future KVM could cache it in-kernel to avoid a heavyweight exit
1369         * when reading the UIC.
1370         */
1371        irq = KVM_INTERRUPT_SET;
1372
1373        DPRINTF("injected interrupt %d\n", irq);
1374        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1375        if (r < 0) {
1376            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1377        }
1378
1379        /* Always wake up soon in case the interrupt was level based */
1380        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1381                       (NANOSECONDS_PER_SECOND / 50));
1382    }
1383
1384    /* We don't know if there are more interrupts pending after this. However,
1385     * the guest will return to userspace in the course of handling this one
1386     * anyways, so we will get a chance to deliver the rest. */
1387
1388    qemu_mutex_unlock_iothread();
1389}
1390
1391MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1392{
1393    return MEMTXATTRS_UNSPECIFIED;
1394}
1395
1396int kvm_arch_process_async_events(CPUState *cs)
1397{
1398    return cs->halted;
1399}
1400
1401static int kvmppc_handle_halt(PowerPCCPU *cpu)
1402{
1403    CPUState *cs = CPU(cpu);
1404    CPUPPCState *env = &cpu->env;
1405
1406    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1407        cs->halted = 1;
1408        cs->exception_index = EXCP_HLT;
1409    }
1410
1411    return 0;
1412}
1413
1414/* map dcr access to existing qemu dcr emulation */
1415static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1416{
1417    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1418        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1419
1420    return 0;
1421}
1422
1423static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1424{
1425    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1426        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1427
1428    return 0;
1429}
1430
1431int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1432{
1433    /* Mixed endian case is not handled */
1434    uint32_t sc = debug_inst_opcode;
1435
1436    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1437                            sizeof(sc), 0) ||
1438        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1439        return -EINVAL;
1440    }
1441
1442    return 0;
1443}
1444
1445int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1446{
1447    uint32_t sc;
1448
1449    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1450        sc != debug_inst_opcode ||
1451        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1452                            sizeof(sc), 1)) {
1453        return -EINVAL;
1454    }
1455
1456    return 0;
1457}
1458
1459static int find_hw_breakpoint(target_ulong addr, int type)
1460{
1461    int n;
1462
1463    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1464           <= ARRAY_SIZE(hw_debug_points));
1465
1466    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1467        if (hw_debug_points[n].addr == addr &&
1468             hw_debug_points[n].type == type) {
1469            return n;
1470        }
1471    }
1472
1473    return -1;
1474}
1475
1476static int find_hw_watchpoint(target_ulong addr, int *flag)
1477{
1478    int n;
1479
1480    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1481    if (n >= 0) {
1482        *flag = BP_MEM_ACCESS;
1483        return n;
1484    }
1485
1486    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1487    if (n >= 0) {
1488        *flag = BP_MEM_WRITE;
1489        return n;
1490    }
1491
1492    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1493    if (n >= 0) {
1494        *flag = BP_MEM_READ;
1495        return n;
1496    }
1497
1498    return -1;
1499}
1500
1501int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1502                                  target_ulong len, int type)
1503{
1504    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1505        return -ENOBUFS;
1506    }
1507
1508    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1509    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1510
1511    switch (type) {
1512    case GDB_BREAKPOINT_HW:
1513        if (nb_hw_breakpoint >= max_hw_breakpoint) {
1514            return -ENOBUFS;
1515        }
1516
1517        if (find_hw_breakpoint(addr, type) >= 0) {
1518            return -EEXIST;
1519        }
1520
1521        nb_hw_breakpoint++;
1522        break;
1523
1524    case GDB_WATCHPOINT_WRITE:
1525    case GDB_WATCHPOINT_READ:
1526    case GDB_WATCHPOINT_ACCESS:
1527        if (nb_hw_watchpoint >= max_hw_watchpoint) {
1528            return -ENOBUFS;
1529        }
1530
1531        if (find_hw_breakpoint(addr, type) >= 0) {
1532            return -EEXIST;
1533        }
1534
1535        nb_hw_watchpoint++;
1536        break;
1537
1538    default:
1539        return -ENOSYS;
1540    }
1541
1542    return 0;
1543}
1544
1545int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1546                                  target_ulong len, int type)
1547{
1548    int n;
1549
1550    n = find_hw_breakpoint(addr, type);
1551    if (n < 0) {
1552        return -ENOENT;
1553    }
1554
1555    switch (type) {
1556    case GDB_BREAKPOINT_HW:
1557        nb_hw_breakpoint--;
1558        break;
1559
1560    case GDB_WATCHPOINT_WRITE:
1561    case GDB_WATCHPOINT_READ:
1562    case GDB_WATCHPOINT_ACCESS:
1563        nb_hw_watchpoint--;
1564        break;
1565
1566    default:
1567        return -ENOSYS;
1568    }
1569    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1570
1571    return 0;
1572}
1573
1574void kvm_arch_remove_all_hw_breakpoints(void)
1575{
1576    nb_hw_breakpoint = nb_hw_watchpoint = 0;
1577}
1578
1579void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1580{
1581    int n;
1582
1583    /* Software Breakpoint updates */
1584    if (kvm_sw_breakpoints_active(cs)) {
1585        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1586    }
1587
1588    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1589           <= ARRAY_SIZE(hw_debug_points));
1590    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1591
1592    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1593        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1594        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1595        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1596            switch (hw_debug_points[n].type) {
1597            case GDB_BREAKPOINT_HW:
1598                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1599                break;
1600            case GDB_WATCHPOINT_WRITE:
1601                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1602                break;
1603            case GDB_WATCHPOINT_READ:
1604                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1605                break;
1606            case GDB_WATCHPOINT_ACCESS:
1607                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1608                                        KVMPPC_DEBUG_WATCH_READ;
1609                break;
1610            default:
1611                cpu_abort(cs, "Unsupported breakpoint type\n");
1612            }
1613            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1614        }
1615    }
1616}
1617
1618static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1619{
1620    CPUState *cs = CPU(cpu);
1621    CPUPPCState *env = &cpu->env;
1622    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1623    int handle = 0;
1624    int n;
1625    int flag = 0;
1626
1627    if (cs->singlestep_enabled) {
1628        handle = 1;
1629    } else if (arch_info->status) {
1630        if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1631            if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1632                n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1633                if (n >= 0) {
1634                    handle = 1;
1635                }
1636            } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1637                                            KVMPPC_DEBUG_WATCH_WRITE)) {
1638                n = find_hw_watchpoint(arch_info->address,  &flag);
1639                if (n >= 0) {
1640                    handle = 1;
1641                    cs->watchpoint_hit = &hw_watchpoint;
1642                    hw_watchpoint.vaddr = hw_debug_points[n].addr;
1643                    hw_watchpoint.flags = flag;
1644                }
1645            }
1646        }
1647    } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1648        handle = 1;
1649    } else {
1650        /* QEMU is not able to handle debug exception, so inject
1651         * program exception to guest;
1652         * Yes program exception NOT debug exception !!
1653         * When QEMU is using debug resources then debug exception must
1654         * be always set. To achieve this we set MSR_DE and also set
1655         * MSRP_DEP so guest cannot change MSR_DE.
1656         * When emulating debug resource for guest we want guest
1657         * to control MSR_DE (enable/disable debug interrupt on need).
1658         * Supporting both configurations are NOT possible.
1659         * So the result is that we cannot share debug resources
1660         * between QEMU and Guest on BOOKE architecture.
1661         * In the current design QEMU gets the priority over guest,
1662         * this means that if QEMU is using debug resources then guest
1663         * cannot use them;
1664         * For software breakpoint QEMU uses a privileged instruction;
1665         * So there cannot be any reason that we are here for guest
1666         * set debug exception, only possibility is guest executed a
1667         * privileged / illegal instruction and that's why we are
1668         * injecting a program interrupt.
1669         */
1670
1671        cpu_synchronize_state(cs);
1672        /* env->nip is PC, so increment this by 4 to use
1673         * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1674         */
1675        env->nip += 4;
1676        cs->exception_index = POWERPC_EXCP_PROGRAM;
1677        env->error_code = POWERPC_EXCP_INVAL;
1678        ppc_cpu_do_interrupt(cs);
1679    }
1680
1681    return handle;
1682}
1683
1684int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1685{
1686    PowerPCCPU *cpu = POWERPC_CPU(cs);
1687    CPUPPCState *env = &cpu->env;
1688    int ret;
1689
1690    qemu_mutex_lock_iothread();
1691
1692    switch (run->exit_reason) {
1693    case KVM_EXIT_DCR:
1694        if (run->dcr.is_write) {
1695            DPRINTF("handle dcr write\n");
1696            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1697        } else {
1698            DPRINTF("handle dcr read\n");
1699            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1700        }
1701        break;
1702    case KVM_EXIT_HLT:
1703        DPRINTF("handle halt\n");
1704        ret = kvmppc_handle_halt(cpu);
1705        break;
1706#if defined(TARGET_PPC64)
1707    case KVM_EXIT_PAPR_HCALL:
1708        DPRINTF("handle PAPR hypercall\n");
1709        run->papr_hcall.ret = spapr_hypercall(cpu,
1710                                              run->papr_hcall.nr,
1711                                              run->papr_hcall.args);
1712        ret = 0;
1713        break;
1714#endif
1715    case KVM_EXIT_EPR:
1716        DPRINTF("handle epr\n");
1717        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1718        ret = 0;
1719        break;
1720    case KVM_EXIT_WATCHDOG:
1721        DPRINTF("handle watchdog expiry\n");
1722        watchdog_perform_action();
1723        ret = 0;
1724        break;
1725
1726    case KVM_EXIT_DEBUG:
1727        DPRINTF("handle debug exception\n");
1728        if (kvm_handle_debug(cpu, run)) {
1729            ret = EXCP_DEBUG;
1730            break;
1731        }
1732        /* re-enter, this exception was guest-internal */
1733        ret = 0;
1734        break;
1735
1736    default:
1737        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1738        ret = -1;
1739        break;
1740    }
1741
1742    qemu_mutex_unlock_iothread();
1743    return ret;
1744}
1745
1746int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1747{
1748    CPUState *cs = CPU(cpu);
1749    uint32_t bits = tsr_bits;
1750    struct kvm_one_reg reg = {
1751        .id = KVM_REG_PPC_OR_TSR,
1752        .addr = (uintptr_t) &bits,
1753    };
1754
1755    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1756}
1757
1758int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1759{
1760
1761    CPUState *cs = CPU(cpu);
1762    uint32_t bits = tsr_bits;
1763    struct kvm_one_reg reg = {
1764        .id = KVM_REG_PPC_CLEAR_TSR,
1765        .addr = (uintptr_t) &bits,
1766    };
1767
1768    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1769}
1770
1771int kvmppc_set_tcr(PowerPCCPU *cpu)
1772{
1773    CPUState *cs = CPU(cpu);
1774    CPUPPCState *env = &cpu->env;
1775    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1776
1777    struct kvm_one_reg reg = {
1778        .id = KVM_REG_PPC_TCR,
1779        .addr = (uintptr_t) &tcr,
1780    };
1781
1782    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1783}
1784
1785int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1786{
1787    CPUState *cs = CPU(cpu);
1788    int ret;
1789
1790    if (!kvm_enabled()) {
1791        return -1;
1792    }
1793
1794    if (!cap_ppc_watchdog) {
1795        printf("warning: KVM does not support watchdog");
1796        return -1;
1797    }
1798
1799    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1800    if (ret < 0) {
1801        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1802                __func__, strerror(-ret));
1803        return ret;
1804    }
1805
1806    return ret;
1807}
1808
1809static int read_cpuinfo(const char *field, char *value, int len)
1810{
1811    FILE *f;
1812    int ret = -1;
1813    int field_len = strlen(field);
1814    char line[512];
1815
1816    f = fopen("/proc/cpuinfo", "r");
1817    if (!f) {
1818        return -1;
1819    }
1820
1821    do {
1822        if (!fgets(line, sizeof(line), f)) {
1823            break;
1824        }
1825        if (!strncmp(line, field, field_len)) {
1826            pstrcpy(value, len, line);
1827            ret = 0;
1828            break;
1829        }
1830    } while(*line);
1831
1832    fclose(f);
1833
1834    return ret;
1835}
1836
1837uint32_t kvmppc_get_tbfreq(void)
1838{
1839    char line[512];
1840    char *ns;
1841    uint32_t retval = NANOSECONDS_PER_SECOND;
1842
1843    if (read_cpuinfo("timebase", line, sizeof(line))) {
1844        return retval;
1845    }
1846
1847    if (!(ns = strchr(line, ':'))) {
1848        return retval;
1849    }
1850
1851    ns++;
1852
1853    return atoi(ns);
1854}
1855
1856bool kvmppc_get_host_serial(char **value)
1857{
1858    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1859                               NULL);
1860}
1861
1862bool kvmppc_get_host_model(char **value)
1863{
1864    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1865}
1866
1867/* Try to find a device tree node for a CPU with clock-frequency property */
1868static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1869{
1870    struct dirent *dirp;
1871    DIR *dp;
1872
1873    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1874        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1875        return -1;
1876    }
1877
1878    buf[0] = '\0';
1879    while ((dirp = readdir(dp)) != NULL) {
1880        FILE *f;
1881        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1882                 dirp->d_name);
1883        f = fopen(buf, "r");
1884        if (f) {
1885            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1886            fclose(f);
1887            break;
1888        }
1889        buf[0] = '\0';
1890    }
1891    closedir(dp);
1892    if (buf[0] == '\0') {
1893        printf("Unknown host!\n");
1894        return -1;
1895    }
1896
1897    return 0;
1898}
1899
1900static uint64_t kvmppc_read_int_dt(const char *filename)
1901{
1902    union {
1903        uint32_t v32;
1904        uint64_t v64;
1905    } u;
1906    FILE *f;
1907    int len;
1908
1909    f = fopen(filename, "rb");
1910    if (!f) {
1911        return -1;
1912    }
1913
1914    len = fread(&u, 1, sizeof(u), f);
1915    fclose(f);
1916    switch (len) {
1917    case 4:
1918        /* property is a 32-bit quantity */
1919        return be32_to_cpu(u.v32);
1920    case 8:
1921        return be64_to_cpu(u.v64);
1922    }
1923
1924    return 0;
1925}
1926
1927/* Read a CPU node property from the host device tree that's a single
1928 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1929 * (can't find or open the property, or doesn't understand the
1930 * format) */
1931static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1932{
1933    char buf[PATH_MAX], *tmp;
1934    uint64_t val;
1935
1936    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1937        return -1;
1938    }
1939
1940    tmp = g_strdup_printf("%s/%s", buf, propname);
1941    val = kvmppc_read_int_dt(tmp);
1942    g_free(tmp);
1943
1944    return val;
1945}
1946
1947uint64_t kvmppc_get_clockfreq(void)
1948{
1949    return kvmppc_read_int_cpu_dt("clock-frequency");
1950}
1951
1952uint32_t kvmppc_get_vmx(void)
1953{
1954    return kvmppc_read_int_cpu_dt("ibm,vmx");
1955}
1956
1957uint32_t kvmppc_get_dfp(void)
1958{
1959    return kvmppc_read_int_cpu_dt("ibm,dfp");
1960}
1961
1962static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1963 {
1964     PowerPCCPU *cpu = ppc_env_get_cpu(env);
1965     CPUState *cs = CPU(cpu);
1966
1967    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1968        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1969        return 0;
1970    }
1971
1972    return 1;
1973}
1974
1975int kvmppc_get_hasidle(CPUPPCState *env)
1976{
1977    struct kvm_ppc_pvinfo pvinfo;
1978
1979    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1980        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1981        return 1;
1982    }
1983
1984    return 0;
1985}
1986
1987int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1988{
1989    uint32_t *hc = (uint32_t*)buf;
1990    struct kvm_ppc_pvinfo pvinfo;
1991
1992    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1993        memcpy(buf, pvinfo.hcall, buf_len);
1994        return 0;
1995    }
1996
1997    /*
1998     * Fallback to always fail hypercalls regardless of endianness:
1999     *
2000     *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2001     *     li r3, -1
2002     *     b .+8       (becomes nop in wrong endian)
2003     *     bswap32(li r3, -1)
2004     */
2005
2006    hc[0] = cpu_to_be32(0x08000048);
2007    hc[1] = cpu_to_be32(0x3860ffff);
2008    hc[2] = cpu_to_be32(0x48000008);
2009    hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2010
2011    return 1;
2012}
2013
2014static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2015{
2016    return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2017}
2018
2019void kvmppc_enable_logical_ci_hcalls(void)
2020{
2021    /*
2022     * FIXME: it would be nice if we could detect the cases where
2023     * we're using a device which requires the in kernel
2024     * implementation of these hcalls, but the kernel lacks them and
2025     * produce a warning.
2026     */
2027    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2028    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2029}
2030
2031void kvmppc_enable_set_mode_hcall(void)
2032{
2033    kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2034}
2035
2036void kvmppc_set_papr(PowerPCCPU *cpu)
2037{
2038    CPUState *cs = CPU(cpu);
2039    int ret;
2040
2041    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2042    if (ret) {
2043        error_report("This vCPU type or KVM version does not support PAPR");
2044        exit(1);
2045    }
2046
2047    /* Update the capability flag so we sync the right information
2048     * with kvm */
2049    cap_papr = 1;
2050}
2051
2052int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2053{
2054    return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2055}
2056
2057void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2058{
2059    CPUState *cs = CPU(cpu);
2060    int ret;
2061
2062    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2063    if (ret && mpic_proxy) {
2064        error_report("This KVM version does not support EPR");
2065        exit(1);
2066    }
2067}
2068
2069int kvmppc_smt_threads(void)
2070{
2071    return cap_ppc_smt ? cap_ppc_smt : 1;
2072}
2073
2074#ifdef TARGET_PPC64
2075off_t kvmppc_alloc_rma(void **rma)
2076{
2077    off_t size;
2078    int fd;
2079    struct kvm_allocate_rma ret;
2080
2081    /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2082     * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2083     *                      not necessary on this hardware
2084     * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2085     *
2086     * FIXME: We should allow the user to force contiguous RMA
2087     * allocation in the cap_ppc_rma==1 case.
2088     */
2089    if (cap_ppc_rma < 2) {
2090        return 0;
2091    }
2092
2093    fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2094    if (fd < 0) {
2095        fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2096                strerror(errno));
2097        return -1;
2098    }
2099
2100    size = MIN(ret.rma_size, 256ul << 20);
2101
2102    *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2103    if (*rma == MAP_FAILED) {
2104        fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2105        return -1;
2106    };
2107
2108    return size;
2109}
2110
2111uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2112{
2113    struct kvm_ppc_smmu_info info;
2114    long rampagesize, best_page_shift;
2115    int i;
2116
2117    if (cap_ppc_rma >= 2) {
2118        return current_size;
2119    }
2120
2121    /* Find the largest hardware supported page size that's less than
2122     * or equal to the (logical) backing page size of guest RAM */
2123    kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2124    rampagesize = getrampagesize();
2125    best_page_shift = 0;
2126
2127    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2128        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2129
2130        if (!sps->page_shift) {
2131            continue;
2132        }
2133
2134        if ((sps->page_shift > best_page_shift)
2135            && ((1UL << sps->page_shift) <= rampagesize)) {
2136            best_page_shift = sps->page_shift;
2137        }
2138    }
2139
2140    return MIN(current_size,
2141               1ULL << (best_page_shift + hash_shift - 7));
2142}
2143#endif
2144
2145bool kvmppc_spapr_use_multitce(void)
2146{
2147    return cap_spapr_multitce;
2148}
2149
2150void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2151                              bool need_vfio)
2152{
2153    struct kvm_create_spapr_tce args = {
2154        .liobn = liobn,
2155        .window_size = window_size,
2156    };
2157    long len;
2158    int fd;
2159    void *table;
2160
2161    /* Must set fd to -1 so we don't try to munmap when called for
2162     * destroying the table, which the upper layers -will- do
2163     */
2164    *pfd = -1;
2165    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2166        return NULL;
2167    }
2168
2169    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2170    if (fd < 0) {
2171        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2172                liobn);
2173        return NULL;
2174    }
2175
2176    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2177    /* FIXME: round this up to page size */
2178
2179    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2180    if (table == MAP_FAILED) {
2181        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2182                liobn);
2183        close(fd);
2184        return NULL;
2185    }
2186
2187    *pfd = fd;
2188    return table;
2189}
2190
2191int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2192{
2193    long len;
2194
2195    if (fd < 0) {
2196        return -1;
2197    }
2198
2199    len = nb_table * sizeof(uint64_t);
2200    if ((munmap(table, len) < 0) ||
2201        (close(fd) < 0)) {
2202        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2203                strerror(errno));
2204        /* Leak the table */
2205    }
2206
2207    return 0;
2208}
2209
2210int kvmppc_reset_htab(int shift_hint)
2211{
2212    uint32_t shift = shift_hint;
2213
2214    if (!kvm_enabled()) {
2215        /* Full emulation, tell caller to allocate htab itself */
2216        return 0;
2217    }
2218    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2219        int ret;
2220        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2221        if (ret == -ENOTTY) {
2222            /* At least some versions of PR KVM advertise the
2223             * capability, but don't implement the ioctl().  Oops.
2224             * Return 0 so that we allocate the htab in qemu, as is
2225             * correct for PR. */
2226            return 0;
2227        } else if (ret < 0) {
2228            return ret;
2229        }
2230        return shift;
2231    }
2232
2233    /* We have a kernel that predates the htab reset calls.  For PR
2234     * KVM, we need to allocate the htab ourselves, for an HV KVM of
2235     * this era, it has allocated a 16MB fixed size hash table
2236     * already.  Kernels of this era have the GET_PVINFO capability
2237     * only on PR, so we use this hack to determine the right
2238     * answer */
2239    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2240        /* PR - tell caller to allocate htab */
2241        return 0;
2242    } else {
2243        /* HV - assume 16MB kernel allocated htab */
2244        return 24;
2245    }
2246}
2247
2248static inline uint32_t mfpvr(void)
2249{
2250    uint32_t pvr;
2251
2252    asm ("mfpvr %0"
2253         : "=r"(pvr));
2254    return pvr;
2255}
2256
2257static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2258{
2259    if (on) {
2260        *word |= flags;
2261    } else {
2262        *word &= ~flags;
2263    }
2264}
2265
2266static void kvmppc_host_cpu_initfn(Object *obj)
2267{
2268    assert(kvm_enabled());
2269}
2270
2271static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2272{
2273    DeviceClass *dc = DEVICE_CLASS(oc);
2274    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2275    uint32_t vmx = kvmppc_get_vmx();
2276    uint32_t dfp = kvmppc_get_dfp();
2277    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2278    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2279
2280    /* Now fix up the class with information we can query from the host */
2281    pcc->pvr = mfpvr();
2282
2283    if (vmx != -1) {
2284        /* Only override when we know what the host supports */
2285        alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2286        alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2287    }
2288    if (dfp != -1) {
2289        /* Only override when we know what the host supports */
2290        alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2291    }
2292
2293    if (dcache_size != -1) {
2294        pcc->l1_dcache_size = dcache_size;
2295    }
2296
2297    if (icache_size != -1) {
2298        pcc->l1_icache_size = icache_size;
2299    }
2300
2301    /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2302    dc->cannot_destroy_with_object_finalize_yet = true;
2303}
2304
2305bool kvmppc_has_cap_epr(void)
2306{
2307    return cap_epr;
2308}
2309
2310bool kvmppc_has_cap_htab_fd(void)
2311{
2312    return cap_htab_fd;
2313}
2314
2315bool kvmppc_has_cap_fixup_hcalls(void)
2316{
2317    return cap_fixup_hcalls;
2318}
2319
2320static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2321{
2322    ObjectClass *oc = OBJECT_CLASS(pcc);
2323
2324    while (oc && !object_class_is_abstract(oc)) {
2325        oc = object_class_get_parent(oc);
2326    }
2327    assert(oc);
2328
2329    return POWERPC_CPU_CLASS(oc);
2330}
2331
2332static int kvm_ppc_register_host_cpu_type(void)
2333{
2334    TypeInfo type_info = {
2335        .name = TYPE_HOST_POWERPC_CPU,
2336        .instance_init = kvmppc_host_cpu_initfn,
2337        .class_init = kvmppc_host_cpu_class_init,
2338    };
2339    uint32_t host_pvr = mfpvr();
2340    PowerPCCPUClass *pvr_pcc;
2341    DeviceClass *dc;
2342
2343    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2344    if (pvr_pcc == NULL) {
2345        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2346    }
2347    if (pvr_pcc == NULL) {
2348        return -1;
2349    }
2350    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2351    type_register(&type_info);
2352
2353    /* Register generic family CPU class for a family */
2354    pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2355    dc = DEVICE_CLASS(pvr_pcc);
2356    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2357    type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2358    type_register(&type_info);
2359
2360    return 0;
2361}
2362
2363int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2364{
2365    struct kvm_rtas_token_args args = {
2366        .token = token,
2367    };
2368
2369    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2370        return -ENOENT;
2371    }
2372
2373    strncpy(args.name, function, sizeof(args.name));
2374
2375    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2376}
2377
2378int kvmppc_get_htab_fd(bool write)
2379{
2380    struct kvm_get_htab_fd s = {
2381        .flags = write ? KVM_GET_HTAB_WRITE : 0,
2382        .start_index = 0,
2383    };
2384
2385    if (!cap_htab_fd) {
2386        fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2387        return -1;
2388    }
2389
2390    return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2391}
2392
2393int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2394{
2395    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2396    uint8_t buf[bufsize];
2397    ssize_t rc;
2398
2399    do {
2400        rc = read(fd, buf, bufsize);
2401        if (rc < 0) {
2402            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2403                    strerror(errno));
2404            return rc;
2405        } else if (rc) {
2406            uint8_t *buffer = buf;
2407            ssize_t n = rc;
2408            while (n) {
2409                struct kvm_get_htab_header *head =
2410                    (struct kvm_get_htab_header *) buffer;
2411                size_t chunksize = sizeof(*head) +
2412                     HASH_PTE_SIZE_64 * head->n_valid;
2413
2414                qemu_put_be32(f, head->index);
2415                qemu_put_be16(f, head->n_valid);
2416                qemu_put_be16(f, head->n_invalid);
2417                qemu_put_buffer(f, (void *)(head + 1),
2418                                HASH_PTE_SIZE_64 * head->n_valid);
2419
2420                buffer += chunksize;
2421                n -= chunksize;
2422            }
2423        }
2424    } while ((rc != 0)
2425             && ((max_ns < 0)
2426                 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2427
2428    return (rc == 0) ? 1 : 0;
2429}
2430
2431int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2432                           uint16_t n_valid, uint16_t n_invalid)
2433{
2434    struct kvm_get_htab_header *buf;
2435    size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2436    ssize_t rc;
2437
2438    buf = alloca(chunksize);
2439    buf->index = index;
2440    buf->n_valid = n_valid;
2441    buf->n_invalid = n_invalid;
2442
2443    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2444
2445    rc = write(fd, buf, chunksize);
2446    if (rc < 0) {
2447        fprintf(stderr, "Error writing KVM hash table: %s\n",
2448                strerror(errno));
2449        return rc;
2450    }
2451    if (rc != chunksize) {
2452        /* We should never get a short write on a single chunk */
2453        fprintf(stderr, "Short write, restoring KVM hash table\n");
2454        return -1;
2455    }
2456    return 0;
2457}
2458
2459bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2460{
2461    return true;
2462}
2463
2464int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2465{
2466    return 1;
2467}
2468
2469int kvm_arch_on_sigbus(int code, void *addr)
2470{
2471    return 1;
2472}
2473
2474void kvm_arch_init_irq_routing(KVMState *s)
2475{
2476}
2477
2478struct kvm_get_htab_buf {
2479    struct kvm_get_htab_header header;
2480    /*
2481     * We require one extra byte for read
2482     */
2483    target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2484};
2485
2486uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2487{
2488    int htab_fd;
2489    struct kvm_get_htab_fd ghf;
2490    struct kvm_get_htab_buf  *hpte_buf;
2491
2492    ghf.flags = 0;
2493    ghf.start_index = pte_index;
2494    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2495    if (htab_fd < 0) {
2496        goto error_out;
2497    }
2498
2499    hpte_buf = g_malloc0(sizeof(*hpte_buf));
2500    /*
2501     * Read the hpte group
2502     */
2503    if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2504        goto out_close;
2505    }
2506
2507    close(htab_fd);
2508    return (uint64_t)(uintptr_t) hpte_buf->hpte;
2509
2510out_close:
2511    g_free(hpte_buf);
2512    close(htab_fd);
2513error_out:
2514    return 0;
2515}
2516
2517void kvmppc_hash64_free_pteg(uint64_t token)
2518{
2519    struct kvm_get_htab_buf *htab_buf;
2520
2521    htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2522                            hpte);
2523    g_free(htab_buf);
2524    return;
2525}
2526
2527void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2528                             target_ulong pte0, target_ulong pte1)
2529{
2530    int htab_fd;
2531    struct kvm_get_htab_fd ghf;
2532    struct kvm_get_htab_buf hpte_buf;
2533
2534    ghf.flags = 0;
2535    ghf.start_index = 0;     /* Ignored */
2536    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2537    if (htab_fd < 0) {
2538        goto error_out;
2539    }
2540
2541    hpte_buf.header.n_valid = 1;
2542    hpte_buf.header.n_invalid = 0;
2543    hpte_buf.header.index = pte_index;
2544    hpte_buf.hpte[0] = pte0;
2545    hpte_buf.hpte[1] = pte1;
2546    /*
2547     * Write the hpte entry.
2548     * CAUTION: write() has the warn_unused_result attribute. Hence we
2549     * need to check the return value, even though we do nothing.
2550     */
2551    if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2552        goto out_close;
2553    }
2554
2555out_close:
2556    close(htab_fd);
2557    return;
2558
2559error_out:
2560    return;
2561}
2562
2563int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2564                             uint64_t address, uint32_t data, PCIDevice *dev)
2565{
2566    return 0;
2567}
2568
2569int kvm_arch_msi_data_to_gsi(uint32_t data)
2570{
2571    return data & 0xffff;
2572}
2573
2574int kvmppc_enable_hwrng(void)
2575{
2576    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2577        return -1;
2578    }
2579
2580    return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2581}
2582