qemu/target-ppc/kvm.c
<<
>>
Prefs
   1/*
   2 * PowerPC implementation of KVM hooks
   3 *
   4 * Copyright IBM Corp. 2007
   5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6 *
   7 * Authors:
   8 *  Jerone Young <jyoung5@us.ibm.com>
   9 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10 *  Hollis Blanchard <hollisb@us.ibm.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13 * See the COPYING file in the top-level directory.
  14 *
  15 */
  16
  17#include "qemu/osdep.h"
  18#include <dirent.h>
  19#include <sys/ioctl.h>
  20#include <sys/vfs.h>
  21
  22#include <linux/kvm.h>
  23
  24#include "qemu-common.h"
  25#include "qemu/error-report.h"
  26#include "cpu.h"
  27#include "qemu/timer.h"
  28#include "sysemu/sysemu.h"
  29#include "sysemu/kvm.h"
  30#include "sysemu/numa.h"
  31#include "kvm_ppc.h"
  32#include "sysemu/cpus.h"
  33#include "sysemu/device_tree.h"
  34#include "mmu-hash64.h"
  35
  36#include "hw/sysbus.h"
  37#include "hw/ppc/spapr.h"
  38#include "hw/ppc/spapr_vio.h"
  39#include "hw/ppc/spapr_cpu_core.h"
  40#include "hw/ppc/ppc.h"
  41#include "sysemu/watchdog.h"
  42#include "trace.h"
  43#include "exec/gdbstub.h"
  44#include "exec/memattrs.h"
  45#include "sysemu/hostmem.h"
  46#include "qemu/cutils.h"
  47#if defined(TARGET_PPC64)
  48#include "hw/ppc/spapr_cpu_core.h"
  49#endif
  50
  51//#define DEBUG_KVM
  52
  53#ifdef DEBUG_KVM
  54#define DPRINTF(fmt, ...) \
  55    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  56#else
  57#define DPRINTF(fmt, ...) \
  58    do { } while (0)
  59#endif
  60
  61#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  62
  63const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  64    KVM_CAP_LAST_INFO
  65};
  66
  67static int cap_interrupt_unset = false;
  68static int cap_interrupt_level = false;
  69static int cap_segstate;
  70static int cap_booke_sregs;
  71static int cap_ppc_smt;
  72static int cap_ppc_rma;
  73static int cap_spapr_tce;
  74static int cap_spapr_multitce;
  75static int cap_spapr_vfio;
  76static int cap_hior;
  77static int cap_one_reg;
  78static int cap_epr;
  79static int cap_ppc_watchdog;
  80static int cap_papr;
  81static int cap_htab_fd;
  82static int cap_fixup_hcalls;
  83static int cap_htm;             /* Hardware transactional memory support */
  84
  85static uint32_t debug_inst_opcode;
  86
  87/* XXX We have a race condition where we actually have a level triggered
  88 *     interrupt, but the infrastructure can't expose that yet, so the guest
  89 *     takes but ignores it, goes to sleep and never gets notified that there's
  90 *     still an interrupt pending.
  91 *
  92 *     As a quick workaround, let's just wake up again 20 ms after we injected
  93 *     an interrupt. That way we can assure that we're always reinjecting
  94 *     interrupts in case the guest swallowed them.
  95 */
  96static QEMUTimer *idle_timer;
  97
  98static void kvm_kick_cpu(void *opaque)
  99{
 100    PowerPCCPU *cpu = opaque;
 101
 102    qemu_cpu_kick(CPU(cpu));
 103}
 104
 105/* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 106 * should only be used for fallback tests - generally we should use
 107 * explicit capabilities for the features we want, rather than
 108 * assuming what is/isn't available depending on the KVM variant. */
 109static bool kvmppc_is_pr(KVMState *ks)
 110{
 111    /* Assume KVM-PR if the GET_PVINFO capability is available */
 112    return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 113}
 114
 115static int kvm_ppc_register_host_cpu_type(void);
 116
 117int kvm_arch_init(MachineState *ms, KVMState *s)
 118{
 119    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 120    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 121    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 122    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 123    cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 124    cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 125    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 126    cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 127    cap_spapr_vfio = false;
 128    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 129    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 130    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 131    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 132    /* Note: we don't set cap_papr here, because this capability is
 133     * only activated after this by kvmppc_set_papr() */
 134    cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 135    cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 136    cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 137
 138    if (!cap_interrupt_level) {
 139        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 140                        "VM to stall at times!\n");
 141    }
 142
 143    kvm_ppc_register_host_cpu_type();
 144
 145    return 0;
 146}
 147
 148static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 149{
 150    CPUPPCState *cenv = &cpu->env;
 151    CPUState *cs = CPU(cpu);
 152    struct kvm_sregs sregs;
 153    int ret;
 154
 155    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 156        /* What we're really trying to say is "if we're on BookE, we use
 157           the native PVR for now". This is the only sane way to check
 158           it though, so we potentially confuse users that they can run
 159           BookE guests on BookS. Let's hope nobody dares enough :) */
 160        return 0;
 161    } else {
 162        if (!cap_segstate) {
 163            fprintf(stderr, "kvm error: missing PVR setting capability\n");
 164            return -ENOSYS;
 165        }
 166    }
 167
 168    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 169    if (ret) {
 170        return ret;
 171    }
 172
 173    sregs.pvr = cenv->spr[SPR_PVR];
 174    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 175}
 176
 177/* Set up a shared TLB array with KVM */
 178static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 179{
 180    CPUPPCState *env = &cpu->env;
 181    CPUState *cs = CPU(cpu);
 182    struct kvm_book3e_206_tlb_params params = {};
 183    struct kvm_config_tlb cfg = {};
 184    unsigned int entries = 0;
 185    int ret, i;
 186
 187    if (!kvm_enabled() ||
 188        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 189        return 0;
 190    }
 191
 192    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 193
 194    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 195        params.tlb_sizes[i] = booke206_tlb_size(env, i);
 196        params.tlb_ways[i] = booke206_tlb_ways(env, i);
 197        entries += params.tlb_sizes[i];
 198    }
 199
 200    assert(entries == env->nb_tlb);
 201    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 202
 203    env->tlb_dirty = true;
 204
 205    cfg.array = (uintptr_t)env->tlb.tlbm;
 206    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 207    cfg.params = (uintptr_t)&params;
 208    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 209
 210    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 211    if (ret < 0) {
 212        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 213                __func__, strerror(-ret));
 214        return ret;
 215    }
 216
 217    env->kvm_sw_tlb = true;
 218    return 0;
 219}
 220
 221
 222#if defined(TARGET_PPC64)
 223static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 224                                       struct kvm_ppc_smmu_info *info)
 225{
 226    CPUPPCState *env = &cpu->env;
 227    CPUState *cs = CPU(cpu);
 228
 229    memset(info, 0, sizeof(*info));
 230
 231    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 232     * need to "guess" what the supported page sizes are.
 233     *
 234     * For that to work we make a few assumptions:
 235     *
 236     * - Check whether we are running "PR" KVM which only supports 4K
 237     *   and 16M pages, but supports them regardless of the backing
 238     *   store characteritics. We also don't support 1T segments.
 239     *
 240     *   This is safe as if HV KVM ever supports that capability or PR
 241     *   KVM grows supports for more page/segment sizes, those versions
 242     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 243     *   will not hit this fallback
 244     *
 245     * - Else we are running HV KVM. This means we only support page
 246     *   sizes that fit in the backing store. Additionally we only
 247     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 248     *   P7 encodings for the SLB and hash table. Here too, we assume
 249     *   support for any newer processor will mean a kernel that
 250     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 251     *   this fallback.
 252     */
 253    if (kvmppc_is_pr(cs->kvm_state)) {
 254        /* No flags */
 255        info->flags = 0;
 256        info->slb_size = 64;
 257
 258        /* Standard 4k base page size segment */
 259        info->sps[0].page_shift = 12;
 260        info->sps[0].slb_enc = 0;
 261        info->sps[0].enc[0].page_shift = 12;
 262        info->sps[0].enc[0].pte_enc = 0;
 263
 264        /* Standard 16M large page size segment */
 265        info->sps[1].page_shift = 24;
 266        info->sps[1].slb_enc = SLB_VSID_L;
 267        info->sps[1].enc[0].page_shift = 24;
 268        info->sps[1].enc[0].pte_enc = 0;
 269    } else {
 270        int i = 0;
 271
 272        /* HV KVM has backing store size restrictions */
 273        info->flags = KVM_PPC_PAGE_SIZES_REAL;
 274
 275        if (env->mmu_model & POWERPC_MMU_1TSEG) {
 276            info->flags |= KVM_PPC_1T_SEGMENTS;
 277        }
 278
 279        if (env->mmu_model == POWERPC_MMU_2_06 ||
 280            env->mmu_model == POWERPC_MMU_2_07) {
 281            info->slb_size = 32;
 282        } else {
 283            info->slb_size = 64;
 284        }
 285
 286        /* Standard 4k base page size segment */
 287        info->sps[i].page_shift = 12;
 288        info->sps[i].slb_enc = 0;
 289        info->sps[i].enc[0].page_shift = 12;
 290        info->sps[i].enc[0].pte_enc = 0;
 291        i++;
 292
 293        /* 64K on MMU 2.06 and later */
 294        if (env->mmu_model == POWERPC_MMU_2_06 ||
 295            env->mmu_model == POWERPC_MMU_2_07) {
 296            info->sps[i].page_shift = 16;
 297            info->sps[i].slb_enc = 0x110;
 298            info->sps[i].enc[0].page_shift = 16;
 299            info->sps[i].enc[0].pte_enc = 1;
 300            i++;
 301        }
 302
 303        /* Standard 16M large page size segment */
 304        info->sps[i].page_shift = 24;
 305        info->sps[i].slb_enc = SLB_VSID_L;
 306        info->sps[i].enc[0].page_shift = 24;
 307        info->sps[i].enc[0].pte_enc = 0;
 308    }
 309}
 310
 311static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 312{
 313    CPUState *cs = CPU(cpu);
 314    int ret;
 315
 316    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 317        ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 318        if (ret == 0) {
 319            return;
 320        }
 321    }
 322
 323    kvm_get_fallback_smmu_info(cpu, info);
 324}
 325
 326static long gethugepagesize(const char *mem_path)
 327{
 328    struct statfs fs;
 329    int ret;
 330
 331    do {
 332        ret = statfs(mem_path, &fs);
 333    } while (ret != 0 && errno == EINTR);
 334
 335    if (ret != 0) {
 336        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 337                strerror(errno));
 338        exit(1);
 339    }
 340
 341#define HUGETLBFS_MAGIC       0x958458f6
 342
 343    if (fs.f_type != HUGETLBFS_MAGIC) {
 344        /* Explicit mempath, but it's ordinary pages */
 345        return getpagesize();
 346    }
 347
 348    /* It's hugepage, return the huge page size */
 349    return fs.f_bsize;
 350}
 351
 352/*
 353 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 354 * may or may not name the same files / on the same filesystem now as
 355 * when we actually open and map them.  Iterate over the file
 356 * descriptors instead, and use qemu_fd_getpagesize().
 357 */
 358static int find_max_supported_pagesize(Object *obj, void *opaque)
 359{
 360    char *mem_path;
 361    long *hpsize_min = opaque;
 362
 363    if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 364        mem_path = object_property_get_str(obj, "mem-path", NULL);
 365        if (mem_path) {
 366            long hpsize = gethugepagesize(mem_path);
 367            if (hpsize < *hpsize_min) {
 368                *hpsize_min = hpsize;
 369            }
 370        } else {
 371            *hpsize_min = getpagesize();
 372        }
 373    }
 374
 375    return 0;
 376}
 377
 378static long getrampagesize(void)
 379{
 380    long hpsize = LONG_MAX;
 381    long mainrampagesize;
 382    Object *memdev_root;
 383
 384    if (mem_path) {
 385        mainrampagesize = gethugepagesize(mem_path);
 386    } else {
 387        mainrampagesize = getpagesize();
 388    }
 389
 390    /* it's possible we have memory-backend objects with
 391     * hugepage-backed RAM. these may get mapped into system
 392     * address space via -numa parameters or memory hotplug
 393     * hooks. we want to take these into account, but we
 394     * also want to make sure these supported hugepage
 395     * sizes are applicable across the entire range of memory
 396     * we may boot from, so we take the min across all
 397     * backends, and assume normal pages in cases where a
 398     * backend isn't backed by hugepages.
 399     */
 400    memdev_root = object_resolve_path("/objects", NULL);
 401    if (memdev_root) {
 402        object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 403    }
 404    if (hpsize == LONG_MAX) {
 405        /* No additional memory regions found ==> Report main RAM page size */
 406        return mainrampagesize;
 407    }
 408
 409    /* If NUMA is disabled or the NUMA nodes are not backed with a
 410     * memory-backend, then there is at least one node using "normal" RAM,
 411     * so if its page size is smaller we have got to report that size instead.
 412     */
 413    if (hpsize > mainrampagesize &&
 414        (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
 415        static bool warned;
 416        if (!warned) {
 417            error_report("Huge page support disabled (n/a for main memory).");
 418            warned = true;
 419        }
 420        return mainrampagesize;
 421    }
 422
 423    return hpsize;
 424}
 425
 426static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 427{
 428    if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 429        return true;
 430    }
 431
 432    return (1ul << shift) <= rampgsize;
 433}
 434
 435static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 436{
 437    static struct kvm_ppc_smmu_info smmu_info;
 438    static bool has_smmu_info;
 439    CPUPPCState *env = &cpu->env;
 440    long rampagesize;
 441    int iq, ik, jq, jk;
 442    bool has_64k_pages = false;
 443
 444    /* We only handle page sizes for 64-bit server guests for now */
 445    if (!(env->mmu_model & POWERPC_MMU_64)) {
 446        return;
 447    }
 448
 449    /* Collect MMU info from kernel if not already */
 450    if (!has_smmu_info) {
 451        kvm_get_smmu_info(cpu, &smmu_info);
 452        has_smmu_info = true;
 453    }
 454
 455    rampagesize = getrampagesize();
 456
 457    /* Convert to QEMU form */
 458    memset(&env->sps, 0, sizeof(env->sps));
 459
 460    /* If we have HV KVM, we need to forbid CI large pages if our
 461     * host page size is smaller than 64K.
 462     */
 463    if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 464        env->ci_large_pages = getpagesize() >= 0x10000;
 465    }
 466
 467    /*
 468     * XXX This loop should be an entry wide AND of the capabilities that
 469     *     the selected CPU has with the capabilities that KVM supports.
 470     */
 471    for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 472        struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 473        struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 474
 475        if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 476                                 ksps->page_shift)) {
 477            continue;
 478        }
 479        qsps->page_shift = ksps->page_shift;
 480        qsps->slb_enc = ksps->slb_enc;
 481        for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 482            if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 483                                     ksps->enc[jk].page_shift)) {
 484                continue;
 485            }
 486            if (ksps->enc[jk].page_shift == 16) {
 487                has_64k_pages = true;
 488            }
 489            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 490            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 491            if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 492                break;
 493            }
 494        }
 495        if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 496            break;
 497        }
 498    }
 499    env->slb_nr = smmu_info.slb_size;
 500    if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 501        env->mmu_model &= ~POWERPC_MMU_1TSEG;
 502    }
 503    if (!has_64k_pages) {
 504        env->mmu_model &= ~POWERPC_MMU_64K;
 505    }
 506}
 507#else /* defined (TARGET_PPC64) */
 508
 509static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 510{
 511}
 512
 513#endif /* !defined (TARGET_PPC64) */
 514
 515unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 516{
 517    return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 518}
 519
 520/* e500 supports 2 h/w breakpoint and 2 watchpoint.
 521 * book3s supports only 1 watchpoint, so array size
 522 * of 4 is sufficient for now.
 523 */
 524#define MAX_HW_BKPTS 4
 525
 526static struct HWBreakpoint {
 527    target_ulong addr;
 528    int type;
 529} hw_debug_points[MAX_HW_BKPTS];
 530
 531static CPUWatchpoint hw_watchpoint;
 532
 533/* Default there is no breakpoint and watchpoint supported */
 534static int max_hw_breakpoint;
 535static int max_hw_watchpoint;
 536static int nb_hw_breakpoint;
 537static int nb_hw_watchpoint;
 538
 539static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 540{
 541    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 542        max_hw_breakpoint = 2;
 543        max_hw_watchpoint = 2;
 544    }
 545
 546    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 547        fprintf(stderr, "Error initializing h/w breakpoints\n");
 548        return;
 549    }
 550}
 551
 552int kvm_arch_init_vcpu(CPUState *cs)
 553{
 554    PowerPCCPU *cpu = POWERPC_CPU(cs);
 555    CPUPPCState *cenv = &cpu->env;
 556    int ret;
 557
 558    /* Gather server mmu info from KVM and update the CPU state */
 559    kvm_fixup_page_sizes(cpu);
 560
 561    /* Synchronize sregs with kvm */
 562    ret = kvm_arch_sync_sregs(cpu);
 563    if (ret) {
 564        if (ret == -EINVAL) {
 565            error_report("Register sync failed... If you're using kvm-hv.ko,"
 566                         " only \"-cpu host\" is possible");
 567        }
 568        return ret;
 569    }
 570
 571    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 572
 573    switch (cenv->mmu_model) {
 574    case POWERPC_MMU_BOOKE206:
 575        /* This target supports access to KVM's guest TLB */
 576        ret = kvm_booke206_tlb_init(cpu);
 577        break;
 578    case POWERPC_MMU_2_07:
 579        if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 580            /* KVM-HV has transactional memory on POWER8 also without the
 581             * KVM_CAP_PPC_HTM extension, so enable it here instead. */
 582            cap_htm = true;
 583        }
 584        break;
 585    default:
 586        break;
 587    }
 588
 589    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 590    kvmppc_hw_debug_points_init(cenv);
 591
 592    return ret;
 593}
 594
 595static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 596{
 597    CPUPPCState *env = &cpu->env;
 598    CPUState *cs = CPU(cpu);
 599    struct kvm_dirty_tlb dirty_tlb;
 600    unsigned char *bitmap;
 601    int ret;
 602
 603    if (!env->kvm_sw_tlb) {
 604        return;
 605    }
 606
 607    bitmap = g_malloc((env->nb_tlb + 7) / 8);
 608    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 609
 610    dirty_tlb.bitmap = (uintptr_t)bitmap;
 611    dirty_tlb.num_dirty = env->nb_tlb;
 612
 613    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 614    if (ret) {
 615        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 616                __func__, strerror(-ret));
 617    }
 618
 619    g_free(bitmap);
 620}
 621
 622static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 623{
 624    PowerPCCPU *cpu = POWERPC_CPU(cs);
 625    CPUPPCState *env = &cpu->env;
 626    union {
 627        uint32_t u32;
 628        uint64_t u64;
 629    } val;
 630    struct kvm_one_reg reg = {
 631        .id = id,
 632        .addr = (uintptr_t) &val,
 633    };
 634    int ret;
 635
 636    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 637    if (ret != 0) {
 638        trace_kvm_failed_spr_get(spr, strerror(errno));
 639    } else {
 640        switch (id & KVM_REG_SIZE_MASK) {
 641        case KVM_REG_SIZE_U32:
 642            env->spr[spr] = val.u32;
 643            break;
 644
 645        case KVM_REG_SIZE_U64:
 646            env->spr[spr] = val.u64;
 647            break;
 648
 649        default:
 650            /* Don't handle this size yet */
 651            abort();
 652        }
 653    }
 654}
 655
 656static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 657{
 658    PowerPCCPU *cpu = POWERPC_CPU(cs);
 659    CPUPPCState *env = &cpu->env;
 660    union {
 661        uint32_t u32;
 662        uint64_t u64;
 663    } val;
 664    struct kvm_one_reg reg = {
 665        .id = id,
 666        .addr = (uintptr_t) &val,
 667    };
 668    int ret;
 669
 670    switch (id & KVM_REG_SIZE_MASK) {
 671    case KVM_REG_SIZE_U32:
 672        val.u32 = env->spr[spr];
 673        break;
 674
 675    case KVM_REG_SIZE_U64:
 676        val.u64 = env->spr[spr];
 677        break;
 678
 679    default:
 680        /* Don't handle this size yet */
 681        abort();
 682    }
 683
 684    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 685    if (ret != 0) {
 686        trace_kvm_failed_spr_set(spr, strerror(errno));
 687    }
 688}
 689
 690static int kvm_put_fp(CPUState *cs)
 691{
 692    PowerPCCPU *cpu = POWERPC_CPU(cs);
 693    CPUPPCState *env = &cpu->env;
 694    struct kvm_one_reg reg;
 695    int i;
 696    int ret;
 697
 698    if (env->insns_flags & PPC_FLOAT) {
 699        uint64_t fpscr = env->fpscr;
 700        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 701
 702        reg.id = KVM_REG_PPC_FPSCR;
 703        reg.addr = (uintptr_t)&fpscr;
 704        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 705        if (ret < 0) {
 706            DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 707            return ret;
 708        }
 709
 710        for (i = 0; i < 32; i++) {
 711            uint64_t vsr[2];
 712
 713#ifdef HOST_WORDS_BIGENDIAN
 714            vsr[0] = float64_val(env->fpr[i]);
 715            vsr[1] = env->vsr[i];
 716#else
 717            vsr[0] = env->vsr[i];
 718            vsr[1] = float64_val(env->fpr[i]);
 719#endif
 720            reg.addr = (uintptr_t) &vsr;
 721            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 722
 723            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 724            if (ret < 0) {
 725                DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 726                        i, strerror(errno));
 727                return ret;
 728            }
 729        }
 730    }
 731
 732    if (env->insns_flags & PPC_ALTIVEC) {
 733        reg.id = KVM_REG_PPC_VSCR;
 734        reg.addr = (uintptr_t)&env->vscr;
 735        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 736        if (ret < 0) {
 737            DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 738            return ret;
 739        }
 740
 741        for (i = 0; i < 32; i++) {
 742            reg.id = KVM_REG_PPC_VR(i);
 743            reg.addr = (uintptr_t)&env->avr[i];
 744            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 745            if (ret < 0) {
 746                DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 747                return ret;
 748            }
 749        }
 750    }
 751
 752    return 0;
 753}
 754
 755static int kvm_get_fp(CPUState *cs)
 756{
 757    PowerPCCPU *cpu = POWERPC_CPU(cs);
 758    CPUPPCState *env = &cpu->env;
 759    struct kvm_one_reg reg;
 760    int i;
 761    int ret;
 762
 763    if (env->insns_flags & PPC_FLOAT) {
 764        uint64_t fpscr;
 765        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 766
 767        reg.id = KVM_REG_PPC_FPSCR;
 768        reg.addr = (uintptr_t)&fpscr;
 769        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 770        if (ret < 0) {
 771            DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 772            return ret;
 773        } else {
 774            env->fpscr = fpscr;
 775        }
 776
 777        for (i = 0; i < 32; i++) {
 778            uint64_t vsr[2];
 779
 780            reg.addr = (uintptr_t) &vsr;
 781            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 782
 783            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 784            if (ret < 0) {
 785                DPRINTF("Unable to get %s%d from KVM: %s\n",
 786                        vsx ? "VSR" : "FPR", i, strerror(errno));
 787                return ret;
 788            } else {
 789#ifdef HOST_WORDS_BIGENDIAN
 790                env->fpr[i] = vsr[0];
 791                if (vsx) {
 792                    env->vsr[i] = vsr[1];
 793                }
 794#else
 795                env->fpr[i] = vsr[1];
 796                if (vsx) {
 797                    env->vsr[i] = vsr[0];
 798                }
 799#endif
 800            }
 801        }
 802    }
 803
 804    if (env->insns_flags & PPC_ALTIVEC) {
 805        reg.id = KVM_REG_PPC_VSCR;
 806        reg.addr = (uintptr_t)&env->vscr;
 807        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 808        if (ret < 0) {
 809            DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 810            return ret;
 811        }
 812
 813        for (i = 0; i < 32; i++) {
 814            reg.id = KVM_REG_PPC_VR(i);
 815            reg.addr = (uintptr_t)&env->avr[i];
 816            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 817            if (ret < 0) {
 818                DPRINTF("Unable to get VR%d from KVM: %s\n",
 819                        i, strerror(errno));
 820                return ret;
 821            }
 822        }
 823    }
 824
 825    return 0;
 826}
 827
 828#if defined(TARGET_PPC64)
 829static int kvm_get_vpa(CPUState *cs)
 830{
 831    PowerPCCPU *cpu = POWERPC_CPU(cs);
 832    CPUPPCState *env = &cpu->env;
 833    struct kvm_one_reg reg;
 834    int ret;
 835
 836    reg.id = KVM_REG_PPC_VPA_ADDR;
 837    reg.addr = (uintptr_t)&env->vpa_addr;
 838    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 839    if (ret < 0) {
 840        DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 841        return ret;
 842    }
 843
 844    assert((uintptr_t)&env->slb_shadow_size
 845           == ((uintptr_t)&env->slb_shadow_addr + 8));
 846    reg.id = KVM_REG_PPC_VPA_SLB;
 847    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 848    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 849    if (ret < 0) {
 850        DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 851                strerror(errno));
 852        return ret;
 853    }
 854
 855    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 856    reg.id = KVM_REG_PPC_VPA_DTL;
 857    reg.addr = (uintptr_t)&env->dtl_addr;
 858    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 859    if (ret < 0) {
 860        DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 861                strerror(errno));
 862        return ret;
 863    }
 864
 865    return 0;
 866}
 867
 868static int kvm_put_vpa(CPUState *cs)
 869{
 870    PowerPCCPU *cpu = POWERPC_CPU(cs);
 871    CPUPPCState *env = &cpu->env;
 872    struct kvm_one_reg reg;
 873    int ret;
 874
 875    /* SLB shadow or DTL can't be registered unless a master VPA is
 876     * registered.  That means when restoring state, if a VPA *is*
 877     * registered, we need to set that up first.  If not, we need to
 878     * deregister the others before deregistering the master VPA */
 879    assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 880
 881    if (env->vpa_addr) {
 882        reg.id = KVM_REG_PPC_VPA_ADDR;
 883        reg.addr = (uintptr_t)&env->vpa_addr;
 884        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 885        if (ret < 0) {
 886            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 887            return ret;
 888        }
 889    }
 890
 891    assert((uintptr_t)&env->slb_shadow_size
 892           == ((uintptr_t)&env->slb_shadow_addr + 8));
 893    reg.id = KVM_REG_PPC_VPA_SLB;
 894    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 895    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 896    if (ret < 0) {
 897        DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 898        return ret;
 899    }
 900
 901    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 902    reg.id = KVM_REG_PPC_VPA_DTL;
 903    reg.addr = (uintptr_t)&env->dtl_addr;
 904    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 905    if (ret < 0) {
 906        DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 907                strerror(errno));
 908        return ret;
 909    }
 910
 911    if (!env->vpa_addr) {
 912        reg.id = KVM_REG_PPC_VPA_ADDR;
 913        reg.addr = (uintptr_t)&env->vpa_addr;
 914        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 915        if (ret < 0) {
 916            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 917            return ret;
 918        }
 919    }
 920
 921    return 0;
 922}
 923#endif /* TARGET_PPC64 */
 924
 925int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 926{
 927    CPUPPCState *env = &cpu->env;
 928    struct kvm_sregs sregs;
 929    int i;
 930
 931    sregs.pvr = env->spr[SPR_PVR];
 932
 933    sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 934
 935    /* Sync SLB */
 936#ifdef TARGET_PPC64
 937    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 938        sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 939        if (env->slb[i].esid & SLB_ESID_V) {
 940            sregs.u.s.ppc64.slb[i].slbe |= i;
 941        }
 942        sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 943    }
 944#endif
 945
 946    /* Sync SRs */
 947    for (i = 0; i < 16; i++) {
 948        sregs.u.s.ppc32.sr[i] = env->sr[i];
 949    }
 950
 951    /* Sync BATs */
 952    for (i = 0; i < 8; i++) {
 953        /* Beware. We have to swap upper and lower bits here */
 954        sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 955            | env->DBAT[1][i];
 956        sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 957            | env->IBAT[1][i];
 958    }
 959
 960    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 961}
 962
 963int kvm_arch_put_registers(CPUState *cs, int level)
 964{
 965    PowerPCCPU *cpu = POWERPC_CPU(cs);
 966    CPUPPCState *env = &cpu->env;
 967    struct kvm_regs regs;
 968    int ret;
 969    int i;
 970
 971    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 972    if (ret < 0) {
 973        return ret;
 974    }
 975
 976    regs.ctr = env->ctr;
 977    regs.lr  = env->lr;
 978    regs.xer = cpu_read_xer(env);
 979    regs.msr = env->msr;
 980    regs.pc = env->nip;
 981
 982    regs.srr0 = env->spr[SPR_SRR0];
 983    regs.srr1 = env->spr[SPR_SRR1];
 984
 985    regs.sprg0 = env->spr[SPR_SPRG0];
 986    regs.sprg1 = env->spr[SPR_SPRG1];
 987    regs.sprg2 = env->spr[SPR_SPRG2];
 988    regs.sprg3 = env->spr[SPR_SPRG3];
 989    regs.sprg4 = env->spr[SPR_SPRG4];
 990    regs.sprg5 = env->spr[SPR_SPRG5];
 991    regs.sprg6 = env->spr[SPR_SPRG6];
 992    regs.sprg7 = env->spr[SPR_SPRG7];
 993
 994    regs.pid = env->spr[SPR_BOOKE_PID];
 995
 996    for (i = 0;i < 32; i++)
 997        regs.gpr[i] = env->gpr[i];
 998
 999    regs.cr = 0;
1000    for (i = 0; i < 8; i++) {
1001        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1002    }
1003
1004    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1005    if (ret < 0)
1006        return ret;
1007
1008    kvm_put_fp(cs);
1009
1010    if (env->tlb_dirty) {
1011        kvm_sw_tlb_put(cpu);
1012        env->tlb_dirty = false;
1013    }
1014
1015    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1016        ret = kvmppc_put_books_sregs(cpu);
1017        if (ret < 0) {
1018            return ret;
1019        }
1020    }
1021
1022    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1023        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1024    }
1025
1026    if (cap_one_reg) {
1027        int i;
1028
1029        /* We deliberately ignore errors here, for kernels which have
1030         * the ONE_REG calls, but don't support the specific
1031         * registers, there's a reasonable chance things will still
1032         * work, at least until we try to migrate. */
1033        for (i = 0; i < 1024; i++) {
1034            uint64_t id = env->spr_cb[i].one_reg_id;
1035
1036            if (id != 0) {
1037                kvm_put_one_spr(cs, id, i);
1038            }
1039        }
1040
1041#ifdef TARGET_PPC64
1042        if (msr_ts) {
1043            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1044                kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1045            }
1046            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1047                kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1048            }
1049            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1050            kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1051            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1052            kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1053            kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1054            kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1055            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1056            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1057            kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1058            kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1059        }
1060
1061        if (cap_papr) {
1062            if (kvm_put_vpa(cs) < 0) {
1063                DPRINTF("Warning: Unable to set VPA information to KVM\n");
1064            }
1065        }
1066
1067        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1068#endif /* TARGET_PPC64 */
1069    }
1070
1071    return ret;
1072}
1073
1074static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1075{
1076     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1077}
1078
1079static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1080{
1081    CPUPPCState *env = &cpu->env;
1082    struct kvm_sregs sregs;
1083    int ret;
1084
1085    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1086    if (ret < 0) {
1087        return ret;
1088    }
1089
1090    if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1091        env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1092        env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1093        env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1094        env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1095        env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1096        env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1097        env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1098        env->spr[SPR_DECR] = sregs.u.e.dec;
1099        env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1100        env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1101        env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1102    }
1103
1104    if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1105        env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1106        env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1107        env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1108        env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1109        env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1110    }
1111
1112    if (sregs.u.e.features & KVM_SREGS_E_64) {
1113        env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1114    }
1115
1116    if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1117        env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1118    }
1119
1120    if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1121        env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1122        kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1123        env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1124        kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1125        env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1126        kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1127        env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1128        kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1129        env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1130        kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1131        env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1132        kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1133        env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1134        kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1135        env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1136        kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1137        env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1138        kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1139        env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1140        kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1141        env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1142        kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1143        env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1144        kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1145        env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1146        kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1147        env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1148        kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1149        env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1150        kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1151        env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1152        kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1153
1154        if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1155            env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1156            kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1157            env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1158            kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1159            env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1160            kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1161        }
1162
1163        if (sregs.u.e.features & KVM_SREGS_E_PM) {
1164            env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1165            kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1166        }
1167
1168        if (sregs.u.e.features & KVM_SREGS_E_PC) {
1169            env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1170            kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1171            env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1172            kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1173        }
1174    }
1175
1176    if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1177        env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1178        env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1179        env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1180        env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1181        env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1182        env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1183        env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1184        env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1185        env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1186        env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1187    }
1188
1189    if (sregs.u.e.features & KVM_SREGS_EXP) {
1190        env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1191    }
1192
1193    if (sregs.u.e.features & KVM_SREGS_E_PD) {
1194        env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1195        env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1196    }
1197
1198    if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1199        env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1200        env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1201        env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1202
1203        if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1204            env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1205            env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1206        }
1207    }
1208
1209    return 0;
1210}
1211
1212static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1213{
1214    CPUPPCState *env = &cpu->env;
1215    struct kvm_sregs sregs;
1216    int ret;
1217    int i;
1218
1219    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1220    if (ret < 0) {
1221        return ret;
1222    }
1223
1224    if (!env->external_htab) {
1225        ppc_store_sdr1(env, sregs.u.s.sdr1);
1226    }
1227
1228    /* Sync SLB */
1229#ifdef TARGET_PPC64
1230    /*
1231     * The packed SLB array we get from KVM_GET_SREGS only contains
1232     * information about valid entries. So we flush our internal copy
1233     * to get rid of stale ones, then put all valid SLB entries back
1234     * in.
1235     */
1236    memset(env->slb, 0, sizeof(env->slb));
1237    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1238        target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1239        target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1240        /*
1241         * Only restore valid entries
1242         */
1243        if (rb & SLB_ESID_V) {
1244            ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1245        }
1246    }
1247#endif
1248
1249    /* Sync SRs */
1250    for (i = 0; i < 16; i++) {
1251        env->sr[i] = sregs.u.s.ppc32.sr[i];
1252    }
1253
1254    /* Sync BATs */
1255    for (i = 0; i < 8; i++) {
1256        env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1257        env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1258        env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1259        env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1260    }
1261
1262    return 0;
1263}
1264
1265int kvm_arch_get_registers(CPUState *cs)
1266{
1267    PowerPCCPU *cpu = POWERPC_CPU(cs);
1268    CPUPPCState *env = &cpu->env;
1269    struct kvm_regs regs;
1270    uint32_t cr;
1271    int i, ret;
1272
1273    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1274    if (ret < 0)
1275        return ret;
1276
1277    cr = regs.cr;
1278    for (i = 7; i >= 0; i--) {
1279        env->crf[i] = cr & 15;
1280        cr >>= 4;
1281    }
1282
1283    env->ctr = regs.ctr;
1284    env->lr = regs.lr;
1285    cpu_write_xer(env, regs.xer);
1286    env->msr = regs.msr;
1287    env->nip = regs.pc;
1288
1289    env->spr[SPR_SRR0] = regs.srr0;
1290    env->spr[SPR_SRR1] = regs.srr1;
1291
1292    env->spr[SPR_SPRG0] = regs.sprg0;
1293    env->spr[SPR_SPRG1] = regs.sprg1;
1294    env->spr[SPR_SPRG2] = regs.sprg2;
1295    env->spr[SPR_SPRG3] = regs.sprg3;
1296    env->spr[SPR_SPRG4] = regs.sprg4;
1297    env->spr[SPR_SPRG5] = regs.sprg5;
1298    env->spr[SPR_SPRG6] = regs.sprg6;
1299    env->spr[SPR_SPRG7] = regs.sprg7;
1300
1301    env->spr[SPR_BOOKE_PID] = regs.pid;
1302
1303    for (i = 0;i < 32; i++)
1304        env->gpr[i] = regs.gpr[i];
1305
1306    kvm_get_fp(cs);
1307
1308    if (cap_booke_sregs) {
1309        ret = kvmppc_get_booke_sregs(cpu);
1310        if (ret < 0) {
1311            return ret;
1312        }
1313    }
1314
1315    if (cap_segstate) {
1316        ret = kvmppc_get_books_sregs(cpu);
1317        if (ret < 0) {
1318            return ret;
1319        }
1320    }
1321
1322    if (cap_hior) {
1323        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1324    }
1325
1326    if (cap_one_reg) {
1327        int i;
1328
1329        /* We deliberately ignore errors here, for kernels which have
1330         * the ONE_REG calls, but don't support the specific
1331         * registers, there's a reasonable chance things will still
1332         * work, at least until we try to migrate. */
1333        for (i = 0; i < 1024; i++) {
1334            uint64_t id = env->spr_cb[i].one_reg_id;
1335
1336            if (id != 0) {
1337                kvm_get_one_spr(cs, id, i);
1338            }
1339        }
1340
1341#ifdef TARGET_PPC64
1342        if (msr_ts) {
1343            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1344                kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1345            }
1346            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1347                kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1348            }
1349            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1350            kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1351            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1352            kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1353            kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1354            kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1355            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1356            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1357            kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1358            kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1359        }
1360
1361        if (cap_papr) {
1362            if (kvm_get_vpa(cs) < 0) {
1363                DPRINTF("Warning: Unable to get VPA information from KVM\n");
1364            }
1365        }
1366
1367        kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1368#endif
1369    }
1370
1371    return 0;
1372}
1373
1374int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1375{
1376    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1377
1378    if (irq != PPC_INTERRUPT_EXT) {
1379        return 0;
1380    }
1381
1382    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1383        return 0;
1384    }
1385
1386    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1387
1388    return 0;
1389}
1390
1391#if defined(TARGET_PPCEMB)
1392#define PPC_INPUT_INT PPC40x_INPUT_INT
1393#elif defined(TARGET_PPC64)
1394#define PPC_INPUT_INT PPC970_INPUT_INT
1395#else
1396#define PPC_INPUT_INT PPC6xx_INPUT_INT
1397#endif
1398
1399void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1400{
1401    PowerPCCPU *cpu = POWERPC_CPU(cs);
1402    CPUPPCState *env = &cpu->env;
1403    int r;
1404    unsigned irq;
1405
1406    qemu_mutex_lock_iothread();
1407
1408    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1409     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1410    if (!cap_interrupt_level &&
1411        run->ready_for_interrupt_injection &&
1412        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1413        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1414    {
1415        /* For now KVM disregards the 'irq' argument. However, in the
1416         * future KVM could cache it in-kernel to avoid a heavyweight exit
1417         * when reading the UIC.
1418         */
1419        irq = KVM_INTERRUPT_SET;
1420
1421        DPRINTF("injected interrupt %d\n", irq);
1422        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1423        if (r < 0) {
1424            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1425        }
1426
1427        /* Always wake up soon in case the interrupt was level based */
1428        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1429                       (NANOSECONDS_PER_SECOND / 50));
1430    }
1431
1432    /* We don't know if there are more interrupts pending after this. However,
1433     * the guest will return to userspace in the course of handling this one
1434     * anyways, so we will get a chance to deliver the rest. */
1435
1436    qemu_mutex_unlock_iothread();
1437}
1438
1439MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1440{
1441    return MEMTXATTRS_UNSPECIFIED;
1442}
1443
1444int kvm_arch_process_async_events(CPUState *cs)
1445{
1446    return cs->halted;
1447}
1448
1449static int kvmppc_handle_halt(PowerPCCPU *cpu)
1450{
1451    CPUState *cs = CPU(cpu);
1452    CPUPPCState *env = &cpu->env;
1453
1454    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1455        cs->halted = 1;
1456        cs->exception_index = EXCP_HLT;
1457    }
1458
1459    return 0;
1460}
1461
1462/* map dcr access to existing qemu dcr emulation */
1463static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1464{
1465    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1466        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1467
1468    return 0;
1469}
1470
1471static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1472{
1473    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1474        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1475
1476    return 0;
1477}
1478
1479int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1480{
1481    /* Mixed endian case is not handled */
1482    uint32_t sc = debug_inst_opcode;
1483
1484    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1485                            sizeof(sc), 0) ||
1486        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1487        return -EINVAL;
1488    }
1489
1490    return 0;
1491}
1492
1493int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1494{
1495    uint32_t sc;
1496
1497    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1498        sc != debug_inst_opcode ||
1499        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1500                            sizeof(sc), 1)) {
1501        return -EINVAL;
1502    }
1503
1504    return 0;
1505}
1506
1507static int find_hw_breakpoint(target_ulong addr, int type)
1508{
1509    int n;
1510
1511    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1512           <= ARRAY_SIZE(hw_debug_points));
1513
1514    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1515        if (hw_debug_points[n].addr == addr &&
1516             hw_debug_points[n].type == type) {
1517            return n;
1518        }
1519    }
1520
1521    return -1;
1522}
1523
1524static int find_hw_watchpoint(target_ulong addr, int *flag)
1525{
1526    int n;
1527
1528    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1529    if (n >= 0) {
1530        *flag = BP_MEM_ACCESS;
1531        return n;
1532    }
1533
1534    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1535    if (n >= 0) {
1536        *flag = BP_MEM_WRITE;
1537        return n;
1538    }
1539
1540    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1541    if (n >= 0) {
1542        *flag = BP_MEM_READ;
1543        return n;
1544    }
1545
1546    return -1;
1547}
1548
1549int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1550                                  target_ulong len, int type)
1551{
1552    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1553        return -ENOBUFS;
1554    }
1555
1556    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1557    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1558
1559    switch (type) {
1560    case GDB_BREAKPOINT_HW:
1561        if (nb_hw_breakpoint >= max_hw_breakpoint) {
1562            return -ENOBUFS;
1563        }
1564
1565        if (find_hw_breakpoint(addr, type) >= 0) {
1566            return -EEXIST;
1567        }
1568
1569        nb_hw_breakpoint++;
1570        break;
1571
1572    case GDB_WATCHPOINT_WRITE:
1573    case GDB_WATCHPOINT_READ:
1574    case GDB_WATCHPOINT_ACCESS:
1575        if (nb_hw_watchpoint >= max_hw_watchpoint) {
1576            return -ENOBUFS;
1577        }
1578
1579        if (find_hw_breakpoint(addr, type) >= 0) {
1580            return -EEXIST;
1581        }
1582
1583        nb_hw_watchpoint++;
1584        break;
1585
1586    default:
1587        return -ENOSYS;
1588    }
1589
1590    return 0;
1591}
1592
1593int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1594                                  target_ulong len, int type)
1595{
1596    int n;
1597
1598    n = find_hw_breakpoint(addr, type);
1599    if (n < 0) {
1600        return -ENOENT;
1601    }
1602
1603    switch (type) {
1604    case GDB_BREAKPOINT_HW:
1605        nb_hw_breakpoint--;
1606        break;
1607
1608    case GDB_WATCHPOINT_WRITE:
1609    case GDB_WATCHPOINT_READ:
1610    case GDB_WATCHPOINT_ACCESS:
1611        nb_hw_watchpoint--;
1612        break;
1613
1614    default:
1615        return -ENOSYS;
1616    }
1617    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1618
1619    return 0;
1620}
1621
1622void kvm_arch_remove_all_hw_breakpoints(void)
1623{
1624    nb_hw_breakpoint = nb_hw_watchpoint = 0;
1625}
1626
1627void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1628{
1629    int n;
1630
1631    /* Software Breakpoint updates */
1632    if (kvm_sw_breakpoints_active(cs)) {
1633        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1634    }
1635
1636    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1637           <= ARRAY_SIZE(hw_debug_points));
1638    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1639
1640    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1641        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1642        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1643        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1644            switch (hw_debug_points[n].type) {
1645            case GDB_BREAKPOINT_HW:
1646                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1647                break;
1648            case GDB_WATCHPOINT_WRITE:
1649                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1650                break;
1651            case GDB_WATCHPOINT_READ:
1652                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1653                break;
1654            case GDB_WATCHPOINT_ACCESS:
1655                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1656                                        KVMPPC_DEBUG_WATCH_READ;
1657                break;
1658            default:
1659                cpu_abort(cs, "Unsupported breakpoint type\n");
1660            }
1661            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1662        }
1663    }
1664}
1665
1666static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1667{
1668    CPUState *cs = CPU(cpu);
1669    CPUPPCState *env = &cpu->env;
1670    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1671    int handle = 0;
1672    int n;
1673    int flag = 0;
1674
1675    if (cs->singlestep_enabled) {
1676        handle = 1;
1677    } else if (arch_info->status) {
1678        if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1679            if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1680                n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1681                if (n >= 0) {
1682                    handle = 1;
1683                }
1684            } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1685                                            KVMPPC_DEBUG_WATCH_WRITE)) {
1686                n = find_hw_watchpoint(arch_info->address,  &flag);
1687                if (n >= 0) {
1688                    handle = 1;
1689                    cs->watchpoint_hit = &hw_watchpoint;
1690                    hw_watchpoint.vaddr = hw_debug_points[n].addr;
1691                    hw_watchpoint.flags = flag;
1692                }
1693            }
1694        }
1695    } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1696        handle = 1;
1697    } else {
1698        /* QEMU is not able to handle debug exception, so inject
1699         * program exception to guest;
1700         * Yes program exception NOT debug exception !!
1701         * When QEMU is using debug resources then debug exception must
1702         * be always set. To achieve this we set MSR_DE and also set
1703         * MSRP_DEP so guest cannot change MSR_DE.
1704         * When emulating debug resource for guest we want guest
1705         * to control MSR_DE (enable/disable debug interrupt on need).
1706         * Supporting both configurations are NOT possible.
1707         * So the result is that we cannot share debug resources
1708         * between QEMU and Guest on BOOKE architecture.
1709         * In the current design QEMU gets the priority over guest,
1710         * this means that if QEMU is using debug resources then guest
1711         * cannot use them;
1712         * For software breakpoint QEMU uses a privileged instruction;
1713         * So there cannot be any reason that we are here for guest
1714         * set debug exception, only possibility is guest executed a
1715         * privileged / illegal instruction and that's why we are
1716         * injecting a program interrupt.
1717         */
1718
1719        cpu_synchronize_state(cs);
1720        /* env->nip is PC, so increment this by 4 to use
1721         * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1722         */
1723        env->nip += 4;
1724        cs->exception_index = POWERPC_EXCP_PROGRAM;
1725        env->error_code = POWERPC_EXCP_INVAL;
1726        ppc_cpu_do_interrupt(cs);
1727    }
1728
1729    return handle;
1730}
1731
1732int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1733{
1734    PowerPCCPU *cpu = POWERPC_CPU(cs);
1735    CPUPPCState *env = &cpu->env;
1736    int ret;
1737
1738    qemu_mutex_lock_iothread();
1739
1740    switch (run->exit_reason) {
1741    case KVM_EXIT_DCR:
1742        if (run->dcr.is_write) {
1743            DPRINTF("handle dcr write\n");
1744            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1745        } else {
1746            DPRINTF("handle dcr read\n");
1747            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1748        }
1749        break;
1750    case KVM_EXIT_HLT:
1751        DPRINTF("handle halt\n");
1752        ret = kvmppc_handle_halt(cpu);
1753        break;
1754#if defined(TARGET_PPC64)
1755    case KVM_EXIT_PAPR_HCALL:
1756        DPRINTF("handle PAPR hypercall\n");
1757        run->papr_hcall.ret = spapr_hypercall(cpu,
1758                                              run->papr_hcall.nr,
1759                                              run->papr_hcall.args);
1760        ret = 0;
1761        break;
1762#endif
1763    case KVM_EXIT_EPR:
1764        DPRINTF("handle epr\n");
1765        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1766        ret = 0;
1767        break;
1768    case KVM_EXIT_WATCHDOG:
1769        DPRINTF("handle watchdog expiry\n");
1770        watchdog_perform_action();
1771        ret = 0;
1772        break;
1773
1774    case KVM_EXIT_DEBUG:
1775        DPRINTF("handle debug exception\n");
1776        if (kvm_handle_debug(cpu, run)) {
1777            ret = EXCP_DEBUG;
1778            break;
1779        }
1780        /* re-enter, this exception was guest-internal */
1781        ret = 0;
1782        break;
1783
1784    default:
1785        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1786        ret = -1;
1787        break;
1788    }
1789
1790    qemu_mutex_unlock_iothread();
1791    return ret;
1792}
1793
1794int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1795{
1796    CPUState *cs = CPU(cpu);
1797    uint32_t bits = tsr_bits;
1798    struct kvm_one_reg reg = {
1799        .id = KVM_REG_PPC_OR_TSR,
1800        .addr = (uintptr_t) &bits,
1801    };
1802
1803    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1804}
1805
1806int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1807{
1808
1809    CPUState *cs = CPU(cpu);
1810    uint32_t bits = tsr_bits;
1811    struct kvm_one_reg reg = {
1812        .id = KVM_REG_PPC_CLEAR_TSR,
1813        .addr = (uintptr_t) &bits,
1814    };
1815
1816    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1817}
1818
1819int kvmppc_set_tcr(PowerPCCPU *cpu)
1820{
1821    CPUState *cs = CPU(cpu);
1822    CPUPPCState *env = &cpu->env;
1823    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1824
1825    struct kvm_one_reg reg = {
1826        .id = KVM_REG_PPC_TCR,
1827        .addr = (uintptr_t) &tcr,
1828    };
1829
1830    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1831}
1832
1833int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1834{
1835    CPUState *cs = CPU(cpu);
1836    int ret;
1837
1838    if (!kvm_enabled()) {
1839        return -1;
1840    }
1841
1842    if (!cap_ppc_watchdog) {
1843        printf("warning: KVM does not support watchdog");
1844        return -1;
1845    }
1846
1847    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1848    if (ret < 0) {
1849        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1850                __func__, strerror(-ret));
1851        return ret;
1852    }
1853
1854    return ret;
1855}
1856
1857static int read_cpuinfo(const char *field, char *value, int len)
1858{
1859    FILE *f;
1860    int ret = -1;
1861    int field_len = strlen(field);
1862    char line[512];
1863
1864    f = fopen("/proc/cpuinfo", "r");
1865    if (!f) {
1866        return -1;
1867    }
1868
1869    do {
1870        if (!fgets(line, sizeof(line), f)) {
1871            break;
1872        }
1873        if (!strncmp(line, field, field_len)) {
1874            pstrcpy(value, len, line);
1875            ret = 0;
1876            break;
1877        }
1878    } while(*line);
1879
1880    fclose(f);
1881
1882    return ret;
1883}
1884
1885uint32_t kvmppc_get_tbfreq(void)
1886{
1887    char line[512];
1888    char *ns;
1889    uint32_t retval = NANOSECONDS_PER_SECOND;
1890
1891    if (read_cpuinfo("timebase", line, sizeof(line))) {
1892        return retval;
1893    }
1894
1895    if (!(ns = strchr(line, ':'))) {
1896        return retval;
1897    }
1898
1899    ns++;
1900
1901    return atoi(ns);
1902}
1903
1904bool kvmppc_get_host_serial(char **value)
1905{
1906    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1907                               NULL);
1908}
1909
1910bool kvmppc_get_host_model(char **value)
1911{
1912    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1913}
1914
1915/* Try to find a device tree node for a CPU with clock-frequency property */
1916static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1917{
1918    struct dirent *dirp;
1919    DIR *dp;
1920
1921    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1922        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1923        return -1;
1924    }
1925
1926    buf[0] = '\0';
1927    while ((dirp = readdir(dp)) != NULL) {
1928        FILE *f;
1929        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1930                 dirp->d_name);
1931        f = fopen(buf, "r");
1932        if (f) {
1933            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1934            fclose(f);
1935            break;
1936        }
1937        buf[0] = '\0';
1938    }
1939    closedir(dp);
1940    if (buf[0] == '\0') {
1941        printf("Unknown host!\n");
1942        return -1;
1943    }
1944
1945    return 0;
1946}
1947
1948static uint64_t kvmppc_read_int_dt(const char *filename)
1949{
1950    union {
1951        uint32_t v32;
1952        uint64_t v64;
1953    } u;
1954    FILE *f;
1955    int len;
1956
1957    f = fopen(filename, "rb");
1958    if (!f) {
1959        return -1;
1960    }
1961
1962    len = fread(&u, 1, sizeof(u), f);
1963    fclose(f);
1964    switch (len) {
1965    case 4:
1966        /* property is a 32-bit quantity */
1967        return be32_to_cpu(u.v32);
1968    case 8:
1969        return be64_to_cpu(u.v64);
1970    }
1971
1972    return 0;
1973}
1974
1975/* Read a CPU node property from the host device tree that's a single
1976 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1977 * (can't find or open the property, or doesn't understand the
1978 * format) */
1979static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1980{
1981    char buf[PATH_MAX], *tmp;
1982    uint64_t val;
1983
1984    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1985        return -1;
1986    }
1987
1988    tmp = g_strdup_printf("%s/%s", buf, propname);
1989    val = kvmppc_read_int_dt(tmp);
1990    g_free(tmp);
1991
1992    return val;
1993}
1994
1995uint64_t kvmppc_get_clockfreq(void)
1996{
1997    return kvmppc_read_int_cpu_dt("clock-frequency");
1998}
1999
2000uint32_t kvmppc_get_vmx(void)
2001{
2002    return kvmppc_read_int_cpu_dt("ibm,vmx");
2003}
2004
2005uint32_t kvmppc_get_dfp(void)
2006{
2007    return kvmppc_read_int_cpu_dt("ibm,dfp");
2008}
2009
2010static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2011 {
2012     PowerPCCPU *cpu = ppc_env_get_cpu(env);
2013     CPUState *cs = CPU(cpu);
2014
2015    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2016        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2017        return 0;
2018    }
2019
2020    return 1;
2021}
2022
2023int kvmppc_get_hasidle(CPUPPCState *env)
2024{
2025    struct kvm_ppc_pvinfo pvinfo;
2026
2027    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2028        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2029        return 1;
2030    }
2031
2032    return 0;
2033}
2034
2035int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2036{
2037    uint32_t *hc = (uint32_t*)buf;
2038    struct kvm_ppc_pvinfo pvinfo;
2039
2040    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2041        memcpy(buf, pvinfo.hcall, buf_len);
2042        return 0;
2043    }
2044
2045    /*
2046     * Fallback to always fail hypercalls regardless of endianness:
2047     *
2048     *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2049     *     li r3, -1
2050     *     b .+8       (becomes nop in wrong endian)
2051     *     bswap32(li r3, -1)
2052     */
2053
2054    hc[0] = cpu_to_be32(0x08000048);
2055    hc[1] = cpu_to_be32(0x3860ffff);
2056    hc[2] = cpu_to_be32(0x48000008);
2057    hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2058
2059    return 1;
2060}
2061
2062static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2063{
2064    return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2065}
2066
2067void kvmppc_enable_logical_ci_hcalls(void)
2068{
2069    /*
2070     * FIXME: it would be nice if we could detect the cases where
2071     * we're using a device which requires the in kernel
2072     * implementation of these hcalls, but the kernel lacks them and
2073     * produce a warning.
2074     */
2075    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2076    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2077}
2078
2079void kvmppc_enable_set_mode_hcall(void)
2080{
2081    kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2082}
2083
2084void kvmppc_enable_clear_ref_mod_hcalls(void)
2085{
2086    kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2087    kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2088}
2089
2090void kvmppc_set_papr(PowerPCCPU *cpu)
2091{
2092    CPUState *cs = CPU(cpu);
2093    int ret;
2094
2095    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2096    if (ret) {
2097        error_report("This vCPU type or KVM version does not support PAPR");
2098        exit(1);
2099    }
2100
2101    /* Update the capability flag so we sync the right information
2102     * with kvm */
2103    cap_papr = 1;
2104}
2105
2106int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2107{
2108    return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2109}
2110
2111void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2112{
2113    CPUState *cs = CPU(cpu);
2114    int ret;
2115
2116    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2117    if (ret && mpic_proxy) {
2118        error_report("This KVM version does not support EPR");
2119        exit(1);
2120    }
2121}
2122
2123int kvmppc_smt_threads(void)
2124{
2125    return cap_ppc_smt ? cap_ppc_smt : 1;
2126}
2127
2128#ifdef TARGET_PPC64
2129off_t kvmppc_alloc_rma(void **rma)
2130{
2131    off_t size;
2132    int fd;
2133    struct kvm_allocate_rma ret;
2134
2135    /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2136     * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2137     *                      not necessary on this hardware
2138     * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2139     *
2140     * FIXME: We should allow the user to force contiguous RMA
2141     * allocation in the cap_ppc_rma==1 case.
2142     */
2143    if (cap_ppc_rma < 2) {
2144        return 0;
2145    }
2146
2147    fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2148    if (fd < 0) {
2149        fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2150                strerror(errno));
2151        return -1;
2152    }
2153
2154    size = MIN(ret.rma_size, 256ul << 20);
2155
2156    *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2157    if (*rma == MAP_FAILED) {
2158        fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2159        return -1;
2160    };
2161
2162    return size;
2163}
2164
2165uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2166{
2167    struct kvm_ppc_smmu_info info;
2168    long rampagesize, best_page_shift;
2169    int i;
2170
2171    if (cap_ppc_rma >= 2) {
2172        return current_size;
2173    }
2174
2175    /* Find the largest hardware supported page size that's less than
2176     * or equal to the (logical) backing page size of guest RAM */
2177    kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2178    rampagesize = getrampagesize();
2179    best_page_shift = 0;
2180
2181    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2182        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2183
2184        if (!sps->page_shift) {
2185            continue;
2186        }
2187
2188        if ((sps->page_shift > best_page_shift)
2189            && ((1UL << sps->page_shift) <= rampagesize)) {
2190            best_page_shift = sps->page_shift;
2191        }
2192    }
2193
2194    return MIN(current_size,
2195               1ULL << (best_page_shift + hash_shift - 7));
2196}
2197#endif
2198
2199bool kvmppc_spapr_use_multitce(void)
2200{
2201    return cap_spapr_multitce;
2202}
2203
2204void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2205                              bool need_vfio)
2206{
2207    struct kvm_create_spapr_tce args = {
2208        .liobn = liobn,
2209        .window_size = window_size,
2210    };
2211    long len;
2212    int fd;
2213    void *table;
2214
2215    /* Must set fd to -1 so we don't try to munmap when called for
2216     * destroying the table, which the upper layers -will- do
2217     */
2218    *pfd = -1;
2219    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2220        return NULL;
2221    }
2222
2223    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2224    if (fd < 0) {
2225        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2226                liobn);
2227        return NULL;
2228    }
2229
2230    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2231    /* FIXME: round this up to page size */
2232
2233    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2234    if (table == MAP_FAILED) {
2235        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2236                liobn);
2237        close(fd);
2238        return NULL;
2239    }
2240
2241    *pfd = fd;
2242    return table;
2243}
2244
2245int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2246{
2247    long len;
2248
2249    if (fd < 0) {
2250        return -1;
2251    }
2252
2253    len = nb_table * sizeof(uint64_t);
2254    if ((munmap(table, len) < 0) ||
2255        (close(fd) < 0)) {
2256        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2257                strerror(errno));
2258        /* Leak the table */
2259    }
2260
2261    return 0;
2262}
2263
2264int kvmppc_reset_htab(int shift_hint)
2265{
2266    uint32_t shift = shift_hint;
2267
2268    if (!kvm_enabled()) {
2269        /* Full emulation, tell caller to allocate htab itself */
2270        return 0;
2271    }
2272    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2273        int ret;
2274        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2275        if (ret == -ENOTTY) {
2276            /* At least some versions of PR KVM advertise the
2277             * capability, but don't implement the ioctl().  Oops.
2278             * Return 0 so that we allocate the htab in qemu, as is
2279             * correct for PR. */
2280            return 0;
2281        } else if (ret < 0) {
2282            return ret;
2283        }
2284        return shift;
2285    }
2286
2287    /* We have a kernel that predates the htab reset calls.  For PR
2288     * KVM, we need to allocate the htab ourselves, for an HV KVM of
2289     * this era, it has allocated a 16MB fixed size hash table already. */
2290    if (kvmppc_is_pr(kvm_state)) {
2291        /* PR - tell caller to allocate htab */
2292        return 0;
2293    } else {
2294        /* HV - assume 16MB kernel allocated htab */
2295        return 24;
2296    }
2297}
2298
2299static inline uint32_t mfpvr(void)
2300{
2301    uint32_t pvr;
2302
2303    asm ("mfpvr %0"
2304         : "=r"(pvr));
2305    return pvr;
2306}
2307
2308static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2309{
2310    if (on) {
2311        *word |= flags;
2312    } else {
2313        *word &= ~flags;
2314    }
2315}
2316
2317static void kvmppc_host_cpu_initfn(Object *obj)
2318{
2319    assert(kvm_enabled());
2320}
2321
2322static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2323{
2324    DeviceClass *dc = DEVICE_CLASS(oc);
2325    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2326    uint32_t vmx = kvmppc_get_vmx();
2327    uint32_t dfp = kvmppc_get_dfp();
2328    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2329    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2330
2331    /* Now fix up the class with information we can query from the host */
2332    pcc->pvr = mfpvr();
2333
2334    if (vmx != -1) {
2335        /* Only override when we know what the host supports */
2336        alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2337        alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2338    }
2339    if (dfp != -1) {
2340        /* Only override when we know what the host supports */
2341        alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2342    }
2343
2344    if (dcache_size != -1) {
2345        pcc->l1_dcache_size = dcache_size;
2346    }
2347
2348    if (icache_size != -1) {
2349        pcc->l1_icache_size = icache_size;
2350    }
2351
2352    /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2353    dc->cannot_destroy_with_object_finalize_yet = true;
2354}
2355
2356bool kvmppc_has_cap_epr(void)
2357{
2358    return cap_epr;
2359}
2360
2361bool kvmppc_has_cap_htab_fd(void)
2362{
2363    return cap_htab_fd;
2364}
2365
2366bool kvmppc_has_cap_fixup_hcalls(void)
2367{
2368    return cap_fixup_hcalls;
2369}
2370
2371bool kvmppc_has_cap_htm(void)
2372{
2373    return cap_htm;
2374}
2375
2376static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2377{
2378    ObjectClass *oc = OBJECT_CLASS(pcc);
2379
2380    while (oc && !object_class_is_abstract(oc)) {
2381        oc = object_class_get_parent(oc);
2382    }
2383    assert(oc);
2384
2385    return POWERPC_CPU_CLASS(oc);
2386}
2387
2388PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2389{
2390    uint32_t host_pvr = mfpvr();
2391    PowerPCCPUClass *pvr_pcc;
2392
2393    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2394    if (pvr_pcc == NULL) {
2395        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2396    }
2397
2398    return pvr_pcc;
2399}
2400
2401static int kvm_ppc_register_host_cpu_type(void)
2402{
2403    TypeInfo type_info = {
2404        .name = TYPE_HOST_POWERPC_CPU,
2405        .instance_init = kvmppc_host_cpu_initfn,
2406        .class_init = kvmppc_host_cpu_class_init,
2407    };
2408    PowerPCCPUClass *pvr_pcc;
2409    DeviceClass *dc;
2410
2411    pvr_pcc = kvm_ppc_get_host_cpu_class();
2412    if (pvr_pcc == NULL) {
2413        return -1;
2414    }
2415    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2416    type_register(&type_info);
2417
2418    /* Register generic family CPU class for a family */
2419    pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2420    dc = DEVICE_CLASS(pvr_pcc);
2421    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2422    type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2423    type_register(&type_info);
2424
2425#if defined(TARGET_PPC64)
2426    type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2427    type_info.parent = TYPE_SPAPR_CPU_CORE,
2428    type_info.instance_size = sizeof(sPAPRCPUCore);
2429    type_info.instance_init = NULL;
2430    type_info.class_init = spapr_cpu_core_class_init;
2431    type_info.class_data = (void *) "host";
2432    type_register(&type_info);
2433    g_free((void *)type_info.name);
2434
2435    /* Register generic spapr CPU family class for current host CPU type */
2436    type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2437    type_info.class_data = (void *) dc->desc;
2438    type_register(&type_info);
2439    g_free((void *)type_info.name);
2440#endif
2441
2442    return 0;
2443}
2444
2445int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2446{
2447    struct kvm_rtas_token_args args = {
2448        .token = token,
2449    };
2450
2451    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2452        return -ENOENT;
2453    }
2454
2455    strncpy(args.name, function, sizeof(args.name));
2456
2457    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2458}
2459
2460int kvmppc_get_htab_fd(bool write)
2461{
2462    struct kvm_get_htab_fd s = {
2463        .flags = write ? KVM_GET_HTAB_WRITE : 0,
2464        .start_index = 0,
2465    };
2466
2467    if (!cap_htab_fd) {
2468        fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2469        return -1;
2470    }
2471
2472    return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2473}
2474
2475int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2476{
2477    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2478    uint8_t buf[bufsize];
2479    ssize_t rc;
2480
2481    do {
2482        rc = read(fd, buf, bufsize);
2483        if (rc < 0) {
2484            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2485                    strerror(errno));
2486            return rc;
2487        } else if (rc) {
2488            uint8_t *buffer = buf;
2489            ssize_t n = rc;
2490            while (n) {
2491                struct kvm_get_htab_header *head =
2492                    (struct kvm_get_htab_header *) buffer;
2493                size_t chunksize = sizeof(*head) +
2494                     HASH_PTE_SIZE_64 * head->n_valid;
2495
2496                qemu_put_be32(f, head->index);
2497                qemu_put_be16(f, head->n_valid);
2498                qemu_put_be16(f, head->n_invalid);
2499                qemu_put_buffer(f, (void *)(head + 1),
2500                                HASH_PTE_SIZE_64 * head->n_valid);
2501
2502                buffer += chunksize;
2503                n -= chunksize;
2504            }
2505        }
2506    } while ((rc != 0)
2507             && ((max_ns < 0)
2508                 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2509
2510    return (rc == 0) ? 1 : 0;
2511}
2512
2513int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2514                           uint16_t n_valid, uint16_t n_invalid)
2515{
2516    struct kvm_get_htab_header *buf;
2517    size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2518    ssize_t rc;
2519
2520    buf = alloca(chunksize);
2521    buf->index = index;
2522    buf->n_valid = n_valid;
2523    buf->n_invalid = n_invalid;
2524
2525    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2526
2527    rc = write(fd, buf, chunksize);
2528    if (rc < 0) {
2529        fprintf(stderr, "Error writing KVM hash table: %s\n",
2530                strerror(errno));
2531        return rc;
2532    }
2533    if (rc != chunksize) {
2534        /* We should never get a short write on a single chunk */
2535        fprintf(stderr, "Short write, restoring KVM hash table\n");
2536        return -1;
2537    }
2538    return 0;
2539}
2540
2541bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2542{
2543    return true;
2544}
2545
2546int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2547{
2548    return 1;
2549}
2550
2551int kvm_arch_on_sigbus(int code, void *addr)
2552{
2553    return 1;
2554}
2555
2556void kvm_arch_init_irq_routing(KVMState *s)
2557{
2558}
2559
2560struct kvm_get_htab_buf {
2561    struct kvm_get_htab_header header;
2562    /*
2563     * We require one extra byte for read
2564     */
2565    target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2566};
2567
2568uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2569{
2570    int htab_fd;
2571    struct kvm_get_htab_fd ghf;
2572    struct kvm_get_htab_buf  *hpte_buf;
2573
2574    ghf.flags = 0;
2575    ghf.start_index = pte_index;
2576    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2577    if (htab_fd < 0) {
2578        goto error_out;
2579    }
2580
2581    hpte_buf = g_malloc0(sizeof(*hpte_buf));
2582    /*
2583     * Read the hpte group
2584     */
2585    if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2586        goto out_close;
2587    }
2588
2589    close(htab_fd);
2590    return (uint64_t)(uintptr_t) hpte_buf->hpte;
2591
2592out_close:
2593    g_free(hpte_buf);
2594    close(htab_fd);
2595error_out:
2596    return 0;
2597}
2598
2599void kvmppc_hash64_free_pteg(uint64_t token)
2600{
2601    struct kvm_get_htab_buf *htab_buf;
2602
2603    htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2604                            hpte);
2605    g_free(htab_buf);
2606    return;
2607}
2608
2609void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2610                             target_ulong pte0, target_ulong pte1)
2611{
2612    int htab_fd;
2613    struct kvm_get_htab_fd ghf;
2614    struct kvm_get_htab_buf hpte_buf;
2615
2616    ghf.flags = 0;
2617    ghf.start_index = 0;     /* Ignored */
2618    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2619    if (htab_fd < 0) {
2620        goto error_out;
2621    }
2622
2623    hpte_buf.header.n_valid = 1;
2624    hpte_buf.header.n_invalid = 0;
2625    hpte_buf.header.index = pte_index;
2626    hpte_buf.hpte[0] = pte0;
2627    hpte_buf.hpte[1] = pte1;
2628    /*
2629     * Write the hpte entry.
2630     * CAUTION: write() has the warn_unused_result attribute. Hence we
2631     * need to check the return value, even though we do nothing.
2632     */
2633    if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2634        goto out_close;
2635    }
2636
2637out_close:
2638    close(htab_fd);
2639    return;
2640
2641error_out:
2642    return;
2643}
2644
2645int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2646                             uint64_t address, uint32_t data, PCIDevice *dev)
2647{
2648    return 0;
2649}
2650
2651int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2652                                int vector, PCIDevice *dev)
2653{
2654    return 0;
2655}
2656
2657int kvm_arch_release_virq_post(int virq)
2658{
2659    return 0;
2660}
2661
2662int kvm_arch_msi_data_to_gsi(uint32_t data)
2663{
2664    return data & 0xffff;
2665}
2666
2667int kvmppc_enable_hwrng(void)
2668{
2669    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2670        return -1;
2671    }
2672
2673    return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2674}
2675