qemu/target-ppc/kvm.c
<<
>>
Prefs
   1/*
   2 * PowerPC implementation of KVM hooks
   3 *
   4 * Copyright IBM Corp. 2007
   5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6 *
   7 * Authors:
   8 *  Jerone Young <jyoung5@us.ibm.com>
   9 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10 *  Hollis Blanchard <hollisb@us.ibm.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13 * See the COPYING file in the top-level directory.
  14 *
  15 */
  16
  17#include "qemu/osdep.h"
  18#include <dirent.h>
  19#include <sys/ioctl.h>
  20#include <sys/vfs.h>
  21
  22#include <linux/kvm.h>
  23
  24#include "qemu-common.h"
  25#include "qemu/error-report.h"
  26#include "cpu.h"
  27#include "qemu/timer.h"
  28#include "sysemu/sysemu.h"
  29#include "sysemu/kvm.h"
  30#include "sysemu/numa.h"
  31#include "kvm_ppc.h"
  32#include "sysemu/cpus.h"
  33#include "sysemu/device_tree.h"
  34#include "mmu-hash64.h"
  35
  36#include "hw/sysbus.h"
  37#include "hw/ppc/spapr.h"
  38#include "hw/ppc/spapr_vio.h"
  39#include "hw/ppc/ppc.h"
  40#include "sysemu/watchdog.h"
  41#include "trace.h"
  42#include "exec/gdbstub.h"
  43#include "exec/memattrs.h"
  44#include "sysemu/hostmem.h"
  45#include "qemu/cutils.h"
  46#if defined(TARGET_PPC64)
  47#include "hw/ppc/spapr_cpu_core.h"
  48#endif
  49
  50//#define DEBUG_KVM
  51
  52#ifdef DEBUG_KVM
  53#define DPRINTF(fmt, ...) \
  54    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  55#else
  56#define DPRINTF(fmt, ...) \
  57    do { } while (0)
  58#endif
  59
  60#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  61
  62const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  63    KVM_CAP_LAST_INFO
  64};
  65
  66static int cap_interrupt_unset = false;
  67static int cap_interrupt_level = false;
  68static int cap_segstate;
  69static int cap_booke_sregs;
  70static int cap_ppc_smt;
  71static int cap_ppc_rma;
  72static int cap_spapr_tce;
  73static int cap_spapr_multitce;
  74static int cap_spapr_vfio;
  75static int cap_hior;
  76static int cap_one_reg;
  77static int cap_epr;
  78static int cap_ppc_watchdog;
  79static int cap_papr;
  80static int cap_htab_fd;
  81static int cap_fixup_hcalls;
  82static int cap_htm;             /* Hardware transactional memory support */
  83
  84static uint32_t debug_inst_opcode;
  85
  86/* XXX We have a race condition where we actually have a level triggered
  87 *     interrupt, but the infrastructure can't expose that yet, so the guest
  88 *     takes but ignores it, goes to sleep and never gets notified that there's
  89 *     still an interrupt pending.
  90 *
  91 *     As a quick workaround, let's just wake up again 20 ms after we injected
  92 *     an interrupt. That way we can assure that we're always reinjecting
  93 *     interrupts in case the guest swallowed them.
  94 */
  95static QEMUTimer *idle_timer;
  96
  97static void kvm_kick_cpu(void *opaque)
  98{
  99    PowerPCCPU *cpu = opaque;
 100
 101    qemu_cpu_kick(CPU(cpu));
 102}
 103
 104static int kvm_ppc_register_host_cpu_type(void);
 105
 106int kvm_arch_init(MachineState *ms, KVMState *s)
 107{
 108    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 109    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 110    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 111    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 112    cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 113    cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 114    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 115    cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 116    cap_spapr_vfio = false;
 117    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 118    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 119    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 120    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 121    /* Note: we don't set cap_papr here, because this capability is
 122     * only activated after this by kvmppc_set_papr() */
 123    cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 124    cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 125    cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 126
 127    if (!cap_interrupt_level) {
 128        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 129                        "VM to stall at times!\n");
 130    }
 131
 132    kvm_ppc_register_host_cpu_type();
 133
 134    return 0;
 135}
 136
 137static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 138{
 139    CPUPPCState *cenv = &cpu->env;
 140    CPUState *cs = CPU(cpu);
 141    struct kvm_sregs sregs;
 142    int ret;
 143
 144    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 145        /* What we're really trying to say is "if we're on BookE, we use
 146           the native PVR for now". This is the only sane way to check
 147           it though, so we potentially confuse users that they can run
 148           BookE guests on BookS. Let's hope nobody dares enough :) */
 149        return 0;
 150    } else {
 151        if (!cap_segstate) {
 152            fprintf(stderr, "kvm error: missing PVR setting capability\n");
 153            return -ENOSYS;
 154        }
 155    }
 156
 157    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 158    if (ret) {
 159        return ret;
 160    }
 161
 162    sregs.pvr = cenv->spr[SPR_PVR];
 163    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 164}
 165
 166/* Set up a shared TLB array with KVM */
 167static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 168{
 169    CPUPPCState *env = &cpu->env;
 170    CPUState *cs = CPU(cpu);
 171    struct kvm_book3e_206_tlb_params params = {};
 172    struct kvm_config_tlb cfg = {};
 173    unsigned int entries = 0;
 174    int ret, i;
 175
 176    if (!kvm_enabled() ||
 177        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 178        return 0;
 179    }
 180
 181    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 182
 183    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 184        params.tlb_sizes[i] = booke206_tlb_size(env, i);
 185        params.tlb_ways[i] = booke206_tlb_ways(env, i);
 186        entries += params.tlb_sizes[i];
 187    }
 188
 189    assert(entries == env->nb_tlb);
 190    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 191
 192    env->tlb_dirty = true;
 193
 194    cfg.array = (uintptr_t)env->tlb.tlbm;
 195    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 196    cfg.params = (uintptr_t)&params;
 197    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 198
 199    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 200    if (ret < 0) {
 201        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 202                __func__, strerror(-ret));
 203        return ret;
 204    }
 205
 206    env->kvm_sw_tlb = true;
 207    return 0;
 208}
 209
 210
 211#if defined(TARGET_PPC64)
 212static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 213                                       struct kvm_ppc_smmu_info *info)
 214{
 215    CPUPPCState *env = &cpu->env;
 216    CPUState *cs = CPU(cpu);
 217
 218    memset(info, 0, sizeof(*info));
 219
 220    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 221     * need to "guess" what the supported page sizes are.
 222     *
 223     * For that to work we make a few assumptions:
 224     *
 225     * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 226     *   KVM which only supports 4K and 16M pages, but supports them
 227     *   regardless of the backing store characteritics. We also don't
 228     *   support 1T segments.
 229     *
 230     *   This is safe as if HV KVM ever supports that capability or PR
 231     *   KVM grows supports for more page/segment sizes, those versions
 232     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 233     *   will not hit this fallback
 234     *
 235     * - Else we are running HV KVM. This means we only support page
 236     *   sizes that fit in the backing store. Additionally we only
 237     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 238     *   P7 encodings for the SLB and hash table. Here too, we assume
 239     *   support for any newer processor will mean a kernel that
 240     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 241     *   this fallback.
 242     */
 243    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 244        /* No flags */
 245        info->flags = 0;
 246        info->slb_size = 64;
 247
 248        /* Standard 4k base page size segment */
 249        info->sps[0].page_shift = 12;
 250        info->sps[0].slb_enc = 0;
 251        info->sps[0].enc[0].page_shift = 12;
 252        info->sps[0].enc[0].pte_enc = 0;
 253
 254        /* Standard 16M large page size segment */
 255        info->sps[1].page_shift = 24;
 256        info->sps[1].slb_enc = SLB_VSID_L;
 257        info->sps[1].enc[0].page_shift = 24;
 258        info->sps[1].enc[0].pte_enc = 0;
 259    } else {
 260        int i = 0;
 261
 262        /* HV KVM has backing store size restrictions */
 263        info->flags = KVM_PPC_PAGE_SIZES_REAL;
 264
 265        if (env->mmu_model & POWERPC_MMU_1TSEG) {
 266            info->flags |= KVM_PPC_1T_SEGMENTS;
 267        }
 268
 269        if (env->mmu_model == POWERPC_MMU_2_06 ||
 270            env->mmu_model == POWERPC_MMU_2_07) {
 271            info->slb_size = 32;
 272        } else {
 273            info->slb_size = 64;
 274        }
 275
 276        /* Standard 4k base page size segment */
 277        info->sps[i].page_shift = 12;
 278        info->sps[i].slb_enc = 0;
 279        info->sps[i].enc[0].page_shift = 12;
 280        info->sps[i].enc[0].pte_enc = 0;
 281        i++;
 282
 283        /* 64K on MMU 2.06 and later */
 284        if (env->mmu_model == POWERPC_MMU_2_06 ||
 285            env->mmu_model == POWERPC_MMU_2_07) {
 286            info->sps[i].page_shift = 16;
 287            info->sps[i].slb_enc = 0x110;
 288            info->sps[i].enc[0].page_shift = 16;
 289            info->sps[i].enc[0].pte_enc = 1;
 290            i++;
 291        }
 292
 293        /* Standard 16M large page size segment */
 294        info->sps[i].page_shift = 24;
 295        info->sps[i].slb_enc = SLB_VSID_L;
 296        info->sps[i].enc[0].page_shift = 24;
 297        info->sps[i].enc[0].pte_enc = 0;
 298    }
 299}
 300
 301static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 302{
 303    CPUState *cs = CPU(cpu);
 304    int ret;
 305
 306    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 307        ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 308        if (ret == 0) {
 309            return;
 310        }
 311    }
 312
 313    kvm_get_fallback_smmu_info(cpu, info);
 314}
 315
 316static long gethugepagesize(const char *mem_path)
 317{
 318    struct statfs fs;
 319    int ret;
 320
 321    do {
 322        ret = statfs(mem_path, &fs);
 323    } while (ret != 0 && errno == EINTR);
 324
 325    if (ret != 0) {
 326        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 327                strerror(errno));
 328        exit(1);
 329    }
 330
 331#define HUGETLBFS_MAGIC       0x958458f6
 332
 333    if (fs.f_type != HUGETLBFS_MAGIC) {
 334        /* Explicit mempath, but it's ordinary pages */
 335        return getpagesize();
 336    }
 337
 338    /* It's hugepage, return the huge page size */
 339    return fs.f_bsize;
 340}
 341
 342/*
 343 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 344 * may or may not name the same files / on the same filesystem now as
 345 * when we actually open and map them.  Iterate over the file
 346 * descriptors instead, and use qemu_fd_getpagesize().
 347 */
 348static int find_max_supported_pagesize(Object *obj, void *opaque)
 349{
 350    char *mem_path;
 351    long *hpsize_min = opaque;
 352
 353    if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 354        mem_path = object_property_get_str(obj, "mem-path", NULL);
 355        if (mem_path) {
 356            long hpsize = gethugepagesize(mem_path);
 357            if (hpsize < *hpsize_min) {
 358                *hpsize_min = hpsize;
 359            }
 360        } else {
 361            *hpsize_min = getpagesize();
 362        }
 363    }
 364
 365    return 0;
 366}
 367
 368static long getrampagesize(void)
 369{
 370    long hpsize = LONG_MAX;
 371    long mainrampagesize;
 372    Object *memdev_root;
 373
 374    if (mem_path) {
 375        mainrampagesize = gethugepagesize(mem_path);
 376    } else {
 377        mainrampagesize = getpagesize();
 378    }
 379
 380    /* it's possible we have memory-backend objects with
 381     * hugepage-backed RAM. these may get mapped into system
 382     * address space via -numa parameters or memory hotplug
 383     * hooks. we want to take these into account, but we
 384     * also want to make sure these supported hugepage
 385     * sizes are applicable across the entire range of memory
 386     * we may boot from, so we take the min across all
 387     * backends, and assume normal pages in cases where a
 388     * backend isn't backed by hugepages.
 389     */
 390    memdev_root = object_resolve_path("/objects", NULL);
 391    if (memdev_root) {
 392        object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 393    }
 394    if (hpsize == LONG_MAX) {
 395        /* No additional memory regions found ==> Report main RAM page size */
 396        return mainrampagesize;
 397    }
 398
 399    /* If NUMA is disabled or the NUMA nodes are not backed with a
 400     * memory-backend, then there is at least one node using "normal" RAM,
 401     * so if its page size is smaller we have got to report that size instead.
 402     */
 403    if (hpsize > mainrampagesize &&
 404        (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
 405        static bool warned;
 406        if (!warned) {
 407            error_report("Huge page support disabled (n/a for main memory).");
 408            warned = true;
 409        }
 410        return mainrampagesize;
 411    }
 412
 413    return hpsize;
 414}
 415
 416static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 417{
 418    if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 419        return true;
 420    }
 421
 422    return (1ul << shift) <= rampgsize;
 423}
 424
 425static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 426{
 427    static struct kvm_ppc_smmu_info smmu_info;
 428    static bool has_smmu_info;
 429    CPUPPCState *env = &cpu->env;
 430    long rampagesize;
 431    int iq, ik, jq, jk;
 432    bool has_64k_pages = false;
 433
 434    /* We only handle page sizes for 64-bit server guests for now */
 435    if (!(env->mmu_model & POWERPC_MMU_64)) {
 436        return;
 437    }
 438
 439    /* Collect MMU info from kernel if not already */
 440    if (!has_smmu_info) {
 441        kvm_get_smmu_info(cpu, &smmu_info);
 442        has_smmu_info = true;
 443    }
 444
 445    rampagesize = getrampagesize();
 446
 447    /* Convert to QEMU form */
 448    memset(&env->sps, 0, sizeof(env->sps));
 449
 450    /* If we have HV KVM, we need to forbid CI large pages if our
 451     * host page size is smaller than 64K.
 452     */
 453    if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 454        env->ci_large_pages = getpagesize() >= 0x10000;
 455    }
 456
 457    /*
 458     * XXX This loop should be an entry wide AND of the capabilities that
 459     *     the selected CPU has with the capabilities that KVM supports.
 460     */
 461    for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 462        struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 463        struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 464
 465        if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 466                                 ksps->page_shift)) {
 467            continue;
 468        }
 469        qsps->page_shift = ksps->page_shift;
 470        qsps->slb_enc = ksps->slb_enc;
 471        for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 472            if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 473                                     ksps->enc[jk].page_shift)) {
 474                continue;
 475            }
 476            if (ksps->enc[jk].page_shift == 16) {
 477                has_64k_pages = true;
 478            }
 479            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 480            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 481            if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 482                break;
 483            }
 484        }
 485        if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 486            break;
 487        }
 488    }
 489    env->slb_nr = smmu_info.slb_size;
 490    if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 491        env->mmu_model &= ~POWERPC_MMU_1TSEG;
 492    }
 493    if (!has_64k_pages) {
 494        env->mmu_model &= ~POWERPC_MMU_64K;
 495    }
 496}
 497#else /* defined (TARGET_PPC64) */
 498
 499static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 500{
 501}
 502
 503#endif /* !defined (TARGET_PPC64) */
 504
 505unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 506{
 507    return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 508}
 509
 510/* e500 supports 2 h/w breakpoint and 2 watchpoint.
 511 * book3s supports only 1 watchpoint, so array size
 512 * of 4 is sufficient for now.
 513 */
 514#define MAX_HW_BKPTS 4
 515
 516static struct HWBreakpoint {
 517    target_ulong addr;
 518    int type;
 519} hw_debug_points[MAX_HW_BKPTS];
 520
 521static CPUWatchpoint hw_watchpoint;
 522
 523/* Default there is no breakpoint and watchpoint supported */
 524static int max_hw_breakpoint;
 525static int max_hw_watchpoint;
 526static int nb_hw_breakpoint;
 527static int nb_hw_watchpoint;
 528
 529static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 530{
 531    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 532        max_hw_breakpoint = 2;
 533        max_hw_watchpoint = 2;
 534    }
 535
 536    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 537        fprintf(stderr, "Error initializing h/w breakpoints\n");
 538        return;
 539    }
 540}
 541
 542int kvm_arch_init_vcpu(CPUState *cs)
 543{
 544    PowerPCCPU *cpu = POWERPC_CPU(cs);
 545    CPUPPCState *cenv = &cpu->env;
 546    int ret;
 547
 548    /* Gather server mmu info from KVM and update the CPU state */
 549    kvm_fixup_page_sizes(cpu);
 550
 551    /* Synchronize sregs with kvm */
 552    ret = kvm_arch_sync_sregs(cpu);
 553    if (ret) {
 554        if (ret == -EINVAL) {
 555            error_report("Register sync failed... If you're using kvm-hv.ko,"
 556                         " only \"-cpu host\" is possible");
 557        }
 558        return ret;
 559    }
 560
 561    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 562
 563    /* Some targets support access to KVM's guest TLB. */
 564    switch (cenv->mmu_model) {
 565    case POWERPC_MMU_BOOKE206:
 566        ret = kvm_booke206_tlb_init(cpu);
 567        break;
 568    default:
 569        break;
 570    }
 571
 572    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 573    kvmppc_hw_debug_points_init(cenv);
 574
 575    return ret;
 576}
 577
 578static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 579{
 580    CPUPPCState *env = &cpu->env;
 581    CPUState *cs = CPU(cpu);
 582    struct kvm_dirty_tlb dirty_tlb;
 583    unsigned char *bitmap;
 584    int ret;
 585
 586    if (!env->kvm_sw_tlb) {
 587        return;
 588    }
 589
 590    bitmap = g_malloc((env->nb_tlb + 7) / 8);
 591    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 592
 593    dirty_tlb.bitmap = (uintptr_t)bitmap;
 594    dirty_tlb.num_dirty = env->nb_tlb;
 595
 596    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 597    if (ret) {
 598        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 599                __func__, strerror(-ret));
 600    }
 601
 602    g_free(bitmap);
 603}
 604
 605static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 606{
 607    PowerPCCPU *cpu = POWERPC_CPU(cs);
 608    CPUPPCState *env = &cpu->env;
 609    union {
 610        uint32_t u32;
 611        uint64_t u64;
 612    } val;
 613    struct kvm_one_reg reg = {
 614        .id = id,
 615        .addr = (uintptr_t) &val,
 616    };
 617    int ret;
 618
 619    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 620    if (ret != 0) {
 621        trace_kvm_failed_spr_get(spr, strerror(errno));
 622    } else {
 623        switch (id & KVM_REG_SIZE_MASK) {
 624        case KVM_REG_SIZE_U32:
 625            env->spr[spr] = val.u32;
 626            break;
 627
 628        case KVM_REG_SIZE_U64:
 629            env->spr[spr] = val.u64;
 630            break;
 631
 632        default:
 633            /* Don't handle this size yet */
 634            abort();
 635        }
 636    }
 637}
 638
 639static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 640{
 641    PowerPCCPU *cpu = POWERPC_CPU(cs);
 642    CPUPPCState *env = &cpu->env;
 643    union {
 644        uint32_t u32;
 645        uint64_t u64;
 646    } val;
 647    struct kvm_one_reg reg = {
 648        .id = id,
 649        .addr = (uintptr_t) &val,
 650    };
 651    int ret;
 652
 653    switch (id & KVM_REG_SIZE_MASK) {
 654    case KVM_REG_SIZE_U32:
 655        val.u32 = env->spr[spr];
 656        break;
 657
 658    case KVM_REG_SIZE_U64:
 659        val.u64 = env->spr[spr];
 660        break;
 661
 662    default:
 663        /* Don't handle this size yet */
 664        abort();
 665    }
 666
 667    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 668    if (ret != 0) {
 669        trace_kvm_failed_spr_set(spr, strerror(errno));
 670    }
 671}
 672
 673static int kvm_put_fp(CPUState *cs)
 674{
 675    PowerPCCPU *cpu = POWERPC_CPU(cs);
 676    CPUPPCState *env = &cpu->env;
 677    struct kvm_one_reg reg;
 678    int i;
 679    int ret;
 680
 681    if (env->insns_flags & PPC_FLOAT) {
 682        uint64_t fpscr = env->fpscr;
 683        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 684
 685        reg.id = KVM_REG_PPC_FPSCR;
 686        reg.addr = (uintptr_t)&fpscr;
 687        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 688        if (ret < 0) {
 689            DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 690            return ret;
 691        }
 692
 693        for (i = 0; i < 32; i++) {
 694            uint64_t vsr[2];
 695
 696#ifdef HOST_WORDS_BIGENDIAN
 697            vsr[0] = float64_val(env->fpr[i]);
 698            vsr[1] = env->vsr[i];
 699#else
 700            vsr[0] = env->vsr[i];
 701            vsr[1] = float64_val(env->fpr[i]);
 702#endif
 703            reg.addr = (uintptr_t) &vsr;
 704            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 705
 706            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 707            if (ret < 0) {
 708                DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 709                        i, strerror(errno));
 710                return ret;
 711            }
 712        }
 713    }
 714
 715    if (env->insns_flags & PPC_ALTIVEC) {
 716        reg.id = KVM_REG_PPC_VSCR;
 717        reg.addr = (uintptr_t)&env->vscr;
 718        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 719        if (ret < 0) {
 720            DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 721            return ret;
 722        }
 723
 724        for (i = 0; i < 32; i++) {
 725            reg.id = KVM_REG_PPC_VR(i);
 726            reg.addr = (uintptr_t)&env->avr[i];
 727            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 728            if (ret < 0) {
 729                DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 730                return ret;
 731            }
 732        }
 733    }
 734
 735    return 0;
 736}
 737
 738static int kvm_get_fp(CPUState *cs)
 739{
 740    PowerPCCPU *cpu = POWERPC_CPU(cs);
 741    CPUPPCState *env = &cpu->env;
 742    struct kvm_one_reg reg;
 743    int i;
 744    int ret;
 745
 746    if (env->insns_flags & PPC_FLOAT) {
 747        uint64_t fpscr;
 748        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 749
 750        reg.id = KVM_REG_PPC_FPSCR;
 751        reg.addr = (uintptr_t)&fpscr;
 752        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 753        if (ret < 0) {
 754            DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 755            return ret;
 756        } else {
 757            env->fpscr = fpscr;
 758        }
 759
 760        for (i = 0; i < 32; i++) {
 761            uint64_t vsr[2];
 762
 763            reg.addr = (uintptr_t) &vsr;
 764            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 765
 766            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 767            if (ret < 0) {
 768                DPRINTF("Unable to get %s%d from KVM: %s\n",
 769                        vsx ? "VSR" : "FPR", i, strerror(errno));
 770                return ret;
 771            } else {
 772#ifdef HOST_WORDS_BIGENDIAN
 773                env->fpr[i] = vsr[0];
 774                if (vsx) {
 775                    env->vsr[i] = vsr[1];
 776                }
 777#else
 778                env->fpr[i] = vsr[1];
 779                if (vsx) {
 780                    env->vsr[i] = vsr[0];
 781                }
 782#endif
 783            }
 784        }
 785    }
 786
 787    if (env->insns_flags & PPC_ALTIVEC) {
 788        reg.id = KVM_REG_PPC_VSCR;
 789        reg.addr = (uintptr_t)&env->vscr;
 790        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 791        if (ret < 0) {
 792            DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 793            return ret;
 794        }
 795
 796        for (i = 0; i < 32; i++) {
 797            reg.id = KVM_REG_PPC_VR(i);
 798            reg.addr = (uintptr_t)&env->avr[i];
 799            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 800            if (ret < 0) {
 801                DPRINTF("Unable to get VR%d from KVM: %s\n",
 802                        i, strerror(errno));
 803                return ret;
 804            }
 805        }
 806    }
 807
 808    return 0;
 809}
 810
 811#if defined(TARGET_PPC64)
 812static int kvm_get_vpa(CPUState *cs)
 813{
 814    PowerPCCPU *cpu = POWERPC_CPU(cs);
 815    CPUPPCState *env = &cpu->env;
 816    struct kvm_one_reg reg;
 817    int ret;
 818
 819    reg.id = KVM_REG_PPC_VPA_ADDR;
 820    reg.addr = (uintptr_t)&env->vpa_addr;
 821    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 822    if (ret < 0) {
 823        DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 824        return ret;
 825    }
 826
 827    assert((uintptr_t)&env->slb_shadow_size
 828           == ((uintptr_t)&env->slb_shadow_addr + 8));
 829    reg.id = KVM_REG_PPC_VPA_SLB;
 830    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 831    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 832    if (ret < 0) {
 833        DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 834                strerror(errno));
 835        return ret;
 836    }
 837
 838    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 839    reg.id = KVM_REG_PPC_VPA_DTL;
 840    reg.addr = (uintptr_t)&env->dtl_addr;
 841    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 842    if (ret < 0) {
 843        DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 844                strerror(errno));
 845        return ret;
 846    }
 847
 848    return 0;
 849}
 850
 851static int kvm_put_vpa(CPUState *cs)
 852{
 853    PowerPCCPU *cpu = POWERPC_CPU(cs);
 854    CPUPPCState *env = &cpu->env;
 855    struct kvm_one_reg reg;
 856    int ret;
 857
 858    /* SLB shadow or DTL can't be registered unless a master VPA is
 859     * registered.  That means when restoring state, if a VPA *is*
 860     * registered, we need to set that up first.  If not, we need to
 861     * deregister the others before deregistering the master VPA */
 862    assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 863
 864    if (env->vpa_addr) {
 865        reg.id = KVM_REG_PPC_VPA_ADDR;
 866        reg.addr = (uintptr_t)&env->vpa_addr;
 867        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 868        if (ret < 0) {
 869            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 870            return ret;
 871        }
 872    }
 873
 874    assert((uintptr_t)&env->slb_shadow_size
 875           == ((uintptr_t)&env->slb_shadow_addr + 8));
 876    reg.id = KVM_REG_PPC_VPA_SLB;
 877    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 878    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 879    if (ret < 0) {
 880        DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 881        return ret;
 882    }
 883
 884    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 885    reg.id = KVM_REG_PPC_VPA_DTL;
 886    reg.addr = (uintptr_t)&env->dtl_addr;
 887    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 888    if (ret < 0) {
 889        DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 890                strerror(errno));
 891        return ret;
 892    }
 893
 894    if (!env->vpa_addr) {
 895        reg.id = KVM_REG_PPC_VPA_ADDR;
 896        reg.addr = (uintptr_t)&env->vpa_addr;
 897        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 898        if (ret < 0) {
 899            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 900            return ret;
 901        }
 902    }
 903
 904    return 0;
 905}
 906#endif /* TARGET_PPC64 */
 907
 908int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 909{
 910    CPUPPCState *env = &cpu->env;
 911    struct kvm_sregs sregs;
 912    int i;
 913
 914    sregs.pvr = env->spr[SPR_PVR];
 915
 916    sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 917
 918    /* Sync SLB */
 919#ifdef TARGET_PPC64
 920    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 921        sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 922        if (env->slb[i].esid & SLB_ESID_V) {
 923            sregs.u.s.ppc64.slb[i].slbe |= i;
 924        }
 925        sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 926    }
 927#endif
 928
 929    /* Sync SRs */
 930    for (i = 0; i < 16; i++) {
 931        sregs.u.s.ppc32.sr[i] = env->sr[i];
 932    }
 933
 934    /* Sync BATs */
 935    for (i = 0; i < 8; i++) {
 936        /* Beware. We have to swap upper and lower bits here */
 937        sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 938            | env->DBAT[1][i];
 939        sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 940            | env->IBAT[1][i];
 941    }
 942
 943    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 944}
 945
 946int kvm_arch_put_registers(CPUState *cs, int level)
 947{
 948    PowerPCCPU *cpu = POWERPC_CPU(cs);
 949    CPUPPCState *env = &cpu->env;
 950    struct kvm_regs regs;
 951    int ret;
 952    int i;
 953
 954    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 955    if (ret < 0) {
 956        return ret;
 957    }
 958
 959    regs.ctr = env->ctr;
 960    regs.lr  = env->lr;
 961    regs.xer = cpu_read_xer(env);
 962    regs.msr = env->msr;
 963    regs.pc = env->nip;
 964
 965    regs.srr0 = env->spr[SPR_SRR0];
 966    regs.srr1 = env->spr[SPR_SRR1];
 967
 968    regs.sprg0 = env->spr[SPR_SPRG0];
 969    regs.sprg1 = env->spr[SPR_SPRG1];
 970    regs.sprg2 = env->spr[SPR_SPRG2];
 971    regs.sprg3 = env->spr[SPR_SPRG3];
 972    regs.sprg4 = env->spr[SPR_SPRG4];
 973    regs.sprg5 = env->spr[SPR_SPRG5];
 974    regs.sprg6 = env->spr[SPR_SPRG6];
 975    regs.sprg7 = env->spr[SPR_SPRG7];
 976
 977    regs.pid = env->spr[SPR_BOOKE_PID];
 978
 979    for (i = 0;i < 32; i++)
 980        regs.gpr[i] = env->gpr[i];
 981
 982    regs.cr = 0;
 983    for (i = 0; i < 8; i++) {
 984        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 985    }
 986
 987    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 988    if (ret < 0)
 989        return ret;
 990
 991    kvm_put_fp(cs);
 992
 993    if (env->tlb_dirty) {
 994        kvm_sw_tlb_put(cpu);
 995        env->tlb_dirty = false;
 996    }
 997
 998    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 999        ret = kvmppc_put_books_sregs(cpu);
1000        if (ret < 0) {
1001            return ret;
1002        }
1003    }
1004
1005    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1006        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1007    }
1008
1009    if (cap_one_reg) {
1010        int i;
1011
1012        /* We deliberately ignore errors here, for kernels which have
1013         * the ONE_REG calls, but don't support the specific
1014         * registers, there's a reasonable chance things will still
1015         * work, at least until we try to migrate. */
1016        for (i = 0; i < 1024; i++) {
1017            uint64_t id = env->spr_cb[i].one_reg_id;
1018
1019            if (id != 0) {
1020                kvm_put_one_spr(cs, id, i);
1021            }
1022        }
1023
1024#ifdef TARGET_PPC64
1025        if (msr_ts) {
1026            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1027                kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1028            }
1029            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1030                kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1031            }
1032            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1033            kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1034            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1035            kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1036            kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1037            kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1038            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1039            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1040            kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1041            kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1042        }
1043
1044        if (cap_papr) {
1045            if (kvm_put_vpa(cs) < 0) {
1046                DPRINTF("Warning: Unable to set VPA information to KVM\n");
1047            }
1048        }
1049
1050        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1051#endif /* TARGET_PPC64 */
1052    }
1053
1054    return ret;
1055}
1056
1057static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1058{
1059     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1060}
1061
1062static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1063{
1064    CPUPPCState *env = &cpu->env;
1065    struct kvm_sregs sregs;
1066    int ret;
1067
1068    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1069    if (ret < 0) {
1070        return ret;
1071    }
1072
1073    if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1074        env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1075        env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1076        env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1077        env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1078        env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1079        env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1080        env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1081        env->spr[SPR_DECR] = sregs.u.e.dec;
1082        env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1083        env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1084        env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1085    }
1086
1087    if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1088        env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1089        env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1090        env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1091        env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1092        env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1093    }
1094
1095    if (sregs.u.e.features & KVM_SREGS_E_64) {
1096        env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1097    }
1098
1099    if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1100        env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1101    }
1102
1103    if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1104        env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1105        kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1106        env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1107        kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1108        env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1109        kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1110        env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1111        kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1112        env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1113        kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1114        env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1115        kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1116        env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1117        kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1118        env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1119        kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1120        env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1121        kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1122        env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1123        kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1124        env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1125        kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1126        env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1127        kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1128        env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1129        kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1130        env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1131        kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1132        env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1133        kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1134        env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1135        kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1136
1137        if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1138            env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1139            kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1140            env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1141            kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1142            env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1143            kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1144        }
1145
1146        if (sregs.u.e.features & KVM_SREGS_E_PM) {
1147            env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1148            kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1149        }
1150
1151        if (sregs.u.e.features & KVM_SREGS_E_PC) {
1152            env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1153            kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1154            env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1155            kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1156        }
1157    }
1158
1159    if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1160        env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1161        env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1162        env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1163        env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1164        env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1165        env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1166        env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1167        env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1168        env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1169        env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1170    }
1171
1172    if (sregs.u.e.features & KVM_SREGS_EXP) {
1173        env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1174    }
1175
1176    if (sregs.u.e.features & KVM_SREGS_E_PD) {
1177        env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1178        env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1179    }
1180
1181    if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1182        env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1183        env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1184        env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1185
1186        if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1187            env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1188            env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1189        }
1190    }
1191
1192    return 0;
1193}
1194
1195static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1196{
1197    CPUPPCState *env = &cpu->env;
1198    struct kvm_sregs sregs;
1199    int ret;
1200    int i;
1201
1202    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1203    if (ret < 0) {
1204        return ret;
1205    }
1206
1207    if (!env->external_htab) {
1208        ppc_store_sdr1(env, sregs.u.s.sdr1);
1209    }
1210
1211    /* Sync SLB */
1212#ifdef TARGET_PPC64
1213    /*
1214     * The packed SLB array we get from KVM_GET_SREGS only contains
1215     * information about valid entries. So we flush our internal copy
1216     * to get rid of stale ones, then put all valid SLB entries back
1217     * in.
1218     */
1219    memset(env->slb, 0, sizeof(env->slb));
1220    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1221        target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1222        target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1223        /*
1224         * Only restore valid entries
1225         */
1226        if (rb & SLB_ESID_V) {
1227            ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1228        }
1229    }
1230#endif
1231
1232    /* Sync SRs */
1233    for (i = 0; i < 16; i++) {
1234        env->sr[i] = sregs.u.s.ppc32.sr[i];
1235    }
1236
1237    /* Sync BATs */
1238    for (i = 0; i < 8; i++) {
1239        env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1240        env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1241        env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1242        env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1243    }
1244
1245    return 0;
1246}
1247
1248int kvm_arch_get_registers(CPUState *cs)
1249{
1250    PowerPCCPU *cpu = POWERPC_CPU(cs);
1251    CPUPPCState *env = &cpu->env;
1252    struct kvm_regs regs;
1253    uint32_t cr;
1254    int i, ret;
1255
1256    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1257    if (ret < 0)
1258        return ret;
1259
1260    cr = regs.cr;
1261    for (i = 7; i >= 0; i--) {
1262        env->crf[i] = cr & 15;
1263        cr >>= 4;
1264    }
1265
1266    env->ctr = regs.ctr;
1267    env->lr = regs.lr;
1268    cpu_write_xer(env, regs.xer);
1269    env->msr = regs.msr;
1270    env->nip = regs.pc;
1271
1272    env->spr[SPR_SRR0] = regs.srr0;
1273    env->spr[SPR_SRR1] = regs.srr1;
1274
1275    env->spr[SPR_SPRG0] = regs.sprg0;
1276    env->spr[SPR_SPRG1] = regs.sprg1;
1277    env->spr[SPR_SPRG2] = regs.sprg2;
1278    env->spr[SPR_SPRG3] = regs.sprg3;
1279    env->spr[SPR_SPRG4] = regs.sprg4;
1280    env->spr[SPR_SPRG5] = regs.sprg5;
1281    env->spr[SPR_SPRG6] = regs.sprg6;
1282    env->spr[SPR_SPRG7] = regs.sprg7;
1283
1284    env->spr[SPR_BOOKE_PID] = regs.pid;
1285
1286    for (i = 0;i < 32; i++)
1287        env->gpr[i] = regs.gpr[i];
1288
1289    kvm_get_fp(cs);
1290
1291    if (cap_booke_sregs) {
1292        ret = kvmppc_get_booke_sregs(cpu);
1293        if (ret < 0) {
1294            return ret;
1295        }
1296    }
1297
1298    if (cap_segstate) {
1299        ret = kvmppc_get_books_sregs(cpu);
1300        if (ret < 0) {
1301            return ret;
1302        }
1303    }
1304
1305    if (cap_hior) {
1306        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1307    }
1308
1309    if (cap_one_reg) {
1310        int i;
1311
1312        /* We deliberately ignore errors here, for kernels which have
1313         * the ONE_REG calls, but don't support the specific
1314         * registers, there's a reasonable chance things will still
1315         * work, at least until we try to migrate. */
1316        for (i = 0; i < 1024; i++) {
1317            uint64_t id = env->spr_cb[i].one_reg_id;
1318
1319            if (id != 0) {
1320                kvm_get_one_spr(cs, id, i);
1321            }
1322        }
1323
1324#ifdef TARGET_PPC64
1325        if (msr_ts) {
1326            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1327                kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1328            }
1329            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1330                kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1331            }
1332            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1333            kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1334            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1335            kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1336            kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1337            kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1338            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1339            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1340            kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1341            kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1342        }
1343
1344        if (cap_papr) {
1345            if (kvm_get_vpa(cs) < 0) {
1346                DPRINTF("Warning: Unable to get VPA information from KVM\n");
1347            }
1348        }
1349
1350        kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1351#endif
1352    }
1353
1354    return 0;
1355}
1356
1357int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1358{
1359    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1360
1361    if (irq != PPC_INTERRUPT_EXT) {
1362        return 0;
1363    }
1364
1365    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1366        return 0;
1367    }
1368
1369    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1370
1371    return 0;
1372}
1373
1374#if defined(TARGET_PPCEMB)
1375#define PPC_INPUT_INT PPC40x_INPUT_INT
1376#elif defined(TARGET_PPC64)
1377#define PPC_INPUT_INT PPC970_INPUT_INT
1378#else
1379#define PPC_INPUT_INT PPC6xx_INPUT_INT
1380#endif
1381
1382void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1383{
1384    PowerPCCPU *cpu = POWERPC_CPU(cs);
1385    CPUPPCState *env = &cpu->env;
1386    int r;
1387    unsigned irq;
1388
1389    qemu_mutex_lock_iothread();
1390
1391    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1392     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1393    if (!cap_interrupt_level &&
1394        run->ready_for_interrupt_injection &&
1395        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1396        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1397    {
1398        /* For now KVM disregards the 'irq' argument. However, in the
1399         * future KVM could cache it in-kernel to avoid a heavyweight exit
1400         * when reading the UIC.
1401         */
1402        irq = KVM_INTERRUPT_SET;
1403
1404        DPRINTF("injected interrupt %d\n", irq);
1405        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1406        if (r < 0) {
1407            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1408        }
1409
1410        /* Always wake up soon in case the interrupt was level based */
1411        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1412                       (NANOSECONDS_PER_SECOND / 50));
1413    }
1414
1415    /* We don't know if there are more interrupts pending after this. However,
1416     * the guest will return to userspace in the course of handling this one
1417     * anyways, so we will get a chance to deliver the rest. */
1418
1419    qemu_mutex_unlock_iothread();
1420}
1421
1422MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1423{
1424    return MEMTXATTRS_UNSPECIFIED;
1425}
1426
1427int kvm_arch_process_async_events(CPUState *cs)
1428{
1429    return cs->halted;
1430}
1431
1432static int kvmppc_handle_halt(PowerPCCPU *cpu)
1433{
1434    CPUState *cs = CPU(cpu);
1435    CPUPPCState *env = &cpu->env;
1436
1437    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1438        cs->halted = 1;
1439        cs->exception_index = EXCP_HLT;
1440    }
1441
1442    return 0;
1443}
1444
1445/* map dcr access to existing qemu dcr emulation */
1446static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1447{
1448    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1449        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1450
1451    return 0;
1452}
1453
1454static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1455{
1456    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1457        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1458
1459    return 0;
1460}
1461
1462int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1463{
1464    /* Mixed endian case is not handled */
1465    uint32_t sc = debug_inst_opcode;
1466
1467    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1468                            sizeof(sc), 0) ||
1469        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1470        return -EINVAL;
1471    }
1472
1473    return 0;
1474}
1475
1476int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1477{
1478    uint32_t sc;
1479
1480    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1481        sc != debug_inst_opcode ||
1482        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1483                            sizeof(sc), 1)) {
1484        return -EINVAL;
1485    }
1486
1487    return 0;
1488}
1489
1490static int find_hw_breakpoint(target_ulong addr, int type)
1491{
1492    int n;
1493
1494    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1495           <= ARRAY_SIZE(hw_debug_points));
1496
1497    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1498        if (hw_debug_points[n].addr == addr &&
1499             hw_debug_points[n].type == type) {
1500            return n;
1501        }
1502    }
1503
1504    return -1;
1505}
1506
1507static int find_hw_watchpoint(target_ulong addr, int *flag)
1508{
1509    int n;
1510
1511    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1512    if (n >= 0) {
1513        *flag = BP_MEM_ACCESS;
1514        return n;
1515    }
1516
1517    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1518    if (n >= 0) {
1519        *flag = BP_MEM_WRITE;
1520        return n;
1521    }
1522
1523    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1524    if (n >= 0) {
1525        *flag = BP_MEM_READ;
1526        return n;
1527    }
1528
1529    return -1;
1530}
1531
1532int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1533                                  target_ulong len, int type)
1534{
1535    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1536        return -ENOBUFS;
1537    }
1538
1539    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1540    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1541
1542    switch (type) {
1543    case GDB_BREAKPOINT_HW:
1544        if (nb_hw_breakpoint >= max_hw_breakpoint) {
1545            return -ENOBUFS;
1546        }
1547
1548        if (find_hw_breakpoint(addr, type) >= 0) {
1549            return -EEXIST;
1550        }
1551
1552        nb_hw_breakpoint++;
1553        break;
1554
1555    case GDB_WATCHPOINT_WRITE:
1556    case GDB_WATCHPOINT_READ:
1557    case GDB_WATCHPOINT_ACCESS:
1558        if (nb_hw_watchpoint >= max_hw_watchpoint) {
1559            return -ENOBUFS;
1560        }
1561
1562        if (find_hw_breakpoint(addr, type) >= 0) {
1563            return -EEXIST;
1564        }
1565
1566        nb_hw_watchpoint++;
1567        break;
1568
1569    default:
1570        return -ENOSYS;
1571    }
1572
1573    return 0;
1574}
1575
1576int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1577                                  target_ulong len, int type)
1578{
1579    int n;
1580
1581    n = find_hw_breakpoint(addr, type);
1582    if (n < 0) {
1583        return -ENOENT;
1584    }
1585
1586    switch (type) {
1587    case GDB_BREAKPOINT_HW:
1588        nb_hw_breakpoint--;
1589        break;
1590
1591    case GDB_WATCHPOINT_WRITE:
1592    case GDB_WATCHPOINT_READ:
1593    case GDB_WATCHPOINT_ACCESS:
1594        nb_hw_watchpoint--;
1595        break;
1596
1597    default:
1598        return -ENOSYS;
1599    }
1600    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1601
1602    return 0;
1603}
1604
1605void kvm_arch_remove_all_hw_breakpoints(void)
1606{
1607    nb_hw_breakpoint = nb_hw_watchpoint = 0;
1608}
1609
1610void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1611{
1612    int n;
1613
1614    /* Software Breakpoint updates */
1615    if (kvm_sw_breakpoints_active(cs)) {
1616        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1617    }
1618
1619    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1620           <= ARRAY_SIZE(hw_debug_points));
1621    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1622
1623    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1624        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1625        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1626        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1627            switch (hw_debug_points[n].type) {
1628            case GDB_BREAKPOINT_HW:
1629                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1630                break;
1631            case GDB_WATCHPOINT_WRITE:
1632                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1633                break;
1634            case GDB_WATCHPOINT_READ:
1635                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1636                break;
1637            case GDB_WATCHPOINT_ACCESS:
1638                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1639                                        KVMPPC_DEBUG_WATCH_READ;
1640                break;
1641            default:
1642                cpu_abort(cs, "Unsupported breakpoint type\n");
1643            }
1644            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1645        }
1646    }
1647}
1648
1649static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1650{
1651    CPUState *cs = CPU(cpu);
1652    CPUPPCState *env = &cpu->env;
1653    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1654    int handle = 0;
1655    int n;
1656    int flag = 0;
1657
1658    if (cs->singlestep_enabled) {
1659        handle = 1;
1660    } else if (arch_info->status) {
1661        if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1662            if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1663                n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1664                if (n >= 0) {
1665                    handle = 1;
1666                }
1667            } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1668                                            KVMPPC_DEBUG_WATCH_WRITE)) {
1669                n = find_hw_watchpoint(arch_info->address,  &flag);
1670                if (n >= 0) {
1671                    handle = 1;
1672                    cs->watchpoint_hit = &hw_watchpoint;
1673                    hw_watchpoint.vaddr = hw_debug_points[n].addr;
1674                    hw_watchpoint.flags = flag;
1675                }
1676            }
1677        }
1678    } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1679        handle = 1;
1680    } else {
1681        /* QEMU is not able to handle debug exception, so inject
1682         * program exception to guest;
1683         * Yes program exception NOT debug exception !!
1684         * When QEMU is using debug resources then debug exception must
1685         * be always set. To achieve this we set MSR_DE and also set
1686         * MSRP_DEP so guest cannot change MSR_DE.
1687         * When emulating debug resource for guest we want guest
1688         * to control MSR_DE (enable/disable debug interrupt on need).
1689         * Supporting both configurations are NOT possible.
1690         * So the result is that we cannot share debug resources
1691         * between QEMU and Guest on BOOKE architecture.
1692         * In the current design QEMU gets the priority over guest,
1693         * this means that if QEMU is using debug resources then guest
1694         * cannot use them;
1695         * For software breakpoint QEMU uses a privileged instruction;
1696         * So there cannot be any reason that we are here for guest
1697         * set debug exception, only possibility is guest executed a
1698         * privileged / illegal instruction and that's why we are
1699         * injecting a program interrupt.
1700         */
1701
1702        cpu_synchronize_state(cs);
1703        /* env->nip is PC, so increment this by 4 to use
1704         * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1705         */
1706        env->nip += 4;
1707        cs->exception_index = POWERPC_EXCP_PROGRAM;
1708        env->error_code = POWERPC_EXCP_INVAL;
1709        ppc_cpu_do_interrupt(cs);
1710    }
1711
1712    return handle;
1713}
1714
1715int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1716{
1717    PowerPCCPU *cpu = POWERPC_CPU(cs);
1718    CPUPPCState *env = &cpu->env;
1719    int ret;
1720
1721    qemu_mutex_lock_iothread();
1722
1723    switch (run->exit_reason) {
1724    case KVM_EXIT_DCR:
1725        if (run->dcr.is_write) {
1726            DPRINTF("handle dcr write\n");
1727            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1728        } else {
1729            DPRINTF("handle dcr read\n");
1730            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1731        }
1732        break;
1733    case KVM_EXIT_HLT:
1734        DPRINTF("handle halt\n");
1735        ret = kvmppc_handle_halt(cpu);
1736        break;
1737#if defined(TARGET_PPC64)
1738    case KVM_EXIT_PAPR_HCALL:
1739        DPRINTF("handle PAPR hypercall\n");
1740        run->papr_hcall.ret = spapr_hypercall(cpu,
1741                                              run->papr_hcall.nr,
1742                                              run->papr_hcall.args);
1743        ret = 0;
1744        break;
1745#endif
1746    case KVM_EXIT_EPR:
1747        DPRINTF("handle epr\n");
1748        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1749        ret = 0;
1750        break;
1751    case KVM_EXIT_WATCHDOG:
1752        DPRINTF("handle watchdog expiry\n");
1753        watchdog_perform_action();
1754        ret = 0;
1755        break;
1756
1757    case KVM_EXIT_DEBUG:
1758        DPRINTF("handle debug exception\n");
1759        if (kvm_handle_debug(cpu, run)) {
1760            ret = EXCP_DEBUG;
1761            break;
1762        }
1763        /* re-enter, this exception was guest-internal */
1764        ret = 0;
1765        break;
1766
1767    default:
1768        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1769        ret = -1;
1770        break;
1771    }
1772
1773    qemu_mutex_unlock_iothread();
1774    return ret;
1775}
1776
1777int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1778{
1779    CPUState *cs = CPU(cpu);
1780    uint32_t bits = tsr_bits;
1781    struct kvm_one_reg reg = {
1782        .id = KVM_REG_PPC_OR_TSR,
1783        .addr = (uintptr_t) &bits,
1784    };
1785
1786    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1787}
1788
1789int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1790{
1791
1792    CPUState *cs = CPU(cpu);
1793    uint32_t bits = tsr_bits;
1794    struct kvm_one_reg reg = {
1795        .id = KVM_REG_PPC_CLEAR_TSR,
1796        .addr = (uintptr_t) &bits,
1797    };
1798
1799    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1800}
1801
1802int kvmppc_set_tcr(PowerPCCPU *cpu)
1803{
1804    CPUState *cs = CPU(cpu);
1805    CPUPPCState *env = &cpu->env;
1806    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1807
1808    struct kvm_one_reg reg = {
1809        .id = KVM_REG_PPC_TCR,
1810        .addr = (uintptr_t) &tcr,
1811    };
1812
1813    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1814}
1815
1816int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1817{
1818    CPUState *cs = CPU(cpu);
1819    int ret;
1820
1821    if (!kvm_enabled()) {
1822        return -1;
1823    }
1824
1825    if (!cap_ppc_watchdog) {
1826        printf("warning: KVM does not support watchdog");
1827        return -1;
1828    }
1829
1830    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1831    if (ret < 0) {
1832        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1833                __func__, strerror(-ret));
1834        return ret;
1835    }
1836
1837    return ret;
1838}
1839
1840static int read_cpuinfo(const char *field, char *value, int len)
1841{
1842    FILE *f;
1843    int ret = -1;
1844    int field_len = strlen(field);
1845    char line[512];
1846
1847    f = fopen("/proc/cpuinfo", "r");
1848    if (!f) {
1849        return -1;
1850    }
1851
1852    do {
1853        if (!fgets(line, sizeof(line), f)) {
1854            break;
1855        }
1856        if (!strncmp(line, field, field_len)) {
1857            pstrcpy(value, len, line);
1858            ret = 0;
1859            break;
1860        }
1861    } while(*line);
1862
1863    fclose(f);
1864
1865    return ret;
1866}
1867
1868uint32_t kvmppc_get_tbfreq(void)
1869{
1870    char line[512];
1871    char *ns;
1872    uint32_t retval = NANOSECONDS_PER_SECOND;
1873
1874    if (read_cpuinfo("timebase", line, sizeof(line))) {
1875        return retval;
1876    }
1877
1878    if (!(ns = strchr(line, ':'))) {
1879        return retval;
1880    }
1881
1882    ns++;
1883
1884    return atoi(ns);
1885}
1886
1887bool kvmppc_get_host_serial(char **value)
1888{
1889    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1890                               NULL);
1891}
1892
1893bool kvmppc_get_host_model(char **value)
1894{
1895    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1896}
1897
1898/* Try to find a device tree node for a CPU with clock-frequency property */
1899static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1900{
1901    struct dirent *dirp;
1902    DIR *dp;
1903
1904    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1905        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1906        return -1;
1907    }
1908
1909    buf[0] = '\0';
1910    while ((dirp = readdir(dp)) != NULL) {
1911        FILE *f;
1912        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1913                 dirp->d_name);
1914        f = fopen(buf, "r");
1915        if (f) {
1916            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1917            fclose(f);
1918            break;
1919        }
1920        buf[0] = '\0';
1921    }
1922    closedir(dp);
1923    if (buf[0] == '\0') {
1924        printf("Unknown host!\n");
1925        return -1;
1926    }
1927
1928    return 0;
1929}
1930
1931static uint64_t kvmppc_read_int_dt(const char *filename)
1932{
1933    union {
1934        uint32_t v32;
1935        uint64_t v64;
1936    } u;
1937    FILE *f;
1938    int len;
1939
1940    f = fopen(filename, "rb");
1941    if (!f) {
1942        return -1;
1943    }
1944
1945    len = fread(&u, 1, sizeof(u), f);
1946    fclose(f);
1947    switch (len) {
1948    case 4:
1949        /* property is a 32-bit quantity */
1950        return be32_to_cpu(u.v32);
1951    case 8:
1952        return be64_to_cpu(u.v64);
1953    }
1954
1955    return 0;
1956}
1957
1958/* Read a CPU node property from the host device tree that's a single
1959 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1960 * (can't find or open the property, or doesn't understand the
1961 * format) */
1962static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1963{
1964    char buf[PATH_MAX], *tmp;
1965    uint64_t val;
1966
1967    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1968        return -1;
1969    }
1970
1971    tmp = g_strdup_printf("%s/%s", buf, propname);
1972    val = kvmppc_read_int_dt(tmp);
1973    g_free(tmp);
1974
1975    return val;
1976}
1977
1978uint64_t kvmppc_get_clockfreq(void)
1979{
1980    return kvmppc_read_int_cpu_dt("clock-frequency");
1981}
1982
1983uint32_t kvmppc_get_vmx(void)
1984{
1985    return kvmppc_read_int_cpu_dt("ibm,vmx");
1986}
1987
1988uint32_t kvmppc_get_dfp(void)
1989{
1990    return kvmppc_read_int_cpu_dt("ibm,dfp");
1991}
1992
1993static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1994 {
1995     PowerPCCPU *cpu = ppc_env_get_cpu(env);
1996     CPUState *cs = CPU(cpu);
1997
1998    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1999        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2000        return 0;
2001    }
2002
2003    return 1;
2004}
2005
2006int kvmppc_get_hasidle(CPUPPCState *env)
2007{
2008    struct kvm_ppc_pvinfo pvinfo;
2009
2010    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2011        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2012        return 1;
2013    }
2014
2015    return 0;
2016}
2017
2018int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2019{
2020    uint32_t *hc = (uint32_t*)buf;
2021    struct kvm_ppc_pvinfo pvinfo;
2022
2023    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2024        memcpy(buf, pvinfo.hcall, buf_len);
2025        return 0;
2026    }
2027
2028    /*
2029     * Fallback to always fail hypercalls regardless of endianness:
2030     *
2031     *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2032     *     li r3, -1
2033     *     b .+8       (becomes nop in wrong endian)
2034     *     bswap32(li r3, -1)
2035     */
2036
2037    hc[0] = cpu_to_be32(0x08000048);
2038    hc[1] = cpu_to_be32(0x3860ffff);
2039    hc[2] = cpu_to_be32(0x48000008);
2040    hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2041
2042    return 1;
2043}
2044
2045static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2046{
2047    return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2048}
2049
2050void kvmppc_enable_logical_ci_hcalls(void)
2051{
2052    /*
2053     * FIXME: it would be nice if we could detect the cases where
2054     * we're using a device which requires the in kernel
2055     * implementation of these hcalls, but the kernel lacks them and
2056     * produce a warning.
2057     */
2058    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2059    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2060}
2061
2062void kvmppc_enable_set_mode_hcall(void)
2063{
2064    kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2065}
2066
2067void kvmppc_set_papr(PowerPCCPU *cpu)
2068{
2069    CPUState *cs = CPU(cpu);
2070    int ret;
2071
2072    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2073    if (ret) {
2074        error_report("This vCPU type or KVM version does not support PAPR");
2075        exit(1);
2076    }
2077
2078    /* Update the capability flag so we sync the right information
2079     * with kvm */
2080    cap_papr = 1;
2081}
2082
2083int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2084{
2085    return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2086}
2087
2088void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2089{
2090    CPUState *cs = CPU(cpu);
2091    int ret;
2092
2093    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2094    if (ret && mpic_proxy) {
2095        error_report("This KVM version does not support EPR");
2096        exit(1);
2097    }
2098}
2099
2100int kvmppc_smt_threads(void)
2101{
2102    return cap_ppc_smt ? cap_ppc_smt : 1;
2103}
2104
2105#ifdef TARGET_PPC64
2106off_t kvmppc_alloc_rma(void **rma)
2107{
2108    off_t size;
2109    int fd;
2110    struct kvm_allocate_rma ret;
2111
2112    /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2113     * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2114     *                      not necessary on this hardware
2115     * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2116     *
2117     * FIXME: We should allow the user to force contiguous RMA
2118     * allocation in the cap_ppc_rma==1 case.
2119     */
2120    if (cap_ppc_rma < 2) {
2121        return 0;
2122    }
2123
2124    fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2125    if (fd < 0) {
2126        fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2127                strerror(errno));
2128        return -1;
2129    }
2130
2131    size = MIN(ret.rma_size, 256ul << 20);
2132
2133    *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2134    if (*rma == MAP_FAILED) {
2135        fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2136        return -1;
2137    };
2138
2139    return size;
2140}
2141
2142uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2143{
2144    struct kvm_ppc_smmu_info info;
2145    long rampagesize, best_page_shift;
2146    int i;
2147
2148    if (cap_ppc_rma >= 2) {
2149        return current_size;
2150    }
2151
2152    /* Find the largest hardware supported page size that's less than
2153     * or equal to the (logical) backing page size of guest RAM */
2154    kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2155    rampagesize = getrampagesize();
2156    best_page_shift = 0;
2157
2158    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2159        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2160
2161        if (!sps->page_shift) {
2162            continue;
2163        }
2164
2165        if ((sps->page_shift > best_page_shift)
2166            && ((1UL << sps->page_shift) <= rampagesize)) {
2167            best_page_shift = sps->page_shift;
2168        }
2169    }
2170
2171    return MIN(current_size,
2172               1ULL << (best_page_shift + hash_shift - 7));
2173}
2174#endif
2175
2176bool kvmppc_spapr_use_multitce(void)
2177{
2178    return cap_spapr_multitce;
2179}
2180
2181void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2182                              bool need_vfio)
2183{
2184    struct kvm_create_spapr_tce args = {
2185        .liobn = liobn,
2186        .window_size = window_size,
2187    };
2188    long len;
2189    int fd;
2190    void *table;
2191
2192    /* Must set fd to -1 so we don't try to munmap when called for
2193     * destroying the table, which the upper layers -will- do
2194     */
2195    *pfd = -1;
2196    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2197        return NULL;
2198    }
2199
2200    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2201    if (fd < 0) {
2202        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2203                liobn);
2204        return NULL;
2205    }
2206
2207    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2208    /* FIXME: round this up to page size */
2209
2210    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2211    if (table == MAP_FAILED) {
2212        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2213                liobn);
2214        close(fd);
2215        return NULL;
2216    }
2217
2218    *pfd = fd;
2219    return table;
2220}
2221
2222int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2223{
2224    long len;
2225
2226    if (fd < 0) {
2227        return -1;
2228    }
2229
2230    len = nb_table * sizeof(uint64_t);
2231    if ((munmap(table, len) < 0) ||
2232        (close(fd) < 0)) {
2233        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2234                strerror(errno));
2235        /* Leak the table */
2236    }
2237
2238    return 0;
2239}
2240
2241int kvmppc_reset_htab(int shift_hint)
2242{
2243    uint32_t shift = shift_hint;
2244
2245    if (!kvm_enabled()) {
2246        /* Full emulation, tell caller to allocate htab itself */
2247        return 0;
2248    }
2249    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2250        int ret;
2251        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2252        if (ret == -ENOTTY) {
2253            /* At least some versions of PR KVM advertise the
2254             * capability, but don't implement the ioctl().  Oops.
2255             * Return 0 so that we allocate the htab in qemu, as is
2256             * correct for PR. */
2257            return 0;
2258        } else if (ret < 0) {
2259            return ret;
2260        }
2261        return shift;
2262    }
2263
2264    /* We have a kernel that predates the htab reset calls.  For PR
2265     * KVM, we need to allocate the htab ourselves, for an HV KVM of
2266     * this era, it has allocated a 16MB fixed size hash table
2267     * already.  Kernels of this era have the GET_PVINFO capability
2268     * only on PR, so we use this hack to determine the right
2269     * answer */
2270    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2271        /* PR - tell caller to allocate htab */
2272        return 0;
2273    } else {
2274        /* HV - assume 16MB kernel allocated htab */
2275        return 24;
2276    }
2277}
2278
2279static inline uint32_t mfpvr(void)
2280{
2281    uint32_t pvr;
2282
2283    asm ("mfpvr %0"
2284         : "=r"(pvr));
2285    return pvr;
2286}
2287
2288static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2289{
2290    if (on) {
2291        *word |= flags;
2292    } else {
2293        *word &= ~flags;
2294    }
2295}
2296
2297static void kvmppc_host_cpu_initfn(Object *obj)
2298{
2299    assert(kvm_enabled());
2300}
2301
2302static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2303{
2304    DeviceClass *dc = DEVICE_CLASS(oc);
2305    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2306    uint32_t vmx = kvmppc_get_vmx();
2307    uint32_t dfp = kvmppc_get_dfp();
2308    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2309    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2310
2311    /* Now fix up the class with information we can query from the host */
2312    pcc->pvr = mfpvr();
2313
2314    if (vmx != -1) {
2315        /* Only override when we know what the host supports */
2316        alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2317        alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2318    }
2319    if (dfp != -1) {
2320        /* Only override when we know what the host supports */
2321        alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2322    }
2323
2324    if (dcache_size != -1) {
2325        pcc->l1_dcache_size = dcache_size;
2326    }
2327
2328    if (icache_size != -1) {
2329        pcc->l1_icache_size = icache_size;
2330    }
2331
2332    /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2333    dc->cannot_destroy_with_object_finalize_yet = true;
2334}
2335
2336bool kvmppc_has_cap_epr(void)
2337{
2338    return cap_epr;
2339}
2340
2341bool kvmppc_has_cap_htab_fd(void)
2342{
2343    return cap_htab_fd;
2344}
2345
2346bool kvmppc_has_cap_fixup_hcalls(void)
2347{
2348    return cap_fixup_hcalls;
2349}
2350
2351bool kvmppc_has_cap_htm(void)
2352{
2353    return cap_htm;
2354}
2355
2356static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2357{
2358    ObjectClass *oc = OBJECT_CLASS(pcc);
2359
2360    while (oc && !object_class_is_abstract(oc)) {
2361        oc = object_class_get_parent(oc);
2362    }
2363    assert(oc);
2364
2365    return POWERPC_CPU_CLASS(oc);
2366}
2367
2368PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2369{
2370    uint32_t host_pvr = mfpvr();
2371    PowerPCCPUClass *pvr_pcc;
2372
2373    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2374    if (pvr_pcc == NULL) {
2375        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2376    }
2377
2378    return pvr_pcc;
2379}
2380
2381#if defined(TARGET_PPC64)
2382static void spapr_cpu_core_host_initfn(Object *obj)
2383{
2384    sPAPRCPUCore *core = SPAPR_CPU_CORE(obj);
2385    char *name = g_strdup_printf("%s-" TYPE_POWERPC_CPU, "host");
2386    ObjectClass *oc = object_class_by_name(name);
2387
2388    g_assert(oc);
2389    g_free((void *)name);
2390    core->cpu_class = oc;
2391}
2392#endif
2393
2394static int kvm_ppc_register_host_cpu_type(void)
2395{
2396    TypeInfo type_info = {
2397        .name = TYPE_HOST_POWERPC_CPU,
2398        .instance_init = kvmppc_host_cpu_initfn,
2399        .class_init = kvmppc_host_cpu_class_init,
2400    };
2401    PowerPCCPUClass *pvr_pcc;
2402    DeviceClass *dc;
2403
2404    pvr_pcc = kvm_ppc_get_host_cpu_class();
2405    if (pvr_pcc == NULL) {
2406        return -1;
2407    }
2408    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2409    type_register(&type_info);
2410
2411    /* Register generic family CPU class for a family */
2412    pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2413    dc = DEVICE_CLASS(pvr_pcc);
2414    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2415    type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2416    type_register(&type_info);
2417
2418#if defined(TARGET_PPC64)
2419    type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2420    type_info.parent = TYPE_SPAPR_CPU_CORE,
2421    type_info.instance_size = sizeof(sPAPRCPUCore),
2422    type_info.instance_init = spapr_cpu_core_host_initfn,
2423    type_info.class_init = NULL;
2424    type_register(&type_info);
2425    g_free((void *)type_info.name);
2426
2427    /* Register generic spapr CPU family class for current host CPU type */
2428    type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2429    type_register(&type_info);
2430    g_free((void *)type_info.name);
2431#endif
2432
2433    return 0;
2434}
2435
2436int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2437{
2438    struct kvm_rtas_token_args args = {
2439        .token = token,
2440    };
2441
2442    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2443        return -ENOENT;
2444    }
2445
2446    strncpy(args.name, function, sizeof(args.name));
2447
2448    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2449}
2450
2451int kvmppc_get_htab_fd(bool write)
2452{
2453    struct kvm_get_htab_fd s = {
2454        .flags = write ? KVM_GET_HTAB_WRITE : 0,
2455        .start_index = 0,
2456    };
2457
2458    if (!cap_htab_fd) {
2459        fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2460        return -1;
2461    }
2462
2463    return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2464}
2465
2466int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2467{
2468    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2469    uint8_t buf[bufsize];
2470    ssize_t rc;
2471
2472    do {
2473        rc = read(fd, buf, bufsize);
2474        if (rc < 0) {
2475            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2476                    strerror(errno));
2477            return rc;
2478        } else if (rc) {
2479            uint8_t *buffer = buf;
2480            ssize_t n = rc;
2481            while (n) {
2482                struct kvm_get_htab_header *head =
2483                    (struct kvm_get_htab_header *) buffer;
2484                size_t chunksize = sizeof(*head) +
2485                     HASH_PTE_SIZE_64 * head->n_valid;
2486
2487                qemu_put_be32(f, head->index);
2488                qemu_put_be16(f, head->n_valid);
2489                qemu_put_be16(f, head->n_invalid);
2490                qemu_put_buffer(f, (void *)(head + 1),
2491                                HASH_PTE_SIZE_64 * head->n_valid);
2492
2493                buffer += chunksize;
2494                n -= chunksize;
2495            }
2496        }
2497    } while ((rc != 0)
2498             && ((max_ns < 0)
2499                 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2500
2501    return (rc == 0) ? 1 : 0;
2502}
2503
2504int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2505                           uint16_t n_valid, uint16_t n_invalid)
2506{
2507    struct kvm_get_htab_header *buf;
2508    size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2509    ssize_t rc;
2510
2511    buf = alloca(chunksize);
2512    buf->index = index;
2513    buf->n_valid = n_valid;
2514    buf->n_invalid = n_invalid;
2515
2516    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2517
2518    rc = write(fd, buf, chunksize);
2519    if (rc < 0) {
2520        fprintf(stderr, "Error writing KVM hash table: %s\n",
2521                strerror(errno));
2522        return rc;
2523    }
2524    if (rc != chunksize) {
2525        /* We should never get a short write on a single chunk */
2526        fprintf(stderr, "Short write, restoring KVM hash table\n");
2527        return -1;
2528    }
2529    return 0;
2530}
2531
2532bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2533{
2534    return true;
2535}
2536
2537int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2538{
2539    return 1;
2540}
2541
2542int kvm_arch_on_sigbus(int code, void *addr)
2543{
2544    return 1;
2545}
2546
2547void kvm_arch_init_irq_routing(KVMState *s)
2548{
2549}
2550
2551struct kvm_get_htab_buf {
2552    struct kvm_get_htab_header header;
2553    /*
2554     * We require one extra byte for read
2555     */
2556    target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2557};
2558
2559uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2560{
2561    int htab_fd;
2562    struct kvm_get_htab_fd ghf;
2563    struct kvm_get_htab_buf  *hpte_buf;
2564
2565    ghf.flags = 0;
2566    ghf.start_index = pte_index;
2567    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2568    if (htab_fd < 0) {
2569        goto error_out;
2570    }
2571
2572    hpte_buf = g_malloc0(sizeof(*hpte_buf));
2573    /*
2574     * Read the hpte group
2575     */
2576    if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2577        goto out_close;
2578    }
2579
2580    close(htab_fd);
2581    return (uint64_t)(uintptr_t) hpte_buf->hpte;
2582
2583out_close:
2584    g_free(hpte_buf);
2585    close(htab_fd);
2586error_out:
2587    return 0;
2588}
2589
2590void kvmppc_hash64_free_pteg(uint64_t token)
2591{
2592    struct kvm_get_htab_buf *htab_buf;
2593
2594    htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2595                            hpte);
2596    g_free(htab_buf);
2597    return;
2598}
2599
2600void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2601                             target_ulong pte0, target_ulong pte1)
2602{
2603    int htab_fd;
2604    struct kvm_get_htab_fd ghf;
2605    struct kvm_get_htab_buf hpte_buf;
2606
2607    ghf.flags = 0;
2608    ghf.start_index = 0;     /* Ignored */
2609    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2610    if (htab_fd < 0) {
2611        goto error_out;
2612    }
2613
2614    hpte_buf.header.n_valid = 1;
2615    hpte_buf.header.n_invalid = 0;
2616    hpte_buf.header.index = pte_index;
2617    hpte_buf.hpte[0] = pte0;
2618    hpte_buf.hpte[1] = pte1;
2619    /*
2620     * Write the hpte entry.
2621     * CAUTION: write() has the warn_unused_result attribute. Hence we
2622     * need to check the return value, even though we do nothing.
2623     */
2624    if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2625        goto out_close;
2626    }
2627
2628out_close:
2629    close(htab_fd);
2630    return;
2631
2632error_out:
2633    return;
2634}
2635
2636int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2637                             uint64_t address, uint32_t data, PCIDevice *dev)
2638{
2639    return 0;
2640}
2641
2642int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2643                                int vector, PCIDevice *dev)
2644{
2645    return 0;
2646}
2647
2648int kvm_arch_release_virq_post(int virq)
2649{
2650    return 0;
2651}
2652
2653int kvm_arch_msi_data_to_gsi(uint32_t data)
2654{
2655    return data & 0xffff;
2656}
2657
2658int kvmppc_enable_hwrng(void)
2659{
2660    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2661        return -1;
2662    }
2663
2664    return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2665}
2666