qemu/target-ppc/kvm.c
<<
>>
Prefs
   1/*
   2 * PowerPC implementation of KVM hooks
   3 *
   4 * Copyright IBM Corp. 2007
   5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6 *
   7 * Authors:
   8 *  Jerone Young <jyoung5@us.ibm.com>
   9 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10 *  Hollis Blanchard <hollisb@us.ibm.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13 * See the COPYING file in the top-level directory.
  14 *
  15 */
  16
  17#include "qemu/osdep.h"
  18#include <dirent.h>
  19#include <sys/ioctl.h>
  20#include <sys/vfs.h>
  21
  22#include <linux/kvm.h>
  23
  24#include "qemu-common.h"
  25#include "qemu/error-report.h"
  26#include "cpu.h"
  27#include "qemu/timer.h"
  28#include "sysemu/sysemu.h"
  29#include "sysemu/kvm.h"
  30#include "sysemu/numa.h"
  31#include "kvm_ppc.h"
  32#include "sysemu/cpus.h"
  33#include "sysemu/device_tree.h"
  34#include "mmu-hash64.h"
  35
  36#include "hw/sysbus.h"
  37#include "hw/ppc/spapr.h"
  38#include "hw/ppc/spapr_vio.h"
  39#include "hw/ppc/ppc.h"
  40#include "sysemu/watchdog.h"
  41#include "trace.h"
  42#include "exec/gdbstub.h"
  43#include "exec/memattrs.h"
  44#include "sysemu/hostmem.h"
  45#include "qemu/cutils.h"
  46#if defined(TARGET_PPC64)
  47#include "hw/ppc/spapr_cpu_core.h"
  48#endif
  49
  50//#define DEBUG_KVM
  51
  52#ifdef DEBUG_KVM
  53#define DPRINTF(fmt, ...) \
  54    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  55#else
  56#define DPRINTF(fmt, ...) \
  57    do { } while (0)
  58#endif
  59
  60#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  61
  62const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  63    KVM_CAP_LAST_INFO
  64};
  65
  66static int cap_interrupt_unset = false;
  67static int cap_interrupt_level = false;
  68static int cap_segstate;
  69static int cap_booke_sregs;
  70static int cap_ppc_smt;
  71static int cap_ppc_rma;
  72static int cap_spapr_tce;
  73static int cap_spapr_multitce;
  74static int cap_spapr_vfio;
  75static int cap_hior;
  76static int cap_one_reg;
  77static int cap_epr;
  78static int cap_ppc_watchdog;
  79static int cap_papr;
  80static int cap_htab_fd;
  81static int cap_fixup_hcalls;
  82
  83static uint32_t debug_inst_opcode;
  84
  85/* XXX We have a race condition where we actually have a level triggered
  86 *     interrupt, but the infrastructure can't expose that yet, so the guest
  87 *     takes but ignores it, goes to sleep and never gets notified that there's
  88 *     still an interrupt pending.
  89 *
  90 *     As a quick workaround, let's just wake up again 20 ms after we injected
  91 *     an interrupt. That way we can assure that we're always reinjecting
  92 *     interrupts in case the guest swallowed them.
  93 */
  94static QEMUTimer *idle_timer;
  95
  96static void kvm_kick_cpu(void *opaque)
  97{
  98    PowerPCCPU *cpu = opaque;
  99
 100    qemu_cpu_kick(CPU(cpu));
 101}
 102
 103static int kvm_ppc_register_host_cpu_type(void);
 104
 105int kvm_arch_init(MachineState *ms, KVMState *s)
 106{
 107    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 108    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 109    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 110    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 111    cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 112    cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 113    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 114    cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 115    cap_spapr_vfio = false;
 116    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 117    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 118    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 119    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 120    /* Note: we don't set cap_papr here, because this capability is
 121     * only activated after this by kvmppc_set_papr() */
 122    cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 123    cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 124
 125    if (!cap_interrupt_level) {
 126        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 127                        "VM to stall at times!\n");
 128    }
 129
 130    kvm_ppc_register_host_cpu_type();
 131
 132    return 0;
 133}
 134
 135static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 136{
 137    CPUPPCState *cenv = &cpu->env;
 138    CPUState *cs = CPU(cpu);
 139    struct kvm_sregs sregs;
 140    int ret;
 141
 142    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 143        /* What we're really trying to say is "if we're on BookE, we use
 144           the native PVR for now". This is the only sane way to check
 145           it though, so we potentially confuse users that they can run
 146           BookE guests on BookS. Let's hope nobody dares enough :) */
 147        return 0;
 148    } else {
 149        if (!cap_segstate) {
 150            fprintf(stderr, "kvm error: missing PVR setting capability\n");
 151            return -ENOSYS;
 152        }
 153    }
 154
 155    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 156    if (ret) {
 157        return ret;
 158    }
 159
 160    sregs.pvr = cenv->spr[SPR_PVR];
 161    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 162}
 163
 164/* Set up a shared TLB array with KVM */
 165static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 166{
 167    CPUPPCState *env = &cpu->env;
 168    CPUState *cs = CPU(cpu);
 169    struct kvm_book3e_206_tlb_params params = {};
 170    struct kvm_config_tlb cfg = {};
 171    unsigned int entries = 0;
 172    int ret, i;
 173
 174    if (!kvm_enabled() ||
 175        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 176        return 0;
 177    }
 178
 179    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 180
 181    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 182        params.tlb_sizes[i] = booke206_tlb_size(env, i);
 183        params.tlb_ways[i] = booke206_tlb_ways(env, i);
 184        entries += params.tlb_sizes[i];
 185    }
 186
 187    assert(entries == env->nb_tlb);
 188    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 189
 190    env->tlb_dirty = true;
 191
 192    cfg.array = (uintptr_t)env->tlb.tlbm;
 193    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 194    cfg.params = (uintptr_t)&params;
 195    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 196
 197    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 198    if (ret < 0) {
 199        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 200                __func__, strerror(-ret));
 201        return ret;
 202    }
 203
 204    env->kvm_sw_tlb = true;
 205    return 0;
 206}
 207
 208
 209#if defined(TARGET_PPC64)
 210static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 211                                       struct kvm_ppc_smmu_info *info)
 212{
 213    CPUPPCState *env = &cpu->env;
 214    CPUState *cs = CPU(cpu);
 215
 216    memset(info, 0, sizeof(*info));
 217
 218    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 219     * need to "guess" what the supported page sizes are.
 220     *
 221     * For that to work we make a few assumptions:
 222     *
 223     * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 224     *   KVM which only supports 4K and 16M pages, but supports them
 225     *   regardless of the backing store characteritics. We also don't
 226     *   support 1T segments.
 227     *
 228     *   This is safe as if HV KVM ever supports that capability or PR
 229     *   KVM grows supports for more page/segment sizes, those versions
 230     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 231     *   will not hit this fallback
 232     *
 233     * - Else we are running HV KVM. This means we only support page
 234     *   sizes that fit in the backing store. Additionally we only
 235     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 236     *   P7 encodings for the SLB and hash table. Here too, we assume
 237     *   support for any newer processor will mean a kernel that
 238     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 239     *   this fallback.
 240     */
 241    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 242        /* No flags */
 243        info->flags = 0;
 244        info->slb_size = 64;
 245
 246        /* Standard 4k base page size segment */
 247        info->sps[0].page_shift = 12;
 248        info->sps[0].slb_enc = 0;
 249        info->sps[0].enc[0].page_shift = 12;
 250        info->sps[0].enc[0].pte_enc = 0;
 251
 252        /* Standard 16M large page size segment */
 253        info->sps[1].page_shift = 24;
 254        info->sps[1].slb_enc = SLB_VSID_L;
 255        info->sps[1].enc[0].page_shift = 24;
 256        info->sps[1].enc[0].pte_enc = 0;
 257    } else {
 258        int i = 0;
 259
 260        /* HV KVM has backing store size restrictions */
 261        info->flags = KVM_PPC_PAGE_SIZES_REAL;
 262
 263        if (env->mmu_model & POWERPC_MMU_1TSEG) {
 264            info->flags |= KVM_PPC_1T_SEGMENTS;
 265        }
 266
 267        if (env->mmu_model == POWERPC_MMU_2_06 ||
 268            env->mmu_model == POWERPC_MMU_2_07) {
 269            info->slb_size = 32;
 270        } else {
 271            info->slb_size = 64;
 272        }
 273
 274        /* Standard 4k base page size segment */
 275        info->sps[i].page_shift = 12;
 276        info->sps[i].slb_enc = 0;
 277        info->sps[i].enc[0].page_shift = 12;
 278        info->sps[i].enc[0].pte_enc = 0;
 279        i++;
 280
 281        /* 64K on MMU 2.06 and later */
 282        if (env->mmu_model == POWERPC_MMU_2_06 ||
 283            env->mmu_model == POWERPC_MMU_2_07) {
 284            info->sps[i].page_shift = 16;
 285            info->sps[i].slb_enc = 0x110;
 286            info->sps[i].enc[0].page_shift = 16;
 287            info->sps[i].enc[0].pte_enc = 1;
 288            i++;
 289        }
 290
 291        /* Standard 16M large page size segment */
 292        info->sps[i].page_shift = 24;
 293        info->sps[i].slb_enc = SLB_VSID_L;
 294        info->sps[i].enc[0].page_shift = 24;
 295        info->sps[i].enc[0].pte_enc = 0;
 296    }
 297}
 298
 299static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 300{
 301    CPUState *cs = CPU(cpu);
 302    int ret;
 303
 304    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 305        ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 306        if (ret == 0) {
 307            return;
 308        }
 309    }
 310
 311    kvm_get_fallback_smmu_info(cpu, info);
 312}
 313
 314static long gethugepagesize(const char *mem_path)
 315{
 316    struct statfs fs;
 317    int ret;
 318
 319    do {
 320        ret = statfs(mem_path, &fs);
 321    } while (ret != 0 && errno == EINTR);
 322
 323    if (ret != 0) {
 324        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 325                strerror(errno));
 326        exit(1);
 327    }
 328
 329#define HUGETLBFS_MAGIC       0x958458f6
 330
 331    if (fs.f_type != HUGETLBFS_MAGIC) {
 332        /* Explicit mempath, but it's ordinary pages */
 333        return getpagesize();
 334    }
 335
 336    /* It's hugepage, return the huge page size */
 337    return fs.f_bsize;
 338}
 339
 340/*
 341 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 342 * may or may not name the same files / on the same filesystem now as
 343 * when we actually open and map them.  Iterate over the file
 344 * descriptors instead, and use qemu_fd_getpagesize().
 345 */
 346static int find_max_supported_pagesize(Object *obj, void *opaque)
 347{
 348    char *mem_path;
 349    long *hpsize_min = opaque;
 350
 351    if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 352        mem_path = object_property_get_str(obj, "mem-path", NULL);
 353        if (mem_path) {
 354            long hpsize = gethugepagesize(mem_path);
 355            if (hpsize < *hpsize_min) {
 356                *hpsize_min = hpsize;
 357            }
 358        } else {
 359            *hpsize_min = getpagesize();
 360        }
 361    }
 362
 363    return 0;
 364}
 365
 366static long getrampagesize(void)
 367{
 368    long hpsize = LONG_MAX;
 369    long mainrampagesize;
 370    Object *memdev_root;
 371
 372    if (mem_path) {
 373        mainrampagesize = gethugepagesize(mem_path);
 374    } else {
 375        mainrampagesize = getpagesize();
 376    }
 377
 378    /* it's possible we have memory-backend objects with
 379     * hugepage-backed RAM. these may get mapped into system
 380     * address space via -numa parameters or memory hotplug
 381     * hooks. we want to take these into account, but we
 382     * also want to make sure these supported hugepage
 383     * sizes are applicable across the entire range of memory
 384     * we may boot from, so we take the min across all
 385     * backends, and assume normal pages in cases where a
 386     * backend isn't backed by hugepages.
 387     */
 388    memdev_root = object_resolve_path("/objects", NULL);
 389    if (memdev_root) {
 390        object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 391    }
 392    if (hpsize == LONG_MAX) {
 393        /* No additional memory regions found ==> Report main RAM page size */
 394        return mainrampagesize;
 395    }
 396
 397    /* If NUMA is disabled or the NUMA nodes are not backed with a
 398     * memory-backend, then there is at least one node using "normal" RAM,
 399     * so if its page size is smaller we have got to report that size instead.
 400     */
 401    if (hpsize > mainrampagesize &&
 402        (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
 403        static bool warned;
 404        if (!warned) {
 405            error_report("Huge page support disabled (n/a for main memory).");
 406            warned = true;
 407        }
 408        return mainrampagesize;
 409    }
 410
 411    return hpsize;
 412}
 413
 414static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 415{
 416    if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 417        return true;
 418    }
 419
 420    return (1ul << shift) <= rampgsize;
 421}
 422
 423static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 424{
 425    static struct kvm_ppc_smmu_info smmu_info;
 426    static bool has_smmu_info;
 427    CPUPPCState *env = &cpu->env;
 428    long rampagesize;
 429    int iq, ik, jq, jk;
 430
 431    /* We only handle page sizes for 64-bit server guests for now */
 432    if (!(env->mmu_model & POWERPC_MMU_64)) {
 433        return;
 434    }
 435
 436    /* Collect MMU info from kernel if not already */
 437    if (!has_smmu_info) {
 438        kvm_get_smmu_info(cpu, &smmu_info);
 439        has_smmu_info = true;
 440    }
 441
 442    rampagesize = getrampagesize();
 443
 444    /* Convert to QEMU form */
 445    memset(&env->sps, 0, sizeof(env->sps));
 446
 447    /* If we have HV KVM, we need to forbid CI large pages if our
 448     * host page size is smaller than 64K.
 449     */
 450    if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 451        env->ci_large_pages = getpagesize() >= 0x10000;
 452    }
 453
 454    /*
 455     * XXX This loop should be an entry wide AND of the capabilities that
 456     *     the selected CPU has with the capabilities that KVM supports.
 457     */
 458    for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 459        struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 460        struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 461
 462        if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 463                                 ksps->page_shift)) {
 464            continue;
 465        }
 466        qsps->page_shift = ksps->page_shift;
 467        qsps->slb_enc = ksps->slb_enc;
 468        for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 469            if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 470                                     ksps->enc[jk].page_shift)) {
 471                continue;
 472            }
 473            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 474            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 475            if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 476                break;
 477            }
 478        }
 479        if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 480            break;
 481        }
 482    }
 483    env->slb_nr = smmu_info.slb_size;
 484    if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 485        env->mmu_model &= ~POWERPC_MMU_1TSEG;
 486    }
 487}
 488#else /* defined (TARGET_PPC64) */
 489
 490static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 491{
 492}
 493
 494#endif /* !defined (TARGET_PPC64) */
 495
 496unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 497{
 498    return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 499}
 500
 501/* e500 supports 2 h/w breakpoint and 2 watchpoint.
 502 * book3s supports only 1 watchpoint, so array size
 503 * of 4 is sufficient for now.
 504 */
 505#define MAX_HW_BKPTS 4
 506
 507static struct HWBreakpoint {
 508    target_ulong addr;
 509    int type;
 510} hw_debug_points[MAX_HW_BKPTS];
 511
 512static CPUWatchpoint hw_watchpoint;
 513
 514/* Default there is no breakpoint and watchpoint supported */
 515static int max_hw_breakpoint;
 516static int max_hw_watchpoint;
 517static int nb_hw_breakpoint;
 518static int nb_hw_watchpoint;
 519
 520static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 521{
 522    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 523        max_hw_breakpoint = 2;
 524        max_hw_watchpoint = 2;
 525    }
 526
 527    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 528        fprintf(stderr, "Error initializing h/w breakpoints\n");
 529        return;
 530    }
 531}
 532
 533int kvm_arch_init_vcpu(CPUState *cs)
 534{
 535    PowerPCCPU *cpu = POWERPC_CPU(cs);
 536    CPUPPCState *cenv = &cpu->env;
 537    int ret;
 538
 539    /* Gather server mmu info from KVM and update the CPU state */
 540    kvm_fixup_page_sizes(cpu);
 541
 542    /* Synchronize sregs with kvm */
 543    ret = kvm_arch_sync_sregs(cpu);
 544    if (ret) {
 545        if (ret == -EINVAL) {
 546            error_report("Register sync failed... If you're using kvm-hv.ko,"
 547                         " only \"-cpu host\" is possible");
 548        }
 549        return ret;
 550    }
 551
 552    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 553
 554    /* Some targets support access to KVM's guest TLB. */
 555    switch (cenv->mmu_model) {
 556    case POWERPC_MMU_BOOKE206:
 557        ret = kvm_booke206_tlb_init(cpu);
 558        break;
 559    default:
 560        break;
 561    }
 562
 563    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 564    kvmppc_hw_debug_points_init(cenv);
 565
 566    return ret;
 567}
 568
 569static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 570{
 571    CPUPPCState *env = &cpu->env;
 572    CPUState *cs = CPU(cpu);
 573    struct kvm_dirty_tlb dirty_tlb;
 574    unsigned char *bitmap;
 575    int ret;
 576
 577    if (!env->kvm_sw_tlb) {
 578        return;
 579    }
 580
 581    bitmap = g_malloc((env->nb_tlb + 7) / 8);
 582    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 583
 584    dirty_tlb.bitmap = (uintptr_t)bitmap;
 585    dirty_tlb.num_dirty = env->nb_tlb;
 586
 587    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 588    if (ret) {
 589        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 590                __func__, strerror(-ret));
 591    }
 592
 593    g_free(bitmap);
 594}
 595
 596static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 597{
 598    PowerPCCPU *cpu = POWERPC_CPU(cs);
 599    CPUPPCState *env = &cpu->env;
 600    union {
 601        uint32_t u32;
 602        uint64_t u64;
 603    } val;
 604    struct kvm_one_reg reg = {
 605        .id = id,
 606        .addr = (uintptr_t) &val,
 607    };
 608    int ret;
 609
 610    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 611    if (ret != 0) {
 612        trace_kvm_failed_spr_get(spr, strerror(errno));
 613    } else {
 614        switch (id & KVM_REG_SIZE_MASK) {
 615        case KVM_REG_SIZE_U32:
 616            env->spr[spr] = val.u32;
 617            break;
 618
 619        case KVM_REG_SIZE_U64:
 620            env->spr[spr] = val.u64;
 621            break;
 622
 623        default:
 624            /* Don't handle this size yet */
 625            abort();
 626        }
 627    }
 628}
 629
 630static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 631{
 632    PowerPCCPU *cpu = POWERPC_CPU(cs);
 633    CPUPPCState *env = &cpu->env;
 634    union {
 635        uint32_t u32;
 636        uint64_t u64;
 637    } val;
 638    struct kvm_one_reg reg = {
 639        .id = id,
 640        .addr = (uintptr_t) &val,
 641    };
 642    int ret;
 643
 644    switch (id & KVM_REG_SIZE_MASK) {
 645    case KVM_REG_SIZE_U32:
 646        val.u32 = env->spr[spr];
 647        break;
 648
 649    case KVM_REG_SIZE_U64:
 650        val.u64 = env->spr[spr];
 651        break;
 652
 653    default:
 654        /* Don't handle this size yet */
 655        abort();
 656    }
 657
 658    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 659    if (ret != 0) {
 660        trace_kvm_failed_spr_set(spr, strerror(errno));
 661    }
 662}
 663
 664static int kvm_put_fp(CPUState *cs)
 665{
 666    PowerPCCPU *cpu = POWERPC_CPU(cs);
 667    CPUPPCState *env = &cpu->env;
 668    struct kvm_one_reg reg;
 669    int i;
 670    int ret;
 671
 672    if (env->insns_flags & PPC_FLOAT) {
 673        uint64_t fpscr = env->fpscr;
 674        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 675
 676        reg.id = KVM_REG_PPC_FPSCR;
 677        reg.addr = (uintptr_t)&fpscr;
 678        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 679        if (ret < 0) {
 680            DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 681            return ret;
 682        }
 683
 684        for (i = 0; i < 32; i++) {
 685            uint64_t vsr[2];
 686
 687#ifdef HOST_WORDS_BIGENDIAN
 688            vsr[0] = float64_val(env->fpr[i]);
 689            vsr[1] = env->vsr[i];
 690#else
 691            vsr[0] = env->vsr[i];
 692            vsr[1] = float64_val(env->fpr[i]);
 693#endif
 694            reg.addr = (uintptr_t) &vsr;
 695            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 696
 697            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 698            if (ret < 0) {
 699                DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 700                        i, strerror(errno));
 701                return ret;
 702            }
 703        }
 704    }
 705
 706    if (env->insns_flags & PPC_ALTIVEC) {
 707        reg.id = KVM_REG_PPC_VSCR;
 708        reg.addr = (uintptr_t)&env->vscr;
 709        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 710        if (ret < 0) {
 711            DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 712            return ret;
 713        }
 714
 715        for (i = 0; i < 32; i++) {
 716            reg.id = KVM_REG_PPC_VR(i);
 717            reg.addr = (uintptr_t)&env->avr[i];
 718            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 719            if (ret < 0) {
 720                DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 721                return ret;
 722            }
 723        }
 724    }
 725
 726    return 0;
 727}
 728
 729static int kvm_get_fp(CPUState *cs)
 730{
 731    PowerPCCPU *cpu = POWERPC_CPU(cs);
 732    CPUPPCState *env = &cpu->env;
 733    struct kvm_one_reg reg;
 734    int i;
 735    int ret;
 736
 737    if (env->insns_flags & PPC_FLOAT) {
 738        uint64_t fpscr;
 739        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 740
 741        reg.id = KVM_REG_PPC_FPSCR;
 742        reg.addr = (uintptr_t)&fpscr;
 743        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 744        if (ret < 0) {
 745            DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 746            return ret;
 747        } else {
 748            env->fpscr = fpscr;
 749        }
 750
 751        for (i = 0; i < 32; i++) {
 752            uint64_t vsr[2];
 753
 754            reg.addr = (uintptr_t) &vsr;
 755            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 756
 757            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 758            if (ret < 0) {
 759                DPRINTF("Unable to get %s%d from KVM: %s\n",
 760                        vsx ? "VSR" : "FPR", i, strerror(errno));
 761                return ret;
 762            } else {
 763#ifdef HOST_WORDS_BIGENDIAN
 764                env->fpr[i] = vsr[0];
 765                if (vsx) {
 766                    env->vsr[i] = vsr[1];
 767                }
 768#else
 769                env->fpr[i] = vsr[1];
 770                if (vsx) {
 771                    env->vsr[i] = vsr[0];
 772                }
 773#endif
 774            }
 775        }
 776    }
 777
 778    if (env->insns_flags & PPC_ALTIVEC) {
 779        reg.id = KVM_REG_PPC_VSCR;
 780        reg.addr = (uintptr_t)&env->vscr;
 781        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 782        if (ret < 0) {
 783            DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 784            return ret;
 785        }
 786
 787        for (i = 0; i < 32; i++) {
 788            reg.id = KVM_REG_PPC_VR(i);
 789            reg.addr = (uintptr_t)&env->avr[i];
 790            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 791            if (ret < 0) {
 792                DPRINTF("Unable to get VR%d from KVM: %s\n",
 793                        i, strerror(errno));
 794                return ret;
 795            }
 796        }
 797    }
 798
 799    return 0;
 800}
 801
 802#if defined(TARGET_PPC64)
 803static int kvm_get_vpa(CPUState *cs)
 804{
 805    PowerPCCPU *cpu = POWERPC_CPU(cs);
 806    CPUPPCState *env = &cpu->env;
 807    struct kvm_one_reg reg;
 808    int ret;
 809
 810    reg.id = KVM_REG_PPC_VPA_ADDR;
 811    reg.addr = (uintptr_t)&env->vpa_addr;
 812    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 813    if (ret < 0) {
 814        DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 815        return ret;
 816    }
 817
 818    assert((uintptr_t)&env->slb_shadow_size
 819           == ((uintptr_t)&env->slb_shadow_addr + 8));
 820    reg.id = KVM_REG_PPC_VPA_SLB;
 821    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 822    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 823    if (ret < 0) {
 824        DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 825                strerror(errno));
 826        return ret;
 827    }
 828
 829    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 830    reg.id = KVM_REG_PPC_VPA_DTL;
 831    reg.addr = (uintptr_t)&env->dtl_addr;
 832    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 833    if (ret < 0) {
 834        DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 835                strerror(errno));
 836        return ret;
 837    }
 838
 839    return 0;
 840}
 841
 842static int kvm_put_vpa(CPUState *cs)
 843{
 844    PowerPCCPU *cpu = POWERPC_CPU(cs);
 845    CPUPPCState *env = &cpu->env;
 846    struct kvm_one_reg reg;
 847    int ret;
 848
 849    /* SLB shadow or DTL can't be registered unless a master VPA is
 850     * registered.  That means when restoring state, if a VPA *is*
 851     * registered, we need to set that up first.  If not, we need to
 852     * deregister the others before deregistering the master VPA */
 853    assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 854
 855    if (env->vpa_addr) {
 856        reg.id = KVM_REG_PPC_VPA_ADDR;
 857        reg.addr = (uintptr_t)&env->vpa_addr;
 858        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 859        if (ret < 0) {
 860            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 861            return ret;
 862        }
 863    }
 864
 865    assert((uintptr_t)&env->slb_shadow_size
 866           == ((uintptr_t)&env->slb_shadow_addr + 8));
 867    reg.id = KVM_REG_PPC_VPA_SLB;
 868    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 869    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 870    if (ret < 0) {
 871        DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 872        return ret;
 873    }
 874
 875    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 876    reg.id = KVM_REG_PPC_VPA_DTL;
 877    reg.addr = (uintptr_t)&env->dtl_addr;
 878    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 879    if (ret < 0) {
 880        DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 881                strerror(errno));
 882        return ret;
 883    }
 884
 885    if (!env->vpa_addr) {
 886        reg.id = KVM_REG_PPC_VPA_ADDR;
 887        reg.addr = (uintptr_t)&env->vpa_addr;
 888        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 889        if (ret < 0) {
 890            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 891            return ret;
 892        }
 893    }
 894
 895    return 0;
 896}
 897#endif /* TARGET_PPC64 */
 898
 899int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 900{
 901    CPUPPCState *env = &cpu->env;
 902    struct kvm_sregs sregs;
 903    int i;
 904
 905    sregs.pvr = env->spr[SPR_PVR];
 906
 907    sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 908
 909    /* Sync SLB */
 910#ifdef TARGET_PPC64
 911    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 912        sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 913        if (env->slb[i].esid & SLB_ESID_V) {
 914            sregs.u.s.ppc64.slb[i].slbe |= i;
 915        }
 916        sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 917    }
 918#endif
 919
 920    /* Sync SRs */
 921    for (i = 0; i < 16; i++) {
 922        sregs.u.s.ppc32.sr[i] = env->sr[i];
 923    }
 924
 925    /* Sync BATs */
 926    for (i = 0; i < 8; i++) {
 927        /* Beware. We have to swap upper and lower bits here */
 928        sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 929            | env->DBAT[1][i];
 930        sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 931            | env->IBAT[1][i];
 932    }
 933
 934    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 935}
 936
 937int kvm_arch_put_registers(CPUState *cs, int level)
 938{
 939    PowerPCCPU *cpu = POWERPC_CPU(cs);
 940    CPUPPCState *env = &cpu->env;
 941    struct kvm_regs regs;
 942    int ret;
 943    int i;
 944
 945    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 946    if (ret < 0) {
 947        return ret;
 948    }
 949
 950    regs.ctr = env->ctr;
 951    regs.lr  = env->lr;
 952    regs.xer = cpu_read_xer(env);
 953    regs.msr = env->msr;
 954    regs.pc = env->nip;
 955
 956    regs.srr0 = env->spr[SPR_SRR0];
 957    regs.srr1 = env->spr[SPR_SRR1];
 958
 959    regs.sprg0 = env->spr[SPR_SPRG0];
 960    regs.sprg1 = env->spr[SPR_SPRG1];
 961    regs.sprg2 = env->spr[SPR_SPRG2];
 962    regs.sprg3 = env->spr[SPR_SPRG3];
 963    regs.sprg4 = env->spr[SPR_SPRG4];
 964    regs.sprg5 = env->spr[SPR_SPRG5];
 965    regs.sprg6 = env->spr[SPR_SPRG6];
 966    regs.sprg7 = env->spr[SPR_SPRG7];
 967
 968    regs.pid = env->spr[SPR_BOOKE_PID];
 969
 970    for (i = 0;i < 32; i++)
 971        regs.gpr[i] = env->gpr[i];
 972
 973    regs.cr = 0;
 974    for (i = 0; i < 8; i++) {
 975        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 976    }
 977
 978    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 979    if (ret < 0)
 980        return ret;
 981
 982    kvm_put_fp(cs);
 983
 984    if (env->tlb_dirty) {
 985        kvm_sw_tlb_put(cpu);
 986        env->tlb_dirty = false;
 987    }
 988
 989    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 990        ret = kvmppc_put_books_sregs(cpu);
 991        if (ret < 0) {
 992            return ret;
 993        }
 994    }
 995
 996    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 997        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 998    }
 999
1000    if (cap_one_reg) {
1001        int i;
1002
1003        /* We deliberately ignore errors here, for kernels which have
1004         * the ONE_REG calls, but don't support the specific
1005         * registers, there's a reasonable chance things will still
1006         * work, at least until we try to migrate. */
1007        for (i = 0; i < 1024; i++) {
1008            uint64_t id = env->spr_cb[i].one_reg_id;
1009
1010            if (id != 0) {
1011                kvm_put_one_spr(cs, id, i);
1012            }
1013        }
1014
1015#ifdef TARGET_PPC64
1016        if (msr_ts) {
1017            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1018                kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1019            }
1020            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1021                kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1022            }
1023            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1024            kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1025            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1026            kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1027            kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1028            kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1029            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1030            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1031            kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1032            kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1033        }
1034
1035        if (cap_papr) {
1036            if (kvm_put_vpa(cs) < 0) {
1037                DPRINTF("Warning: Unable to set VPA information to KVM\n");
1038            }
1039        }
1040
1041        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1042#endif /* TARGET_PPC64 */
1043    }
1044
1045    return ret;
1046}
1047
1048static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1049{
1050     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1051}
1052
1053static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1054{
1055    CPUPPCState *env = &cpu->env;
1056    struct kvm_sregs sregs;
1057    int ret;
1058
1059    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1060    if (ret < 0) {
1061        return ret;
1062    }
1063
1064    if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1065        env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1066        env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1067        env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1068        env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1069        env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1070        env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1071        env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1072        env->spr[SPR_DECR] = sregs.u.e.dec;
1073        env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1074        env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1075        env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1076    }
1077
1078    if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1079        env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1080        env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1081        env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1082        env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1083        env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1084    }
1085
1086    if (sregs.u.e.features & KVM_SREGS_E_64) {
1087        env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1088    }
1089
1090    if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1091        env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1092    }
1093
1094    if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1095        env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1096        kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1097        env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1098        kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1099        env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1100        kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1101        env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1102        kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1103        env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1104        kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1105        env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1106        kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1107        env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1108        kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1109        env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1110        kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1111        env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1112        kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1113        env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1114        kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1115        env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1116        kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1117        env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1118        kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1119        env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1120        kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1121        env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1122        kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1123        env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1124        kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1125        env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1126        kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1127
1128        if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1129            env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1130            kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1131            env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1132            kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1133            env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1134            kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1135        }
1136
1137        if (sregs.u.e.features & KVM_SREGS_E_PM) {
1138            env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1139            kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1140        }
1141
1142        if (sregs.u.e.features & KVM_SREGS_E_PC) {
1143            env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1144            kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1145            env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1146            kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1147        }
1148    }
1149
1150    if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1151        env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1152        env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1153        env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1154        env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1155        env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1156        env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1157        env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1158        env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1159        env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1160        env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1161    }
1162
1163    if (sregs.u.e.features & KVM_SREGS_EXP) {
1164        env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1165    }
1166
1167    if (sregs.u.e.features & KVM_SREGS_E_PD) {
1168        env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1169        env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1170    }
1171
1172    if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1173        env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1174        env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1175        env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1176
1177        if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1178            env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1179            env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1180        }
1181    }
1182
1183    return 0;
1184}
1185
1186static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1187{
1188    CPUPPCState *env = &cpu->env;
1189    struct kvm_sregs sregs;
1190    int ret;
1191    int i;
1192
1193    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1194    if (ret < 0) {
1195        return ret;
1196    }
1197
1198    if (!env->external_htab) {
1199        ppc_store_sdr1(env, sregs.u.s.sdr1);
1200    }
1201
1202    /* Sync SLB */
1203#ifdef TARGET_PPC64
1204    /*
1205     * The packed SLB array we get from KVM_GET_SREGS only contains
1206     * information about valid entries. So we flush our internal copy
1207     * to get rid of stale ones, then put all valid SLB entries back
1208     * in.
1209     */
1210    memset(env->slb, 0, sizeof(env->slb));
1211    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1212        target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1213        target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1214        /*
1215         * Only restore valid entries
1216         */
1217        if (rb & SLB_ESID_V) {
1218            ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1219        }
1220    }
1221#endif
1222
1223    /* Sync SRs */
1224    for (i = 0; i < 16; i++) {
1225        env->sr[i] = sregs.u.s.ppc32.sr[i];
1226    }
1227
1228    /* Sync BATs */
1229    for (i = 0; i < 8; i++) {
1230        env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1231        env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1232        env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1233        env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1234    }
1235
1236    return 0;
1237}
1238
1239int kvm_arch_get_registers(CPUState *cs)
1240{
1241    PowerPCCPU *cpu = POWERPC_CPU(cs);
1242    CPUPPCState *env = &cpu->env;
1243    struct kvm_regs regs;
1244    uint32_t cr;
1245    int i, ret;
1246
1247    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1248    if (ret < 0)
1249        return ret;
1250
1251    cr = regs.cr;
1252    for (i = 7; i >= 0; i--) {
1253        env->crf[i] = cr & 15;
1254        cr >>= 4;
1255    }
1256
1257    env->ctr = regs.ctr;
1258    env->lr = regs.lr;
1259    cpu_write_xer(env, regs.xer);
1260    env->msr = regs.msr;
1261    env->nip = regs.pc;
1262
1263    env->spr[SPR_SRR0] = regs.srr0;
1264    env->spr[SPR_SRR1] = regs.srr1;
1265
1266    env->spr[SPR_SPRG0] = regs.sprg0;
1267    env->spr[SPR_SPRG1] = regs.sprg1;
1268    env->spr[SPR_SPRG2] = regs.sprg2;
1269    env->spr[SPR_SPRG3] = regs.sprg3;
1270    env->spr[SPR_SPRG4] = regs.sprg4;
1271    env->spr[SPR_SPRG5] = regs.sprg5;
1272    env->spr[SPR_SPRG6] = regs.sprg6;
1273    env->spr[SPR_SPRG7] = regs.sprg7;
1274
1275    env->spr[SPR_BOOKE_PID] = regs.pid;
1276
1277    for (i = 0;i < 32; i++)
1278        env->gpr[i] = regs.gpr[i];
1279
1280    kvm_get_fp(cs);
1281
1282    if (cap_booke_sregs) {
1283        ret = kvmppc_get_booke_sregs(cpu);
1284        if (ret < 0) {
1285            return ret;
1286        }
1287    }
1288
1289    if (cap_segstate) {
1290        ret = kvmppc_get_books_sregs(cpu);
1291        if (ret < 0) {
1292            return ret;
1293        }
1294    }
1295
1296    if (cap_hior) {
1297        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1298    }
1299
1300    if (cap_one_reg) {
1301        int i;
1302
1303        /* We deliberately ignore errors here, for kernels which have
1304         * the ONE_REG calls, but don't support the specific
1305         * registers, there's a reasonable chance things will still
1306         * work, at least until we try to migrate. */
1307        for (i = 0; i < 1024; i++) {
1308            uint64_t id = env->spr_cb[i].one_reg_id;
1309
1310            if (id != 0) {
1311                kvm_get_one_spr(cs, id, i);
1312            }
1313        }
1314
1315#ifdef TARGET_PPC64
1316        if (msr_ts) {
1317            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1318                kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1319            }
1320            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1321                kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1322            }
1323            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1324            kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1325            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1326            kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1327            kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1328            kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1329            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1330            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1331            kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1332            kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1333        }
1334
1335        if (cap_papr) {
1336            if (kvm_get_vpa(cs) < 0) {
1337                DPRINTF("Warning: Unable to get VPA information from KVM\n");
1338            }
1339        }
1340
1341        kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1342#endif
1343    }
1344
1345    return 0;
1346}
1347
1348int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1349{
1350    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1351
1352    if (irq != PPC_INTERRUPT_EXT) {
1353        return 0;
1354    }
1355
1356    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1357        return 0;
1358    }
1359
1360    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1361
1362    return 0;
1363}
1364
1365#if defined(TARGET_PPCEMB)
1366#define PPC_INPUT_INT PPC40x_INPUT_INT
1367#elif defined(TARGET_PPC64)
1368#define PPC_INPUT_INT PPC970_INPUT_INT
1369#else
1370#define PPC_INPUT_INT PPC6xx_INPUT_INT
1371#endif
1372
1373void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1374{
1375    PowerPCCPU *cpu = POWERPC_CPU(cs);
1376    CPUPPCState *env = &cpu->env;
1377    int r;
1378    unsigned irq;
1379
1380    qemu_mutex_lock_iothread();
1381
1382    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1383     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1384    if (!cap_interrupt_level &&
1385        run->ready_for_interrupt_injection &&
1386        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1387        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1388    {
1389        /* For now KVM disregards the 'irq' argument. However, in the
1390         * future KVM could cache it in-kernel to avoid a heavyweight exit
1391         * when reading the UIC.
1392         */
1393        irq = KVM_INTERRUPT_SET;
1394
1395        DPRINTF("injected interrupt %d\n", irq);
1396        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1397        if (r < 0) {
1398            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1399        }
1400
1401        /* Always wake up soon in case the interrupt was level based */
1402        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1403                       (NANOSECONDS_PER_SECOND / 50));
1404    }
1405
1406    /* We don't know if there are more interrupts pending after this. However,
1407     * the guest will return to userspace in the course of handling this one
1408     * anyways, so we will get a chance to deliver the rest. */
1409
1410    qemu_mutex_unlock_iothread();
1411}
1412
1413MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1414{
1415    return MEMTXATTRS_UNSPECIFIED;
1416}
1417
1418int kvm_arch_process_async_events(CPUState *cs)
1419{
1420    return cs->halted;
1421}
1422
1423static int kvmppc_handle_halt(PowerPCCPU *cpu)
1424{
1425    CPUState *cs = CPU(cpu);
1426    CPUPPCState *env = &cpu->env;
1427
1428    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1429        cs->halted = 1;
1430        cs->exception_index = EXCP_HLT;
1431    }
1432
1433    return 0;
1434}
1435
1436/* map dcr access to existing qemu dcr emulation */
1437static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1438{
1439    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1440        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1441
1442    return 0;
1443}
1444
1445static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1446{
1447    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1448        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1449
1450    return 0;
1451}
1452
1453int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1454{
1455    /* Mixed endian case is not handled */
1456    uint32_t sc = debug_inst_opcode;
1457
1458    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1459                            sizeof(sc), 0) ||
1460        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1461        return -EINVAL;
1462    }
1463
1464    return 0;
1465}
1466
1467int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1468{
1469    uint32_t sc;
1470
1471    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1472        sc != debug_inst_opcode ||
1473        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1474                            sizeof(sc), 1)) {
1475        return -EINVAL;
1476    }
1477
1478    return 0;
1479}
1480
1481static int find_hw_breakpoint(target_ulong addr, int type)
1482{
1483    int n;
1484
1485    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1486           <= ARRAY_SIZE(hw_debug_points));
1487
1488    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1489        if (hw_debug_points[n].addr == addr &&
1490             hw_debug_points[n].type == type) {
1491            return n;
1492        }
1493    }
1494
1495    return -1;
1496}
1497
1498static int find_hw_watchpoint(target_ulong addr, int *flag)
1499{
1500    int n;
1501
1502    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1503    if (n >= 0) {
1504        *flag = BP_MEM_ACCESS;
1505        return n;
1506    }
1507
1508    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1509    if (n >= 0) {
1510        *flag = BP_MEM_WRITE;
1511        return n;
1512    }
1513
1514    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1515    if (n >= 0) {
1516        *flag = BP_MEM_READ;
1517        return n;
1518    }
1519
1520    return -1;
1521}
1522
1523int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1524                                  target_ulong len, int type)
1525{
1526    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1527        return -ENOBUFS;
1528    }
1529
1530    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1531    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1532
1533    switch (type) {
1534    case GDB_BREAKPOINT_HW:
1535        if (nb_hw_breakpoint >= max_hw_breakpoint) {
1536            return -ENOBUFS;
1537        }
1538
1539        if (find_hw_breakpoint(addr, type) >= 0) {
1540            return -EEXIST;
1541        }
1542
1543        nb_hw_breakpoint++;
1544        break;
1545
1546    case GDB_WATCHPOINT_WRITE:
1547    case GDB_WATCHPOINT_READ:
1548    case GDB_WATCHPOINT_ACCESS:
1549        if (nb_hw_watchpoint >= max_hw_watchpoint) {
1550            return -ENOBUFS;
1551        }
1552
1553        if (find_hw_breakpoint(addr, type) >= 0) {
1554            return -EEXIST;
1555        }
1556
1557        nb_hw_watchpoint++;
1558        break;
1559
1560    default:
1561        return -ENOSYS;
1562    }
1563
1564    return 0;
1565}
1566
1567int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1568                                  target_ulong len, int type)
1569{
1570    int n;
1571
1572    n = find_hw_breakpoint(addr, type);
1573    if (n < 0) {
1574        return -ENOENT;
1575    }
1576
1577    switch (type) {
1578    case GDB_BREAKPOINT_HW:
1579        nb_hw_breakpoint--;
1580        break;
1581
1582    case GDB_WATCHPOINT_WRITE:
1583    case GDB_WATCHPOINT_READ:
1584    case GDB_WATCHPOINT_ACCESS:
1585        nb_hw_watchpoint--;
1586        break;
1587
1588    default:
1589        return -ENOSYS;
1590    }
1591    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1592
1593    return 0;
1594}
1595
1596void kvm_arch_remove_all_hw_breakpoints(void)
1597{
1598    nb_hw_breakpoint = nb_hw_watchpoint = 0;
1599}
1600
1601void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1602{
1603    int n;
1604
1605    /* Software Breakpoint updates */
1606    if (kvm_sw_breakpoints_active(cs)) {
1607        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1608    }
1609
1610    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1611           <= ARRAY_SIZE(hw_debug_points));
1612    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1613
1614    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1615        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1616        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1617        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1618            switch (hw_debug_points[n].type) {
1619            case GDB_BREAKPOINT_HW:
1620                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1621                break;
1622            case GDB_WATCHPOINT_WRITE:
1623                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1624                break;
1625            case GDB_WATCHPOINT_READ:
1626                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1627                break;
1628            case GDB_WATCHPOINT_ACCESS:
1629                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1630                                        KVMPPC_DEBUG_WATCH_READ;
1631                break;
1632            default:
1633                cpu_abort(cs, "Unsupported breakpoint type\n");
1634            }
1635            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1636        }
1637    }
1638}
1639
1640static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1641{
1642    CPUState *cs = CPU(cpu);
1643    CPUPPCState *env = &cpu->env;
1644    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1645    int handle = 0;
1646    int n;
1647    int flag = 0;
1648
1649    if (cs->singlestep_enabled) {
1650        handle = 1;
1651    } else if (arch_info->status) {
1652        if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1653            if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1654                n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1655                if (n >= 0) {
1656                    handle = 1;
1657                }
1658            } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1659                                            KVMPPC_DEBUG_WATCH_WRITE)) {
1660                n = find_hw_watchpoint(arch_info->address,  &flag);
1661                if (n >= 0) {
1662                    handle = 1;
1663                    cs->watchpoint_hit = &hw_watchpoint;
1664                    hw_watchpoint.vaddr = hw_debug_points[n].addr;
1665                    hw_watchpoint.flags = flag;
1666                }
1667            }
1668        }
1669    } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1670        handle = 1;
1671    } else {
1672        /* QEMU is not able to handle debug exception, so inject
1673         * program exception to guest;
1674         * Yes program exception NOT debug exception !!
1675         * When QEMU is using debug resources then debug exception must
1676         * be always set. To achieve this we set MSR_DE and also set
1677         * MSRP_DEP so guest cannot change MSR_DE.
1678         * When emulating debug resource for guest we want guest
1679         * to control MSR_DE (enable/disable debug interrupt on need).
1680         * Supporting both configurations are NOT possible.
1681         * So the result is that we cannot share debug resources
1682         * between QEMU and Guest on BOOKE architecture.
1683         * In the current design QEMU gets the priority over guest,
1684         * this means that if QEMU is using debug resources then guest
1685         * cannot use them;
1686         * For software breakpoint QEMU uses a privileged instruction;
1687         * So there cannot be any reason that we are here for guest
1688         * set debug exception, only possibility is guest executed a
1689         * privileged / illegal instruction and that's why we are
1690         * injecting a program interrupt.
1691         */
1692
1693        cpu_synchronize_state(cs);
1694        /* env->nip is PC, so increment this by 4 to use
1695         * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1696         */
1697        env->nip += 4;
1698        cs->exception_index = POWERPC_EXCP_PROGRAM;
1699        env->error_code = POWERPC_EXCP_INVAL;
1700        ppc_cpu_do_interrupt(cs);
1701    }
1702
1703    return handle;
1704}
1705
1706int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1707{
1708    PowerPCCPU *cpu = POWERPC_CPU(cs);
1709    CPUPPCState *env = &cpu->env;
1710    int ret;
1711
1712    qemu_mutex_lock_iothread();
1713
1714    switch (run->exit_reason) {
1715    case KVM_EXIT_DCR:
1716        if (run->dcr.is_write) {
1717            DPRINTF("handle dcr write\n");
1718            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1719        } else {
1720            DPRINTF("handle dcr read\n");
1721            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1722        }
1723        break;
1724    case KVM_EXIT_HLT:
1725        DPRINTF("handle halt\n");
1726        ret = kvmppc_handle_halt(cpu);
1727        break;
1728#if defined(TARGET_PPC64)
1729    case KVM_EXIT_PAPR_HCALL:
1730        DPRINTF("handle PAPR hypercall\n");
1731        run->papr_hcall.ret = spapr_hypercall(cpu,
1732                                              run->papr_hcall.nr,
1733                                              run->papr_hcall.args);
1734        ret = 0;
1735        break;
1736#endif
1737    case KVM_EXIT_EPR:
1738        DPRINTF("handle epr\n");
1739        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1740        ret = 0;
1741        break;
1742    case KVM_EXIT_WATCHDOG:
1743        DPRINTF("handle watchdog expiry\n");
1744        watchdog_perform_action();
1745        ret = 0;
1746        break;
1747
1748    case KVM_EXIT_DEBUG:
1749        DPRINTF("handle debug exception\n");
1750        if (kvm_handle_debug(cpu, run)) {
1751            ret = EXCP_DEBUG;
1752            break;
1753        }
1754        /* re-enter, this exception was guest-internal */
1755        ret = 0;
1756        break;
1757
1758    default:
1759        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1760        ret = -1;
1761        break;
1762    }
1763
1764    qemu_mutex_unlock_iothread();
1765    return ret;
1766}
1767
1768int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1769{
1770    CPUState *cs = CPU(cpu);
1771    uint32_t bits = tsr_bits;
1772    struct kvm_one_reg reg = {
1773        .id = KVM_REG_PPC_OR_TSR,
1774        .addr = (uintptr_t) &bits,
1775    };
1776
1777    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1778}
1779
1780int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1781{
1782
1783    CPUState *cs = CPU(cpu);
1784    uint32_t bits = tsr_bits;
1785    struct kvm_one_reg reg = {
1786        .id = KVM_REG_PPC_CLEAR_TSR,
1787        .addr = (uintptr_t) &bits,
1788    };
1789
1790    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1791}
1792
1793int kvmppc_set_tcr(PowerPCCPU *cpu)
1794{
1795    CPUState *cs = CPU(cpu);
1796    CPUPPCState *env = &cpu->env;
1797    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1798
1799    struct kvm_one_reg reg = {
1800        .id = KVM_REG_PPC_TCR,
1801        .addr = (uintptr_t) &tcr,
1802    };
1803
1804    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1805}
1806
1807int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1808{
1809    CPUState *cs = CPU(cpu);
1810    int ret;
1811
1812    if (!kvm_enabled()) {
1813        return -1;
1814    }
1815
1816    if (!cap_ppc_watchdog) {
1817        printf("warning: KVM does not support watchdog");
1818        return -1;
1819    }
1820
1821    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1822    if (ret < 0) {
1823        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1824                __func__, strerror(-ret));
1825        return ret;
1826    }
1827
1828    return ret;
1829}
1830
1831static int read_cpuinfo(const char *field, char *value, int len)
1832{
1833    FILE *f;
1834    int ret = -1;
1835    int field_len = strlen(field);
1836    char line[512];
1837
1838    f = fopen("/proc/cpuinfo", "r");
1839    if (!f) {
1840        return -1;
1841    }
1842
1843    do {
1844        if (!fgets(line, sizeof(line), f)) {
1845            break;
1846        }
1847        if (!strncmp(line, field, field_len)) {
1848            pstrcpy(value, len, line);
1849            ret = 0;
1850            break;
1851        }
1852    } while(*line);
1853
1854    fclose(f);
1855
1856    return ret;
1857}
1858
1859uint32_t kvmppc_get_tbfreq(void)
1860{
1861    char line[512];
1862    char *ns;
1863    uint32_t retval = NANOSECONDS_PER_SECOND;
1864
1865    if (read_cpuinfo("timebase", line, sizeof(line))) {
1866        return retval;
1867    }
1868
1869    if (!(ns = strchr(line, ':'))) {
1870        return retval;
1871    }
1872
1873    ns++;
1874
1875    return atoi(ns);
1876}
1877
1878bool kvmppc_get_host_serial(char **value)
1879{
1880    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1881                               NULL);
1882}
1883
1884bool kvmppc_get_host_model(char **value)
1885{
1886    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1887}
1888
1889/* Try to find a device tree node for a CPU with clock-frequency property */
1890static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1891{
1892    struct dirent *dirp;
1893    DIR *dp;
1894
1895    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1896        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1897        return -1;
1898    }
1899
1900    buf[0] = '\0';
1901    while ((dirp = readdir(dp)) != NULL) {
1902        FILE *f;
1903        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1904                 dirp->d_name);
1905        f = fopen(buf, "r");
1906        if (f) {
1907            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1908            fclose(f);
1909            break;
1910        }
1911        buf[0] = '\0';
1912    }
1913    closedir(dp);
1914    if (buf[0] == '\0') {
1915        printf("Unknown host!\n");
1916        return -1;
1917    }
1918
1919    return 0;
1920}
1921
1922static uint64_t kvmppc_read_int_dt(const char *filename)
1923{
1924    union {
1925        uint32_t v32;
1926        uint64_t v64;
1927    } u;
1928    FILE *f;
1929    int len;
1930
1931    f = fopen(filename, "rb");
1932    if (!f) {
1933        return -1;
1934    }
1935
1936    len = fread(&u, 1, sizeof(u), f);
1937    fclose(f);
1938    switch (len) {
1939    case 4:
1940        /* property is a 32-bit quantity */
1941        return be32_to_cpu(u.v32);
1942    case 8:
1943        return be64_to_cpu(u.v64);
1944    }
1945
1946    return 0;
1947}
1948
1949/* Read a CPU node property from the host device tree that's a single
1950 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1951 * (can't find or open the property, or doesn't understand the
1952 * format) */
1953static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1954{
1955    char buf[PATH_MAX], *tmp;
1956    uint64_t val;
1957
1958    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1959        return -1;
1960    }
1961
1962    tmp = g_strdup_printf("%s/%s", buf, propname);
1963    val = kvmppc_read_int_dt(tmp);
1964    g_free(tmp);
1965
1966    return val;
1967}
1968
1969uint64_t kvmppc_get_clockfreq(void)
1970{
1971    return kvmppc_read_int_cpu_dt("clock-frequency");
1972}
1973
1974uint32_t kvmppc_get_vmx(void)
1975{
1976    return kvmppc_read_int_cpu_dt("ibm,vmx");
1977}
1978
1979uint32_t kvmppc_get_dfp(void)
1980{
1981    return kvmppc_read_int_cpu_dt("ibm,dfp");
1982}
1983
1984static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1985 {
1986     PowerPCCPU *cpu = ppc_env_get_cpu(env);
1987     CPUState *cs = CPU(cpu);
1988
1989    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1990        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1991        return 0;
1992    }
1993
1994    return 1;
1995}
1996
1997int kvmppc_get_hasidle(CPUPPCState *env)
1998{
1999    struct kvm_ppc_pvinfo pvinfo;
2000
2001    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2002        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2003        return 1;
2004    }
2005
2006    return 0;
2007}
2008
2009int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2010{
2011    uint32_t *hc = (uint32_t*)buf;
2012    struct kvm_ppc_pvinfo pvinfo;
2013
2014    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2015        memcpy(buf, pvinfo.hcall, buf_len);
2016        return 0;
2017    }
2018
2019    /*
2020     * Fallback to always fail hypercalls regardless of endianness:
2021     *
2022     *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2023     *     li r3, -1
2024     *     b .+8       (becomes nop in wrong endian)
2025     *     bswap32(li r3, -1)
2026     */
2027
2028    hc[0] = cpu_to_be32(0x08000048);
2029    hc[1] = cpu_to_be32(0x3860ffff);
2030    hc[2] = cpu_to_be32(0x48000008);
2031    hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2032
2033    return 1;
2034}
2035
2036static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2037{
2038    return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2039}
2040
2041void kvmppc_enable_logical_ci_hcalls(void)
2042{
2043    /*
2044     * FIXME: it would be nice if we could detect the cases where
2045     * we're using a device which requires the in kernel
2046     * implementation of these hcalls, but the kernel lacks them and
2047     * produce a warning.
2048     */
2049    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2050    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2051}
2052
2053void kvmppc_enable_set_mode_hcall(void)
2054{
2055    kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2056}
2057
2058void kvmppc_set_papr(PowerPCCPU *cpu)
2059{
2060    CPUState *cs = CPU(cpu);
2061    int ret;
2062
2063    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2064    if (ret) {
2065        error_report("This vCPU type or KVM version does not support PAPR");
2066        exit(1);
2067    }
2068
2069    /* Update the capability flag so we sync the right information
2070     * with kvm */
2071    cap_papr = 1;
2072}
2073
2074int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2075{
2076    return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2077}
2078
2079void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2080{
2081    CPUState *cs = CPU(cpu);
2082    int ret;
2083
2084    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2085    if (ret && mpic_proxy) {
2086        error_report("This KVM version does not support EPR");
2087        exit(1);
2088    }
2089}
2090
2091int kvmppc_smt_threads(void)
2092{
2093    return cap_ppc_smt ? cap_ppc_smt : 1;
2094}
2095
2096#ifdef TARGET_PPC64
2097off_t kvmppc_alloc_rma(void **rma)
2098{
2099    off_t size;
2100    int fd;
2101    struct kvm_allocate_rma ret;
2102
2103    /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2104     * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2105     *                      not necessary on this hardware
2106     * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2107     *
2108     * FIXME: We should allow the user to force contiguous RMA
2109     * allocation in the cap_ppc_rma==1 case.
2110     */
2111    if (cap_ppc_rma < 2) {
2112        return 0;
2113    }
2114
2115    fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2116    if (fd < 0) {
2117        fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2118                strerror(errno));
2119        return -1;
2120    }
2121
2122    size = MIN(ret.rma_size, 256ul << 20);
2123
2124    *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2125    if (*rma == MAP_FAILED) {
2126        fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2127        return -1;
2128    };
2129
2130    return size;
2131}
2132
2133uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2134{
2135    struct kvm_ppc_smmu_info info;
2136    long rampagesize, best_page_shift;
2137    int i;
2138
2139    if (cap_ppc_rma >= 2) {
2140        return current_size;
2141    }
2142
2143    /* Find the largest hardware supported page size that's less than
2144     * or equal to the (logical) backing page size of guest RAM */
2145    kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2146    rampagesize = getrampagesize();
2147    best_page_shift = 0;
2148
2149    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2150        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2151
2152        if (!sps->page_shift) {
2153            continue;
2154        }
2155
2156        if ((sps->page_shift > best_page_shift)
2157            && ((1UL << sps->page_shift) <= rampagesize)) {
2158            best_page_shift = sps->page_shift;
2159        }
2160    }
2161
2162    return MIN(current_size,
2163               1ULL << (best_page_shift + hash_shift - 7));
2164}
2165#endif
2166
2167bool kvmppc_spapr_use_multitce(void)
2168{
2169    return cap_spapr_multitce;
2170}
2171
2172void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2173                              bool need_vfio)
2174{
2175    struct kvm_create_spapr_tce args = {
2176        .liobn = liobn,
2177        .window_size = window_size,
2178    };
2179    long len;
2180    int fd;
2181    void *table;
2182
2183    /* Must set fd to -1 so we don't try to munmap when called for
2184     * destroying the table, which the upper layers -will- do
2185     */
2186    *pfd = -1;
2187    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2188        return NULL;
2189    }
2190
2191    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2192    if (fd < 0) {
2193        fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2194                liobn);
2195        return NULL;
2196    }
2197
2198    len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2199    /* FIXME: round this up to page size */
2200
2201    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2202    if (table == MAP_FAILED) {
2203        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2204                liobn);
2205        close(fd);
2206        return NULL;
2207    }
2208
2209    *pfd = fd;
2210    return table;
2211}
2212
2213int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2214{
2215    long len;
2216
2217    if (fd < 0) {
2218        return -1;
2219    }
2220
2221    len = nb_table * sizeof(uint64_t);
2222    if ((munmap(table, len) < 0) ||
2223        (close(fd) < 0)) {
2224        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2225                strerror(errno));
2226        /* Leak the table */
2227    }
2228
2229    return 0;
2230}
2231
2232int kvmppc_reset_htab(int shift_hint)
2233{
2234    uint32_t shift = shift_hint;
2235
2236    if (!kvm_enabled()) {
2237        /* Full emulation, tell caller to allocate htab itself */
2238        return 0;
2239    }
2240    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2241        int ret;
2242        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2243        if (ret == -ENOTTY) {
2244            /* At least some versions of PR KVM advertise the
2245             * capability, but don't implement the ioctl().  Oops.
2246             * Return 0 so that we allocate the htab in qemu, as is
2247             * correct for PR. */
2248            return 0;
2249        } else if (ret < 0) {
2250            return ret;
2251        }
2252        return shift;
2253    }
2254
2255    /* We have a kernel that predates the htab reset calls.  For PR
2256     * KVM, we need to allocate the htab ourselves, for an HV KVM of
2257     * this era, it has allocated a 16MB fixed size hash table
2258     * already.  Kernels of this era have the GET_PVINFO capability
2259     * only on PR, so we use this hack to determine the right
2260     * answer */
2261    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2262        /* PR - tell caller to allocate htab */
2263        return 0;
2264    } else {
2265        /* HV - assume 16MB kernel allocated htab */
2266        return 24;
2267    }
2268}
2269
2270static inline uint32_t mfpvr(void)
2271{
2272    uint32_t pvr;
2273
2274    asm ("mfpvr %0"
2275         : "=r"(pvr));
2276    return pvr;
2277}
2278
2279static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2280{
2281    if (on) {
2282        *word |= flags;
2283    } else {
2284        *word &= ~flags;
2285    }
2286}
2287
2288static void kvmppc_host_cpu_initfn(Object *obj)
2289{
2290    assert(kvm_enabled());
2291}
2292
2293static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2294{
2295    DeviceClass *dc = DEVICE_CLASS(oc);
2296    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2297    uint32_t vmx = kvmppc_get_vmx();
2298    uint32_t dfp = kvmppc_get_dfp();
2299    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2300    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2301
2302    /* Now fix up the class with information we can query from the host */
2303    pcc->pvr = mfpvr();
2304
2305    if (vmx != -1) {
2306        /* Only override when we know what the host supports */
2307        alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2308        alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2309    }
2310    if (dfp != -1) {
2311        /* Only override when we know what the host supports */
2312        alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2313    }
2314
2315    if (dcache_size != -1) {
2316        pcc->l1_dcache_size = dcache_size;
2317    }
2318
2319    if (icache_size != -1) {
2320        pcc->l1_icache_size = icache_size;
2321    }
2322
2323    /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2324    dc->cannot_destroy_with_object_finalize_yet = true;
2325}
2326
2327bool kvmppc_has_cap_epr(void)
2328{
2329    return cap_epr;
2330}
2331
2332bool kvmppc_has_cap_htab_fd(void)
2333{
2334    return cap_htab_fd;
2335}
2336
2337bool kvmppc_has_cap_fixup_hcalls(void)
2338{
2339    return cap_fixup_hcalls;
2340}
2341
2342static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2343{
2344    ObjectClass *oc = OBJECT_CLASS(pcc);
2345
2346    while (oc && !object_class_is_abstract(oc)) {
2347        oc = object_class_get_parent(oc);
2348    }
2349    assert(oc);
2350
2351    return POWERPC_CPU_CLASS(oc);
2352}
2353
2354PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2355{
2356    uint32_t host_pvr = mfpvr();
2357    PowerPCCPUClass *pvr_pcc;
2358
2359    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2360    if (pvr_pcc == NULL) {
2361        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2362    }
2363
2364    return pvr_pcc;
2365}
2366
2367#if defined(TARGET_PPC64)
2368static void spapr_cpu_core_host_initfn(Object *obj)
2369{
2370    sPAPRCPUCore *core = SPAPR_CPU_CORE(obj);
2371    char *name = g_strdup_printf("%s-" TYPE_POWERPC_CPU, "host");
2372    ObjectClass *oc = object_class_by_name(name);
2373
2374    g_assert(oc);
2375    g_free((void *)name);
2376    core->cpu_class = oc;
2377}
2378#endif
2379
2380static int kvm_ppc_register_host_cpu_type(void)
2381{
2382    TypeInfo type_info = {
2383        .name = TYPE_HOST_POWERPC_CPU,
2384        .instance_init = kvmppc_host_cpu_initfn,
2385        .class_init = kvmppc_host_cpu_class_init,
2386    };
2387    PowerPCCPUClass *pvr_pcc;
2388    DeviceClass *dc;
2389
2390    pvr_pcc = kvm_ppc_get_host_cpu_class();
2391    if (pvr_pcc == NULL) {
2392        return -1;
2393    }
2394    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2395    type_register(&type_info);
2396
2397    /* Register generic family CPU class for a family */
2398    pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2399    dc = DEVICE_CLASS(pvr_pcc);
2400    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2401    type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2402    type_register(&type_info);
2403
2404#if defined(TARGET_PPC64)
2405    type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2406    type_info.parent = TYPE_SPAPR_CPU_CORE,
2407    type_info.instance_size = sizeof(sPAPRCPUCore),
2408    type_info.instance_init = spapr_cpu_core_host_initfn,
2409    type_info.class_init = NULL;
2410    type_register(&type_info);
2411    g_free((void *)type_info.name);
2412
2413    /* Register generic spapr CPU family class for current host CPU type */
2414    type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2415    type_register(&type_info);
2416    g_free((void *)type_info.name);
2417#endif
2418
2419    return 0;
2420}
2421
2422int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2423{
2424    struct kvm_rtas_token_args args = {
2425        .token = token,
2426    };
2427
2428    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2429        return -ENOENT;
2430    }
2431
2432    strncpy(args.name, function, sizeof(args.name));
2433
2434    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2435}
2436
2437int kvmppc_get_htab_fd(bool write)
2438{
2439    struct kvm_get_htab_fd s = {
2440        .flags = write ? KVM_GET_HTAB_WRITE : 0,
2441        .start_index = 0,
2442    };
2443
2444    if (!cap_htab_fd) {
2445        fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2446        return -1;
2447    }
2448
2449    return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2450}
2451
2452int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2453{
2454    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2455    uint8_t buf[bufsize];
2456    ssize_t rc;
2457
2458    do {
2459        rc = read(fd, buf, bufsize);
2460        if (rc < 0) {
2461            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2462                    strerror(errno));
2463            return rc;
2464        } else if (rc) {
2465            uint8_t *buffer = buf;
2466            ssize_t n = rc;
2467            while (n) {
2468                struct kvm_get_htab_header *head =
2469                    (struct kvm_get_htab_header *) buffer;
2470                size_t chunksize = sizeof(*head) +
2471                     HASH_PTE_SIZE_64 * head->n_valid;
2472
2473                qemu_put_be32(f, head->index);
2474                qemu_put_be16(f, head->n_valid);
2475                qemu_put_be16(f, head->n_invalid);
2476                qemu_put_buffer(f, (void *)(head + 1),
2477                                HASH_PTE_SIZE_64 * head->n_valid);
2478
2479                buffer += chunksize;
2480                n -= chunksize;
2481            }
2482        }
2483    } while ((rc != 0)
2484             && ((max_ns < 0)
2485                 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2486
2487    return (rc == 0) ? 1 : 0;
2488}
2489
2490int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2491                           uint16_t n_valid, uint16_t n_invalid)
2492{
2493    struct kvm_get_htab_header *buf;
2494    size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2495    ssize_t rc;
2496
2497    buf = alloca(chunksize);
2498    buf->index = index;
2499    buf->n_valid = n_valid;
2500    buf->n_invalid = n_invalid;
2501
2502    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2503
2504    rc = write(fd, buf, chunksize);
2505    if (rc < 0) {
2506        fprintf(stderr, "Error writing KVM hash table: %s\n",
2507                strerror(errno));
2508        return rc;
2509    }
2510    if (rc != chunksize) {
2511        /* We should never get a short write on a single chunk */
2512        fprintf(stderr, "Short write, restoring KVM hash table\n");
2513        return -1;
2514    }
2515    return 0;
2516}
2517
2518bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2519{
2520    return true;
2521}
2522
2523int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2524{
2525    return 1;
2526}
2527
2528int kvm_arch_on_sigbus(int code, void *addr)
2529{
2530    return 1;
2531}
2532
2533void kvm_arch_init_irq_routing(KVMState *s)
2534{
2535}
2536
2537struct kvm_get_htab_buf {
2538    struct kvm_get_htab_header header;
2539    /*
2540     * We require one extra byte for read
2541     */
2542    target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2543};
2544
2545uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2546{
2547    int htab_fd;
2548    struct kvm_get_htab_fd ghf;
2549    struct kvm_get_htab_buf  *hpte_buf;
2550
2551    ghf.flags = 0;
2552    ghf.start_index = pte_index;
2553    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2554    if (htab_fd < 0) {
2555        goto error_out;
2556    }
2557
2558    hpte_buf = g_malloc0(sizeof(*hpte_buf));
2559    /*
2560     * Read the hpte group
2561     */
2562    if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2563        goto out_close;
2564    }
2565
2566    close(htab_fd);
2567    return (uint64_t)(uintptr_t) hpte_buf->hpte;
2568
2569out_close:
2570    g_free(hpte_buf);
2571    close(htab_fd);
2572error_out:
2573    return 0;
2574}
2575
2576void kvmppc_hash64_free_pteg(uint64_t token)
2577{
2578    struct kvm_get_htab_buf *htab_buf;
2579
2580    htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2581                            hpte);
2582    g_free(htab_buf);
2583    return;
2584}
2585
2586void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2587                             target_ulong pte0, target_ulong pte1)
2588{
2589    int htab_fd;
2590    struct kvm_get_htab_fd ghf;
2591    struct kvm_get_htab_buf hpte_buf;
2592
2593    ghf.flags = 0;
2594    ghf.start_index = 0;     /* Ignored */
2595    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2596    if (htab_fd < 0) {
2597        goto error_out;
2598    }
2599
2600    hpte_buf.header.n_valid = 1;
2601    hpte_buf.header.n_invalid = 0;
2602    hpte_buf.header.index = pte_index;
2603    hpte_buf.hpte[0] = pte0;
2604    hpte_buf.hpte[1] = pte1;
2605    /*
2606     * Write the hpte entry.
2607     * CAUTION: write() has the warn_unused_result attribute. Hence we
2608     * need to check the return value, even though we do nothing.
2609     */
2610    if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2611        goto out_close;
2612    }
2613
2614out_close:
2615    close(htab_fd);
2616    return;
2617
2618error_out:
2619    return;
2620}
2621
2622int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2623                             uint64_t address, uint32_t data, PCIDevice *dev)
2624{
2625    return 0;
2626}
2627
2628int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2629                                int vector, PCIDevice *dev)
2630{
2631    return 0;
2632}
2633
2634int kvm_arch_release_virq_post(int virq)
2635{
2636    return 0;
2637}
2638
2639int kvm_arch_msi_data_to_gsi(uint32_t data)
2640{
2641    return data & 0xffff;
2642}
2643
2644int kvmppc_enable_hwrng(void)
2645{
2646    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2647        return -1;
2648    }
2649
2650    return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2651}
2652