qemu/target/ppc/kvm.c
<<
>>
Prefs
   1/*
   2 * PowerPC implementation of KVM hooks
   3 *
   4 * Copyright IBM Corp. 2007
   5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6 *
   7 * Authors:
   8 *  Jerone Young <jyoung5@us.ibm.com>
   9 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10 *  Hollis Blanchard <hollisb@us.ibm.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13 * See the COPYING file in the top-level directory.
  14 *
  15 */
  16
  17#include "qemu/osdep.h"
  18#include <dirent.h>
  19#include <sys/ioctl.h>
  20#include <sys/vfs.h>
  21
  22#include <linux/kvm.h>
  23
  24#include "qemu-common.h"
  25#include "qapi/error.h"
  26#include "qemu/error-report.h"
  27#include "cpu.h"
  28#include "cpu-models.h"
  29#include "qemu/timer.h"
  30#include "sysemu/sysemu.h"
  31#include "sysemu/hw_accel.h"
  32#include "kvm_ppc.h"
  33#include "sysemu/cpus.h"
  34#include "sysemu/device_tree.h"
  35#include "mmu-hash64.h"
  36
  37#include "hw/sysbus.h"
  38#include "hw/ppc/spapr.h"
  39#include "hw/ppc/spapr_cpu_core.h"
  40#include "hw/ppc/ppc.h"
  41#include "sysemu/watchdog.h"
  42#include "trace.h"
  43#include "exec/gdbstub.h"
  44#include "exec/memattrs.h"
  45#include "exec/ram_addr.h"
  46#include "sysemu/hostmem.h"
  47#include "qemu/cutils.h"
  48#include "qemu/mmap-alloc.h"
  49#include "elf.h"
  50#include "sysemu/kvm_int.h"
  51
  52//#define DEBUG_KVM
  53
  54#ifdef DEBUG_KVM
  55#define DPRINTF(fmt, ...) \
  56    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  57#else
  58#define DPRINTF(fmt, ...) \
  59    do { } while (0)
  60#endif
  61
  62#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  63
  64const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  65    KVM_CAP_LAST_INFO
  66};
  67
  68static int cap_interrupt_unset = false;
  69static int cap_interrupt_level = false;
  70static int cap_segstate;
  71static int cap_booke_sregs;
  72static int cap_ppc_smt;
  73static int cap_ppc_smt_possible;
  74static int cap_spapr_tce;
  75static int cap_spapr_tce_64;
  76static int cap_spapr_multitce;
  77static int cap_spapr_vfio;
  78static int cap_hior;
  79static int cap_one_reg;
  80static int cap_epr;
  81static int cap_ppc_watchdog;
  82static int cap_papr;
  83static int cap_htab_fd;
  84static int cap_fixup_hcalls;
  85static int cap_htm;             /* Hardware transactional memory support */
  86static int cap_mmu_radix;
  87static int cap_mmu_hash_v3;
  88static int cap_resize_hpt;
  89static int cap_ppc_pvr_compat;
  90static int cap_ppc_safe_cache;
  91static int cap_ppc_safe_bounds_check;
  92static int cap_ppc_safe_indirect_branch;
  93static int cap_ppc_count_cache_flush_assist;
  94static int cap_ppc_nested_kvm_hv;
  95static int cap_large_decr;
  96
  97static uint32_t debug_inst_opcode;
  98
  99/* XXX We have a race condition where we actually have a level triggered
 100 *     interrupt, but the infrastructure can't expose that yet, so the guest
 101 *     takes but ignores it, goes to sleep and never gets notified that there's
 102 *     still an interrupt pending.
 103 *
 104 *     As a quick workaround, let's just wake up again 20 ms after we injected
 105 *     an interrupt. That way we can assure that we're always reinjecting
 106 *     interrupts in case the guest swallowed them.
 107 */
 108static QEMUTimer *idle_timer;
 109
 110static void kvm_kick_cpu(void *opaque)
 111{
 112    PowerPCCPU *cpu = opaque;
 113
 114    qemu_cpu_kick(CPU(cpu));
 115}
 116
 117/* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 118 * should only be used for fallback tests - generally we should use
 119 * explicit capabilities for the features we want, rather than
 120 * assuming what is/isn't available depending on the KVM variant. */
 121static bool kvmppc_is_pr(KVMState *ks)
 122{
 123    /* Assume KVM-PR if the GET_PVINFO capability is available */
 124    return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 125}
 126
 127static int kvm_ppc_register_host_cpu_type(MachineState *ms);
 128static void kvmppc_get_cpu_characteristics(KVMState *s);
 129static int kvmppc_get_dec_bits(void);
 130
 131int kvm_arch_init(MachineState *ms, KVMState *s)
 132{
 133    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 134    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 135    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 136    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 137    cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 138    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 139    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 140    cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 141    cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
 142    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 143    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 144    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 145    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 146    /* Note: we don't set cap_papr here, because this capability is
 147     * only activated after this by kvmppc_set_papr() */
 148    cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 149    cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 150    cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 151    cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 152    cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 153    cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 154    cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 155    kvmppc_get_cpu_characteristics(s);
 156    cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
 157    cap_large_decr = kvmppc_get_dec_bits();
 158    /*
 159     * Note: setting it to false because there is not such capability
 160     * in KVM at this moment.
 161     *
 162     * TODO: call kvm_vm_check_extension() with the right capability
 163     * after the kernel starts implementing it.*/
 164    cap_ppc_pvr_compat = false;
 165
 166    if (!cap_interrupt_level) {
 167        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 168                        "VM to stall at times!\n");
 169    }
 170
 171    kvm_ppc_register_host_cpu_type(ms);
 172
 173    return 0;
 174}
 175
 176int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 177{
 178    return 0;
 179}
 180
 181static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 182{
 183    CPUPPCState *cenv = &cpu->env;
 184    CPUState *cs = CPU(cpu);
 185    struct kvm_sregs sregs;
 186    int ret;
 187
 188    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 189        /* What we're really trying to say is "if we're on BookE, we use
 190           the native PVR for now". This is the only sane way to check
 191           it though, so we potentially confuse users that they can run
 192           BookE guests on BookS. Let's hope nobody dares enough :) */
 193        return 0;
 194    } else {
 195        if (!cap_segstate) {
 196            fprintf(stderr, "kvm error: missing PVR setting capability\n");
 197            return -ENOSYS;
 198        }
 199    }
 200
 201    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 202    if (ret) {
 203        return ret;
 204    }
 205
 206    sregs.pvr = cenv->spr[SPR_PVR];
 207    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 208}
 209
 210/* Set up a shared TLB array with KVM */
 211static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 212{
 213    CPUPPCState *env = &cpu->env;
 214    CPUState *cs = CPU(cpu);
 215    struct kvm_book3e_206_tlb_params params = {};
 216    struct kvm_config_tlb cfg = {};
 217    unsigned int entries = 0;
 218    int ret, i;
 219
 220    if (!kvm_enabled() ||
 221        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 222        return 0;
 223    }
 224
 225    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 226
 227    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 228        params.tlb_sizes[i] = booke206_tlb_size(env, i);
 229        params.tlb_ways[i] = booke206_tlb_ways(env, i);
 230        entries += params.tlb_sizes[i];
 231    }
 232
 233    assert(entries == env->nb_tlb);
 234    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 235
 236    env->tlb_dirty = true;
 237
 238    cfg.array = (uintptr_t)env->tlb.tlbm;
 239    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 240    cfg.params = (uintptr_t)&params;
 241    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 242
 243    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 244    if (ret < 0) {
 245        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 246                __func__, strerror(-ret));
 247        return ret;
 248    }
 249
 250    env->kvm_sw_tlb = true;
 251    return 0;
 252}
 253
 254
 255#if defined(TARGET_PPC64)
 256static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
 257{
 258    int ret;
 259
 260    assert(kvm_state != NULL);
 261
 262    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 263        error_setg(errp, "KVM doesn't expose the MMU features it supports");
 264        error_append_hint(errp, "Consider switching to a newer KVM\n");
 265        return;
 266    }
 267
 268    ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 269    if (ret == 0) {
 270        return;
 271    }
 272
 273    error_setg_errno(errp, -ret,
 274                     "KVM failed to provide the MMU features it supports");
 275}
 276
 277struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 278{
 279    KVMState *s = KVM_STATE(current_machine->accelerator);
 280    struct ppc_radix_page_info *radix_page_info;
 281    struct kvm_ppc_rmmu_info rmmu_info;
 282    int i;
 283
 284    if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 285        return NULL;
 286    }
 287    if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 288        return NULL;
 289    }
 290    radix_page_info = g_malloc0(sizeof(*radix_page_info));
 291    radix_page_info->count = 0;
 292    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 293        if (rmmu_info.ap_encodings[i]) {
 294            radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 295            radix_page_info->count++;
 296        }
 297    }
 298    return radix_page_info;
 299}
 300
 301target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 302                                     bool radix, bool gtse,
 303                                     uint64_t proc_tbl)
 304{
 305    CPUState *cs = CPU(cpu);
 306    int ret;
 307    uint64_t flags = 0;
 308    struct kvm_ppc_mmuv3_cfg cfg = {
 309        .process_table = proc_tbl,
 310    };
 311
 312    if (radix) {
 313        flags |= KVM_PPC_MMUV3_RADIX;
 314    }
 315    if (gtse) {
 316        flags |= KVM_PPC_MMUV3_GTSE;
 317    }
 318    cfg.flags = flags;
 319    ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 320    switch (ret) {
 321    case 0:
 322        return H_SUCCESS;
 323    case -EINVAL:
 324        return H_PARAMETER;
 325    case -ENODEV:
 326        return H_NOT_AVAILABLE;
 327    default:
 328        return H_HARDWARE;
 329    }
 330}
 331
 332bool kvmppc_hpt_needs_host_contiguous_pages(void)
 333{
 334    static struct kvm_ppc_smmu_info smmu_info;
 335
 336    if (!kvm_enabled()) {
 337        return false;
 338    }
 339
 340    kvm_get_smmu_info(&smmu_info, &error_fatal);
 341    return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
 342}
 343
 344void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
 345{
 346    struct kvm_ppc_smmu_info smmu_info;
 347    int iq, ik, jq, jk;
 348    Error *local_err = NULL;
 349
 350    /* For now, we only have anything to check on hash64 MMUs */
 351    if (!cpu->hash64_opts || !kvm_enabled()) {
 352        return;
 353    }
 354
 355    kvm_get_smmu_info(&smmu_info, &local_err);
 356    if (local_err) {
 357        error_propagate(errp, local_err);
 358        return;
 359    }
 360
 361    if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
 362        && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 363        error_setg(errp,
 364                   "KVM does not support 1TiB segments which guest expects");
 365        return;
 366    }
 367
 368    if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
 369        error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
 370                   smmu_info.slb_size, cpu->hash64_opts->slb_size);
 371        return;
 372    }
 373
 374    /*
 375     * Verify that every pagesize supported by the cpu model is
 376     * supported by KVM with the same encodings
 377     */
 378    for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
 379        PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
 380        struct kvm_ppc_one_seg_page_size *ksps;
 381
 382        for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
 383            if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
 384                break;
 385            }
 386        }
 387        if (ik >= ARRAY_SIZE(smmu_info.sps)) {
 388            error_setg(errp, "KVM doesn't support for base page shift %u",
 389                       qsps->page_shift);
 390            return;
 391        }
 392
 393        ksps = &smmu_info.sps[ik];
 394        if (ksps->slb_enc != qsps->slb_enc) {
 395            error_setg(errp,
 396"KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
 397                       ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
 398            return;
 399        }
 400
 401        for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
 402            for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
 403                if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
 404                    break;
 405                }
 406            }
 407
 408            if (jk >= ARRAY_SIZE(ksps->enc)) {
 409                error_setg(errp, "KVM doesn't support page shift %u/%u",
 410                           qsps->enc[jq].page_shift, qsps->page_shift);
 411                return;
 412            }
 413            if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
 414                error_setg(errp,
 415"KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
 416                           ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
 417                           qsps->page_shift, qsps->enc[jq].pte_enc);
 418                return;
 419            }
 420        }
 421    }
 422
 423    if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
 424        /* Mostly what guest pagesizes we can use are related to the
 425         * host pages used to map guest RAM, which is handled in the
 426         * platform code. Cache-Inhibited largepages (64k) however are
 427         * used for I/O, so if they're mapped to the host at all it
 428         * will be a normal mapping, not a special hugepage one used
 429         * for RAM. */
 430        if (getpagesize() < 0x10000) {
 431            error_setg(errp,
 432                       "KVM can't supply 64kiB CI pages, which guest expects");
 433        }
 434    }
 435}
 436#endif /* !defined (TARGET_PPC64) */
 437
 438unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 439{
 440    return POWERPC_CPU(cpu)->vcpu_id;
 441}
 442
 443/* e500 supports 2 h/w breakpoint and 2 watchpoint.
 444 * book3s supports only 1 watchpoint, so array size
 445 * of 4 is sufficient for now.
 446 */
 447#define MAX_HW_BKPTS 4
 448
 449static struct HWBreakpoint {
 450    target_ulong addr;
 451    int type;
 452} hw_debug_points[MAX_HW_BKPTS];
 453
 454static CPUWatchpoint hw_watchpoint;
 455
 456/* Default there is no breakpoint and watchpoint supported */
 457static int max_hw_breakpoint;
 458static int max_hw_watchpoint;
 459static int nb_hw_breakpoint;
 460static int nb_hw_watchpoint;
 461
 462static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 463{
 464    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 465        max_hw_breakpoint = 2;
 466        max_hw_watchpoint = 2;
 467    }
 468
 469    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 470        fprintf(stderr, "Error initializing h/w breakpoints\n");
 471        return;
 472    }
 473}
 474
 475int kvm_arch_init_vcpu(CPUState *cs)
 476{
 477    PowerPCCPU *cpu = POWERPC_CPU(cs);
 478    CPUPPCState *cenv = &cpu->env;
 479    int ret;
 480
 481    /* Synchronize sregs with kvm */
 482    ret = kvm_arch_sync_sregs(cpu);
 483    if (ret) {
 484        if (ret == -EINVAL) {
 485            error_report("Register sync failed... If you're using kvm-hv.ko,"
 486                         " only \"-cpu host\" is possible");
 487        }
 488        return ret;
 489    }
 490
 491    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 492
 493    switch (cenv->mmu_model) {
 494    case POWERPC_MMU_BOOKE206:
 495        /* This target supports access to KVM's guest TLB */
 496        ret = kvm_booke206_tlb_init(cpu);
 497        break;
 498    case POWERPC_MMU_2_07:
 499        if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 500            /* KVM-HV has transactional memory on POWER8 also without the
 501             * KVM_CAP_PPC_HTM extension, so enable it here instead as
 502             * long as it's availble to userspace on the host. */
 503            if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 504                cap_htm = true;
 505            }
 506        }
 507        break;
 508    default:
 509        break;
 510    }
 511
 512    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 513    kvmppc_hw_debug_points_init(cenv);
 514
 515    return ret;
 516}
 517
 518static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 519{
 520    CPUPPCState *env = &cpu->env;
 521    CPUState *cs = CPU(cpu);
 522    struct kvm_dirty_tlb dirty_tlb;
 523    unsigned char *bitmap;
 524    int ret;
 525
 526    if (!env->kvm_sw_tlb) {
 527        return;
 528    }
 529
 530    bitmap = g_malloc((env->nb_tlb + 7) / 8);
 531    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 532
 533    dirty_tlb.bitmap = (uintptr_t)bitmap;
 534    dirty_tlb.num_dirty = env->nb_tlb;
 535
 536    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 537    if (ret) {
 538        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 539                __func__, strerror(-ret));
 540    }
 541
 542    g_free(bitmap);
 543}
 544
 545static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 546{
 547    PowerPCCPU *cpu = POWERPC_CPU(cs);
 548    CPUPPCState *env = &cpu->env;
 549    union {
 550        uint32_t u32;
 551        uint64_t u64;
 552    } val;
 553    struct kvm_one_reg reg = {
 554        .id = id,
 555        .addr = (uintptr_t) &val,
 556    };
 557    int ret;
 558
 559    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 560    if (ret != 0) {
 561        trace_kvm_failed_spr_get(spr, strerror(errno));
 562    } else {
 563        switch (id & KVM_REG_SIZE_MASK) {
 564        case KVM_REG_SIZE_U32:
 565            env->spr[spr] = val.u32;
 566            break;
 567
 568        case KVM_REG_SIZE_U64:
 569            env->spr[spr] = val.u64;
 570            break;
 571
 572        default:
 573            /* Don't handle this size yet */
 574            abort();
 575        }
 576    }
 577}
 578
 579static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 580{
 581    PowerPCCPU *cpu = POWERPC_CPU(cs);
 582    CPUPPCState *env = &cpu->env;
 583    union {
 584        uint32_t u32;
 585        uint64_t u64;
 586    } val;
 587    struct kvm_one_reg reg = {
 588        .id = id,
 589        .addr = (uintptr_t) &val,
 590    };
 591    int ret;
 592
 593    switch (id & KVM_REG_SIZE_MASK) {
 594    case KVM_REG_SIZE_U32:
 595        val.u32 = env->spr[spr];
 596        break;
 597
 598    case KVM_REG_SIZE_U64:
 599        val.u64 = env->spr[spr];
 600        break;
 601
 602    default:
 603        /* Don't handle this size yet */
 604        abort();
 605    }
 606
 607    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 608    if (ret != 0) {
 609        trace_kvm_failed_spr_set(spr, strerror(errno));
 610    }
 611}
 612
 613static int kvm_put_fp(CPUState *cs)
 614{
 615    PowerPCCPU *cpu = POWERPC_CPU(cs);
 616    CPUPPCState *env = &cpu->env;
 617    struct kvm_one_reg reg;
 618    int i;
 619    int ret;
 620
 621    if (env->insns_flags & PPC_FLOAT) {
 622        uint64_t fpscr = env->fpscr;
 623        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 624
 625        reg.id = KVM_REG_PPC_FPSCR;
 626        reg.addr = (uintptr_t)&fpscr;
 627        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 628        if (ret < 0) {
 629            DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 630            return ret;
 631        }
 632
 633        for (i = 0; i < 32; i++) {
 634            uint64_t vsr[2];
 635            uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
 636            uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
 637
 638#ifdef HOST_WORDS_BIGENDIAN
 639            vsr[0] = float64_val(*fpr);
 640            vsr[1] = *vsrl;
 641#else
 642            vsr[0] = *vsrl;
 643            vsr[1] = float64_val(*fpr);
 644#endif
 645            reg.addr = (uintptr_t) &vsr;
 646            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 647
 648            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 649            if (ret < 0) {
 650                DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 651                        i, strerror(errno));
 652                return ret;
 653            }
 654        }
 655    }
 656
 657    if (env->insns_flags & PPC_ALTIVEC) {
 658        reg.id = KVM_REG_PPC_VSCR;
 659        reg.addr = (uintptr_t)&env->vscr;
 660        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 661        if (ret < 0) {
 662            DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 663            return ret;
 664        }
 665
 666        for (i = 0; i < 32; i++) {
 667            reg.id = KVM_REG_PPC_VR(i);
 668            reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
 669            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 670            if (ret < 0) {
 671                DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 672                return ret;
 673            }
 674        }
 675    }
 676
 677    return 0;
 678}
 679
 680static int kvm_get_fp(CPUState *cs)
 681{
 682    PowerPCCPU *cpu = POWERPC_CPU(cs);
 683    CPUPPCState *env = &cpu->env;
 684    struct kvm_one_reg reg;
 685    int i;
 686    int ret;
 687
 688    if (env->insns_flags & PPC_FLOAT) {
 689        uint64_t fpscr;
 690        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 691
 692        reg.id = KVM_REG_PPC_FPSCR;
 693        reg.addr = (uintptr_t)&fpscr;
 694        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 695        if (ret < 0) {
 696            DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 697            return ret;
 698        } else {
 699            env->fpscr = fpscr;
 700        }
 701
 702        for (i = 0; i < 32; i++) {
 703            uint64_t vsr[2];
 704            uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
 705            uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
 706
 707            reg.addr = (uintptr_t) &vsr;
 708            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 709
 710            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 711            if (ret < 0) {
 712                DPRINTF("Unable to get %s%d from KVM: %s\n",
 713                        vsx ? "VSR" : "FPR", i, strerror(errno));
 714                return ret;
 715            } else {
 716#ifdef HOST_WORDS_BIGENDIAN
 717                *fpr = vsr[0];
 718                if (vsx) {
 719                    *vsrl = vsr[1];
 720                }
 721#else
 722                *fpr = vsr[1];
 723                if (vsx) {
 724                    *vsrl = vsr[0];
 725                }
 726#endif
 727            }
 728        }
 729    }
 730
 731    if (env->insns_flags & PPC_ALTIVEC) {
 732        reg.id = KVM_REG_PPC_VSCR;
 733        reg.addr = (uintptr_t)&env->vscr;
 734        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 735        if (ret < 0) {
 736            DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 737            return ret;
 738        }
 739
 740        for (i = 0; i < 32; i++) {
 741            reg.id = KVM_REG_PPC_VR(i);
 742            reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
 743            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 744            if (ret < 0) {
 745                DPRINTF("Unable to get VR%d from KVM: %s\n",
 746                        i, strerror(errno));
 747                return ret;
 748            }
 749        }
 750    }
 751
 752    return 0;
 753}
 754
 755#if defined(TARGET_PPC64)
 756static int kvm_get_vpa(CPUState *cs)
 757{
 758    PowerPCCPU *cpu = POWERPC_CPU(cs);
 759    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
 760    struct kvm_one_reg reg;
 761    int ret;
 762
 763    reg.id = KVM_REG_PPC_VPA_ADDR;
 764    reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 765    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 766    if (ret < 0) {
 767        DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 768        return ret;
 769    }
 770
 771    assert((uintptr_t)&spapr_cpu->slb_shadow_size
 772           == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 773    reg.id = KVM_REG_PPC_VPA_SLB;
 774    reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 775    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 776    if (ret < 0) {
 777        DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 778                strerror(errno));
 779        return ret;
 780    }
 781
 782    assert((uintptr_t)&spapr_cpu->dtl_size
 783           == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 784    reg.id = KVM_REG_PPC_VPA_DTL;
 785    reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 786    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 787    if (ret < 0) {
 788        DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 789                strerror(errno));
 790        return ret;
 791    }
 792
 793    return 0;
 794}
 795
 796static int kvm_put_vpa(CPUState *cs)
 797{
 798    PowerPCCPU *cpu = POWERPC_CPU(cs);
 799    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
 800    struct kvm_one_reg reg;
 801    int ret;
 802
 803    /* SLB shadow or DTL can't be registered unless a master VPA is
 804     * registered.  That means when restoring state, if a VPA *is*
 805     * registered, we need to set that up first.  If not, we need to
 806     * deregister the others before deregistering the master VPA */
 807    assert(spapr_cpu->vpa_addr
 808           || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
 809
 810    if (spapr_cpu->vpa_addr) {
 811        reg.id = KVM_REG_PPC_VPA_ADDR;
 812        reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 813        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 814        if (ret < 0) {
 815            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 816            return ret;
 817        }
 818    }
 819
 820    assert((uintptr_t)&spapr_cpu->slb_shadow_size
 821           == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 822    reg.id = KVM_REG_PPC_VPA_SLB;
 823    reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 824    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 825    if (ret < 0) {
 826        DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 827        return ret;
 828    }
 829
 830    assert((uintptr_t)&spapr_cpu->dtl_size
 831           == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 832    reg.id = KVM_REG_PPC_VPA_DTL;
 833    reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 834    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 835    if (ret < 0) {
 836        DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 837                strerror(errno));
 838        return ret;
 839    }
 840
 841    if (!spapr_cpu->vpa_addr) {
 842        reg.id = KVM_REG_PPC_VPA_ADDR;
 843        reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 844        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 845        if (ret < 0) {
 846            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 847            return ret;
 848        }
 849    }
 850
 851    return 0;
 852}
 853#endif /* TARGET_PPC64 */
 854
 855int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 856{
 857    CPUPPCState *env = &cpu->env;
 858    struct kvm_sregs sregs;
 859    int i;
 860
 861    sregs.pvr = env->spr[SPR_PVR];
 862
 863    if (cpu->vhyp) {
 864        PPCVirtualHypervisorClass *vhc =
 865            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 866        sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 867    } else {
 868        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 869    }
 870
 871    /* Sync SLB */
 872#ifdef TARGET_PPC64
 873    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 874        sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 875        if (env->slb[i].esid & SLB_ESID_V) {
 876            sregs.u.s.ppc64.slb[i].slbe |= i;
 877        }
 878        sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 879    }
 880#endif
 881
 882    /* Sync SRs */
 883    for (i = 0; i < 16; i++) {
 884        sregs.u.s.ppc32.sr[i] = env->sr[i];
 885    }
 886
 887    /* Sync BATs */
 888    for (i = 0; i < 8; i++) {
 889        /* Beware. We have to swap upper and lower bits here */
 890        sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 891            | env->DBAT[1][i];
 892        sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 893            | env->IBAT[1][i];
 894    }
 895
 896    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 897}
 898
 899int kvm_arch_put_registers(CPUState *cs, int level)
 900{
 901    PowerPCCPU *cpu = POWERPC_CPU(cs);
 902    CPUPPCState *env = &cpu->env;
 903    struct kvm_regs regs;
 904    int ret;
 905    int i;
 906
 907    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 908    if (ret < 0) {
 909        return ret;
 910    }
 911
 912    regs.ctr = env->ctr;
 913    regs.lr  = env->lr;
 914    regs.xer = cpu_read_xer(env);
 915    regs.msr = env->msr;
 916    regs.pc = env->nip;
 917
 918    regs.srr0 = env->spr[SPR_SRR0];
 919    regs.srr1 = env->spr[SPR_SRR1];
 920
 921    regs.sprg0 = env->spr[SPR_SPRG0];
 922    regs.sprg1 = env->spr[SPR_SPRG1];
 923    regs.sprg2 = env->spr[SPR_SPRG2];
 924    regs.sprg3 = env->spr[SPR_SPRG3];
 925    regs.sprg4 = env->spr[SPR_SPRG4];
 926    regs.sprg5 = env->spr[SPR_SPRG5];
 927    regs.sprg6 = env->spr[SPR_SPRG6];
 928    regs.sprg7 = env->spr[SPR_SPRG7];
 929
 930    regs.pid = env->spr[SPR_BOOKE_PID];
 931
 932    for (i = 0;i < 32; i++)
 933        regs.gpr[i] = env->gpr[i];
 934
 935    regs.cr = 0;
 936    for (i = 0; i < 8; i++) {
 937        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 938    }
 939
 940    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 941    if (ret < 0)
 942        return ret;
 943
 944    kvm_put_fp(cs);
 945
 946    if (env->tlb_dirty) {
 947        kvm_sw_tlb_put(cpu);
 948        env->tlb_dirty = false;
 949    }
 950
 951    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 952        ret = kvmppc_put_books_sregs(cpu);
 953        if (ret < 0) {
 954            return ret;
 955        }
 956    }
 957
 958    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 959        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 960    }
 961
 962    if (cap_one_reg) {
 963        int i;
 964
 965        /* We deliberately ignore errors here, for kernels which have
 966         * the ONE_REG calls, but don't support the specific
 967         * registers, there's a reasonable chance things will still
 968         * work, at least until we try to migrate. */
 969        for (i = 0; i < 1024; i++) {
 970            uint64_t id = env->spr_cb[i].one_reg_id;
 971
 972            if (id != 0) {
 973                kvm_put_one_spr(cs, id, i);
 974            }
 975        }
 976
 977#ifdef TARGET_PPC64
 978        if (msr_ts) {
 979            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 980                kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 981            }
 982            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 983                kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 984            }
 985            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 986            kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 987            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 988            kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 989            kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 990            kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 991            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 992            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 993            kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 994            kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 995        }
 996
 997        if (cap_papr) {
 998            if (kvm_put_vpa(cs) < 0) {
 999                DPRINTF("Warning: Unable to set VPA information to KVM\n");
1000            }
1001        }
1002
1003        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1004#endif /* TARGET_PPC64 */
1005    }
1006
1007    return ret;
1008}
1009
1010static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1011{
1012     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1013}
1014
1015static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1016{
1017    CPUPPCState *env = &cpu->env;
1018    struct kvm_sregs sregs;
1019    int ret;
1020
1021    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1022    if (ret < 0) {
1023        return ret;
1024    }
1025
1026    if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1027        env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1028        env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1029        env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1030        env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1031        env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1032        env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1033        env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1034        env->spr[SPR_DECR] = sregs.u.e.dec;
1035        env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1036        env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1037        env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1038    }
1039
1040    if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1041        env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1042        env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1043        env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1044        env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1045        env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1046    }
1047
1048    if (sregs.u.e.features & KVM_SREGS_E_64) {
1049        env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1050    }
1051
1052    if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1053        env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1054    }
1055
1056    if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1057        env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1058        kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1059        env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1060        kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1061        env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1062        kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1063        env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1064        kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1065        env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1066        kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1067        env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1068        kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1069        env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1070        kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1071        env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1072        kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1073        env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1074        kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1075        env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1076        kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1077        env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1078        kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1079        env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1080        kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1081        env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1082        kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1083        env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1084        kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1085        env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1086        kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1087        env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1088        kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1089
1090        if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1091            env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1092            kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1093            env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1094            kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1095            env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1096            kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1097        }
1098
1099        if (sregs.u.e.features & KVM_SREGS_E_PM) {
1100            env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1101            kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1102        }
1103
1104        if (sregs.u.e.features & KVM_SREGS_E_PC) {
1105            env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1106            kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1107            env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1108            kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1109        }
1110    }
1111
1112    if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1113        env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1114        env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1115        env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1116        env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1117        env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1118        env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1119        env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1120        env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1121        env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1122        env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1123    }
1124
1125    if (sregs.u.e.features & KVM_SREGS_EXP) {
1126        env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1127    }
1128
1129    if (sregs.u.e.features & KVM_SREGS_E_PD) {
1130        env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1131        env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1132    }
1133
1134    if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1135        env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1136        env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1137        env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1138
1139        if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1140            env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1141            env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1142        }
1143    }
1144
1145    return 0;
1146}
1147
1148static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1149{
1150    CPUPPCState *env = &cpu->env;
1151    struct kvm_sregs sregs;
1152    int ret;
1153    int i;
1154
1155    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1156    if (ret < 0) {
1157        return ret;
1158    }
1159
1160    if (!cpu->vhyp) {
1161        ppc_store_sdr1(env, sregs.u.s.sdr1);
1162    }
1163
1164    /* Sync SLB */
1165#ifdef TARGET_PPC64
1166    /*
1167     * The packed SLB array we get from KVM_GET_SREGS only contains
1168     * information about valid entries. So we flush our internal copy
1169     * to get rid of stale ones, then put all valid SLB entries back
1170     * in.
1171     */
1172    memset(env->slb, 0, sizeof(env->slb));
1173    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1174        target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1175        target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1176        /*
1177         * Only restore valid entries
1178         */
1179        if (rb & SLB_ESID_V) {
1180            ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1181        }
1182    }
1183#endif
1184
1185    /* Sync SRs */
1186    for (i = 0; i < 16; i++) {
1187        env->sr[i] = sregs.u.s.ppc32.sr[i];
1188    }
1189
1190    /* Sync BATs */
1191    for (i = 0; i < 8; i++) {
1192        env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1193        env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1194        env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1195        env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1196    }
1197
1198    return 0;
1199}
1200
1201int kvm_arch_get_registers(CPUState *cs)
1202{
1203    PowerPCCPU *cpu = POWERPC_CPU(cs);
1204    CPUPPCState *env = &cpu->env;
1205    struct kvm_regs regs;
1206    uint32_t cr;
1207    int i, ret;
1208
1209    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1210    if (ret < 0)
1211        return ret;
1212
1213    cr = regs.cr;
1214    for (i = 7; i >= 0; i--) {
1215        env->crf[i] = cr & 15;
1216        cr >>= 4;
1217    }
1218
1219    env->ctr = regs.ctr;
1220    env->lr = regs.lr;
1221    cpu_write_xer(env, regs.xer);
1222    env->msr = regs.msr;
1223    env->nip = regs.pc;
1224
1225    env->spr[SPR_SRR0] = regs.srr0;
1226    env->spr[SPR_SRR1] = regs.srr1;
1227
1228    env->spr[SPR_SPRG0] = regs.sprg0;
1229    env->spr[SPR_SPRG1] = regs.sprg1;
1230    env->spr[SPR_SPRG2] = regs.sprg2;
1231    env->spr[SPR_SPRG3] = regs.sprg3;
1232    env->spr[SPR_SPRG4] = regs.sprg4;
1233    env->spr[SPR_SPRG5] = regs.sprg5;
1234    env->spr[SPR_SPRG6] = regs.sprg6;
1235    env->spr[SPR_SPRG7] = regs.sprg7;
1236
1237    env->spr[SPR_BOOKE_PID] = regs.pid;
1238
1239    for (i = 0;i < 32; i++)
1240        env->gpr[i] = regs.gpr[i];
1241
1242    kvm_get_fp(cs);
1243
1244    if (cap_booke_sregs) {
1245        ret = kvmppc_get_booke_sregs(cpu);
1246        if (ret < 0) {
1247            return ret;
1248        }
1249    }
1250
1251    if (cap_segstate) {
1252        ret = kvmppc_get_books_sregs(cpu);
1253        if (ret < 0) {
1254            return ret;
1255        }
1256    }
1257
1258    if (cap_hior) {
1259        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1260    }
1261
1262    if (cap_one_reg) {
1263        int i;
1264
1265        /* We deliberately ignore errors here, for kernels which have
1266         * the ONE_REG calls, but don't support the specific
1267         * registers, there's a reasonable chance things will still
1268         * work, at least until we try to migrate. */
1269        for (i = 0; i < 1024; i++) {
1270            uint64_t id = env->spr_cb[i].one_reg_id;
1271
1272            if (id != 0) {
1273                kvm_get_one_spr(cs, id, i);
1274            }
1275        }
1276
1277#ifdef TARGET_PPC64
1278        if (msr_ts) {
1279            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1280                kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1281            }
1282            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1283                kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1284            }
1285            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1286            kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1287            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1288            kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1289            kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1290            kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1291            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1292            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1293            kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1294            kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1295        }
1296
1297        if (cap_papr) {
1298            if (kvm_get_vpa(cs) < 0) {
1299                DPRINTF("Warning: Unable to get VPA information from KVM\n");
1300            }
1301        }
1302
1303        kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1304#endif
1305    }
1306
1307    return 0;
1308}
1309
1310int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1311{
1312    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1313
1314    if (irq != PPC_INTERRUPT_EXT) {
1315        return 0;
1316    }
1317
1318    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1319        return 0;
1320    }
1321
1322    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1323
1324    return 0;
1325}
1326
1327#if defined(TARGET_PPC64)
1328#define PPC_INPUT_INT PPC970_INPUT_INT
1329#else
1330#define PPC_INPUT_INT PPC6xx_INPUT_INT
1331#endif
1332
1333void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1334{
1335    PowerPCCPU *cpu = POWERPC_CPU(cs);
1336    CPUPPCState *env = &cpu->env;
1337    int r;
1338    unsigned irq;
1339
1340    qemu_mutex_lock_iothread();
1341
1342    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1343     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1344    if (!cap_interrupt_level &&
1345        run->ready_for_interrupt_injection &&
1346        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1347        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1348    {
1349        /* For now KVM disregards the 'irq' argument. However, in the
1350         * future KVM could cache it in-kernel to avoid a heavyweight exit
1351         * when reading the UIC.
1352         */
1353        irq = KVM_INTERRUPT_SET;
1354
1355        DPRINTF("injected interrupt %d\n", irq);
1356        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1357        if (r < 0) {
1358            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1359        }
1360
1361        /* Always wake up soon in case the interrupt was level based */
1362        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1363                       (NANOSECONDS_PER_SECOND / 50));
1364    }
1365
1366    /* We don't know if there are more interrupts pending after this. However,
1367     * the guest will return to userspace in the course of handling this one
1368     * anyways, so we will get a chance to deliver the rest. */
1369
1370    qemu_mutex_unlock_iothread();
1371}
1372
1373MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1374{
1375    return MEMTXATTRS_UNSPECIFIED;
1376}
1377
1378int kvm_arch_process_async_events(CPUState *cs)
1379{
1380    return cs->halted;
1381}
1382
1383static int kvmppc_handle_halt(PowerPCCPU *cpu)
1384{
1385    CPUState *cs = CPU(cpu);
1386    CPUPPCState *env = &cpu->env;
1387
1388    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1389        cs->halted = 1;
1390        cs->exception_index = EXCP_HLT;
1391    }
1392
1393    return 0;
1394}
1395
1396/* map dcr access to existing qemu dcr emulation */
1397static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1398{
1399    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1400        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1401
1402    return 0;
1403}
1404
1405static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1406{
1407    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1408        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1409
1410    return 0;
1411}
1412
1413int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1414{
1415    /* Mixed endian case is not handled */
1416    uint32_t sc = debug_inst_opcode;
1417
1418    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1419                            sizeof(sc), 0) ||
1420        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1421        return -EINVAL;
1422    }
1423
1424    return 0;
1425}
1426
1427int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1428{
1429    uint32_t sc;
1430
1431    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1432        sc != debug_inst_opcode ||
1433        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1434                            sizeof(sc), 1)) {
1435        return -EINVAL;
1436    }
1437
1438    return 0;
1439}
1440
1441static int find_hw_breakpoint(target_ulong addr, int type)
1442{
1443    int n;
1444
1445    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1446           <= ARRAY_SIZE(hw_debug_points));
1447
1448    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1449        if (hw_debug_points[n].addr == addr &&
1450             hw_debug_points[n].type == type) {
1451            return n;
1452        }
1453    }
1454
1455    return -1;
1456}
1457
1458static int find_hw_watchpoint(target_ulong addr, int *flag)
1459{
1460    int n;
1461
1462    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1463    if (n >= 0) {
1464        *flag = BP_MEM_ACCESS;
1465        return n;
1466    }
1467
1468    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1469    if (n >= 0) {
1470        *flag = BP_MEM_WRITE;
1471        return n;
1472    }
1473
1474    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1475    if (n >= 0) {
1476        *flag = BP_MEM_READ;
1477        return n;
1478    }
1479
1480    return -1;
1481}
1482
1483int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1484                                  target_ulong len, int type)
1485{
1486    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1487        return -ENOBUFS;
1488    }
1489
1490    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1491    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1492
1493    switch (type) {
1494    case GDB_BREAKPOINT_HW:
1495        if (nb_hw_breakpoint >= max_hw_breakpoint) {
1496            return -ENOBUFS;
1497        }
1498
1499        if (find_hw_breakpoint(addr, type) >= 0) {
1500            return -EEXIST;
1501        }
1502
1503        nb_hw_breakpoint++;
1504        break;
1505
1506    case GDB_WATCHPOINT_WRITE:
1507    case GDB_WATCHPOINT_READ:
1508    case GDB_WATCHPOINT_ACCESS:
1509        if (nb_hw_watchpoint >= max_hw_watchpoint) {
1510            return -ENOBUFS;
1511        }
1512
1513        if (find_hw_breakpoint(addr, type) >= 0) {
1514            return -EEXIST;
1515        }
1516
1517        nb_hw_watchpoint++;
1518        break;
1519
1520    default:
1521        return -ENOSYS;
1522    }
1523
1524    return 0;
1525}
1526
1527int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1528                                  target_ulong len, int type)
1529{
1530    int n;
1531
1532    n = find_hw_breakpoint(addr, type);
1533    if (n < 0) {
1534        return -ENOENT;
1535    }
1536
1537    switch (type) {
1538    case GDB_BREAKPOINT_HW:
1539        nb_hw_breakpoint--;
1540        break;
1541
1542    case GDB_WATCHPOINT_WRITE:
1543    case GDB_WATCHPOINT_READ:
1544    case GDB_WATCHPOINT_ACCESS:
1545        nb_hw_watchpoint--;
1546        break;
1547
1548    default:
1549        return -ENOSYS;
1550    }
1551    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1552
1553    return 0;
1554}
1555
1556void kvm_arch_remove_all_hw_breakpoints(void)
1557{
1558    nb_hw_breakpoint = nb_hw_watchpoint = 0;
1559}
1560
1561void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1562{
1563    int n;
1564
1565    /* Software Breakpoint updates */
1566    if (kvm_sw_breakpoints_active(cs)) {
1567        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1568    }
1569
1570    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1571           <= ARRAY_SIZE(hw_debug_points));
1572    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1573
1574    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1575        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1576        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1577        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1578            switch (hw_debug_points[n].type) {
1579            case GDB_BREAKPOINT_HW:
1580                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1581                break;
1582            case GDB_WATCHPOINT_WRITE:
1583                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1584                break;
1585            case GDB_WATCHPOINT_READ:
1586                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1587                break;
1588            case GDB_WATCHPOINT_ACCESS:
1589                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1590                                        KVMPPC_DEBUG_WATCH_READ;
1591                break;
1592            default:
1593                cpu_abort(cs, "Unsupported breakpoint type\n");
1594            }
1595            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1596        }
1597    }
1598}
1599
1600static int kvm_handle_hw_breakpoint(CPUState *cs,
1601                                    struct kvm_debug_exit_arch *arch_info)
1602{
1603    int handle = 0;
1604    int n;
1605    int flag = 0;
1606
1607    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1608        if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1609            n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1610            if (n >= 0) {
1611                handle = 1;
1612            }
1613        } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1614                                        KVMPPC_DEBUG_WATCH_WRITE)) {
1615            n = find_hw_watchpoint(arch_info->address,  &flag);
1616            if (n >= 0) {
1617                handle = 1;
1618                cs->watchpoint_hit = &hw_watchpoint;
1619                hw_watchpoint.vaddr = hw_debug_points[n].addr;
1620                hw_watchpoint.flags = flag;
1621            }
1622        }
1623    }
1624    return handle;
1625}
1626
1627static int kvm_handle_singlestep(void)
1628{
1629    return 1;
1630}
1631
1632static int kvm_handle_sw_breakpoint(void)
1633{
1634    return 1;
1635}
1636
1637static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1638{
1639    CPUState *cs = CPU(cpu);
1640    CPUPPCState *env = &cpu->env;
1641    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1642
1643    if (cs->singlestep_enabled) {
1644        return kvm_handle_singlestep();
1645    }
1646
1647    if (arch_info->status) {
1648        return kvm_handle_hw_breakpoint(cs, arch_info);
1649    }
1650
1651    if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1652        return kvm_handle_sw_breakpoint();
1653    }
1654
1655    /*
1656     * QEMU is not able to handle debug exception, so inject
1657     * program exception to guest;
1658     * Yes program exception NOT debug exception !!
1659     * When QEMU is using debug resources then debug exception must
1660     * be always set. To achieve this we set MSR_DE and also set
1661     * MSRP_DEP so guest cannot change MSR_DE.
1662     * When emulating debug resource for guest we want guest
1663     * to control MSR_DE (enable/disable debug interrupt on need).
1664     * Supporting both configurations are NOT possible.
1665     * So the result is that we cannot share debug resources
1666     * between QEMU and Guest on BOOKE architecture.
1667     * In the current design QEMU gets the priority over guest,
1668     * this means that if QEMU is using debug resources then guest
1669     * cannot use them;
1670     * For software breakpoint QEMU uses a privileged instruction;
1671     * So there cannot be any reason that we are here for guest
1672     * set debug exception, only possibility is guest executed a
1673     * privileged / illegal instruction and that's why we are
1674     * injecting a program interrupt.
1675     */
1676    cpu_synchronize_state(cs);
1677    /*
1678     * env->nip is PC, so increment this by 4 to use
1679     * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1680     */
1681    env->nip += 4;
1682    cs->exception_index = POWERPC_EXCP_PROGRAM;
1683    env->error_code = POWERPC_EXCP_INVAL;
1684    ppc_cpu_do_interrupt(cs);
1685
1686    return 0;
1687}
1688
1689int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1690{
1691    PowerPCCPU *cpu = POWERPC_CPU(cs);
1692    CPUPPCState *env = &cpu->env;
1693    int ret;
1694
1695    qemu_mutex_lock_iothread();
1696
1697    switch (run->exit_reason) {
1698    case KVM_EXIT_DCR:
1699        if (run->dcr.is_write) {
1700            DPRINTF("handle dcr write\n");
1701            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1702        } else {
1703            DPRINTF("handle dcr read\n");
1704            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1705        }
1706        break;
1707    case KVM_EXIT_HLT:
1708        DPRINTF("handle halt\n");
1709        ret = kvmppc_handle_halt(cpu);
1710        break;
1711#if defined(TARGET_PPC64)
1712    case KVM_EXIT_PAPR_HCALL:
1713        DPRINTF("handle PAPR hypercall\n");
1714        run->papr_hcall.ret = spapr_hypercall(cpu,
1715                                              run->papr_hcall.nr,
1716                                              run->papr_hcall.args);
1717        ret = 0;
1718        break;
1719#endif
1720    case KVM_EXIT_EPR:
1721        DPRINTF("handle epr\n");
1722        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1723        ret = 0;
1724        break;
1725    case KVM_EXIT_WATCHDOG:
1726        DPRINTF("handle watchdog expiry\n");
1727        watchdog_perform_action();
1728        ret = 0;
1729        break;
1730
1731    case KVM_EXIT_DEBUG:
1732        DPRINTF("handle debug exception\n");
1733        if (kvm_handle_debug(cpu, run)) {
1734            ret = EXCP_DEBUG;
1735            break;
1736        }
1737        /* re-enter, this exception was guest-internal */
1738        ret = 0;
1739        break;
1740
1741    default:
1742        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1743        ret = -1;
1744        break;
1745    }
1746
1747    qemu_mutex_unlock_iothread();
1748    return ret;
1749}
1750
1751int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1752{
1753    CPUState *cs = CPU(cpu);
1754    uint32_t bits = tsr_bits;
1755    struct kvm_one_reg reg = {
1756        .id = KVM_REG_PPC_OR_TSR,
1757        .addr = (uintptr_t) &bits,
1758    };
1759
1760    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1761}
1762
1763int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1764{
1765
1766    CPUState *cs = CPU(cpu);
1767    uint32_t bits = tsr_bits;
1768    struct kvm_one_reg reg = {
1769        .id = KVM_REG_PPC_CLEAR_TSR,
1770        .addr = (uintptr_t) &bits,
1771    };
1772
1773    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1774}
1775
1776int kvmppc_set_tcr(PowerPCCPU *cpu)
1777{
1778    CPUState *cs = CPU(cpu);
1779    CPUPPCState *env = &cpu->env;
1780    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1781
1782    struct kvm_one_reg reg = {
1783        .id = KVM_REG_PPC_TCR,
1784        .addr = (uintptr_t) &tcr,
1785    };
1786
1787    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1788}
1789
1790int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1791{
1792    CPUState *cs = CPU(cpu);
1793    int ret;
1794
1795    if (!kvm_enabled()) {
1796        return -1;
1797    }
1798
1799    if (!cap_ppc_watchdog) {
1800        printf("warning: KVM does not support watchdog");
1801        return -1;
1802    }
1803
1804    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1805    if (ret < 0) {
1806        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1807                __func__, strerror(-ret));
1808        return ret;
1809    }
1810
1811    return ret;
1812}
1813
1814static int read_cpuinfo(const char *field, char *value, int len)
1815{
1816    FILE *f;
1817    int ret = -1;
1818    int field_len = strlen(field);
1819    char line[512];
1820
1821    f = fopen("/proc/cpuinfo", "r");
1822    if (!f) {
1823        return -1;
1824    }
1825
1826    do {
1827        if (!fgets(line, sizeof(line), f)) {
1828            break;
1829        }
1830        if (!strncmp(line, field, field_len)) {
1831            pstrcpy(value, len, line);
1832            ret = 0;
1833            break;
1834        }
1835    } while(*line);
1836
1837    fclose(f);
1838
1839    return ret;
1840}
1841
1842uint32_t kvmppc_get_tbfreq(void)
1843{
1844    char line[512];
1845    char *ns;
1846    uint32_t retval = NANOSECONDS_PER_SECOND;
1847
1848    if (read_cpuinfo("timebase", line, sizeof(line))) {
1849        return retval;
1850    }
1851
1852    if (!(ns = strchr(line, ':'))) {
1853        return retval;
1854    }
1855
1856    ns++;
1857
1858    return atoi(ns);
1859}
1860
1861bool kvmppc_get_host_serial(char **value)
1862{
1863    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1864                               NULL);
1865}
1866
1867bool kvmppc_get_host_model(char **value)
1868{
1869    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1870}
1871
1872/* Try to find a device tree node for a CPU with clock-frequency property */
1873static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1874{
1875    struct dirent *dirp;
1876    DIR *dp;
1877
1878    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1879        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1880        return -1;
1881    }
1882
1883    buf[0] = '\0';
1884    while ((dirp = readdir(dp)) != NULL) {
1885        FILE *f;
1886        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1887                 dirp->d_name);
1888        f = fopen(buf, "r");
1889        if (f) {
1890            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1891            fclose(f);
1892            break;
1893        }
1894        buf[0] = '\0';
1895    }
1896    closedir(dp);
1897    if (buf[0] == '\0') {
1898        printf("Unknown host!\n");
1899        return -1;
1900    }
1901
1902    return 0;
1903}
1904
1905static uint64_t kvmppc_read_int_dt(const char *filename)
1906{
1907    union {
1908        uint32_t v32;
1909        uint64_t v64;
1910    } u;
1911    FILE *f;
1912    int len;
1913
1914    f = fopen(filename, "rb");
1915    if (!f) {
1916        return -1;
1917    }
1918
1919    len = fread(&u, 1, sizeof(u), f);
1920    fclose(f);
1921    switch (len) {
1922    case 4:
1923        /* property is a 32-bit quantity */
1924        return be32_to_cpu(u.v32);
1925    case 8:
1926        return be64_to_cpu(u.v64);
1927    }
1928
1929    return 0;
1930}
1931
1932/* Read a CPU node property from the host device tree that's a single
1933 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1934 * (can't find or open the property, or doesn't understand the
1935 * format) */
1936static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1937{
1938    char buf[PATH_MAX], *tmp;
1939    uint64_t val;
1940
1941    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1942        return -1;
1943    }
1944
1945    tmp = g_strdup_printf("%s/%s", buf, propname);
1946    val = kvmppc_read_int_dt(tmp);
1947    g_free(tmp);
1948
1949    return val;
1950}
1951
1952uint64_t kvmppc_get_clockfreq(void)
1953{
1954    return kvmppc_read_int_cpu_dt("clock-frequency");
1955}
1956
1957static int kvmppc_get_dec_bits(void)
1958{
1959    int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits");
1960
1961    if (nr_bits > 0) {
1962        return nr_bits;
1963    }
1964    return 0;
1965}
1966
1967static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1968 {
1969     PowerPCCPU *cpu = ppc_env_get_cpu(env);
1970     CPUState *cs = CPU(cpu);
1971
1972    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1973        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1974        return 0;
1975    }
1976
1977    return 1;
1978}
1979
1980int kvmppc_get_hasidle(CPUPPCState *env)
1981{
1982    struct kvm_ppc_pvinfo pvinfo;
1983
1984    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1985        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1986        return 1;
1987    }
1988
1989    return 0;
1990}
1991
1992int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1993{
1994    uint32_t *hc = (uint32_t*)buf;
1995    struct kvm_ppc_pvinfo pvinfo;
1996
1997    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1998        memcpy(buf, pvinfo.hcall, buf_len);
1999        return 0;
2000    }
2001
2002    /*
2003     * Fallback to always fail hypercalls regardless of endianness:
2004     *
2005     *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2006     *     li r3, -1
2007     *     b .+8       (becomes nop in wrong endian)
2008     *     bswap32(li r3, -1)
2009     */
2010
2011    hc[0] = cpu_to_be32(0x08000048);
2012    hc[1] = cpu_to_be32(0x3860ffff);
2013    hc[2] = cpu_to_be32(0x48000008);
2014    hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2015
2016    return 1;
2017}
2018
2019static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2020{
2021    return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2022}
2023
2024void kvmppc_enable_logical_ci_hcalls(void)
2025{
2026    /*
2027     * FIXME: it would be nice if we could detect the cases where
2028     * we're using a device which requires the in kernel
2029     * implementation of these hcalls, but the kernel lacks them and
2030     * produce a warning.
2031     */
2032    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2033    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2034}
2035
2036void kvmppc_enable_set_mode_hcall(void)
2037{
2038    kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2039}
2040
2041void kvmppc_enable_clear_ref_mod_hcalls(void)
2042{
2043    kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2044    kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2045}
2046
2047void kvmppc_enable_h_page_init(void)
2048{
2049    kvmppc_enable_hcall(kvm_state, H_PAGE_INIT);
2050}
2051
2052void kvmppc_set_papr(PowerPCCPU *cpu)
2053{
2054    CPUState *cs = CPU(cpu);
2055    int ret;
2056
2057    if (!kvm_enabled()) {
2058        return;
2059    }
2060
2061    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2062    if (ret) {
2063        error_report("This vCPU type or KVM version does not support PAPR");
2064        exit(1);
2065    }
2066
2067    /* Update the capability flag so we sync the right information
2068     * with kvm */
2069    cap_papr = 1;
2070}
2071
2072int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2073{
2074    return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2075}
2076
2077void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2078{
2079    CPUState *cs = CPU(cpu);
2080    int ret;
2081
2082    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2083    if (ret && mpic_proxy) {
2084        error_report("This KVM version does not support EPR");
2085        exit(1);
2086    }
2087}
2088
2089int kvmppc_smt_threads(void)
2090{
2091    return cap_ppc_smt ? cap_ppc_smt : 1;
2092}
2093
2094int kvmppc_set_smt_threads(int smt)
2095{
2096    int ret;
2097
2098    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2099    if (!ret) {
2100        cap_ppc_smt = smt;
2101    }
2102    return ret;
2103}
2104
2105void kvmppc_hint_smt_possible(Error **errp)
2106{
2107    int i;
2108    GString *g;
2109    char *s;
2110
2111    assert(kvm_enabled());
2112    if (cap_ppc_smt_possible) {
2113        g = g_string_new("Available VSMT modes:");
2114        for (i = 63; i >= 0; i--) {
2115            if ((1UL << i) & cap_ppc_smt_possible) {
2116                g_string_append_printf(g, " %lu", (1UL << i));
2117            }
2118        }
2119        s = g_string_free(g, false);
2120        error_append_hint(errp, "%s.\n", s);
2121        g_free(s);
2122    } else {
2123        error_append_hint(errp,
2124                          "This KVM seems to be too old to support VSMT.\n");
2125    }
2126}
2127
2128
2129#ifdef TARGET_PPC64
2130uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2131{
2132    struct kvm_ppc_smmu_info info;
2133    long rampagesize, best_page_shift;
2134    int i;
2135
2136    /* Find the largest hardware supported page size that's less than
2137     * or equal to the (logical) backing page size of guest RAM */
2138    kvm_get_smmu_info(&info, &error_fatal);
2139    rampagesize = qemu_getrampagesize();
2140    best_page_shift = 0;
2141
2142    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2143        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2144
2145        if (!sps->page_shift) {
2146            continue;
2147        }
2148
2149        if ((sps->page_shift > best_page_shift)
2150            && ((1UL << sps->page_shift) <= rampagesize)) {
2151            best_page_shift = sps->page_shift;
2152        }
2153    }
2154
2155    return MIN(current_size,
2156               1ULL << (best_page_shift + hash_shift - 7));
2157}
2158#endif
2159
2160bool kvmppc_spapr_use_multitce(void)
2161{
2162    return cap_spapr_multitce;
2163}
2164
2165int kvmppc_spapr_enable_inkernel_multitce(void)
2166{
2167    int ret;
2168
2169    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2170                            H_PUT_TCE_INDIRECT, 1);
2171    if (!ret) {
2172        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2173                                H_STUFF_TCE, 1);
2174    }
2175
2176    return ret;
2177}
2178
2179void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2180                              uint64_t bus_offset, uint32_t nb_table,
2181                              int *pfd, bool need_vfio)
2182{
2183    long len;
2184    int fd;
2185    void *table;
2186
2187    /* Must set fd to -1 so we don't try to munmap when called for
2188     * destroying the table, which the upper layers -will- do
2189     */
2190    *pfd = -1;
2191    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2192        return NULL;
2193    }
2194
2195    if (cap_spapr_tce_64) {
2196        struct kvm_create_spapr_tce_64 args = {
2197            .liobn = liobn,
2198            .page_shift = page_shift,
2199            .offset = bus_offset >> page_shift,
2200            .size = nb_table,
2201            .flags = 0
2202        };
2203        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2204        if (fd < 0) {
2205            fprintf(stderr,
2206                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2207                    liobn);
2208            return NULL;
2209        }
2210    } else if (cap_spapr_tce) {
2211        uint64_t window_size = (uint64_t) nb_table << page_shift;
2212        struct kvm_create_spapr_tce args = {
2213            .liobn = liobn,
2214            .window_size = window_size,
2215        };
2216        if ((window_size != args.window_size) || bus_offset) {
2217            return NULL;
2218        }
2219        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2220        if (fd < 0) {
2221            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2222                    liobn);
2223            return NULL;
2224        }
2225    } else {
2226        return NULL;
2227    }
2228
2229    len = nb_table * sizeof(uint64_t);
2230    /* FIXME: round this up to page size */
2231
2232    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2233    if (table == MAP_FAILED) {
2234        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2235                liobn);
2236        close(fd);
2237        return NULL;
2238    }
2239
2240    *pfd = fd;
2241    return table;
2242}
2243
2244int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2245{
2246    long len;
2247
2248    if (fd < 0) {
2249        return -1;
2250    }
2251
2252    len = nb_table * sizeof(uint64_t);
2253    if ((munmap(table, len) < 0) ||
2254        (close(fd) < 0)) {
2255        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2256                strerror(errno));
2257        /* Leak the table */
2258    }
2259
2260    return 0;
2261}
2262
2263int kvmppc_reset_htab(int shift_hint)
2264{
2265    uint32_t shift = shift_hint;
2266
2267    if (!kvm_enabled()) {
2268        /* Full emulation, tell caller to allocate htab itself */
2269        return 0;
2270    }
2271    if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2272        int ret;
2273        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2274        if (ret == -ENOTTY) {
2275            /* At least some versions of PR KVM advertise the
2276             * capability, but don't implement the ioctl().  Oops.
2277             * Return 0 so that we allocate the htab in qemu, as is
2278             * correct for PR. */
2279            return 0;
2280        } else if (ret < 0) {
2281            return ret;
2282        }
2283        return shift;
2284    }
2285
2286    /* We have a kernel that predates the htab reset calls.  For PR
2287     * KVM, we need to allocate the htab ourselves, for an HV KVM of
2288     * this era, it has allocated a 16MB fixed size hash table already. */
2289    if (kvmppc_is_pr(kvm_state)) {
2290        /* PR - tell caller to allocate htab */
2291        return 0;
2292    } else {
2293        /* HV - assume 16MB kernel allocated htab */
2294        return 24;
2295    }
2296}
2297
2298static inline uint32_t mfpvr(void)
2299{
2300    uint32_t pvr;
2301
2302    asm ("mfpvr %0"
2303         : "=r"(pvr));
2304    return pvr;
2305}
2306
2307static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2308{
2309    if (on) {
2310        *word |= flags;
2311    } else {
2312        *word &= ~flags;
2313    }
2314}
2315
2316static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2317{
2318    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2319    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2320    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2321
2322    /* Now fix up the class with information we can query from the host */
2323    pcc->pvr = mfpvr();
2324
2325    alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2326                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2327    alter_insns(&pcc->insns_flags2, PPC2_VSX,
2328                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2329    alter_insns(&pcc->insns_flags2, PPC2_DFP,
2330                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2331
2332    if (dcache_size != -1) {
2333        pcc->l1_dcache_size = dcache_size;
2334    }
2335
2336    if (icache_size != -1) {
2337        pcc->l1_icache_size = icache_size;
2338    }
2339
2340#if defined(TARGET_PPC64)
2341    pcc->radix_page_info = kvm_get_radix_page_info();
2342
2343    if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2344        /*
2345         * POWER9 DD1 has some bugs which make it not really ISA 3.00
2346         * compliant.  More importantly, advertising ISA 3.00
2347         * architected mode may prevent guests from activating
2348         * necessary DD1 workarounds.
2349         */
2350        pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2351                                | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2352    }
2353#endif /* defined(TARGET_PPC64) */
2354}
2355
2356bool kvmppc_has_cap_epr(void)
2357{
2358    return cap_epr;
2359}
2360
2361bool kvmppc_has_cap_fixup_hcalls(void)
2362{
2363    return cap_fixup_hcalls;
2364}
2365
2366bool kvmppc_has_cap_htm(void)
2367{
2368    return cap_htm;
2369}
2370
2371bool kvmppc_has_cap_mmu_radix(void)
2372{
2373    return cap_mmu_radix;
2374}
2375
2376bool kvmppc_has_cap_mmu_hash_v3(void)
2377{
2378    return cap_mmu_hash_v3;
2379}
2380
2381static bool kvmppc_power8_host(void)
2382{
2383    bool ret = false;
2384#ifdef TARGET_PPC64
2385    {
2386        uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2387        ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2388              (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2389              (base_pvr == CPU_POWERPC_POWER8_BASE);
2390    }
2391#endif /* TARGET_PPC64 */
2392    return ret;
2393}
2394
2395static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2396{
2397    bool l1d_thread_priv_req = !kvmppc_power8_host();
2398
2399    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2400        return 2;
2401    } else if ((!l1d_thread_priv_req ||
2402                c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2403               (c.character & c.character_mask
2404                & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2405        return 1;
2406    }
2407
2408    return 0;
2409}
2410
2411static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2412{
2413    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2414        return 2;
2415    } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2416        return 1;
2417    }
2418
2419    return 0;
2420}
2421
2422static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2423{
2424    if ((~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) &&
2425        (~c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) &&
2426        (~c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED)) {
2427        return SPAPR_CAP_FIXED_NA;
2428    } else if (c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) {
2429        return SPAPR_CAP_WORKAROUND;
2430    } else if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2431        return  SPAPR_CAP_FIXED_CCD;
2432    } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2433        return SPAPR_CAP_FIXED_IBS;
2434    }
2435
2436    return 0;
2437}
2438
2439static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
2440{
2441    if (c.character & c.character_mask & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) {
2442        return 1;
2443    }
2444    return 0;
2445}
2446
2447static void kvmppc_get_cpu_characteristics(KVMState *s)
2448{
2449    struct kvm_ppc_cpu_char c;
2450    int ret;
2451
2452    /* Assume broken */
2453    cap_ppc_safe_cache = 0;
2454    cap_ppc_safe_bounds_check = 0;
2455    cap_ppc_safe_indirect_branch = 0;
2456
2457    ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2458    if (!ret) {
2459        return;
2460    }
2461    ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2462    if (ret < 0) {
2463        return;
2464    }
2465
2466    cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2467    cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2468    cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2469    cap_ppc_count_cache_flush_assist =
2470        parse_cap_ppc_count_cache_flush_assist(c);
2471}
2472
2473int kvmppc_get_cap_safe_cache(void)
2474{
2475    return cap_ppc_safe_cache;
2476}
2477
2478int kvmppc_get_cap_safe_bounds_check(void)
2479{
2480    return cap_ppc_safe_bounds_check;
2481}
2482
2483int kvmppc_get_cap_safe_indirect_branch(void)
2484{
2485    return cap_ppc_safe_indirect_branch;
2486}
2487
2488int kvmppc_get_cap_count_cache_flush_assist(void)
2489{
2490    return cap_ppc_count_cache_flush_assist;
2491}
2492
2493bool kvmppc_has_cap_nested_kvm_hv(void)
2494{
2495    return !!cap_ppc_nested_kvm_hv;
2496}
2497
2498int kvmppc_set_cap_nested_kvm_hv(int enable)
2499{
2500    return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2501}
2502
2503bool kvmppc_has_cap_spapr_vfio(void)
2504{
2505    return cap_spapr_vfio;
2506}
2507
2508int kvmppc_get_cap_large_decr(void)
2509{
2510    return cap_large_decr;
2511}
2512
2513int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
2514{
2515    CPUState *cs = CPU(cpu);
2516    uint64_t lpcr;
2517
2518    kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2519    /* Do we need to modify the LPCR? */
2520    if (!!(lpcr & LPCR_LD) != !!enable) {
2521        if (enable) {
2522            lpcr |= LPCR_LD;
2523        } else {
2524            lpcr &= ~LPCR_LD;
2525        }
2526        kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2527        kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2528
2529        if (!!(lpcr & LPCR_LD) != !!enable) {
2530            return -1;
2531        }
2532    }
2533
2534    return 0;
2535}
2536
2537PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2538{
2539    uint32_t host_pvr = mfpvr();
2540    PowerPCCPUClass *pvr_pcc;
2541
2542    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2543    if (pvr_pcc == NULL) {
2544        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2545    }
2546
2547    return pvr_pcc;
2548}
2549
2550static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2551{
2552    TypeInfo type_info = {
2553        .name = TYPE_HOST_POWERPC_CPU,
2554        .class_init = kvmppc_host_cpu_class_init,
2555    };
2556    MachineClass *mc = MACHINE_GET_CLASS(ms);
2557    PowerPCCPUClass *pvr_pcc;
2558    ObjectClass *oc;
2559    DeviceClass *dc;
2560    int i;
2561
2562    pvr_pcc = kvm_ppc_get_host_cpu_class();
2563    if (pvr_pcc == NULL) {
2564        return -1;
2565    }
2566    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2567    type_register(&type_info);
2568    if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2569        /* override TCG default cpu type with 'host' cpu model */
2570        mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2571    }
2572
2573    oc = object_class_by_name(type_info.name);
2574    g_assert(oc);
2575
2576    /*
2577     * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2578     * we want "POWER8" to be a "family" alias that points to the current
2579     * host CPU type, too)
2580     */
2581    dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2582    for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2583        if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2584            char *suffix;
2585
2586            ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2587            suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2588            if (suffix) {
2589                *suffix = 0;
2590            }
2591            break;
2592        }
2593    }
2594
2595    return 0;
2596}
2597
2598int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2599{
2600    struct kvm_rtas_token_args args = {
2601        .token = token,
2602    };
2603
2604    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2605        return -ENOENT;
2606    }
2607
2608    strncpy(args.name, function, sizeof(args.name));
2609
2610    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2611}
2612
2613int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2614{
2615    struct kvm_get_htab_fd s = {
2616        .flags = write ? KVM_GET_HTAB_WRITE : 0,
2617        .start_index = index,
2618    };
2619    int ret;
2620
2621    if (!cap_htab_fd) {
2622        error_setg(errp, "KVM version doesn't support %s the HPT",
2623                   write ? "writing" : "reading");
2624        return -ENOTSUP;
2625    }
2626
2627    ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2628    if (ret < 0) {
2629        error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2630                   write ? "writing" : "reading", write ? "to" : "from",
2631                   strerror(errno));
2632        return -errno;
2633    }
2634
2635    return ret;
2636}
2637
2638int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2639{
2640    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2641    uint8_t buf[bufsize];
2642    ssize_t rc;
2643
2644    do {
2645        rc = read(fd, buf, bufsize);
2646        if (rc < 0) {
2647            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2648                    strerror(errno));
2649            return rc;
2650        } else if (rc) {
2651            uint8_t *buffer = buf;
2652            ssize_t n = rc;
2653            while (n) {
2654                struct kvm_get_htab_header *head =
2655                    (struct kvm_get_htab_header *) buffer;
2656                size_t chunksize = sizeof(*head) +
2657                     HASH_PTE_SIZE_64 * head->n_valid;
2658
2659                qemu_put_be32(f, head->index);
2660                qemu_put_be16(f, head->n_valid);
2661                qemu_put_be16(f, head->n_invalid);
2662                qemu_put_buffer(f, (void *)(head + 1),
2663                                HASH_PTE_SIZE_64 * head->n_valid);
2664
2665                buffer += chunksize;
2666                n -= chunksize;
2667            }
2668        }
2669    } while ((rc != 0)
2670             && ((max_ns < 0)
2671                 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2672
2673    return (rc == 0) ? 1 : 0;
2674}
2675
2676int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2677                           uint16_t n_valid, uint16_t n_invalid)
2678{
2679    struct kvm_get_htab_header *buf;
2680    size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2681    ssize_t rc;
2682
2683    buf = alloca(chunksize);
2684    buf->index = index;
2685    buf->n_valid = n_valid;
2686    buf->n_invalid = n_invalid;
2687
2688    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2689
2690    rc = write(fd, buf, chunksize);
2691    if (rc < 0) {
2692        fprintf(stderr, "Error writing KVM hash table: %s\n",
2693                strerror(errno));
2694        return rc;
2695    }
2696    if (rc != chunksize) {
2697        /* We should never get a short write on a single chunk */
2698        fprintf(stderr, "Short write, restoring KVM hash table\n");
2699        return -1;
2700    }
2701    return 0;
2702}
2703
2704bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2705{
2706    return true;
2707}
2708
2709void kvm_arch_init_irq_routing(KVMState *s)
2710{
2711}
2712
2713void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2714{
2715    int fd, rc;
2716    int i;
2717
2718    fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2719
2720    i = 0;
2721    while (i < n) {
2722        struct kvm_get_htab_header *hdr;
2723        int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2724        char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2725
2726        rc = read(fd, buf, sizeof(buf));
2727        if (rc < 0) {
2728            hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2729        }
2730
2731        hdr = (struct kvm_get_htab_header *)buf;
2732        while ((i < n) && ((char *)hdr < (buf + rc))) {
2733            int invalid = hdr->n_invalid, valid = hdr->n_valid;
2734
2735            if (hdr->index != (ptex + i)) {
2736                hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2737                         " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2738            }
2739
2740            if (n - i < valid) {
2741                valid = n - i;
2742            }
2743            memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2744            i += valid;
2745
2746            if ((n - i) < invalid) {
2747                invalid = n - i;
2748            }
2749            memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2750            i += invalid;
2751
2752            hdr = (struct kvm_get_htab_header *)
2753                ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2754        }
2755    }
2756
2757    close(fd);
2758}
2759
2760void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2761{
2762    int fd, rc;
2763    struct {
2764        struct kvm_get_htab_header hdr;
2765        uint64_t pte0;
2766        uint64_t pte1;
2767    } buf;
2768
2769    fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2770
2771    buf.hdr.n_valid = 1;
2772    buf.hdr.n_invalid = 0;
2773    buf.hdr.index = ptex;
2774    buf.pte0 = cpu_to_be64(pte0);
2775    buf.pte1 = cpu_to_be64(pte1);
2776
2777    rc = write(fd, &buf, sizeof(buf));
2778    if (rc != sizeof(buf)) {
2779        hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2780    }
2781    close(fd);
2782}
2783
2784int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2785                             uint64_t address, uint32_t data, PCIDevice *dev)
2786{
2787    return 0;
2788}
2789
2790int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2791                                int vector, PCIDevice *dev)
2792{
2793    return 0;
2794}
2795
2796int kvm_arch_release_virq_post(int virq)
2797{
2798    return 0;
2799}
2800
2801int kvm_arch_msi_data_to_gsi(uint32_t data)
2802{
2803    return data & 0xffff;
2804}
2805
2806int kvmppc_enable_hwrng(void)
2807{
2808    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2809        return -1;
2810    }
2811
2812    return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2813}
2814
2815void kvmppc_check_papr_resize_hpt(Error **errp)
2816{
2817    if (!kvm_enabled()) {
2818        return; /* No KVM, we're good */
2819    }
2820
2821    if (cap_resize_hpt) {
2822        return; /* Kernel has explicit support, we're good */
2823    }
2824
2825    /* Otherwise fallback on looking for PR KVM */
2826    if (kvmppc_is_pr(kvm_state)) {
2827        return;
2828    }
2829
2830    error_setg(errp,
2831               "Hash page table resizing not available with this KVM version");
2832}
2833
2834int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2835{
2836    CPUState *cs = CPU(cpu);
2837    struct kvm_ppc_resize_hpt rhpt = {
2838        .flags = flags,
2839        .shift = shift,
2840    };
2841
2842    if (!cap_resize_hpt) {
2843        return -ENOSYS;
2844    }
2845
2846    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2847}
2848
2849int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2850{
2851    CPUState *cs = CPU(cpu);
2852    struct kvm_ppc_resize_hpt rhpt = {
2853        .flags = flags,
2854        .shift = shift,
2855    };
2856
2857    if (!cap_resize_hpt) {
2858        return -ENOSYS;
2859    }
2860
2861    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2862}
2863
2864/*
2865 * This is a helper function to detect a post migration scenario
2866 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2867 * the guest kernel can't handle a PVR value other than the actual host
2868 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2869 *
2870 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2871 * (so, we're HV), return true. The workaround itself is done in
2872 * cpu_post_load.
2873 *
2874 * The order here is important: we'll only check for KVM PR as a
2875 * fallback if the guest kernel can't handle the situation itself.
2876 * We need to avoid as much as possible querying the running KVM type
2877 * in QEMU level.
2878 */
2879bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2880{
2881    CPUState *cs = CPU(cpu);
2882
2883    if (!kvm_enabled()) {
2884        return false;
2885    }
2886
2887    if (cap_ppc_pvr_compat) {
2888        return false;
2889    }
2890
2891    return !kvmppc_is_pr(cs->kvm_state);
2892}
2893
2894void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2895{
2896    CPUState *cs = CPU(cpu);
2897
2898    if (kvm_enabled()) {
2899        kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
2900    }
2901}
2902