qemu/target/ppc/kvm.c
<<
>>
Prefs
   1/*
   2 * PowerPC implementation of KVM hooks
   3 *
   4 * Copyright IBM Corp. 2007
   5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6 *
   7 * Authors:
   8 *  Jerone Young <jyoung5@us.ibm.com>
   9 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10 *  Hollis Blanchard <hollisb@us.ibm.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13 * See the COPYING file in the top-level directory.
  14 *
  15 */
  16
  17#include "qemu/osdep.h"
  18#include <dirent.h>
  19#include <sys/ioctl.h>
  20#include <sys/vfs.h>
  21
  22#include <linux/kvm.h>
  23
  24#include "qemu-common.h"
  25#include "qapi/error.h"
  26#include "qemu/error-report.h"
  27#include "cpu.h"
  28#include "cpu-models.h"
  29#include "qemu/timer.h"
  30#include "sysemu/sysemu.h"
  31#include "sysemu/hw_accel.h"
  32#include "kvm_ppc.h"
  33#include "sysemu/cpus.h"
  34#include "sysemu/device_tree.h"
  35#include "mmu-hash64.h"
  36
  37#include "hw/sysbus.h"
  38#include "hw/ppc/spapr.h"
  39#include "hw/ppc/spapr_vio.h"
  40#include "hw/ppc/spapr_cpu_core.h"
  41#include "hw/ppc/ppc.h"
  42#include "sysemu/watchdog.h"
  43#include "trace.h"
  44#include "exec/gdbstub.h"
  45#include "exec/memattrs.h"
  46#include "exec/ram_addr.h"
  47#include "sysemu/hostmem.h"
  48#include "qemu/cutils.h"
  49#include "qemu/mmap-alloc.h"
  50#if defined(TARGET_PPC64)
  51#include "hw/ppc/spapr_cpu_core.h"
  52#endif
  53#include "elf.h"
  54#include "sysemu/kvm_int.h"
  55
  56//#define DEBUG_KVM
  57
  58#ifdef DEBUG_KVM
  59#define DPRINTF(fmt, ...) \
  60    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  61#else
  62#define DPRINTF(fmt, ...) \
  63    do { } while (0)
  64#endif
  65
  66#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  67
  68const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  69    KVM_CAP_LAST_INFO
  70};
  71
  72static int cap_interrupt_unset = false;
  73static int cap_interrupt_level = false;
  74static int cap_segstate;
  75static int cap_booke_sregs;
  76static int cap_ppc_smt;
  77static int cap_ppc_smt_possible;
  78static int cap_ppc_rma;
  79static int cap_spapr_tce;
  80static int cap_spapr_tce_64;
  81static int cap_spapr_multitce;
  82static int cap_spapr_vfio;
  83static int cap_hior;
  84static int cap_one_reg;
  85static int cap_epr;
  86static int cap_ppc_watchdog;
  87static int cap_papr;
  88static int cap_htab_fd;
  89static int cap_fixup_hcalls;
  90static int cap_htm;             /* Hardware transactional memory support */
  91static int cap_mmu_radix;
  92static int cap_mmu_hash_v3;
  93static int cap_resize_hpt;
  94static int cap_ppc_pvr_compat;
  95static int cap_ppc_safe_cache;
  96static int cap_ppc_safe_bounds_check;
  97static int cap_ppc_safe_indirect_branch;
  98
  99static uint32_t debug_inst_opcode;
 100
 101/* XXX We have a race condition where we actually have a level triggered
 102 *     interrupt, but the infrastructure can't expose that yet, so the guest
 103 *     takes but ignores it, goes to sleep and never gets notified that there's
 104 *     still an interrupt pending.
 105 *
 106 *     As a quick workaround, let's just wake up again 20 ms after we injected
 107 *     an interrupt. That way we can assure that we're always reinjecting
 108 *     interrupts in case the guest swallowed them.
 109 */
 110static QEMUTimer *idle_timer;
 111
 112static void kvm_kick_cpu(void *opaque)
 113{
 114    PowerPCCPU *cpu = opaque;
 115
 116    qemu_cpu_kick(CPU(cpu));
 117}
 118
 119/* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 120 * should only be used for fallback tests - generally we should use
 121 * explicit capabilities for the features we want, rather than
 122 * assuming what is/isn't available depending on the KVM variant. */
 123static bool kvmppc_is_pr(KVMState *ks)
 124{
 125    /* Assume KVM-PR if the GET_PVINFO capability is available */
 126    return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 127}
 128
 129static int kvm_ppc_register_host_cpu_type(MachineState *ms);
 130static void kvmppc_get_cpu_characteristics(KVMState *s);
 131
 132int kvm_arch_init(MachineState *ms, KVMState *s)
 133{
 134    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 135    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 136    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 137    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 138    cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 139    cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 140    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 141    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 142    cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 143    cap_spapr_vfio = false;
 144    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 145    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 146    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 147    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 148    /* Note: we don't set cap_papr here, because this capability is
 149     * only activated after this by kvmppc_set_papr() */
 150    cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 151    cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 152    cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 153    cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 154    cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 155    cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 156    cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 157    kvmppc_get_cpu_characteristics(s);
 158    /*
 159     * Note: setting it to false because there is not such capability
 160     * in KVM at this moment.
 161     *
 162     * TODO: call kvm_vm_check_extension() with the right capability
 163     * after the kernel starts implementing it.*/
 164    cap_ppc_pvr_compat = false;
 165
 166    if (!cap_interrupt_level) {
 167        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 168                        "VM to stall at times!\n");
 169    }
 170
 171    kvm_ppc_register_host_cpu_type(ms);
 172
 173    return 0;
 174}
 175
 176int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 177{
 178    return 0;
 179}
 180
 181static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 182{
 183    CPUPPCState *cenv = &cpu->env;
 184    CPUState *cs = CPU(cpu);
 185    struct kvm_sregs sregs;
 186    int ret;
 187
 188    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 189        /* What we're really trying to say is "if we're on BookE, we use
 190           the native PVR for now". This is the only sane way to check
 191           it though, so we potentially confuse users that they can run
 192           BookE guests on BookS. Let's hope nobody dares enough :) */
 193        return 0;
 194    } else {
 195        if (!cap_segstate) {
 196            fprintf(stderr, "kvm error: missing PVR setting capability\n");
 197            return -ENOSYS;
 198        }
 199    }
 200
 201    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 202    if (ret) {
 203        return ret;
 204    }
 205
 206    sregs.pvr = cenv->spr[SPR_PVR];
 207    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 208}
 209
 210/* Set up a shared TLB array with KVM */
 211static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 212{
 213    CPUPPCState *env = &cpu->env;
 214    CPUState *cs = CPU(cpu);
 215    struct kvm_book3e_206_tlb_params params = {};
 216    struct kvm_config_tlb cfg = {};
 217    unsigned int entries = 0;
 218    int ret, i;
 219
 220    if (!kvm_enabled() ||
 221        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 222        return 0;
 223    }
 224
 225    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 226
 227    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 228        params.tlb_sizes[i] = booke206_tlb_size(env, i);
 229        params.tlb_ways[i] = booke206_tlb_ways(env, i);
 230        entries += params.tlb_sizes[i];
 231    }
 232
 233    assert(entries == env->nb_tlb);
 234    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 235
 236    env->tlb_dirty = true;
 237
 238    cfg.array = (uintptr_t)env->tlb.tlbm;
 239    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 240    cfg.params = (uintptr_t)&params;
 241    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 242
 243    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 244    if (ret < 0) {
 245        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 246                __func__, strerror(-ret));
 247        return ret;
 248    }
 249
 250    env->kvm_sw_tlb = true;
 251    return 0;
 252}
 253
 254
 255#if defined(TARGET_PPC64)
 256static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 257                                       struct kvm_ppc_smmu_info *info)
 258{
 259    CPUPPCState *env = &cpu->env;
 260    CPUState *cs = CPU(cpu);
 261
 262    memset(info, 0, sizeof(*info));
 263
 264    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 265     * need to "guess" what the supported page sizes are.
 266     *
 267     * For that to work we make a few assumptions:
 268     *
 269     * - Check whether we are running "PR" KVM which only supports 4K
 270     *   and 16M pages, but supports them regardless of the backing
 271     *   store characteritics. We also don't support 1T segments.
 272     *
 273     *   This is safe as if HV KVM ever supports that capability or PR
 274     *   KVM grows supports for more page/segment sizes, those versions
 275     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 276     *   will not hit this fallback
 277     *
 278     * - Else we are running HV KVM. This means we only support page
 279     *   sizes that fit in the backing store. Additionally we only
 280     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 281     *   P7 encodings for the SLB and hash table. Here too, we assume
 282     *   support for any newer processor will mean a kernel that
 283     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 284     *   this fallback.
 285     */
 286    if (kvmppc_is_pr(cs->kvm_state)) {
 287        /* No flags */
 288        info->flags = 0;
 289        info->slb_size = 64;
 290
 291        /* Standard 4k base page size segment */
 292        info->sps[0].page_shift = 12;
 293        info->sps[0].slb_enc = 0;
 294        info->sps[0].enc[0].page_shift = 12;
 295        info->sps[0].enc[0].pte_enc = 0;
 296
 297        /* Standard 16M large page size segment */
 298        info->sps[1].page_shift = 24;
 299        info->sps[1].slb_enc = SLB_VSID_L;
 300        info->sps[1].enc[0].page_shift = 24;
 301        info->sps[1].enc[0].pte_enc = 0;
 302    } else {
 303        int i = 0;
 304
 305        /* HV KVM has backing store size restrictions */
 306        info->flags = KVM_PPC_PAGE_SIZES_REAL;
 307
 308        if (env->mmu_model & POWERPC_MMU_1TSEG) {
 309            info->flags |= KVM_PPC_1T_SEGMENTS;
 310        }
 311
 312        if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 313           POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 314            info->slb_size = 32;
 315        } else {
 316            info->slb_size = 64;
 317        }
 318
 319        /* Standard 4k base page size segment */
 320        info->sps[i].page_shift = 12;
 321        info->sps[i].slb_enc = 0;
 322        info->sps[i].enc[0].page_shift = 12;
 323        info->sps[i].enc[0].pte_enc = 0;
 324        i++;
 325
 326        /* 64K on MMU 2.06 and later */
 327        if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 328            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 329            info->sps[i].page_shift = 16;
 330            info->sps[i].slb_enc = 0x110;
 331            info->sps[i].enc[0].page_shift = 16;
 332            info->sps[i].enc[0].pte_enc = 1;
 333            i++;
 334        }
 335
 336        /* Standard 16M large page size segment */
 337        info->sps[i].page_shift = 24;
 338        info->sps[i].slb_enc = SLB_VSID_L;
 339        info->sps[i].enc[0].page_shift = 24;
 340        info->sps[i].enc[0].pte_enc = 0;
 341    }
 342}
 343
 344static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 345{
 346    CPUState *cs = CPU(cpu);
 347    int ret;
 348
 349    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 350        ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 351        if (ret == 0) {
 352            return;
 353        }
 354    }
 355
 356    kvm_get_fallback_smmu_info(cpu, info);
 357}
 358
 359struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 360{
 361    KVMState *s = KVM_STATE(current_machine->accelerator);
 362    struct ppc_radix_page_info *radix_page_info;
 363    struct kvm_ppc_rmmu_info rmmu_info;
 364    int i;
 365
 366    if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 367        return NULL;
 368    }
 369    if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 370        return NULL;
 371    }
 372    radix_page_info = g_malloc0(sizeof(*radix_page_info));
 373    radix_page_info->count = 0;
 374    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 375        if (rmmu_info.ap_encodings[i]) {
 376            radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 377            radix_page_info->count++;
 378        }
 379    }
 380    return radix_page_info;
 381}
 382
 383target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 384                                     bool radix, bool gtse,
 385                                     uint64_t proc_tbl)
 386{
 387    CPUState *cs = CPU(cpu);
 388    int ret;
 389    uint64_t flags = 0;
 390    struct kvm_ppc_mmuv3_cfg cfg = {
 391        .process_table = proc_tbl,
 392    };
 393
 394    if (radix) {
 395        flags |= KVM_PPC_MMUV3_RADIX;
 396    }
 397    if (gtse) {
 398        flags |= KVM_PPC_MMUV3_GTSE;
 399    }
 400    cfg.flags = flags;
 401    ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 402    switch (ret) {
 403    case 0:
 404        return H_SUCCESS;
 405    case -EINVAL:
 406        return H_PARAMETER;
 407    case -ENODEV:
 408        return H_NOT_AVAILABLE;
 409    default:
 410        return H_HARDWARE;
 411    }
 412}
 413
 414static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 415{
 416    if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 417        return true;
 418    }
 419
 420    return (1ul << shift) <= rampgsize;
 421}
 422
 423static long max_cpu_page_size;
 424
 425static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 426{
 427    static struct kvm_ppc_smmu_info smmu_info;
 428    static bool has_smmu_info;
 429    CPUPPCState *env = &cpu->env;
 430    int iq, ik, jq, jk;
 431    bool has_64k_pages = false;
 432
 433    /* We only handle page sizes for 64-bit server guests for now */
 434    if (!(env->mmu_model & POWERPC_MMU_64)) {
 435        return;
 436    }
 437
 438    /* Collect MMU info from kernel if not already */
 439    if (!has_smmu_info) {
 440        kvm_get_smmu_info(cpu, &smmu_info);
 441        has_smmu_info = true;
 442    }
 443
 444    if (!max_cpu_page_size) {
 445        max_cpu_page_size = qemu_getrampagesize();
 446    }
 447
 448    /* Convert to QEMU form */
 449    memset(&env->sps, 0, sizeof(env->sps));
 450
 451    /* If we have HV KVM, we need to forbid CI large pages if our
 452     * host page size is smaller than 64K.
 453     */
 454    if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 455        env->ci_large_pages = getpagesize() >= 0x10000;
 456    }
 457
 458    /*
 459     * XXX This loop should be an entry wide AND of the capabilities that
 460     *     the selected CPU has with the capabilities that KVM supports.
 461     */
 462    for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 463        struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 464        struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 465
 466        if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 467                                 ksps->page_shift)) {
 468            continue;
 469        }
 470        qsps->page_shift = ksps->page_shift;
 471        qsps->slb_enc = ksps->slb_enc;
 472        for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 473            if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 474                                     ksps->enc[jk].page_shift)) {
 475                continue;
 476            }
 477            if (ksps->enc[jk].page_shift == 16) {
 478                has_64k_pages = true;
 479            }
 480            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 481            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 482            if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 483                break;
 484            }
 485        }
 486        if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 487            break;
 488        }
 489    }
 490    env->slb_nr = smmu_info.slb_size;
 491    if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 492        env->mmu_model &= ~POWERPC_MMU_1TSEG;
 493    }
 494    if (!has_64k_pages) {
 495        env->mmu_model &= ~POWERPC_MMU_64K;
 496    }
 497}
 498
 499bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 500{
 501    Object *mem_obj = object_resolve_path(obj_path, NULL);
 502    char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
 503    long pagesize;
 504
 505    if (mempath) {
 506        pagesize = qemu_mempath_getpagesize(mempath);
 507        g_free(mempath);
 508    } else {
 509        pagesize = getpagesize();
 510    }
 511
 512    return pagesize >= max_cpu_page_size;
 513}
 514
 515#else /* defined (TARGET_PPC64) */
 516
 517static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 518{
 519}
 520
 521bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 522{
 523    return true;
 524}
 525
 526#endif /* !defined (TARGET_PPC64) */
 527
 528unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 529{
 530    return POWERPC_CPU(cpu)->vcpu_id;
 531}
 532
 533/* e500 supports 2 h/w breakpoint and 2 watchpoint.
 534 * book3s supports only 1 watchpoint, so array size
 535 * of 4 is sufficient for now.
 536 */
 537#define MAX_HW_BKPTS 4
 538
 539static struct HWBreakpoint {
 540    target_ulong addr;
 541    int type;
 542} hw_debug_points[MAX_HW_BKPTS];
 543
 544static CPUWatchpoint hw_watchpoint;
 545
 546/* Default there is no breakpoint and watchpoint supported */
 547static int max_hw_breakpoint;
 548static int max_hw_watchpoint;
 549static int nb_hw_breakpoint;
 550static int nb_hw_watchpoint;
 551
 552static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 553{
 554    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 555        max_hw_breakpoint = 2;
 556        max_hw_watchpoint = 2;
 557    }
 558
 559    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 560        fprintf(stderr, "Error initializing h/w breakpoints\n");
 561        return;
 562    }
 563}
 564
 565int kvm_arch_init_vcpu(CPUState *cs)
 566{
 567    PowerPCCPU *cpu = POWERPC_CPU(cs);
 568    CPUPPCState *cenv = &cpu->env;
 569    int ret;
 570
 571    /* Gather server mmu info from KVM and update the CPU state */
 572    kvm_fixup_page_sizes(cpu);
 573
 574    /* Synchronize sregs with kvm */
 575    ret = kvm_arch_sync_sregs(cpu);
 576    if (ret) {
 577        if (ret == -EINVAL) {
 578            error_report("Register sync failed... If you're using kvm-hv.ko,"
 579                         " only \"-cpu host\" is possible");
 580        }
 581        return ret;
 582    }
 583
 584    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 585
 586    switch (cenv->mmu_model) {
 587    case POWERPC_MMU_BOOKE206:
 588        /* This target supports access to KVM's guest TLB */
 589        ret = kvm_booke206_tlb_init(cpu);
 590        break;
 591    case POWERPC_MMU_2_07:
 592        if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 593            /* KVM-HV has transactional memory on POWER8 also without the
 594             * KVM_CAP_PPC_HTM extension, so enable it here instead as
 595             * long as it's availble to userspace on the host. */
 596            if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 597                cap_htm = true;
 598            }
 599        }
 600        break;
 601    default:
 602        break;
 603    }
 604
 605    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 606    kvmppc_hw_debug_points_init(cenv);
 607
 608    return ret;
 609}
 610
 611static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 612{
 613    CPUPPCState *env = &cpu->env;
 614    CPUState *cs = CPU(cpu);
 615    struct kvm_dirty_tlb dirty_tlb;
 616    unsigned char *bitmap;
 617    int ret;
 618
 619    if (!env->kvm_sw_tlb) {
 620        return;
 621    }
 622
 623    bitmap = g_malloc((env->nb_tlb + 7) / 8);
 624    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 625
 626    dirty_tlb.bitmap = (uintptr_t)bitmap;
 627    dirty_tlb.num_dirty = env->nb_tlb;
 628
 629    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 630    if (ret) {
 631        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 632                __func__, strerror(-ret));
 633    }
 634
 635    g_free(bitmap);
 636}
 637
 638static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 639{
 640    PowerPCCPU *cpu = POWERPC_CPU(cs);
 641    CPUPPCState *env = &cpu->env;
 642    union {
 643        uint32_t u32;
 644        uint64_t u64;
 645    } val;
 646    struct kvm_one_reg reg = {
 647        .id = id,
 648        .addr = (uintptr_t) &val,
 649    };
 650    int ret;
 651
 652    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 653    if (ret != 0) {
 654        trace_kvm_failed_spr_get(spr, strerror(errno));
 655    } else {
 656        switch (id & KVM_REG_SIZE_MASK) {
 657        case KVM_REG_SIZE_U32:
 658            env->spr[spr] = val.u32;
 659            break;
 660
 661        case KVM_REG_SIZE_U64:
 662            env->spr[spr] = val.u64;
 663            break;
 664
 665        default:
 666            /* Don't handle this size yet */
 667            abort();
 668        }
 669    }
 670}
 671
 672static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 673{
 674    PowerPCCPU *cpu = POWERPC_CPU(cs);
 675    CPUPPCState *env = &cpu->env;
 676    union {
 677        uint32_t u32;
 678        uint64_t u64;
 679    } val;
 680    struct kvm_one_reg reg = {
 681        .id = id,
 682        .addr = (uintptr_t) &val,
 683    };
 684    int ret;
 685
 686    switch (id & KVM_REG_SIZE_MASK) {
 687    case KVM_REG_SIZE_U32:
 688        val.u32 = env->spr[spr];
 689        break;
 690
 691    case KVM_REG_SIZE_U64:
 692        val.u64 = env->spr[spr];
 693        break;
 694
 695    default:
 696        /* Don't handle this size yet */
 697        abort();
 698    }
 699
 700    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 701    if (ret != 0) {
 702        trace_kvm_failed_spr_set(spr, strerror(errno));
 703    }
 704}
 705
 706static int kvm_put_fp(CPUState *cs)
 707{
 708    PowerPCCPU *cpu = POWERPC_CPU(cs);
 709    CPUPPCState *env = &cpu->env;
 710    struct kvm_one_reg reg;
 711    int i;
 712    int ret;
 713
 714    if (env->insns_flags & PPC_FLOAT) {
 715        uint64_t fpscr = env->fpscr;
 716        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 717
 718        reg.id = KVM_REG_PPC_FPSCR;
 719        reg.addr = (uintptr_t)&fpscr;
 720        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 721        if (ret < 0) {
 722            DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 723            return ret;
 724        }
 725
 726        for (i = 0; i < 32; i++) {
 727            uint64_t vsr[2];
 728
 729#ifdef HOST_WORDS_BIGENDIAN
 730            vsr[0] = float64_val(env->fpr[i]);
 731            vsr[1] = env->vsr[i];
 732#else
 733            vsr[0] = env->vsr[i];
 734            vsr[1] = float64_val(env->fpr[i]);
 735#endif
 736            reg.addr = (uintptr_t) &vsr;
 737            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 738
 739            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 740            if (ret < 0) {
 741                DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 742                        i, strerror(errno));
 743                return ret;
 744            }
 745        }
 746    }
 747
 748    if (env->insns_flags & PPC_ALTIVEC) {
 749        reg.id = KVM_REG_PPC_VSCR;
 750        reg.addr = (uintptr_t)&env->vscr;
 751        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 752        if (ret < 0) {
 753            DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 754            return ret;
 755        }
 756
 757        for (i = 0; i < 32; i++) {
 758            reg.id = KVM_REG_PPC_VR(i);
 759            reg.addr = (uintptr_t)&env->avr[i];
 760            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 761            if (ret < 0) {
 762                DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 763                return ret;
 764            }
 765        }
 766    }
 767
 768    return 0;
 769}
 770
 771static int kvm_get_fp(CPUState *cs)
 772{
 773    PowerPCCPU *cpu = POWERPC_CPU(cs);
 774    CPUPPCState *env = &cpu->env;
 775    struct kvm_one_reg reg;
 776    int i;
 777    int ret;
 778
 779    if (env->insns_flags & PPC_FLOAT) {
 780        uint64_t fpscr;
 781        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 782
 783        reg.id = KVM_REG_PPC_FPSCR;
 784        reg.addr = (uintptr_t)&fpscr;
 785        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 786        if (ret < 0) {
 787            DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 788            return ret;
 789        } else {
 790            env->fpscr = fpscr;
 791        }
 792
 793        for (i = 0; i < 32; i++) {
 794            uint64_t vsr[2];
 795
 796            reg.addr = (uintptr_t) &vsr;
 797            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 798
 799            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 800            if (ret < 0) {
 801                DPRINTF("Unable to get %s%d from KVM: %s\n",
 802                        vsx ? "VSR" : "FPR", i, strerror(errno));
 803                return ret;
 804            } else {
 805#ifdef HOST_WORDS_BIGENDIAN
 806                env->fpr[i] = vsr[0];
 807                if (vsx) {
 808                    env->vsr[i] = vsr[1];
 809                }
 810#else
 811                env->fpr[i] = vsr[1];
 812                if (vsx) {
 813                    env->vsr[i] = vsr[0];
 814                }
 815#endif
 816            }
 817        }
 818    }
 819
 820    if (env->insns_flags & PPC_ALTIVEC) {
 821        reg.id = KVM_REG_PPC_VSCR;
 822        reg.addr = (uintptr_t)&env->vscr;
 823        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 824        if (ret < 0) {
 825            DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 826            return ret;
 827        }
 828
 829        for (i = 0; i < 32; i++) {
 830            reg.id = KVM_REG_PPC_VR(i);
 831            reg.addr = (uintptr_t)&env->avr[i];
 832            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 833            if (ret < 0) {
 834                DPRINTF("Unable to get VR%d from KVM: %s\n",
 835                        i, strerror(errno));
 836                return ret;
 837            }
 838        }
 839    }
 840
 841    return 0;
 842}
 843
 844#if defined(TARGET_PPC64)
 845static int kvm_get_vpa(CPUState *cs)
 846{
 847    PowerPCCPU *cpu = POWERPC_CPU(cs);
 848    CPUPPCState *env = &cpu->env;
 849    struct kvm_one_reg reg;
 850    int ret;
 851
 852    reg.id = KVM_REG_PPC_VPA_ADDR;
 853    reg.addr = (uintptr_t)&env->vpa_addr;
 854    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 855    if (ret < 0) {
 856        DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 857        return ret;
 858    }
 859
 860    assert((uintptr_t)&env->slb_shadow_size
 861           == ((uintptr_t)&env->slb_shadow_addr + 8));
 862    reg.id = KVM_REG_PPC_VPA_SLB;
 863    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 864    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 865    if (ret < 0) {
 866        DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 867                strerror(errno));
 868        return ret;
 869    }
 870
 871    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 872    reg.id = KVM_REG_PPC_VPA_DTL;
 873    reg.addr = (uintptr_t)&env->dtl_addr;
 874    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 875    if (ret < 0) {
 876        DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 877                strerror(errno));
 878        return ret;
 879    }
 880
 881    return 0;
 882}
 883
 884static int kvm_put_vpa(CPUState *cs)
 885{
 886    PowerPCCPU *cpu = POWERPC_CPU(cs);
 887    CPUPPCState *env = &cpu->env;
 888    struct kvm_one_reg reg;
 889    int ret;
 890
 891    /* SLB shadow or DTL can't be registered unless a master VPA is
 892     * registered.  That means when restoring state, if a VPA *is*
 893     * registered, we need to set that up first.  If not, we need to
 894     * deregister the others before deregistering the master VPA */
 895    assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 896
 897    if (env->vpa_addr) {
 898        reg.id = KVM_REG_PPC_VPA_ADDR;
 899        reg.addr = (uintptr_t)&env->vpa_addr;
 900        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 901        if (ret < 0) {
 902            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 903            return ret;
 904        }
 905    }
 906
 907    assert((uintptr_t)&env->slb_shadow_size
 908           == ((uintptr_t)&env->slb_shadow_addr + 8));
 909    reg.id = KVM_REG_PPC_VPA_SLB;
 910    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 911    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 912    if (ret < 0) {
 913        DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 914        return ret;
 915    }
 916
 917    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 918    reg.id = KVM_REG_PPC_VPA_DTL;
 919    reg.addr = (uintptr_t)&env->dtl_addr;
 920    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 921    if (ret < 0) {
 922        DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 923                strerror(errno));
 924        return ret;
 925    }
 926
 927    if (!env->vpa_addr) {
 928        reg.id = KVM_REG_PPC_VPA_ADDR;
 929        reg.addr = (uintptr_t)&env->vpa_addr;
 930        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 931        if (ret < 0) {
 932            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 933            return ret;
 934        }
 935    }
 936
 937    return 0;
 938}
 939#endif /* TARGET_PPC64 */
 940
 941int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 942{
 943    CPUPPCState *env = &cpu->env;
 944    struct kvm_sregs sregs;
 945    int i;
 946
 947    sregs.pvr = env->spr[SPR_PVR];
 948
 949    if (cpu->vhyp) {
 950        PPCVirtualHypervisorClass *vhc =
 951            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 952        sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 953    } else {
 954        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 955    }
 956
 957    /* Sync SLB */
 958#ifdef TARGET_PPC64
 959    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 960        sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 961        if (env->slb[i].esid & SLB_ESID_V) {
 962            sregs.u.s.ppc64.slb[i].slbe |= i;
 963        }
 964        sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 965    }
 966#endif
 967
 968    /* Sync SRs */
 969    for (i = 0; i < 16; i++) {
 970        sregs.u.s.ppc32.sr[i] = env->sr[i];
 971    }
 972
 973    /* Sync BATs */
 974    for (i = 0; i < 8; i++) {
 975        /* Beware. We have to swap upper and lower bits here */
 976        sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 977            | env->DBAT[1][i];
 978        sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 979            | env->IBAT[1][i];
 980    }
 981
 982    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 983}
 984
 985int kvm_arch_put_registers(CPUState *cs, int level)
 986{
 987    PowerPCCPU *cpu = POWERPC_CPU(cs);
 988    CPUPPCState *env = &cpu->env;
 989    struct kvm_regs regs;
 990    int ret;
 991    int i;
 992
 993    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 994    if (ret < 0) {
 995        return ret;
 996    }
 997
 998    regs.ctr = env->ctr;
 999    regs.lr  = env->lr;
1000    regs.xer = cpu_read_xer(env);
1001    regs.msr = env->msr;
1002    regs.pc = env->nip;
1003
1004    regs.srr0 = env->spr[SPR_SRR0];
1005    regs.srr1 = env->spr[SPR_SRR1];
1006
1007    regs.sprg0 = env->spr[SPR_SPRG0];
1008    regs.sprg1 = env->spr[SPR_SPRG1];
1009    regs.sprg2 = env->spr[SPR_SPRG2];
1010    regs.sprg3 = env->spr[SPR_SPRG3];
1011    regs.sprg4 = env->spr[SPR_SPRG4];
1012    regs.sprg5 = env->spr[SPR_SPRG5];
1013    regs.sprg6 = env->spr[SPR_SPRG6];
1014    regs.sprg7 = env->spr[SPR_SPRG7];
1015
1016    regs.pid = env->spr[SPR_BOOKE_PID];
1017
1018    for (i = 0;i < 32; i++)
1019        regs.gpr[i] = env->gpr[i];
1020
1021    regs.cr = 0;
1022    for (i = 0; i < 8; i++) {
1023        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1024    }
1025
1026    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1027    if (ret < 0)
1028        return ret;
1029
1030    kvm_put_fp(cs);
1031
1032    if (env->tlb_dirty) {
1033        kvm_sw_tlb_put(cpu);
1034        env->tlb_dirty = false;
1035    }
1036
1037    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1038        ret = kvmppc_put_books_sregs(cpu);
1039        if (ret < 0) {
1040            return ret;
1041        }
1042    }
1043
1044    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1045        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1046    }
1047
1048    if (cap_one_reg) {
1049        int i;
1050
1051        /* We deliberately ignore errors here, for kernels which have
1052         * the ONE_REG calls, but don't support the specific
1053         * registers, there's a reasonable chance things will still
1054         * work, at least until we try to migrate. */
1055        for (i = 0; i < 1024; i++) {
1056            uint64_t id = env->spr_cb[i].one_reg_id;
1057
1058            if (id != 0) {
1059                kvm_put_one_spr(cs, id, i);
1060            }
1061        }
1062
1063#ifdef TARGET_PPC64
1064        if (msr_ts) {
1065            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1066                kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1067            }
1068            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1069                kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1070            }
1071            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1072            kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1073            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1074            kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1075            kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1076            kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1077            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1078            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1079            kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1080            kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1081        }
1082
1083        if (cap_papr) {
1084            if (kvm_put_vpa(cs) < 0) {
1085                DPRINTF("Warning: Unable to set VPA information to KVM\n");
1086            }
1087        }
1088
1089        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1090#endif /* TARGET_PPC64 */
1091    }
1092
1093    return ret;
1094}
1095
1096static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1097{
1098     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1099}
1100
1101static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1102{
1103    CPUPPCState *env = &cpu->env;
1104    struct kvm_sregs sregs;
1105    int ret;
1106
1107    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1108    if (ret < 0) {
1109        return ret;
1110    }
1111
1112    if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1113        env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1114        env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1115        env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1116        env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1117        env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1118        env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1119        env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1120        env->spr[SPR_DECR] = sregs.u.e.dec;
1121        env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1122        env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1123        env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1124    }
1125
1126    if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1127        env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1128        env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1129        env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1130        env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1131        env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1132    }
1133
1134    if (sregs.u.e.features & KVM_SREGS_E_64) {
1135        env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1136    }
1137
1138    if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1139        env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1140    }
1141
1142    if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1143        env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1144        kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1145        env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1146        kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1147        env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1148        kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1149        env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1150        kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1151        env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1152        kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1153        env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1154        kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1155        env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1156        kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1157        env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1158        kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1159        env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1160        kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1161        env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1162        kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1163        env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1164        kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1165        env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1166        kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1167        env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1168        kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1169        env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1170        kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1171        env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1172        kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1173        env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1174        kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1175
1176        if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1177            env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1178            kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1179            env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1180            kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1181            env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1182            kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1183        }
1184
1185        if (sregs.u.e.features & KVM_SREGS_E_PM) {
1186            env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1187            kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1188        }
1189
1190        if (sregs.u.e.features & KVM_SREGS_E_PC) {
1191            env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1192            kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1193            env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1194            kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1195        }
1196    }
1197
1198    if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1199        env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1200        env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1201        env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1202        env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1203        env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1204        env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1205        env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1206        env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1207        env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1208        env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1209    }
1210
1211    if (sregs.u.e.features & KVM_SREGS_EXP) {
1212        env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1213    }
1214
1215    if (sregs.u.e.features & KVM_SREGS_E_PD) {
1216        env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1217        env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1218    }
1219
1220    if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1221        env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1222        env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1223        env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1224
1225        if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1226            env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1227            env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1228        }
1229    }
1230
1231    return 0;
1232}
1233
1234static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1235{
1236    CPUPPCState *env = &cpu->env;
1237    struct kvm_sregs sregs;
1238    int ret;
1239    int i;
1240
1241    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1242    if (ret < 0) {
1243        return ret;
1244    }
1245
1246    if (!cpu->vhyp) {
1247        ppc_store_sdr1(env, sregs.u.s.sdr1);
1248    }
1249
1250    /* Sync SLB */
1251#ifdef TARGET_PPC64
1252    /*
1253     * The packed SLB array we get from KVM_GET_SREGS only contains
1254     * information about valid entries. So we flush our internal copy
1255     * to get rid of stale ones, then put all valid SLB entries back
1256     * in.
1257     */
1258    memset(env->slb, 0, sizeof(env->slb));
1259    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1260        target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1261        target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1262        /*
1263         * Only restore valid entries
1264         */
1265        if (rb & SLB_ESID_V) {
1266            ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1267        }
1268    }
1269#endif
1270
1271    /* Sync SRs */
1272    for (i = 0; i < 16; i++) {
1273        env->sr[i] = sregs.u.s.ppc32.sr[i];
1274    }
1275
1276    /* Sync BATs */
1277    for (i = 0; i < 8; i++) {
1278        env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1279        env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1280        env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1281        env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1282    }
1283
1284    return 0;
1285}
1286
1287int kvm_arch_get_registers(CPUState *cs)
1288{
1289    PowerPCCPU *cpu = POWERPC_CPU(cs);
1290    CPUPPCState *env = &cpu->env;
1291    struct kvm_regs regs;
1292    uint32_t cr;
1293    int i, ret;
1294
1295    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1296    if (ret < 0)
1297        return ret;
1298
1299    cr = regs.cr;
1300    for (i = 7; i >= 0; i--) {
1301        env->crf[i] = cr & 15;
1302        cr >>= 4;
1303    }
1304
1305    env->ctr = regs.ctr;
1306    env->lr = regs.lr;
1307    cpu_write_xer(env, regs.xer);
1308    env->msr = regs.msr;
1309    env->nip = regs.pc;
1310
1311    env->spr[SPR_SRR0] = regs.srr0;
1312    env->spr[SPR_SRR1] = regs.srr1;
1313
1314    env->spr[SPR_SPRG0] = regs.sprg0;
1315    env->spr[SPR_SPRG1] = regs.sprg1;
1316    env->spr[SPR_SPRG2] = regs.sprg2;
1317    env->spr[SPR_SPRG3] = regs.sprg3;
1318    env->spr[SPR_SPRG4] = regs.sprg4;
1319    env->spr[SPR_SPRG5] = regs.sprg5;
1320    env->spr[SPR_SPRG6] = regs.sprg6;
1321    env->spr[SPR_SPRG7] = regs.sprg7;
1322
1323    env->spr[SPR_BOOKE_PID] = regs.pid;
1324
1325    for (i = 0;i < 32; i++)
1326        env->gpr[i] = regs.gpr[i];
1327
1328    kvm_get_fp(cs);
1329
1330    if (cap_booke_sregs) {
1331        ret = kvmppc_get_booke_sregs(cpu);
1332        if (ret < 0) {
1333            return ret;
1334        }
1335    }
1336
1337    if (cap_segstate) {
1338        ret = kvmppc_get_books_sregs(cpu);
1339        if (ret < 0) {
1340            return ret;
1341        }
1342    }
1343
1344    if (cap_hior) {
1345        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1346    }
1347
1348    if (cap_one_reg) {
1349        int i;
1350
1351        /* We deliberately ignore errors here, for kernels which have
1352         * the ONE_REG calls, but don't support the specific
1353         * registers, there's a reasonable chance things will still
1354         * work, at least until we try to migrate. */
1355        for (i = 0; i < 1024; i++) {
1356            uint64_t id = env->spr_cb[i].one_reg_id;
1357
1358            if (id != 0) {
1359                kvm_get_one_spr(cs, id, i);
1360            }
1361        }
1362
1363#ifdef TARGET_PPC64
1364        if (msr_ts) {
1365            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1366                kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1367            }
1368            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1369                kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1370            }
1371            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1372            kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1373            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1374            kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1375            kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1376            kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1377            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1378            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1379            kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1380            kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1381        }
1382
1383        if (cap_papr) {
1384            if (kvm_get_vpa(cs) < 0) {
1385                DPRINTF("Warning: Unable to get VPA information from KVM\n");
1386            }
1387        }
1388
1389        kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1390#endif
1391    }
1392
1393    return 0;
1394}
1395
1396int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1397{
1398    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1399
1400    if (irq != PPC_INTERRUPT_EXT) {
1401        return 0;
1402    }
1403
1404    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1405        return 0;
1406    }
1407
1408    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1409
1410    return 0;
1411}
1412
1413#if defined(TARGET_PPCEMB)
1414#define PPC_INPUT_INT PPC40x_INPUT_INT
1415#elif defined(TARGET_PPC64)
1416#define PPC_INPUT_INT PPC970_INPUT_INT
1417#else
1418#define PPC_INPUT_INT PPC6xx_INPUT_INT
1419#endif
1420
1421void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1422{
1423    PowerPCCPU *cpu = POWERPC_CPU(cs);
1424    CPUPPCState *env = &cpu->env;
1425    int r;
1426    unsigned irq;
1427
1428    qemu_mutex_lock_iothread();
1429
1430    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1431     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1432    if (!cap_interrupt_level &&
1433        run->ready_for_interrupt_injection &&
1434        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1435        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1436    {
1437        /* For now KVM disregards the 'irq' argument. However, in the
1438         * future KVM could cache it in-kernel to avoid a heavyweight exit
1439         * when reading the UIC.
1440         */
1441        irq = KVM_INTERRUPT_SET;
1442
1443        DPRINTF("injected interrupt %d\n", irq);
1444        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1445        if (r < 0) {
1446            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1447        }
1448
1449        /* Always wake up soon in case the interrupt was level based */
1450        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1451                       (NANOSECONDS_PER_SECOND / 50));
1452    }
1453
1454    /* We don't know if there are more interrupts pending after this. However,
1455     * the guest will return to userspace in the course of handling this one
1456     * anyways, so we will get a chance to deliver the rest. */
1457
1458    qemu_mutex_unlock_iothread();
1459}
1460
1461MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1462{
1463    return MEMTXATTRS_UNSPECIFIED;
1464}
1465
1466int kvm_arch_process_async_events(CPUState *cs)
1467{
1468    return cs->halted;
1469}
1470
1471static int kvmppc_handle_halt(PowerPCCPU *cpu)
1472{
1473    CPUState *cs = CPU(cpu);
1474    CPUPPCState *env = &cpu->env;
1475
1476    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1477        cs->halted = 1;
1478        cs->exception_index = EXCP_HLT;
1479    }
1480
1481    return 0;
1482}
1483
1484/* map dcr access to existing qemu dcr emulation */
1485static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1486{
1487    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1488        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1489
1490    return 0;
1491}
1492
1493static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1494{
1495    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1496        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1497
1498    return 0;
1499}
1500
1501int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1502{
1503    /* Mixed endian case is not handled */
1504    uint32_t sc = debug_inst_opcode;
1505
1506    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1507                            sizeof(sc), 0) ||
1508        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1509        return -EINVAL;
1510    }
1511
1512    return 0;
1513}
1514
1515int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1516{
1517    uint32_t sc;
1518
1519    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1520        sc != debug_inst_opcode ||
1521        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1522                            sizeof(sc), 1)) {
1523        return -EINVAL;
1524    }
1525
1526    return 0;
1527}
1528
1529static int find_hw_breakpoint(target_ulong addr, int type)
1530{
1531    int n;
1532
1533    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1534           <= ARRAY_SIZE(hw_debug_points));
1535
1536    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1537        if (hw_debug_points[n].addr == addr &&
1538             hw_debug_points[n].type == type) {
1539            return n;
1540        }
1541    }
1542
1543    return -1;
1544}
1545
1546static int find_hw_watchpoint(target_ulong addr, int *flag)
1547{
1548    int n;
1549
1550    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1551    if (n >= 0) {
1552        *flag = BP_MEM_ACCESS;
1553        return n;
1554    }
1555
1556    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1557    if (n >= 0) {
1558        *flag = BP_MEM_WRITE;
1559        return n;
1560    }
1561
1562    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1563    if (n >= 0) {
1564        *flag = BP_MEM_READ;
1565        return n;
1566    }
1567
1568    return -1;
1569}
1570
1571int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1572                                  target_ulong len, int type)
1573{
1574    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1575        return -ENOBUFS;
1576    }
1577
1578    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1579    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1580
1581    switch (type) {
1582    case GDB_BREAKPOINT_HW:
1583        if (nb_hw_breakpoint >= max_hw_breakpoint) {
1584            return -ENOBUFS;
1585        }
1586
1587        if (find_hw_breakpoint(addr, type) >= 0) {
1588            return -EEXIST;
1589        }
1590
1591        nb_hw_breakpoint++;
1592        break;
1593
1594    case GDB_WATCHPOINT_WRITE:
1595    case GDB_WATCHPOINT_READ:
1596    case GDB_WATCHPOINT_ACCESS:
1597        if (nb_hw_watchpoint >= max_hw_watchpoint) {
1598            return -ENOBUFS;
1599        }
1600
1601        if (find_hw_breakpoint(addr, type) >= 0) {
1602            return -EEXIST;
1603        }
1604
1605        nb_hw_watchpoint++;
1606        break;
1607
1608    default:
1609        return -ENOSYS;
1610    }
1611
1612    return 0;
1613}
1614
1615int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1616                                  target_ulong len, int type)
1617{
1618    int n;
1619
1620    n = find_hw_breakpoint(addr, type);
1621    if (n < 0) {
1622        return -ENOENT;
1623    }
1624
1625    switch (type) {
1626    case GDB_BREAKPOINT_HW:
1627        nb_hw_breakpoint--;
1628        break;
1629
1630    case GDB_WATCHPOINT_WRITE:
1631    case GDB_WATCHPOINT_READ:
1632    case GDB_WATCHPOINT_ACCESS:
1633        nb_hw_watchpoint--;
1634        break;
1635
1636    default:
1637        return -ENOSYS;
1638    }
1639    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1640
1641    return 0;
1642}
1643
1644void kvm_arch_remove_all_hw_breakpoints(void)
1645{
1646    nb_hw_breakpoint = nb_hw_watchpoint = 0;
1647}
1648
1649void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1650{
1651    int n;
1652
1653    /* Software Breakpoint updates */
1654    if (kvm_sw_breakpoints_active(cs)) {
1655        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1656    }
1657
1658    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1659           <= ARRAY_SIZE(hw_debug_points));
1660    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1661
1662    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1663        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1664        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1665        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1666            switch (hw_debug_points[n].type) {
1667            case GDB_BREAKPOINT_HW:
1668                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1669                break;
1670            case GDB_WATCHPOINT_WRITE:
1671                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1672                break;
1673            case GDB_WATCHPOINT_READ:
1674                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1675                break;
1676            case GDB_WATCHPOINT_ACCESS:
1677                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1678                                        KVMPPC_DEBUG_WATCH_READ;
1679                break;
1680            default:
1681                cpu_abort(cs, "Unsupported breakpoint type\n");
1682            }
1683            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1684        }
1685    }
1686}
1687
1688static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1689{
1690    CPUState *cs = CPU(cpu);
1691    CPUPPCState *env = &cpu->env;
1692    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1693    int handle = 0;
1694    int n;
1695    int flag = 0;
1696
1697    if (cs->singlestep_enabled) {
1698        handle = 1;
1699    } else if (arch_info->status) {
1700        if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1701            if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1702                n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1703                if (n >= 0) {
1704                    handle = 1;
1705                }
1706            } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1707                                            KVMPPC_DEBUG_WATCH_WRITE)) {
1708                n = find_hw_watchpoint(arch_info->address,  &flag);
1709                if (n >= 0) {
1710                    handle = 1;
1711                    cs->watchpoint_hit = &hw_watchpoint;
1712                    hw_watchpoint.vaddr = hw_debug_points[n].addr;
1713                    hw_watchpoint.flags = flag;
1714                }
1715            }
1716        }
1717    } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1718        handle = 1;
1719    } else {
1720        /* QEMU is not able to handle debug exception, so inject
1721         * program exception to guest;
1722         * Yes program exception NOT debug exception !!
1723         * When QEMU is using debug resources then debug exception must
1724         * be always set. To achieve this we set MSR_DE and also set
1725         * MSRP_DEP so guest cannot change MSR_DE.
1726         * When emulating debug resource for guest we want guest
1727         * to control MSR_DE (enable/disable debug interrupt on need).
1728         * Supporting both configurations are NOT possible.
1729         * So the result is that we cannot share debug resources
1730         * between QEMU and Guest on BOOKE architecture.
1731         * In the current design QEMU gets the priority over guest,
1732         * this means that if QEMU is using debug resources then guest
1733         * cannot use them;
1734         * For software breakpoint QEMU uses a privileged instruction;
1735         * So there cannot be any reason that we are here for guest
1736         * set debug exception, only possibility is guest executed a
1737         * privileged / illegal instruction and that's why we are
1738         * injecting a program interrupt.
1739         */
1740
1741        cpu_synchronize_state(cs);
1742        /* env->nip is PC, so increment this by 4 to use
1743         * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1744         */
1745        env->nip += 4;
1746        cs->exception_index = POWERPC_EXCP_PROGRAM;
1747        env->error_code = POWERPC_EXCP_INVAL;
1748        ppc_cpu_do_interrupt(cs);
1749    }
1750
1751    return handle;
1752}
1753
1754int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1755{
1756    PowerPCCPU *cpu = POWERPC_CPU(cs);
1757    CPUPPCState *env = &cpu->env;
1758    int ret;
1759
1760    qemu_mutex_lock_iothread();
1761
1762    switch (run->exit_reason) {
1763    case KVM_EXIT_DCR:
1764        if (run->dcr.is_write) {
1765            DPRINTF("handle dcr write\n");
1766            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1767        } else {
1768            DPRINTF("handle dcr read\n");
1769            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1770        }
1771        break;
1772    case KVM_EXIT_HLT:
1773        DPRINTF("handle halt\n");
1774        ret = kvmppc_handle_halt(cpu);
1775        break;
1776#if defined(TARGET_PPC64)
1777    case KVM_EXIT_PAPR_HCALL:
1778        DPRINTF("handle PAPR hypercall\n");
1779        run->papr_hcall.ret = spapr_hypercall(cpu,
1780                                              run->papr_hcall.nr,
1781                                              run->papr_hcall.args);
1782        ret = 0;
1783        break;
1784#endif
1785    case KVM_EXIT_EPR:
1786        DPRINTF("handle epr\n");
1787        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1788        ret = 0;
1789        break;
1790    case KVM_EXIT_WATCHDOG:
1791        DPRINTF("handle watchdog expiry\n");
1792        watchdog_perform_action();
1793        ret = 0;
1794        break;
1795
1796    case KVM_EXIT_DEBUG:
1797        DPRINTF("handle debug exception\n");
1798        if (kvm_handle_debug(cpu, run)) {
1799            ret = EXCP_DEBUG;
1800            break;
1801        }
1802        /* re-enter, this exception was guest-internal */
1803        ret = 0;
1804        break;
1805
1806    default:
1807        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1808        ret = -1;
1809        break;
1810    }
1811
1812    qemu_mutex_unlock_iothread();
1813    return ret;
1814}
1815
1816int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1817{
1818    CPUState *cs = CPU(cpu);
1819    uint32_t bits = tsr_bits;
1820    struct kvm_one_reg reg = {
1821        .id = KVM_REG_PPC_OR_TSR,
1822        .addr = (uintptr_t) &bits,
1823    };
1824
1825    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1826}
1827
1828int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1829{
1830
1831    CPUState *cs = CPU(cpu);
1832    uint32_t bits = tsr_bits;
1833    struct kvm_one_reg reg = {
1834        .id = KVM_REG_PPC_CLEAR_TSR,
1835        .addr = (uintptr_t) &bits,
1836    };
1837
1838    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1839}
1840
1841int kvmppc_set_tcr(PowerPCCPU *cpu)
1842{
1843    CPUState *cs = CPU(cpu);
1844    CPUPPCState *env = &cpu->env;
1845    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1846
1847    struct kvm_one_reg reg = {
1848        .id = KVM_REG_PPC_TCR,
1849        .addr = (uintptr_t) &tcr,
1850    };
1851
1852    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1853}
1854
1855int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1856{
1857    CPUState *cs = CPU(cpu);
1858    int ret;
1859
1860    if (!kvm_enabled()) {
1861        return -1;
1862    }
1863
1864    if (!cap_ppc_watchdog) {
1865        printf("warning: KVM does not support watchdog");
1866        return -1;
1867    }
1868
1869    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1870    if (ret < 0) {
1871        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1872                __func__, strerror(-ret));
1873        return ret;
1874    }
1875
1876    return ret;
1877}
1878
1879static int read_cpuinfo(const char *field, char *value, int len)
1880{
1881    FILE *f;
1882    int ret = -1;
1883    int field_len = strlen(field);
1884    char line[512];
1885
1886    f = fopen("/proc/cpuinfo", "r");
1887    if (!f) {
1888        return -1;
1889    }
1890
1891    do {
1892        if (!fgets(line, sizeof(line), f)) {
1893            break;
1894        }
1895        if (!strncmp(line, field, field_len)) {
1896            pstrcpy(value, len, line);
1897            ret = 0;
1898            break;
1899        }
1900    } while(*line);
1901
1902    fclose(f);
1903
1904    return ret;
1905}
1906
1907uint32_t kvmppc_get_tbfreq(void)
1908{
1909    char line[512];
1910    char *ns;
1911    uint32_t retval = NANOSECONDS_PER_SECOND;
1912
1913    if (read_cpuinfo("timebase", line, sizeof(line))) {
1914        return retval;
1915    }
1916
1917    if (!(ns = strchr(line, ':'))) {
1918        return retval;
1919    }
1920
1921    ns++;
1922
1923    return atoi(ns);
1924}
1925
1926bool kvmppc_get_host_serial(char **value)
1927{
1928    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1929                               NULL);
1930}
1931
1932bool kvmppc_get_host_model(char **value)
1933{
1934    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1935}
1936
1937/* Try to find a device tree node for a CPU with clock-frequency property */
1938static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1939{
1940    struct dirent *dirp;
1941    DIR *dp;
1942
1943    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1944        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1945        return -1;
1946    }
1947
1948    buf[0] = '\0';
1949    while ((dirp = readdir(dp)) != NULL) {
1950        FILE *f;
1951        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1952                 dirp->d_name);
1953        f = fopen(buf, "r");
1954        if (f) {
1955            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1956            fclose(f);
1957            break;
1958        }
1959        buf[0] = '\0';
1960    }
1961    closedir(dp);
1962    if (buf[0] == '\0') {
1963        printf("Unknown host!\n");
1964        return -1;
1965    }
1966
1967    return 0;
1968}
1969
1970static uint64_t kvmppc_read_int_dt(const char *filename)
1971{
1972    union {
1973        uint32_t v32;
1974        uint64_t v64;
1975    } u;
1976    FILE *f;
1977    int len;
1978
1979    f = fopen(filename, "rb");
1980    if (!f) {
1981        return -1;
1982    }
1983
1984    len = fread(&u, 1, sizeof(u), f);
1985    fclose(f);
1986    switch (len) {
1987    case 4:
1988        /* property is a 32-bit quantity */
1989        return be32_to_cpu(u.v32);
1990    case 8:
1991        return be64_to_cpu(u.v64);
1992    }
1993
1994    return 0;
1995}
1996
1997/* Read a CPU node property from the host device tree that's a single
1998 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1999 * (can't find or open the property, or doesn't understand the
2000 * format) */
2001static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
2002{
2003    char buf[PATH_MAX], *tmp;
2004    uint64_t val;
2005
2006    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
2007        return -1;
2008    }
2009
2010    tmp = g_strdup_printf("%s/%s", buf, propname);
2011    val = kvmppc_read_int_dt(tmp);
2012    g_free(tmp);
2013
2014    return val;
2015}
2016
2017uint64_t kvmppc_get_clockfreq(void)
2018{
2019    return kvmppc_read_int_cpu_dt("clock-frequency");
2020}
2021
2022static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2023 {
2024     PowerPCCPU *cpu = ppc_env_get_cpu(env);
2025     CPUState *cs = CPU(cpu);
2026
2027    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2028        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2029        return 0;
2030    }
2031
2032    return 1;
2033}
2034
2035int kvmppc_get_hasidle(CPUPPCState *env)
2036{
2037    struct kvm_ppc_pvinfo pvinfo;
2038
2039    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2040        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2041        return 1;
2042    }
2043
2044    return 0;
2045}
2046
2047int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2048{
2049    uint32_t *hc = (uint32_t*)buf;
2050    struct kvm_ppc_pvinfo pvinfo;
2051
2052    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2053        memcpy(buf, pvinfo.hcall, buf_len);
2054        return 0;
2055    }
2056
2057    /*
2058     * Fallback to always fail hypercalls regardless of endianness:
2059     *
2060     *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2061     *     li r3, -1
2062     *     b .+8       (becomes nop in wrong endian)
2063     *     bswap32(li r3, -1)
2064     */
2065
2066    hc[0] = cpu_to_be32(0x08000048);
2067    hc[1] = cpu_to_be32(0x3860ffff);
2068    hc[2] = cpu_to_be32(0x48000008);
2069    hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2070
2071    return 1;
2072}
2073
2074static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2075{
2076    return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2077}
2078
2079void kvmppc_enable_logical_ci_hcalls(void)
2080{
2081    /*
2082     * FIXME: it would be nice if we could detect the cases where
2083     * we're using a device which requires the in kernel
2084     * implementation of these hcalls, but the kernel lacks them and
2085     * produce a warning.
2086     */
2087    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2088    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2089}
2090
2091void kvmppc_enable_set_mode_hcall(void)
2092{
2093    kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2094}
2095
2096void kvmppc_enable_clear_ref_mod_hcalls(void)
2097{
2098    kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2099    kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2100}
2101
2102void kvmppc_set_papr(PowerPCCPU *cpu)
2103{
2104    CPUState *cs = CPU(cpu);
2105    int ret;
2106
2107    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2108    if (ret) {
2109        error_report("This vCPU type or KVM version does not support PAPR");
2110        exit(1);
2111    }
2112
2113    /* Update the capability flag so we sync the right information
2114     * with kvm */
2115    cap_papr = 1;
2116}
2117
2118int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2119{
2120    return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2121}
2122
2123void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2124{
2125    CPUState *cs = CPU(cpu);
2126    int ret;
2127
2128    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2129    if (ret && mpic_proxy) {
2130        error_report("This KVM version does not support EPR");
2131        exit(1);
2132    }
2133}
2134
2135int kvmppc_smt_threads(void)
2136{
2137    return cap_ppc_smt ? cap_ppc_smt : 1;
2138}
2139
2140int kvmppc_set_smt_threads(int smt)
2141{
2142    int ret;
2143
2144    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2145    if (!ret) {
2146        cap_ppc_smt = smt;
2147    }
2148    return ret;
2149}
2150
2151void kvmppc_hint_smt_possible(Error **errp)
2152{
2153    int i;
2154    GString *g;
2155    char *s;
2156
2157    assert(kvm_enabled());
2158    if (cap_ppc_smt_possible) {
2159        g = g_string_new("Available VSMT modes:");
2160        for (i = 63; i >= 0; i--) {
2161            if ((1UL << i) & cap_ppc_smt_possible) {
2162                g_string_append_printf(g, " %lu", (1UL << i));
2163            }
2164        }
2165        s = g_string_free(g, false);
2166        error_append_hint(errp, "%s.\n", s);
2167        g_free(s);
2168    } else {
2169        error_append_hint(errp,
2170                          "This KVM seems to be too old to support VSMT.\n");
2171    }
2172}
2173
2174
2175#ifdef TARGET_PPC64
2176off_t kvmppc_alloc_rma(void **rma)
2177{
2178    off_t size;
2179    int fd;
2180    struct kvm_allocate_rma ret;
2181
2182    /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2183     * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2184     *                      not necessary on this hardware
2185     * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2186     *
2187     * FIXME: We should allow the user to force contiguous RMA
2188     * allocation in the cap_ppc_rma==1 case.
2189     */
2190    if (cap_ppc_rma < 2) {
2191        return 0;
2192    }
2193
2194    fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2195    if (fd < 0) {
2196        fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2197                strerror(errno));
2198        return -1;
2199    }
2200
2201    size = MIN(ret.rma_size, 256ul << 20);
2202
2203    *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2204    if (*rma == MAP_FAILED) {
2205        fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2206        return -1;
2207    };
2208
2209    return size;
2210}
2211
2212uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2213{
2214    struct kvm_ppc_smmu_info info;
2215    long rampagesize, best_page_shift;
2216    int i;
2217
2218    if (cap_ppc_rma >= 2) {
2219        return current_size;
2220    }
2221
2222    /* Find the largest hardware supported page size that's less than
2223     * or equal to the (logical) backing page size of guest RAM */
2224    kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2225    rampagesize = qemu_getrampagesize();
2226    best_page_shift = 0;
2227
2228    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2229        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2230
2231        if (!sps->page_shift) {
2232            continue;
2233        }
2234
2235        if ((sps->page_shift > best_page_shift)
2236            && ((1UL << sps->page_shift) <= rampagesize)) {
2237            best_page_shift = sps->page_shift;
2238        }
2239    }
2240
2241    return MIN(current_size,
2242               1ULL << (best_page_shift + hash_shift - 7));
2243}
2244#endif
2245
2246bool kvmppc_spapr_use_multitce(void)
2247{
2248    return cap_spapr_multitce;
2249}
2250
2251int kvmppc_spapr_enable_inkernel_multitce(void)
2252{
2253    int ret;
2254
2255    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2256                            H_PUT_TCE_INDIRECT, 1);
2257    if (!ret) {
2258        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2259                                H_STUFF_TCE, 1);
2260    }
2261
2262    return ret;
2263}
2264
2265void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2266                              uint64_t bus_offset, uint32_t nb_table,
2267                              int *pfd, bool need_vfio)
2268{
2269    long len;
2270    int fd;
2271    void *table;
2272
2273    /* Must set fd to -1 so we don't try to munmap when called for
2274     * destroying the table, which the upper layers -will- do
2275     */
2276    *pfd = -1;
2277    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2278        return NULL;
2279    }
2280
2281    if (cap_spapr_tce_64) {
2282        struct kvm_create_spapr_tce_64 args = {
2283            .liobn = liobn,
2284            .page_shift = page_shift,
2285            .offset = bus_offset >> page_shift,
2286            .size = nb_table,
2287            .flags = 0
2288        };
2289        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2290        if (fd < 0) {
2291            fprintf(stderr,
2292                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2293                    liobn);
2294            return NULL;
2295        }
2296    } else if (cap_spapr_tce) {
2297        uint64_t window_size = (uint64_t) nb_table << page_shift;
2298        struct kvm_create_spapr_tce args = {
2299            .liobn = liobn,
2300            .window_size = window_size,
2301        };
2302        if ((window_size != args.window_size) || bus_offset) {
2303            return NULL;
2304        }
2305        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2306        if (fd < 0) {
2307            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2308                    liobn);
2309            return NULL;
2310        }
2311    } else {
2312        return NULL;
2313    }
2314
2315    len = nb_table * sizeof(uint64_t);
2316    /* FIXME: round this up to page size */
2317
2318    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2319    if (table == MAP_FAILED) {
2320        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2321                liobn);
2322        close(fd);
2323        return NULL;
2324    }
2325
2326    *pfd = fd;
2327    return table;
2328}
2329
2330int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2331{
2332    long len;
2333
2334    if (fd < 0) {
2335        return -1;
2336    }
2337
2338    len = nb_table * sizeof(uint64_t);
2339    if ((munmap(table, len) < 0) ||
2340        (close(fd) < 0)) {
2341        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2342                strerror(errno));
2343        /* Leak the table */
2344    }
2345
2346    return 0;
2347}
2348
2349int kvmppc_reset_htab(int shift_hint)
2350{
2351    uint32_t shift = shift_hint;
2352
2353    if (!kvm_enabled()) {
2354        /* Full emulation, tell caller to allocate htab itself */
2355        return 0;
2356    }
2357    if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2358        int ret;
2359        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2360        if (ret == -ENOTTY) {
2361            /* At least some versions of PR KVM advertise the
2362             * capability, but don't implement the ioctl().  Oops.
2363             * Return 0 so that we allocate the htab in qemu, as is
2364             * correct for PR. */
2365            return 0;
2366        } else if (ret < 0) {
2367            return ret;
2368        }
2369        return shift;
2370    }
2371
2372    /* We have a kernel that predates the htab reset calls.  For PR
2373     * KVM, we need to allocate the htab ourselves, for an HV KVM of
2374     * this era, it has allocated a 16MB fixed size hash table already. */
2375    if (kvmppc_is_pr(kvm_state)) {
2376        /* PR - tell caller to allocate htab */
2377        return 0;
2378    } else {
2379        /* HV - assume 16MB kernel allocated htab */
2380        return 24;
2381    }
2382}
2383
2384static inline uint32_t mfpvr(void)
2385{
2386    uint32_t pvr;
2387
2388    asm ("mfpvr %0"
2389         : "=r"(pvr));
2390    return pvr;
2391}
2392
2393static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2394{
2395    if (on) {
2396        *word |= flags;
2397    } else {
2398        *word &= ~flags;
2399    }
2400}
2401
2402static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2403{
2404    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2405    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2406    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2407
2408    /* Now fix up the class with information we can query from the host */
2409    pcc->pvr = mfpvr();
2410
2411    alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2412                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2413    alter_insns(&pcc->insns_flags2, PPC2_VSX,
2414                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2415    alter_insns(&pcc->insns_flags2, PPC2_DFP,
2416                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2417
2418    if (dcache_size != -1) {
2419        pcc->l1_dcache_size = dcache_size;
2420    }
2421
2422    if (icache_size != -1) {
2423        pcc->l1_icache_size = icache_size;
2424    }
2425
2426#if defined(TARGET_PPC64)
2427    pcc->radix_page_info = kvm_get_radix_page_info();
2428
2429    if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2430        /*
2431         * POWER9 DD1 has some bugs which make it not really ISA 3.00
2432         * compliant.  More importantly, advertising ISA 3.00
2433         * architected mode may prevent guests from activating
2434         * necessary DD1 workarounds.
2435         */
2436        pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2437                                | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2438    }
2439#endif /* defined(TARGET_PPC64) */
2440}
2441
2442bool kvmppc_has_cap_epr(void)
2443{
2444    return cap_epr;
2445}
2446
2447bool kvmppc_has_cap_fixup_hcalls(void)
2448{
2449    return cap_fixup_hcalls;
2450}
2451
2452bool kvmppc_has_cap_htm(void)
2453{
2454    return cap_htm;
2455}
2456
2457bool kvmppc_has_cap_mmu_radix(void)
2458{
2459    return cap_mmu_radix;
2460}
2461
2462bool kvmppc_has_cap_mmu_hash_v3(void)
2463{
2464    return cap_mmu_hash_v3;
2465}
2466
2467static void kvmppc_get_cpu_characteristics(KVMState *s)
2468{
2469    struct kvm_ppc_cpu_char c;
2470    int ret;
2471
2472    /* Assume broken */
2473    cap_ppc_safe_cache = 0;
2474    cap_ppc_safe_bounds_check = 0;
2475    cap_ppc_safe_indirect_branch = 0;
2476
2477    ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2478    if (!ret) {
2479        return;
2480    }
2481    ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2482    if (ret < 0) {
2483        return;
2484    }
2485    /* Parse and set cap_ppc_safe_cache */
2486    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2487        cap_ppc_safe_cache = 2;
2488    } else if ((c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2489               (c.character & c.character_mask
2490                & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2491        cap_ppc_safe_cache = 1;
2492    }
2493    /* Parse and set cap_ppc_safe_bounds_check */
2494    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2495        cap_ppc_safe_bounds_check = 2;
2496    } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2497        cap_ppc_safe_bounds_check = 1;
2498    }
2499    /* Parse and set cap_ppc_safe_indirect_branch */
2500    if (c.character & H_CPU_CHAR_BCCTRL_SERIALISED) {
2501        cap_ppc_safe_indirect_branch = 2;
2502    }
2503}
2504
2505int kvmppc_get_cap_safe_cache(void)
2506{
2507    return cap_ppc_safe_cache;
2508}
2509
2510int kvmppc_get_cap_safe_bounds_check(void)
2511{
2512    return cap_ppc_safe_bounds_check;
2513}
2514
2515int kvmppc_get_cap_safe_indirect_branch(void)
2516{
2517    return cap_ppc_safe_indirect_branch;
2518}
2519
2520PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2521{
2522    uint32_t host_pvr = mfpvr();
2523    PowerPCCPUClass *pvr_pcc;
2524
2525    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2526    if (pvr_pcc == NULL) {
2527        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2528    }
2529
2530    return pvr_pcc;
2531}
2532
2533static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2534{
2535    TypeInfo type_info = {
2536        .name = TYPE_HOST_POWERPC_CPU,
2537        .class_init = kvmppc_host_cpu_class_init,
2538    };
2539    MachineClass *mc = MACHINE_GET_CLASS(ms);
2540    PowerPCCPUClass *pvr_pcc;
2541    ObjectClass *oc;
2542    DeviceClass *dc;
2543    int i;
2544
2545    pvr_pcc = kvm_ppc_get_host_cpu_class();
2546    if (pvr_pcc == NULL) {
2547        return -1;
2548    }
2549    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2550    type_register(&type_info);
2551    if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2552        /* override TCG default cpu type with 'host' cpu model */
2553        mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2554    }
2555
2556    oc = object_class_by_name(type_info.name);
2557    g_assert(oc);
2558
2559    /*
2560     * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2561     * we want "POWER8" to be a "family" alias that points to the current
2562     * host CPU type, too)
2563     */
2564    dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2565    for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2566        if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2567            char *suffix;
2568
2569            ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2570            suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2571            if (suffix) {
2572                *suffix = 0;
2573            }
2574            break;
2575        }
2576    }
2577
2578    return 0;
2579}
2580
2581int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2582{
2583    struct kvm_rtas_token_args args = {
2584        .token = token,
2585    };
2586
2587    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2588        return -ENOENT;
2589    }
2590
2591    strncpy(args.name, function, sizeof(args.name));
2592
2593    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2594}
2595
2596int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2597{
2598    struct kvm_get_htab_fd s = {
2599        .flags = write ? KVM_GET_HTAB_WRITE : 0,
2600        .start_index = index,
2601    };
2602    int ret;
2603
2604    if (!cap_htab_fd) {
2605        error_setg(errp, "KVM version doesn't support %s the HPT",
2606                   write ? "writing" : "reading");
2607        return -ENOTSUP;
2608    }
2609
2610    ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2611    if (ret < 0) {
2612        error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2613                   write ? "writing" : "reading", write ? "to" : "from",
2614                   strerror(errno));
2615        return -errno;
2616    }
2617
2618    return ret;
2619}
2620
2621int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2622{
2623    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2624    uint8_t buf[bufsize];
2625    ssize_t rc;
2626
2627    do {
2628        rc = read(fd, buf, bufsize);
2629        if (rc < 0) {
2630            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2631                    strerror(errno));
2632            return rc;
2633        } else if (rc) {
2634            uint8_t *buffer = buf;
2635            ssize_t n = rc;
2636            while (n) {
2637                struct kvm_get_htab_header *head =
2638                    (struct kvm_get_htab_header *) buffer;
2639                size_t chunksize = sizeof(*head) +
2640                     HASH_PTE_SIZE_64 * head->n_valid;
2641
2642                qemu_put_be32(f, head->index);
2643                qemu_put_be16(f, head->n_valid);
2644                qemu_put_be16(f, head->n_invalid);
2645                qemu_put_buffer(f, (void *)(head + 1),
2646                                HASH_PTE_SIZE_64 * head->n_valid);
2647
2648                buffer += chunksize;
2649                n -= chunksize;
2650            }
2651        }
2652    } while ((rc != 0)
2653             && ((max_ns < 0)
2654                 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2655
2656    return (rc == 0) ? 1 : 0;
2657}
2658
2659int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2660                           uint16_t n_valid, uint16_t n_invalid)
2661{
2662    struct kvm_get_htab_header *buf;
2663    size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2664    ssize_t rc;
2665
2666    buf = alloca(chunksize);
2667    buf->index = index;
2668    buf->n_valid = n_valid;
2669    buf->n_invalid = n_invalid;
2670
2671    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2672
2673    rc = write(fd, buf, chunksize);
2674    if (rc < 0) {
2675        fprintf(stderr, "Error writing KVM hash table: %s\n",
2676                strerror(errno));
2677        return rc;
2678    }
2679    if (rc != chunksize) {
2680        /* We should never get a short write on a single chunk */
2681        fprintf(stderr, "Short write, restoring KVM hash table\n");
2682        return -1;
2683    }
2684    return 0;
2685}
2686
2687bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2688{
2689    return true;
2690}
2691
2692void kvm_arch_init_irq_routing(KVMState *s)
2693{
2694}
2695
2696void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2697{
2698    int fd, rc;
2699    int i;
2700
2701    fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2702
2703    i = 0;
2704    while (i < n) {
2705        struct kvm_get_htab_header *hdr;
2706        int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2707        char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2708
2709        rc = read(fd, buf, sizeof(buf));
2710        if (rc < 0) {
2711            hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2712        }
2713
2714        hdr = (struct kvm_get_htab_header *)buf;
2715        while ((i < n) && ((char *)hdr < (buf + rc))) {
2716            int invalid = hdr->n_invalid;
2717
2718            if (hdr->index != (ptex + i)) {
2719                hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2720                         " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2721            }
2722
2723            memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2724            i += hdr->n_valid;
2725
2726            if ((n - i) < invalid) {
2727                invalid = n - i;
2728            }
2729            memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2730            i += hdr->n_invalid;
2731
2732            hdr = (struct kvm_get_htab_header *)
2733                ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2734        }
2735    }
2736
2737    close(fd);
2738}
2739
2740void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2741{
2742    int fd, rc;
2743    struct {
2744        struct kvm_get_htab_header hdr;
2745        uint64_t pte0;
2746        uint64_t pte1;
2747    } buf;
2748
2749    fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2750
2751    buf.hdr.n_valid = 1;
2752    buf.hdr.n_invalid = 0;
2753    buf.hdr.index = ptex;
2754    buf.pte0 = cpu_to_be64(pte0);
2755    buf.pte1 = cpu_to_be64(pte1);
2756
2757    rc = write(fd, &buf, sizeof(buf));
2758    if (rc != sizeof(buf)) {
2759        hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2760    }
2761    close(fd);
2762}
2763
2764int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2765                             uint64_t address, uint32_t data, PCIDevice *dev)
2766{
2767    return 0;
2768}
2769
2770int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2771                                int vector, PCIDevice *dev)
2772{
2773    return 0;
2774}
2775
2776int kvm_arch_release_virq_post(int virq)
2777{
2778    return 0;
2779}
2780
2781int kvm_arch_msi_data_to_gsi(uint32_t data)
2782{
2783    return data & 0xffff;
2784}
2785
2786int kvmppc_enable_hwrng(void)
2787{
2788    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2789        return -1;
2790    }
2791
2792    return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2793}
2794
2795void kvmppc_check_papr_resize_hpt(Error **errp)
2796{
2797    if (!kvm_enabled()) {
2798        return; /* No KVM, we're good */
2799    }
2800
2801    if (cap_resize_hpt) {
2802        return; /* Kernel has explicit support, we're good */
2803    }
2804
2805    /* Otherwise fallback on looking for PR KVM */
2806    if (kvmppc_is_pr(kvm_state)) {
2807        return;
2808    }
2809
2810    error_setg(errp,
2811               "Hash page table resizing not available with this KVM version");
2812}
2813
2814int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2815{
2816    CPUState *cs = CPU(cpu);
2817    struct kvm_ppc_resize_hpt rhpt = {
2818        .flags = flags,
2819        .shift = shift,
2820    };
2821
2822    if (!cap_resize_hpt) {
2823        return -ENOSYS;
2824    }
2825
2826    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2827}
2828
2829int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2830{
2831    CPUState *cs = CPU(cpu);
2832    struct kvm_ppc_resize_hpt rhpt = {
2833        .flags = flags,
2834        .shift = shift,
2835    };
2836
2837    if (!cap_resize_hpt) {
2838        return -ENOSYS;
2839    }
2840
2841    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2842}
2843
2844/*
2845 * This is a helper function to detect a post migration scenario
2846 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2847 * the guest kernel can't handle a PVR value other than the actual host
2848 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2849 *
2850 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2851 * (so, we're HV), return true. The workaround itself is done in
2852 * cpu_post_load.
2853 *
2854 * The order here is important: we'll only check for KVM PR as a
2855 * fallback if the guest kernel can't handle the situation itself.
2856 * We need to avoid as much as possible querying the running KVM type
2857 * in QEMU level.
2858 */
2859bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2860{
2861    CPUState *cs = CPU(cpu);
2862
2863    if (!kvm_enabled()) {
2864        return false;
2865    }
2866
2867    if (cap_ppc_pvr_compat) {
2868        return false;
2869    }
2870
2871    return !kvmppc_is_pr(cs->kvm_state);
2872}
2873