LXR qemu/target/ppc/kvm.c

   1/*
   2 * PowerPC implementation of KVM hooks
   3 *
   4 * Copyright IBM Corp. 2007
   5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6 *
   7 * Authors:
   8 *  Jerone Young <jyoung5@us.ibm.com>
   9 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10 *  Hollis Blanchard <hollisb@us.ibm.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13 * See the COPYING file in the top-level directory.
  14 *
  15 */
  16
  17#include "qemu/osdep.h"
  18#include <dirent.h>
  19#include <sys/ioctl.h>
  20#include <sys/vfs.h>
  21
  22#include <linux/kvm.h>
  23
  24#include "qemu-common.h"
  25#include "qapi/error.h"
  26#include "qemu/error-report.h"
  27#include "cpu.h"
  28#include "cpu-models.h"
  29#include "qemu/timer.h"
  30#include "sysemu/sysemu.h"
  31#include "sysemu/hw_accel.h"
  32#include "kvm_ppc.h"
  33#include "sysemu/cpus.h"
  34#include "sysemu/device_tree.h"
  35#include "mmu-hash64.h"
  36
  37#include "hw/sysbus.h"
  38#include "hw/ppc/spapr.h"
  39#include "hw/ppc/spapr_vio.h"
  40#include "hw/ppc/spapr_cpu_core.h"
  41#include "hw/ppc/ppc.h"
  42#include "sysemu/watchdog.h"
  43#include "trace.h"
  44#include "exec/gdbstub.h"
  45#include "exec/memattrs.h"
  46#include "exec/ram_addr.h"
  47#include "sysemu/hostmem.h"
  48#include "qemu/cutils.h"
  49#include "qemu/mmap-alloc.h"
  50#include "elf.h"
  51#include "sysemu/kvm_int.h"
  52
  53//#define DEBUG_KVM
  54
  55#ifdef DEBUG_KVM
  56#define DPRINTF(fmt, ...) \
  57    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  58#else
  59#define DPRINTF(fmt, ...) \
  60    do { } while (0)
  61#endif
  62
  63#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  64
  65const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  66    KVM_CAP_LAST_INFO
  67};
  68
  69static int cap_interrupt_unset = false;
  70static int cap_interrupt_level = false;
  71static int cap_segstate;
  72static int cap_booke_sregs;
  73static int cap_ppc_smt;
  74static int cap_ppc_smt_possible;
  75static int cap_ppc_rma;
  76static int cap_spapr_tce;
  77static int cap_spapr_tce_64;
  78static int cap_spapr_multitce;
  79static int cap_spapr_vfio;
  80static int cap_hior;
  81static int cap_one_reg;
  82static int cap_epr;
  83static int cap_ppc_watchdog;
  84static int cap_papr;
  85static int cap_htab_fd;
  86static int cap_fixup_hcalls;
  87static int cap_htm;             /* Hardware transactional memory support */
  88static int cap_mmu_radix;
  89static int cap_mmu_hash_v3;
  90static int cap_resize_hpt;
  91static int cap_ppc_pvr_compat;
  92static int cap_ppc_safe_cache;
  93static int cap_ppc_safe_bounds_check;
  94static int cap_ppc_safe_indirect_branch;
  95
  96static uint32_t debug_inst_opcode;
  97
  98/* XXX We have a race condition where we actually have a level triggered
  99 *     interrupt, but the infrastructure can't expose that yet, so the guest
 100 *     takes but ignores it, goes to sleep and never gets notified that there's
 101 *     still an interrupt pending.
 102 *
 103 *     As a quick workaround, let's just wake up again 20 ms after we injected
 104 *     an interrupt. That way we can assure that we're always reinjecting
 105 *     interrupts in case the guest swallowed them.
 106 */
 107static QEMUTimer *idle_timer;
 108
 109static void kvm_kick_cpu(void *opaque)
 110{
 111    PowerPCCPU *cpu = opaque;
 112
 113    qemu_cpu_kick(CPU(cpu));
 114}
 115
 116/* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 117 * should only be used for fallback tests - generally we should use
 118 * explicit capabilities for the features we want, rather than
 119 * assuming what is/isn't available depending on the KVM variant. */
 120static bool kvmppc_is_pr(KVMState *ks)
 121{
 122    /* Assume KVM-PR if the GET_PVINFO capability is available */
 123    return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 124}
 125
 126static int kvm_ppc_register_host_cpu_type(MachineState *ms);
 127static void kvmppc_get_cpu_characteristics(KVMState *s);
 128
 129int kvm_arch_init(MachineState *ms, KVMState *s)
 130{
 131    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 132    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 133    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 134    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 135    cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 136    cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 137    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 138    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 139    cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 140    cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
 141    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 142    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 143    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 144    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 145    /* Note: we don't set cap_papr here, because this capability is
 146     * only activated after this by kvmppc_set_papr() */
 147    cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 148    cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 149    cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 150    cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 151    cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 152    cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 153    cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 154    kvmppc_get_cpu_characteristics(s);
 155    /*
 156     * Note: setting it to false because there is not such capability
 157     * in KVM at this moment.
 158     *
 159     * TODO: call kvm_vm_check_extension() with the right capability
 160     * after the kernel starts implementing it.*/
 161    cap_ppc_pvr_compat = false;
 162
 163    if (!cap_interrupt_level) {
 164        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 165                        "VM to stall at times!\n");
 166    }
 167
 168    kvm_ppc_register_host_cpu_type(ms);
 169
 170    return 0;
 171}
 172
 173int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 174{
 175    return 0;
 176}
 177
 178static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 179{
 180    CPUPPCState *cenv = &cpu->env;
 181    CPUState *cs = CPU(cpu);
 182    struct kvm_sregs sregs;
 183    int ret;
 184
 185    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 186        /* What we're really trying to say is "if we're on BookE, we use
 187           the native PVR for now". This is the only sane way to check
 188           it though, so we potentially confuse users that they can run
 189           BookE guests on BookS. Let's hope nobody dares enough :) */
 190        return 0;
 191    } else {
 192        if (!cap_segstate) {
 193            fprintf(stderr, "kvm error: missing PVR setting capability\n");
 194            return -ENOSYS;
 195        }
 196    }
 197
 198    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 199    if (ret) {
 200        return ret;
 201    }
 202
 203    sregs.pvr = cenv->spr[SPR_PVR];
 204    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 205}
 206
 207/* Set up a shared TLB array with KVM */
 208static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 209{
 210    CPUPPCState *env = &cpu->env;
 211    CPUState *cs = CPU(cpu);
 212    struct kvm_book3e_206_tlb_params params = {};
 213    struct kvm_config_tlb cfg = {};
 214    unsigned int entries = 0;
 215    int ret, i;
 216
 217    if (!kvm_enabled() ||
 218        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 219        return 0;
 220    }
 221
 222    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 223
 224    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 225        params.tlb_sizes[i] = booke206_tlb_size(env, i);
 226        params.tlb_ways[i] = booke206_tlb_ways(env, i);
 227        entries += params.tlb_sizes[i];
 228    }
 229
 230    assert(entries == env->nb_tlb);
 231    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 232
 233    env->tlb_dirty = true;
 234
 235    cfg.array = (uintptr_t)env->tlb.tlbm;
 236    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 237    cfg.params = (uintptr_t)&params;
 238    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 239
 240    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 241    if (ret < 0) {
 242        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 243                __func__, strerror(-ret));
 244        return ret;
 245    }
 246
 247    env->kvm_sw_tlb = true;
 248    return 0;
 249}
 250
 251
 252#if defined(TARGET_PPC64)
 253static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 254                                       struct kvm_ppc_smmu_info *info)
 255{
 256    CPUPPCState *env = &cpu->env;
 257    CPUState *cs = CPU(cpu);
 258
 259    memset(info, 0, sizeof(*info));
 260
 261    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 262     * need to "guess" what the supported page sizes are.
 263     *
 264     * For that to work we make a few assumptions:
 265     *
 266     * - Check whether we are running "PR" KVM which only supports 4K
 267     *   and 16M pages, but supports them regardless of the backing
 268     *   store characteritics. We also don't support 1T segments.
 269     *
 270     *   This is safe as if HV KVM ever supports that capability or PR
 271     *   KVM grows supports for more page/segment sizes, those versions
 272     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 273     *   will not hit this fallback
 274     *
 275     * - Else we are running HV KVM. This means we only support page
 276     *   sizes that fit in the backing store. Additionally we only
 277     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 278     *   P7 encodings for the SLB and hash table. Here too, we assume
 279     *   support for any newer processor will mean a kernel that
 280     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 281     *   this fallback.
 282     */
 283    if (kvmppc_is_pr(cs->kvm_state)) {
 284        /* No flags */
 285        info->flags = 0;
 286        info->slb_size = 64;
 287
 288        /* Standard 4k base page size segment */
 289        info->sps[0].page_shift = 12;
 290        info->sps[0].slb_enc = 0;
 291        info->sps[0].enc[0].page_shift = 12;
 292        info->sps[0].enc[0].pte_enc = 0;
 293
 294        /* Standard 16M large page size segment */
 295        info->sps[1].page_shift = 24;
 296        info->sps[1].slb_enc = SLB_VSID_L;
 297        info->sps[1].enc[0].page_shift = 24;
 298        info->sps[1].enc[0].pte_enc = 0;
 299    } else {
 300        int i = 0;
 301
 302        /* HV KVM has backing store size restrictions */
 303        info->flags = KVM_PPC_PAGE_SIZES_REAL;
 304
 305        if (env->mmu_model & POWERPC_MMU_1TSEG) {
 306            info->flags |= KVM_PPC_1T_SEGMENTS;
 307        }
 308
 309        if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 310           POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 311            info->slb_size = 32;
 312        } else {
 313            info->slb_size = 64;
 314        }
 315
 316        /* Standard 4k base page size segment */
 317        info->sps[i].page_shift = 12;
 318        info->sps[i].slb_enc = 0;
 319        info->sps[i].enc[0].page_shift = 12;
 320        info->sps[i].enc[0].pte_enc = 0;
 321        i++;
 322
 323        /* 64K on MMU 2.06 and later */
 324        if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 325            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 326            info->sps[i].page_shift = 16;
 327            info->sps[i].slb_enc = 0x110;
 328            info->sps[i].enc[0].page_shift = 16;
 329            info->sps[i].enc[0].pte_enc = 1;
 330            i++;
 331        }
 332
 333        /* Standard 16M large page size segment */
 334        info->sps[i].page_shift = 24;
 335        info->sps[i].slb_enc = SLB_VSID_L;
 336        info->sps[i].enc[0].page_shift = 24;
 337        info->sps[i].enc[0].pte_enc = 0;
 338    }
 339}
 340
 341static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 342{
 343    CPUState *cs = CPU(cpu);
 344    int ret;
 345
 346    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 347        ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 348        if (ret == 0) {
 349            return;
 350        }
 351    }
 352
 353    kvm_get_fallback_smmu_info(cpu, info);
 354}
 355
 356struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 357{
 358    KVMState *s = KVM_STATE(current_machine->accelerator);
 359    struct ppc_radix_page_info *radix_page_info;
 360    struct kvm_ppc_rmmu_info rmmu_info;
 361    int i;
 362
 363    if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 364        return NULL;
 365    }
 366    if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 367        return NULL;
 368    }
 369    radix_page_info = g_malloc0(sizeof(*radix_page_info));
 370    radix_page_info->count = 0;
 371    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 372        if (rmmu_info.ap_encodings[i]) {
 373            radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 374            radix_page_info->count++;
 375        }
 376    }
 377    return radix_page_info;
 378}
 379
 380target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 381                                     bool radix, bool gtse,
 382                                     uint64_t proc_tbl)
 383{
 384    CPUState *cs = CPU(cpu);
 385    int ret;
 386    uint64_t flags = 0;
 387    struct kvm_ppc_mmuv3_cfg cfg = {
 388        .process_table = proc_tbl,
 389    };
 390
 391    if (radix) {
 392        flags |= KVM_PPC_MMUV3_RADIX;
 393    }
 394    if (gtse) {
 395        flags |= KVM_PPC_MMUV3_GTSE;
 396    }
 397    cfg.flags = flags;
 398    ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 399    switch (ret) {
 400    case 0:
 401        return H_SUCCESS;
 402    case -EINVAL:
 403        return H_PARAMETER;
 404    case -ENODEV:
 405        return H_NOT_AVAILABLE;
 406    default:
 407        return H_HARDWARE;
 408    }
 409}
 410
 411static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 412{
 413    if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 414        return true;
 415    }
 416
 417    return (1ul << shift) <= rampgsize;
 418}
 419
 420static long max_cpu_page_size;
 421
 422static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 423{
 424    static struct kvm_ppc_smmu_info smmu_info;
 425    static bool has_smmu_info;
 426    CPUPPCState *env = &cpu->env;
 427    int iq, ik, jq, jk;
 428    bool has_64k_pages = false;
 429
 430    /* We only handle page sizes for 64-bit server guests for now */
 431    if (!(env->mmu_model & POWERPC_MMU_64)) {
 432        return;
 433    }
 434
 435    /* Collect MMU info from kernel if not already */
 436    if (!has_smmu_info) {
 437        kvm_get_smmu_info(cpu, &smmu_info);
 438        has_smmu_info = true;
 439    }
 440
 441    if (!max_cpu_page_size) {
 442        max_cpu_page_size = qemu_getrampagesize();
 443    }
 444
 445    /* Convert to QEMU form */
 446    memset(&env->sps, 0, sizeof(env->sps));
 447
 448    /* If we have HV KVM, we need to forbid CI large pages if our
 449     * host page size is smaller than 64K.
 450     */
 451    if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 452        env->ci_large_pages = getpagesize() >= 0x10000;
 453    }
 454
 455    /*
 456     * XXX This loop should be an entry wide AND of the capabilities that
 457     *     the selected CPU has with the capabilities that KVM supports.
 458     */
 459    for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 460        struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 461        struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 462
 463        if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 464                                 ksps->page_shift)) {
 465            continue;
 466        }
 467        qsps->page_shift = ksps->page_shift;
 468        qsps->slb_enc = ksps->slb_enc;
 469        for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 470            if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 471                                     ksps->enc[jk].page_shift)) {
 472                continue;
 473            }
 474            if (ksps->enc[jk].page_shift == 16) {
 475                has_64k_pages = true;
 476            }
 477            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 478            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 479            if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 480                break;
 481            }
 482        }
 483        if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 484            break;
 485        }
 486    }
 487    env->slb_nr = smmu_info.slb_size;
 488    if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 489        env->mmu_model &= ~POWERPC_MMU_1TSEG;
 490    }
 491    if (!has_64k_pages) {
 492        env->mmu_model &= ~POWERPC_MMU_64K;
 493    }
 494}
 495
 496bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 497{
 498    Object *mem_obj = object_resolve_path(obj_path, NULL);
 499    char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
 500    long pagesize;
 501
 502    if (mempath) {
 503        pagesize = qemu_mempath_getpagesize(mempath);
 504        g_free(mempath);
 505    } else {
 506        pagesize = getpagesize();
 507    }
 508
 509    return pagesize >= max_cpu_page_size;
 510}
 511
 512#else /* defined (TARGET_PPC64) */
 513
 514static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 515{
 516}
 517
 518bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 519{
 520    return true;
 521}
 522
 523#endif /* !defined (TARGET_PPC64) */
 524
 525unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 526{
 527    return POWERPC_CPU(cpu)->vcpu_id;
 528}
 529
 530/* e500 supports 2 h/w breakpoint and 2 watchpoint.
 531 * book3s supports only 1 watchpoint, so array size
 532 * of 4 is sufficient for now.
 533 */
 534#define MAX_HW_BKPTS 4
 535
 536static struct HWBreakpoint {
 537    target_ulong addr;
 538    int type;
 539} hw_debug_points[MAX_HW_BKPTS];
 540
 541static CPUWatchpoint hw_watchpoint;
 542
 543/* Default there is no breakpoint and watchpoint supported */
 544static int max_hw_breakpoint;
 545static int max_hw_watchpoint;
 546static int nb_hw_breakpoint;
 547static int nb_hw_watchpoint;
 548
 549static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 550{
 551    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 552        max_hw_breakpoint = 2;
 553        max_hw_watchpoint = 2;
 554    }
 555
 556    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 557        fprintf(stderr, "Error initializing h/w breakpoints\n");
 558        return;
 559    }
 560}
 561
 562int kvm_arch_init_vcpu(CPUState *cs)
 563{
 564    PowerPCCPU *cpu = POWERPC_CPU(cs);
 565    CPUPPCState *cenv = &cpu->env;
 566    int ret;
 567
 568    /* Gather server mmu info from KVM and update the CPU state */
 569    kvm_fixup_page_sizes(cpu);
 570
 571    /* Synchronize sregs with kvm */
 572    ret = kvm_arch_sync_sregs(cpu);
 573    if (ret) {
 574        if (ret == -EINVAL) {
 575            error_report("Register sync failed... If you're using kvm-hv.ko,"
 576                         " only \"-cpu host\" is possible");
 577        }
 578        return ret;
 579    }
 580
 581    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 582
 583    switch (cenv->mmu_model) {
 584    case POWERPC_MMU_BOOKE206:
 585        /* This target supports access to KVM's guest TLB */
 586        ret = kvm_booke206_tlb_init(cpu);
 587        break;
 588    case POWERPC_MMU_2_07:
 589        if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 590            /* KVM-HV has transactional memory on POWER8 also without the
 591             * KVM_CAP_PPC_HTM extension, so enable it here instead as
 592             * long as it's availble to userspace on the host. */
 593            if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 594                cap_htm = true;
 595            }
 596        }
 597        break;
 598    default:
 599        break;
 600    }
 601
 602    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 603    kvmppc_hw_debug_points_init(cenv);
 604
 605    return ret;
 606}
 607
 608static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 609{
 610    CPUPPCState *env = &cpu->env;
 611    CPUState *cs = CPU(cpu);
 612    struct kvm_dirty_tlb dirty_tlb;
 613    unsigned char *bitmap;
 614    int ret;
 615
 616    if (!env->kvm_sw_tlb) {
 617        return;
 618    }
 619
 620    bitmap = g_malloc((env->nb_tlb + 7) / 8);
 621    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 622
 623    dirty_tlb.bitmap = (uintptr_t)bitmap;
 624    dirty_tlb.num_dirty = env->nb_tlb;
 625
 626    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 627    if (ret) {
 628        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 629                __func__, strerror(-ret));
 630    }
 631
 632    g_free(bitmap);
 633}
 634
 635static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 636{
 637    PowerPCCPU *cpu = POWERPC_CPU(cs);
 638    CPUPPCState *env = &cpu->env;
 639    union {
 640        uint32_t u32;
 641        uint64_t u64;
 642    } val;
 643    struct kvm_one_reg reg = {
 644        .id = id,
 645        .addr = (uintptr_t) &val,
 646    };
 647    int ret;
 648
 649    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 650    if (ret != 0) {
 651        trace_kvm_failed_spr_get(spr, strerror(errno));
 652    } else {
 653        switch (id & KVM_REG_SIZE_MASK) {
 654        case KVM_REG_SIZE_U32:
 655            env->spr[spr] = val.u32;
 656            break;
 657
 658        case KVM_REG_SIZE_U64:
 659            env->spr[spr] = val.u64;
 660            break;
 661
 662        default:
 663            /* Don't handle this size yet */
 664            abort();
 665        }
 666    }
 667}
 668
 669static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 670{
 671    PowerPCCPU *cpu = POWERPC_CPU(cs);
 672    CPUPPCState *env = &cpu->env;
 673    union {
 674        uint32_t u32;
 675        uint64_t u64;
 676    } val;
 677    struct kvm_one_reg reg = {
 678        .id = id,
 679        .addr = (uintptr_t) &val,
 680    };
 681    int ret;
 682
 683    switch (id & KVM_REG_SIZE_MASK) {
 684    case KVM_REG_SIZE_U32:
 685        val.u32 = env->spr[spr];
 686        break;
 687
 688    case KVM_REG_SIZE_U64:
 689        val.u64 = env->spr[spr];
 690        break;
 691
 692    default:
 693        /* Don't handle this size yet */
 694        abort();
 695    }
 696
 697    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 698    if (ret != 0) {
 699        trace_kvm_failed_spr_set(spr, strerror(errno));
 700    }
 701}
 702
 703static int kvm_put_fp(CPUState *cs)
 704{
 705    PowerPCCPU *cpu = POWERPC_CPU(cs);
 706    CPUPPCState *env = &cpu->env;
 707    struct kvm_one_reg reg;
 708    int i;
 709    int ret;
 710
 711    if (env->insns_flags & PPC_FLOAT) {
 712        uint64_t fpscr = env->fpscr;
 713        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 714
 715        reg.id = KVM_REG_PPC_FPSCR;
 716        reg.addr = (uintptr_t)&fpscr;
 717        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 718        if (ret < 0) {
 719            DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 720            return ret;
 721        }
 722
 723        for (i = 0; i < 32; i++) {
 724            uint64_t vsr[2];
 725
 726#ifdef HOST_WORDS_BIGENDIAN
 727            vsr[0] = float64_val(env->fpr[i]);
 728            vsr[1] = env->vsr[i];
 729#else
 730            vsr[0] = env->vsr[i];
 731            vsr[1] = float64_val(env->fpr[i]);
 732#endif
 733            reg.addr = (uintptr_t) &vsr;
 734            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 735
 736            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 737            if (ret < 0) {
 738                DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 739                        i, strerror(errno));
 740                return ret;
 741            }
 742        }
 743    }
 744
 745    if (env->insns_flags & PPC_ALTIVEC) {
 746        reg.id = KVM_REG_PPC_VSCR;
 747        reg.addr = (uintptr_t)&env->vscr;
 748        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 749        if (ret < 0) {
 750            DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 751            return ret;
 752        }
 753
 754        for (i = 0; i < 32; i++) {
 755            reg.id = KVM_REG_PPC_VR(i);
 756            reg.addr = (uintptr_t)&env->avr[i];
 757            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 758            if (ret < 0) {
 759                DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 760                return ret;
 761            }
 762        }
 763    }
 764
 765    return 0;
 766}
 767
 768static int kvm_get_fp(CPUState *cs)
 769{
 770    PowerPCCPU *cpu = POWERPC_CPU(cs);
 771    CPUPPCState *env = &cpu->env;
 772    struct kvm_one_reg reg;
 773    int i;
 774    int ret;
 775
 776    if (env->insns_flags & PPC_FLOAT) {
 777        uint64_t fpscr;
 778        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 779
 780        reg.id = KVM_REG_PPC_FPSCR;
 781        reg.addr = (uintptr_t)&fpscr;
 782        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 783        if (ret < 0) {
 784            DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 785            return ret;
 786        } else {
 787            env->fpscr = fpscr;
 788        }
 789
 790        for (i = 0; i < 32; i++) {
 791            uint64_t vsr[2];
 792
 793            reg.addr = (uintptr_t) &vsr;
 794            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 795
 796            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 797            if (ret < 0) {
 798                DPRINTF("Unable to get %s%d from KVM: %s\n",
 799                        vsx ? "VSR" : "FPR", i, strerror(errno));
 800                return ret;
 801            } else {
 802#ifdef HOST_WORDS_BIGENDIAN
 803                env->fpr[i] = vsr[0];
 804                if (vsx) {
 805                    env->vsr[i] = vsr[1];
 806                }
 807#else
 808                env->fpr[i] = vsr[1];
 809                if (vsx) {
 810                    env->vsr[i] = vsr[0];
 811                }
 812#endif
 813            }
 814        }
 815    }
 816
 817    if (env->insns_flags & PPC_ALTIVEC) {
 818        reg.id = KVM_REG_PPC_VSCR;
 819        reg.addr = (uintptr_t)&env->vscr;
 820        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 821        if (ret < 0) {
 822            DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 823            return ret;
 824        }
 825
 826        for (i = 0; i < 32; i++) {
 827            reg.id = KVM_REG_PPC_VR(i);
 828            reg.addr = (uintptr_t)&env->avr[i];
 829            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 830            if (ret < 0) {
 831                DPRINTF("Unable to get VR%d from KVM: %s\n",
 832                        i, strerror(errno));
 833                return ret;
 834            }
 835        }
 836    }
 837
 838    return 0;
 839}
 840
 841#if defined(TARGET_PPC64)
 842static int kvm_get_vpa(CPUState *cs)
 843{
 844    PowerPCCPU *cpu = POWERPC_CPU(cs);
 845    CPUPPCState *env = &cpu->env;
 846    struct kvm_one_reg reg;
 847    int ret;
 848
 849    reg.id = KVM_REG_PPC_VPA_ADDR;
 850    reg.addr = (uintptr_t)&env->vpa_addr;
 851    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 852    if (ret < 0) {
 853        DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 854        return ret;
 855    }
 856
 857    assert((uintptr_t)&env->slb_shadow_size
 858           == ((uintptr_t)&env->slb_shadow_addr + 8));
 859    reg.id = KVM_REG_PPC_VPA_SLB;
 860    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 861    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 862    if (ret < 0) {
 863        DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 864                strerror(errno));
 865        return ret;
 866    }
 867
 868    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 869    reg.id = KVM_REG_PPC_VPA_DTL;
 870    reg.addr = (uintptr_t)&env->dtl_addr;
 871    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 872    if (ret < 0) {
 873        DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 874                strerror(errno));
 875        return ret;
 876    }
 877
 878    return 0;
 879}
 880
 881static int kvm_put_vpa(CPUState *cs)
 882{
 883    PowerPCCPU *cpu = POWERPC_CPU(cs);
 884    CPUPPCState *env = &cpu->env;
 885    struct kvm_one_reg reg;
 886    int ret;
 887
 888    /* SLB shadow or DTL can't be registered unless a master VPA is
 889     * registered.  That means when restoring state, if a VPA *is*
 890     * registered, we need to set that up first.  If not, we need to
 891     * deregister the others before deregistering the master VPA */
 892    assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 893
 894    if (env->vpa_addr) {
 895        reg.id = KVM_REG_PPC_VPA_ADDR;
 896        reg.addr = (uintptr_t)&env->vpa_addr;
 897        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 898        if (ret < 0) {
 899            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 900            return ret;
 901        }
 902    }
 903
 904    assert((uintptr_t)&env->slb_shadow_size
 905           == ((uintptr_t)&env->slb_shadow_addr + 8));
 906    reg.id = KVM_REG_PPC_VPA_SLB;
 907    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 908    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 909    if (ret < 0) {
 910        DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 911        return ret;
 912    }
 913
 914    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 915    reg.id = KVM_REG_PPC_VPA_DTL;
 916    reg.addr = (uintptr_t)&env->dtl_addr;
 917    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 918    if (ret < 0) {
 919        DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 920                strerror(errno));
 921        return ret;
 922    }
 923
 924    if (!env->vpa_addr) {
 925        reg.id = KVM_REG_PPC_VPA_ADDR;
 926        reg.addr = (uintptr_t)&env->vpa_addr;
 927        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 928        if (ret < 0) {
 929            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 930            return ret;
 931        }
 932    }
 933
 934    return 0;
 935}
 936#endif /* TARGET_PPC64 */
 937
 938int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 939{
 940    CPUPPCState *env = &cpu->env;
 941    struct kvm_sregs sregs;
 942    int i;
 943
 944    sregs.pvr = env->spr[SPR_PVR];
 945
 946    if (cpu->vhyp) {
 947        PPCVirtualHypervisorClass *vhc =
 948            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 949        sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 950    } else {
 951        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 952    }
 953
 954    /* Sync SLB */
 955#ifdef TARGET_PPC64
 956    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 957        sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 958        if (env->slb[i].esid & SLB_ESID_V) {
 959            sregs.u.s.ppc64.slb[i].slbe |= i;
 960        }
 961        sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 962    }
 963#endif
 964
 965    /* Sync SRs */
 966    for (i = 0; i < 16; i++) {
 967        sregs.u.s.ppc32.sr[i] = env->sr[i];
 968    }
 969
 970    /* Sync BATs */
 971    for (i = 0; i < 8; i++) {
 972        /* Beware. We have to swap upper and lower bits here */
 973        sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 974            | env->DBAT[1][i];
 975        sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 976            | env->IBAT[1][i];
 977    }
 978
 979    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 980}
 981
 982int kvm_arch_put_registers(CPUState *cs, int level)
 983{
 984    PowerPCCPU *cpu = POWERPC_CPU(cs);
 985    CPUPPCState *env = &cpu->env;
 986    struct kvm_regs regs;
 987    int ret;
 988    int i;
 989
 990    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 991    if (ret < 0) {
 992        return ret;
 993    }
 994
 995    regs.ctr = env->ctr;
 996    regs.lr  = env->lr;
 997    regs.xer = cpu_read_xer(env);
 998    regs.msr = env->msr;
 999    regs.pc = env->nip;
1000

1001    regs.srr0 = env->spr[SPR_SRR0];
1002    regs.srr1 = env->spr[SPR_SRR1];
1003
1004    regs.sprg0 = env->spr[SPR_SPRG0];
1005    regs.sprg1 = env->spr[SPR_SPRG1];
1006    regs.sprg2 = env->spr[SPR_SPRG2];
1007    regs.sprg3 = env->spr[SPR_SPRG3];
1008    regs.sprg4 = env->spr[SPR_SPRG4];
1009    regs.sprg5 = env->spr[SPR_SPRG5];
1010    regs.sprg6 = env->spr[SPR_SPRG6];
1011    regs.sprg7 = env->spr[SPR_SPRG7];
1012
1013    regs.pid = env->spr[SPR_BOOKE_PID];
1014
1015    for (i = 0;i < 32; i++)
1016        regs.gpr[i] = env->gpr[i];
1017
1018    regs.cr = 0;
1019    for (i = 0; i < 8; i++) {
1020        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1021    }
1022
1023    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1024    if (ret < 0)
1025        return ret;
1026
1027    kvm_put_fp(cs);
1028
1029    if (env->tlb_dirty) {
1030        kvm_sw_tlb_put(cpu);
1031        env->tlb_dirty = false;
1032    }
1033
1034    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1035        ret = kvmppc_put_books_sregs(cpu);
1036        if (ret < 0) {
1037            return ret;
1038        }
1039    }
1040
1041    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1042        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1043    }
1044
1045    if (cap_one_reg) {
1046        int i;
1047
1048        /* We deliberately ignore errors here, for kernels which have
1049         * the ONE_REG calls, but don't support the specific
1050         * registers, there's a reasonable chance things will still
1051         * work, at least until we try to migrate. */
1052        for (i = 0; i < 1024; i++) {
1053            uint64_t id = env->spr_cb[i].one_reg_id;
1054
1055            if (id != 0) {
1056                kvm_put_one_spr(cs, id, i);
1057            }
1058        }
1059
1060#ifdef TARGET_PPC64
1061        if (msr_ts) {
1062            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1063                kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1064            }
1065            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1066                kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1067            }
1068            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1069            kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1070            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1071            kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1072            kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1073            kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1074            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1075            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1076            kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1077            kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1078        }
1079
1080        if (cap_papr) {
1081            if (kvm_put_vpa(cs) < 0) {
1082                DPRINTF("Warning: Unable to set VPA information to KVM\n");
1083            }
1084        }
1085
1086        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1087#endif /* TARGET_PPC64 */
1088    }
1089
1090    return ret;
1091}
1092
1093static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1094{
1095     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1096}
1097
1098static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1099{
1100    CPUPPCState *env = &cpu->env;
1101    struct kvm_sregs sregs;
1102    int ret;
1103
1104    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1105    if (ret < 0) {
1106        return ret;
1107    }
1108
1109    if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1110        env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1111        env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1112        env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1113        env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1114        env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1115        env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1116        env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1117        env->spr[SPR_DECR] = sregs.u.e.dec;
1118        env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1119        env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1120        env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1121    }
1122
1123    if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1124        env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1125        env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1126        env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1127        env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1128        env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1129    }
1130
1131    if (sregs.u.e.features & KVM_SREGS_E_64) {
1132        env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1133    }
1134
1135    if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1136        env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1137    }
1138
1139    if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1140        env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1141        kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1142        env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1143        kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1144        env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1145        kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1146        env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1147        kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1148        env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1149        kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1150        env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1151        kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1152        env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1153        kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1154        env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1155        kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1156        env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1157        kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1158        env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1159        kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1160        env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1161        kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1162        env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1163        kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1164        env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1165        kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1166        env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1167        kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1168        env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1169        kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1170        env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1171        kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1172
1173        if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1174            env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1175            kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1176            env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1177            kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1178            env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1179            kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1180        }
1181
1182        if (sregs.u.e.features & KVM_SREGS_E_PM) {
1183            env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1184            kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1185        }
1186
1187        if (sregs.u.e.features & KVM_SREGS_E_PC) {
1188            env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1189            kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1190            env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1191            kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1192        }
1193    }
1194
1195    if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1196        env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1197        env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1198        env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1199        env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1200        env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1201        env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1202        env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1203        env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1204        env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1205        env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1206    }
1207
1208    if (sregs.u.e.features & KVM_SREGS_EXP) {
1209        env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1210    }
1211
1212    if (sregs.u.e.features & KVM_SREGS_E_PD) {
1213        env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1214        env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1215    }
1216
1217    if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1218        env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1219        env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1220        env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1221
1222        if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1223            env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1224            env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1225        }
1226    }
1227
1228    return 0;
1229}
1230
1231static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1232{
1233    CPUPPCState *env = &cpu->env;
1234    struct kvm_sregs sregs;
1235    int ret;
1236    int i;
1237
1238    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1239    if (ret < 0) {
1240        return ret;
1241    }
1242
1243    if (!cpu->vhyp) {
1244        ppc_store_sdr1(env, sregs.u.s.sdr1);
1245    }
1246
1247    /* Sync SLB */
1248#ifdef TARGET_PPC64
1249    /*
1250     * The packed SLB array we get from KVM_GET_SREGS only contains
1251     * information about valid entries. So we flush our internal copy
1252     * to get rid of stale ones, then put all valid SLB entries back
1253     * in.
1254     */
1255    memset(env->slb, 0, sizeof(env->slb));
1256    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1257        target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1258        target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1259        /*
1260         * Only restore valid entries
1261         */
1262        if (rb & SLB_ESID_V) {
1263            ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1264        }
1265    }
1266#endif
1267
1268    /* Sync SRs */
1269    for (i = 0; i < 16; i++) {
1270        env->sr[i] = sregs.u.s.ppc32.sr[i];
1271    }
1272
1273    /* Sync BATs */
1274    for (i = 0; i < 8; i++) {
1275        env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1276        env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1277        env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1278        env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1279    }
1280
1281    return 0;
1282}
1283
1284int kvm_arch_get_registers(CPUState *cs)
1285{
1286    PowerPCCPU *cpu = POWERPC_CPU(cs);
1287    CPUPPCState *env = &cpu->env;
1288    struct kvm_regs regs;
1289    uint32_t cr;
1290    int i, ret;
1291
1292    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1293    if (ret < 0)
1294        return ret;
1295
1296    cr = regs.cr;
1297    for (i = 7; i >= 0; i--) {
1298        env->crf[i] = cr & 15;
1299        cr >>= 4;
1300    }
1301
1302    env->ctr = regs.ctr;
1303    env->lr = regs.lr;
1304    cpu_write_xer(env, regs.xer);
1305    env->msr = regs.msr;
1306    env->nip = regs.pc;
1307
1308    env->spr[SPR_SRR0] = regs.srr0;
1309    env->spr[SPR_SRR1] = regs.srr1;
1310
1311    env->spr[SPR_SPRG0] = regs.sprg0;
1312    env->spr[SPR_SPRG1] = regs.sprg1;
1313    env->spr[SPR_SPRG2] = regs.sprg2;
1314    env->spr[SPR_SPRG3] = regs.sprg3;
1315    env->spr[SPR_SPRG4] = regs.sprg4;
1316    env->spr[SPR_SPRG5] = regs.sprg5;
1317    env->spr[SPR_SPRG6] = regs.sprg6;
1318    env->spr[SPR_SPRG7] = regs.sprg7;
1319
1320    env->spr[SPR_BOOKE_PID] = regs.pid;
1321
1322    for (i = 0;i < 32; i++)
1323        env->gpr[i] = regs.gpr[i];
1324
1325    kvm_get_fp(cs);
1326
1327    if (cap_booke_sregs) {
1328        ret = kvmppc_get_booke_sregs(cpu);
1329        if (ret < 0) {
1330            return ret;
1331        }
1332    }
1333
1334    if (cap_segstate) {
1335        ret = kvmppc_get_books_sregs(cpu);
1336        if (ret < 0) {
1337            return ret;
1338        }
1339    }
1340
1341    if (cap_hior) {
1342        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1343    }
1344
1345    if (cap_one_reg) {
1346        int i;
1347
1348        /* We deliberately ignore errors here, for kernels which have
1349         * the ONE_REG calls, but don't support the specific
1350         * registers, there's a reasonable chance things will still
1351         * work, at least until we try to migrate. */
1352        for (i = 0; i < 1024; i++) {
1353            uint64_t id = env->spr_cb[i].one_reg_id;
1354
1355            if (id != 0) {
1356                kvm_get_one_spr(cs, id, i);
1357            }
1358        }
1359
1360#ifdef TARGET_PPC64
1361        if (msr_ts) {
1362            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1363                kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1364            }
1365            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1366                kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1367            }
1368            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1369            kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1370            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1371            kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1372            kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1373            kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1374            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1375            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1376            kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1377            kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1378        }
1379
1380        if (cap_papr) {
1381            if (kvm_get_vpa(cs) < 0) {
1382                DPRINTF("Warning: Unable to get VPA information from KVM\n");
1383            }
1384        }
1385
1386        kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1387#endif
1388    }
1389
1390    return 0;
1391}
1392
1393int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1394{
1395    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1396
1397    if (irq != PPC_INTERRUPT_EXT) {
1398        return 0;
1399    }
1400
1401    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1402        return 0;
1403    }
1404
1405    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1406
1407    return 0;
1408}
1409
1410#if defined(TARGET_PPCEMB)
1411#define PPC_INPUT_INT PPC40x_INPUT_INT
1412#elif defined(TARGET_PPC64)
1413#define PPC_INPUT_INT PPC970_INPUT_INT
1414#else
1415#define PPC_INPUT_INT PPC6xx_INPUT_INT
1416#endif
1417
1418void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1419{
1420    PowerPCCPU *cpu = POWERPC_CPU(cs);
1421    CPUPPCState *env = &cpu->env;
1422    int r;
1423    unsigned irq;
1424
1425    qemu_mutex_lock_iothread();
1426
1427    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1428     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1429    if (!cap_interrupt_level &&
1430        run->ready_for_interrupt_injection &&
1431        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1432        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1433    {
1434        /* For now KVM disregards the 'irq' argument. However, in the
1435         * future KVM could cache it in-kernel to avoid a heavyweight exit
1436         * when reading the UIC.
1437         */
1438        irq = KVM_INTERRUPT_SET;
1439
1440        DPRINTF("injected interrupt %d\n", irq);
1441        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1442        if (r < 0) {
1443            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1444        }
1445
1446        /* Always wake up soon in case the interrupt was level based */
1447        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1448                       (NANOSECONDS_PER_SECOND / 50));
1449    }
1450
1451    /* We don't know if there are more interrupts pending after this. However,
1452     * the guest will return to userspace in the course of handling this one
1453     * anyways, so we will get a chance to deliver the rest. */
1454
1455    qemu_mutex_unlock_iothread();
1456}
1457
1458MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1459{
1460    return MEMTXATTRS_UNSPECIFIED;
1461}
1462
1463int kvm_arch_process_async_events(CPUState *cs)
1464{
1465    return cs->halted;
1466}
1467
1468static int kvmppc_handle_halt(PowerPCCPU *cpu)
1469{
1470    CPUState *cs = CPU(cpu);
1471    CPUPPCState *env = &cpu->env;
1472
1473    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1474        cs->halted = 1;
1475        cs->exception_index = EXCP_HLT;
1476    }
1477
1478    return 0;
1479}
1480
1481/* map dcr access to existing qemu dcr emulation */
1482static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1483{
1484    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1485        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1486
1487    return 0;
1488}
1489
1490static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1491{
1492    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1493        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1494
1495    return 0;
1496}
1497
1498int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1499{
1500    /* Mixed endian case is not handled */
1501    uint32_t sc = debug_inst_opcode;
1502
1503    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1504                            sizeof(sc), 0) ||
1505        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1506        return -EINVAL;
1507    }
1508
1509    return 0;
1510}
1511
1512int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1513{
1514    uint32_t sc;
1515
1516    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1517        sc != debug_inst_opcode ||
1518        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1519                            sizeof(sc), 1)) {
1520        return -EINVAL;
1521    }
1522
1523    return 0;
1524}
1525
1526static int find_hw_breakpoint(target_ulong addr, int type)
1527{
1528    int n;
1529
1530    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1531           <= ARRAY_SIZE(hw_debug_points));
1532
1533    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1534        if (hw_debug_points[n].addr == addr &&
1535             hw_debug_points[n].type == type) {
1536            return n;
1537        }
1538    }
1539
1540    return -1;
1541}
1542
1543static int find_hw_watchpoint(target_ulong addr, int *flag)
1544{
1545    int n;
1546
1547    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1548    if (n >= 0) {
1549        *flag = BP_MEM_ACCESS;
1550        return n;
1551    }
1552
1553    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1554    if (n >= 0) {
1555        *flag = BP_MEM_WRITE;
1556        return n;
1557    }
1558
1559    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1560    if (n >= 0) {
1561        *flag = BP_MEM_READ;
1562        return n;
1563    }
1564
1565    return -1;
1566}
1567
1568int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1569                                  target_ulong len, int type)
1570{
1571    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1572        return -ENOBUFS;
1573    }
1574
1575    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1576    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1577
1578    switch (type) {
1579    case GDB_BREAKPOINT_HW:
1580        if (nb_hw_breakpoint >= max_hw_breakpoint) {
1581            return -ENOBUFS;
1582        }
1583
1584        if (find_hw_breakpoint(addr, type) >= 0) {
1585            return -EEXIST;
1586        }
1587
1588        nb_hw_breakpoint++;
1589        break;
1590
1591    case GDB_WATCHPOINT_WRITE:
1592    case GDB_WATCHPOINT_READ:
1593    case GDB_WATCHPOINT_ACCESS:
1594        if (nb_hw_watchpoint >= max_hw_watchpoint) {
1595            return -ENOBUFS;
1596        }
1597
1598        if (find_hw_breakpoint(addr, type) >= 0) {
1599            return -EEXIST;
1600        }
1601
1602        nb_hw_watchpoint++;
1603        break;
1604
1605    default:
1606        return -ENOSYS;
1607    }
1608
1609    return 0;
1610}
1611
1612int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1613                                  target_ulong len, int type)
1614{
1615    int n;
1616
1617    n = find_hw_breakpoint(addr, type);
1618    if (n < 0) {
1619        return -ENOENT;
1620    }
1621
1622    switch (type) {
1623    case GDB_BREAKPOINT_HW:
1624        nb_hw_breakpoint--;
1625        break;
1626
1627    case GDB_WATCHPOINT_WRITE:
1628    case GDB_WATCHPOINT_READ:
1629    case GDB_WATCHPOINT_ACCESS:
1630        nb_hw_watchpoint--;
1631        break;
1632
1633    default:
1634        return -ENOSYS;
1635    }
1636    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1637
1638    return 0;
1639}
1640
1641void kvm_arch_remove_all_hw_breakpoints(void)
1642{
1643    nb_hw_breakpoint = nb_hw_watchpoint = 0;
1644}
1645
1646void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1647{
1648    int n;
1649
1650    /* Software Breakpoint updates */
1651    if (kvm_sw_breakpoints_active(cs)) {
1652        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1653    }
1654
1655    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1656           <= ARRAY_SIZE(hw_debug_points));
1657    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1658
1659    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1660        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1661        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1662        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1663            switch (hw_debug_points[n].type) {
1664            case GDB_BREAKPOINT_HW:
1665                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1666                break;
1667            case GDB_WATCHPOINT_WRITE:
1668                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1669                break;
1670            case GDB_WATCHPOINT_READ:
1671                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1672                break;
1673            case GDB_WATCHPOINT_ACCESS:
1674                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1675                                        KVMPPC_DEBUG_WATCH_READ;
1676                break;
1677            default:
1678                cpu_abort(cs, "Unsupported breakpoint type\n");
1679            }
1680            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1681        }
1682    }
1683}
1684
1685static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1686{
1687    CPUState *cs = CPU(cpu);
1688    CPUPPCState *env = &cpu->env;
1689    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1690    int handle = 0;
1691    int n;
1692    int flag = 0;
1693
1694    if (cs->singlestep_enabled) {
1695        handle = 1;
1696    } else if (arch_info->status) {
1697        if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1698            if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1699                n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1700                if (n >= 0) {
1701                    handle = 1;
1702                }
1703            } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1704                                            KVMPPC_DEBUG_WATCH_WRITE)) {
1705                n = find_hw_watchpoint(arch_info->address,  &flag);
1706                if (n >= 0) {
1707                    handle = 1;
1708                    cs->watchpoint_hit = &hw_watchpoint;
1709                    hw_watchpoint.vaddr = hw_debug_points[n].addr;
1710                    hw_watchpoint.flags = flag;
1711                }
1712            }
1713        }
1714    } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1715        handle = 1;
1716    } else {
1717        /* QEMU is not able to handle debug exception, so inject
1718         * program exception to guest;
1719         * Yes program exception NOT debug exception !!
1720         * When QEMU is using debug resources then debug exception must
1721         * be always set. To achieve this we set MSR_DE and also set
1722         * MSRP_DEP so guest cannot change MSR_DE.
1723         * When emulating debug resource for guest we want guest
1724         * to control MSR_DE (enable/disable debug interrupt on need).
1725         * Supporting both configurations are NOT possible.
1726         * So the result is that we cannot share debug resources
1727         * between QEMU and Guest on BOOKE architecture.
1728         * In the current design QEMU gets the priority over guest,
1729         * this means that if QEMU is using debug resources then guest
1730         * cannot use them;
1731         * For software breakpoint QEMU uses a privileged instruction;
1732         * So there cannot be any reason that we are here for guest
1733         * set debug exception, only possibility is guest executed a
1734         * privileged / illegal instruction and that's why we are
1735         * injecting a program interrupt.
1736         */
1737
1738        cpu_synchronize_state(cs);
1739        /* env->nip is PC, so increment this by 4 to use
1740         * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1741         */
1742        env->nip += 4;
1743        cs->exception_index = POWERPC_EXCP_PROGRAM;
1744        env->error_code = POWERPC_EXCP_INVAL;
1745        ppc_cpu_do_interrupt(cs);
1746    }
1747
1748    return handle;
1749}
1750
1751int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1752{
1753    PowerPCCPU *cpu = POWERPC_CPU(cs);
1754    CPUPPCState *env = &cpu->env;
1755    int ret;
1756
1757    qemu_mutex_lock_iothread();
1758
1759    switch (run->exit_reason) {
1760    case KVM_EXIT_DCR:
1761        if (run->dcr.is_write) {
1762            DPRINTF("handle dcr write\n");
1763            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1764        } else {
1765            DPRINTF("handle dcr read\n");
1766            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1767        }
1768        break;
1769    case KVM_EXIT_HLT:
1770        DPRINTF("handle halt\n");
1771        ret = kvmppc_handle_halt(cpu);
1772        break;
1773#if defined(TARGET_PPC64)
1774    case KVM_EXIT_PAPR_HCALL:
1775        DPRINTF("handle PAPR hypercall\n");
1776        run->papr_hcall.ret = spapr_hypercall(cpu,
1777                                              run->papr_hcall.nr,
1778                                              run->papr_hcall.args);
1779        ret = 0;
1780        break;
1781#endif
1782    case KVM_EXIT_EPR:
1783        DPRINTF("handle epr\n");
1784        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1785        ret = 0;
1786        break;
1787    case KVM_EXIT_WATCHDOG:
1788        DPRINTF("handle watchdog expiry\n");
1789        watchdog_perform_action();
1790        ret = 0;
1791        break;
1792
1793    case KVM_EXIT_DEBUG:
1794        DPRINTF("handle debug exception\n");
1795        if (kvm_handle_debug(cpu, run)) {
1796            ret = EXCP_DEBUG;
1797            break;
1798        }
1799        /* re-enter, this exception was guest-internal */
1800        ret = 0;
1801        break;
1802
1803    default:
1804        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1805        ret = -1;
1806        break;
1807    }
1808
1809    qemu_mutex_unlock_iothread();
1810    return ret;
1811}
1812
1813int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1814{
1815    CPUState *cs = CPU(cpu);
1816    uint32_t bits = tsr_bits;
1817    struct kvm_one_reg reg = {
1818        .id = KVM_REG_PPC_OR_TSR,
1819        .addr = (uintptr_t) &bits,
1820    };
1821
1822    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1823}
1824
1825int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1826{
1827
1828    CPUState *cs = CPU(cpu);
1829    uint32_t bits = tsr_bits;
1830    struct kvm_one_reg reg = {
1831        .id = KVM_REG_PPC_CLEAR_TSR,
1832        .addr = (uintptr_t) &bits,
1833    };
1834
1835    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1836}
1837
1838int kvmppc_set_tcr(PowerPCCPU *cpu)
1839{
1840    CPUState *cs = CPU(cpu);
1841    CPUPPCState *env = &cpu->env;
1842    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1843
1844    struct kvm_one_reg reg = {
1845        .id = KVM_REG_PPC_TCR,
1846        .addr = (uintptr_t) &tcr,
1847    };
1848
1849    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1850}
1851
1852int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1853{
1854    CPUState *cs = CPU(cpu);
1855    int ret;
1856
1857    if (!kvm_enabled()) {
1858        return -1;
1859    }
1860
1861    if (!cap_ppc_watchdog) {
1862        printf("warning: KVM does not support watchdog");
1863        return -1;
1864    }
1865
1866    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1867    if (ret < 0) {
1868        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1869                __func__, strerror(-ret));
1870        return ret;
1871    }
1872
1873    return ret;
1874}
1875
1876static int read_cpuinfo(const char *field, char *value, int len)
1877{
1878    FILE *f;
1879    int ret = -1;
1880    int field_len = strlen(field);
1881    char line[512];
1882
1883    f = fopen("/proc/cpuinfo", "r");
1884    if (!f) {
1885        return -1;
1886    }
1887
1888    do {
1889        if (!fgets(line, sizeof(line), f)) {
1890            break;
1891        }
1892        if (!strncmp(line, field, field_len)) {
1893            pstrcpy(value, len, line);
1894            ret = 0;
1895            break;
1896        }
1897    } while(*line);
1898
1899    fclose(f);
1900
1901    return ret;
1902}
1903
1904uint32_t kvmppc_get_tbfreq(void)
1905{
1906    char line[512];
1907    char *ns;
1908    uint32_t retval = NANOSECONDS_PER_SECOND;
1909
1910    if (read_cpuinfo("timebase", line, sizeof(line))) {
1911        return retval;
1912    }
1913
1914    if (!(ns = strchr(line, ':'))) {
1915        return retval;
1916    }
1917
1918    ns++;
1919
1920    return atoi(ns);
1921}
1922
1923bool kvmppc_get_host_serial(char **value)
1924{
1925    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1926                               NULL);
1927}
1928
1929bool kvmppc_get_host_model(char **value)
1930{
1931    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1932}
1933
1934/* Try to find a device tree node for a CPU with clock-frequency property */
1935static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1936{
1937    struct dirent *dirp;
1938    DIR *dp;
1939
1940    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1941        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1942        return -1;
1943    }
1944
1945    buf[0] = '\0';
1946    while ((dirp = readdir(dp)) != NULL) {
1947        FILE *f;
1948        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1949                 dirp->d_name);
1950        f = fopen(buf, "r");
1951        if (f) {
1952            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1953            fclose(f);
1954            break;
1955        }
1956        buf[0] = '\0';
1957    }
1958    closedir(dp);
1959    if (buf[0] == '\0') {
1960        printf("Unknown host!\n");
1961        return -1;
1962    }
1963
1964    return 0;
1965}
1966
1967static uint64_t kvmppc_read_int_dt(const char *filename)
1968{
1969    union {
1970        uint32_t v32;
1971        uint64_t v64;
1972    } u;
1973    FILE *f;
1974    int len;
1975
1976    f = fopen(filename, "rb");
1977    if (!f) {
1978        return -1;
1979    }
1980
1981    len = fread(&u, 1, sizeof(u), f);
1982    fclose(f);
1983    switch (len) {
1984    case 4:
1985        /* property is a 32-bit quantity */
1986        return be32_to_cpu(u.v32);
1987    case 8:
1988        return be64_to_cpu(u.v64);
1989    }
1990
1991    return 0;
1992}
1993
1994/* Read a CPU node property from the host device tree that's a single
1995 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1996 * (can't find or open the property, or doesn't understand the
1997 * format) */
1998static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1999{
2000    char buf[PATH_MAX], *tmp;

2001    uint64_t val;
2002
2003    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
2004        return -1;
2005    }
2006
2007    tmp = g_strdup_printf("%s/%s", buf, propname);
2008    val = kvmppc_read_int_dt(tmp);
2009    g_free(tmp);
2010
2011    return val;
2012}
2013
2014uint64_t kvmppc_get_clockfreq(void)
2015{
2016    return kvmppc_read_int_cpu_dt("clock-frequency");
2017}
2018
2019static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2020 {
2021     PowerPCCPU *cpu = ppc_env_get_cpu(env);
2022     CPUState *cs = CPU(cpu);
2023
2024    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2025        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2026        return 0;
2027    }
2028
2029    return 1;
2030}
2031
2032int kvmppc_get_hasidle(CPUPPCState *env)
2033{
2034    struct kvm_ppc_pvinfo pvinfo;
2035
2036    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2037        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2038        return 1;
2039    }
2040
2041    return 0;
2042}
2043
2044int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2045{
2046    uint32_t *hc = (uint32_t*)buf;
2047    struct kvm_ppc_pvinfo pvinfo;
2048
2049    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2050        memcpy(buf, pvinfo.hcall, buf_len);
2051        return 0;
2052    }
2053
2054    /*
2055     * Fallback to always fail hypercalls regardless of endianness:
2056     *
2057     *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2058     *     li r3, -1
2059     *     b .+8       (becomes nop in wrong endian)
2060     *     bswap32(li r3, -1)
2061     */
2062
2063    hc[0] = cpu_to_be32(0x08000048);
2064    hc[1] = cpu_to_be32(0x3860ffff);
2065    hc[2] = cpu_to_be32(0x48000008);
2066    hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2067
2068    return 1;
2069}
2070
2071static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2072{
2073    return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2074}
2075
2076void kvmppc_enable_logical_ci_hcalls(void)
2077{
2078    /*
2079     * FIXME: it would be nice if we could detect the cases where
2080     * we're using a device which requires the in kernel
2081     * implementation of these hcalls, but the kernel lacks them and
2082     * produce a warning.
2083     */
2084    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2085    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2086}
2087
2088void kvmppc_enable_set_mode_hcall(void)
2089{
2090    kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2091}
2092
2093void kvmppc_enable_clear_ref_mod_hcalls(void)
2094{
2095    kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2096    kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2097}
2098
2099void kvmppc_set_papr(PowerPCCPU *cpu)
2100{
2101    CPUState *cs = CPU(cpu);
2102    int ret;
2103
2104    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2105    if (ret) {
2106        error_report("This vCPU type or KVM version does not support PAPR");
2107        exit(1);
2108    }
2109
2110    /* Update the capability flag so we sync the right information
2111     * with kvm */
2112    cap_papr = 1;
2113}
2114
2115int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2116{
2117    return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2118}
2119
2120void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2121{
2122    CPUState *cs = CPU(cpu);
2123    int ret;
2124
2125    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2126    if (ret && mpic_proxy) {
2127        error_report("This KVM version does not support EPR");
2128        exit(1);
2129    }
2130}
2131
2132int kvmppc_smt_threads(void)
2133{
2134    return cap_ppc_smt ? cap_ppc_smt : 1;
2135}
2136
2137int kvmppc_set_smt_threads(int smt)
2138{
2139    int ret;
2140
2141    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2142    if (!ret) {
2143        cap_ppc_smt = smt;
2144    }
2145    return ret;
2146}
2147
2148void kvmppc_hint_smt_possible(Error **errp)
2149{
2150    int i;
2151    GString *g;
2152    char *s;
2153
2154    assert(kvm_enabled());
2155    if (cap_ppc_smt_possible) {
2156        g = g_string_new("Available VSMT modes:");
2157        for (i = 63; i >= 0; i--) {
2158            if ((1UL << i) & cap_ppc_smt_possible) {
2159                g_string_append_printf(g, " %lu", (1UL << i));
2160            }
2161        }
2162        s = g_string_free(g, false);
2163        error_append_hint(errp, "%s.\n", s);
2164        g_free(s);
2165    } else {
2166        error_append_hint(errp,
2167                          "This KVM seems to be too old to support VSMT.\n");
2168    }
2169}
2170
2171
2172#ifdef TARGET_PPC64
2173off_t kvmppc_alloc_rma(void **rma)
2174{
2175    off_t size;
2176    int fd;
2177    struct kvm_allocate_rma ret;
2178
2179    /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2180     * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2181     *                      not necessary on this hardware
2182     * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2183     *
2184     * FIXME: We should allow the user to force contiguous RMA
2185     * allocation in the cap_ppc_rma==1 case.
2186     */
2187    if (cap_ppc_rma < 2) {
2188        return 0;
2189    }
2190
2191    fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2192    if (fd < 0) {
2193        fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2194                strerror(errno));
2195        return -1;
2196    }
2197
2198    size = MIN(ret.rma_size, 256ul << 20);
2199
2200    *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2201    if (*rma == MAP_FAILED) {
2202        fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2203        return -1;
2204    };
2205
2206    return size;
2207}
2208
2209uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2210{
2211    struct kvm_ppc_smmu_info info;
2212    long rampagesize, best_page_shift;
2213    int i;
2214
2215    if (cap_ppc_rma >= 2) {
2216        return current_size;
2217    }
2218
2219    /* Find the largest hardware supported page size that's less than
2220     * or equal to the (logical) backing page size of guest RAM */
2221    kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2222    rampagesize = qemu_getrampagesize();
2223    best_page_shift = 0;
2224
2225    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2226        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2227
2228        if (!sps->page_shift) {
2229            continue;
2230        }
2231
2232        if ((sps->page_shift > best_page_shift)
2233            && ((1UL << sps->page_shift) <= rampagesize)) {
2234            best_page_shift = sps->page_shift;
2235        }
2236    }
2237
2238    return MIN(current_size,
2239               1ULL << (best_page_shift + hash_shift - 7));
2240}
2241#endif
2242
2243bool kvmppc_spapr_use_multitce(void)
2244{
2245    return cap_spapr_multitce;
2246}
2247
2248int kvmppc_spapr_enable_inkernel_multitce(void)
2249{
2250    int ret;
2251
2252    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2253                            H_PUT_TCE_INDIRECT, 1);
2254    if (!ret) {
2255        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2256                                H_STUFF_TCE, 1);
2257    }
2258
2259    return ret;
2260}
2261
2262void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2263                              uint64_t bus_offset, uint32_t nb_table,
2264                              int *pfd, bool need_vfio)
2265{
2266    long len;
2267    int fd;
2268    void *table;
2269
2270    /* Must set fd to -1 so we don't try to munmap when called for
2271     * destroying the table, which the upper layers -will- do
2272     */
2273    *pfd = -1;
2274    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2275        return NULL;
2276    }
2277
2278    if (cap_spapr_tce_64) {
2279        struct kvm_create_spapr_tce_64 args = {
2280            .liobn = liobn,
2281            .page_shift = page_shift,
2282            .offset = bus_offset >> page_shift,
2283            .size = nb_table,
2284            .flags = 0
2285        };
2286        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2287        if (fd < 0) {
2288            fprintf(stderr,
2289                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2290                    liobn);
2291            return NULL;
2292        }
2293    } else if (cap_spapr_tce) {
2294        uint64_t window_size = (uint64_t) nb_table << page_shift;
2295        struct kvm_create_spapr_tce args = {
2296            .liobn = liobn,
2297            .window_size = window_size,
2298        };
2299        if ((window_size != args.window_size) || bus_offset) {
2300            return NULL;
2301        }
2302        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2303        if (fd < 0) {
2304            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2305                    liobn);
2306            return NULL;
2307        }
2308    } else {
2309        return NULL;
2310    }
2311
2312    len = nb_table * sizeof(uint64_t);
2313    /* FIXME: round this up to page size */
2314
2315    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2316    if (table == MAP_FAILED) {
2317        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2318                liobn);
2319        close(fd);
2320        return NULL;
2321    }
2322
2323    *pfd = fd;
2324    return table;
2325}
2326
2327int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2328{
2329    long len;
2330
2331    if (fd < 0) {
2332        return -1;
2333    }
2334
2335    len = nb_table * sizeof(uint64_t);
2336    if ((munmap(table, len) < 0) ||
2337        (close(fd) < 0)) {
2338        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2339                strerror(errno));
2340        /* Leak the table */
2341    }
2342
2343    return 0;
2344}
2345
2346int kvmppc_reset_htab(int shift_hint)
2347{
2348    uint32_t shift = shift_hint;
2349
2350    if (!kvm_enabled()) {
2351        /* Full emulation, tell caller to allocate htab itself */
2352        return 0;
2353    }
2354    if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2355        int ret;
2356        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2357        if (ret == -ENOTTY) {
2358            /* At least some versions of PR KVM advertise the
2359             * capability, but don't implement the ioctl().  Oops.
2360             * Return 0 so that we allocate the htab in qemu, as is
2361             * correct for PR. */
2362            return 0;
2363        } else if (ret < 0) {
2364            return ret;
2365        }
2366        return shift;
2367    }
2368
2369    /* We have a kernel that predates the htab reset calls.  For PR
2370     * KVM, we need to allocate the htab ourselves, for an HV KVM of
2371     * this era, it has allocated a 16MB fixed size hash table already. */
2372    if (kvmppc_is_pr(kvm_state)) {
2373        /* PR - tell caller to allocate htab */
2374        return 0;
2375    } else {
2376        /* HV - assume 16MB kernel allocated htab */
2377        return 24;
2378    }
2379}
2380
2381static inline uint32_t mfpvr(void)
2382{
2383    uint32_t pvr;
2384
2385    asm ("mfpvr %0"
2386         : "=r"(pvr));
2387    return pvr;
2388}
2389
2390static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2391{
2392    if (on) {
2393        *word |= flags;
2394    } else {
2395        *word &= ~flags;
2396    }
2397}
2398
2399static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2400{
2401    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2402    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2403    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2404
2405    /* Now fix up the class with information we can query from the host */
2406    pcc->pvr = mfpvr();
2407
2408    alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2409                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2410    alter_insns(&pcc->insns_flags2, PPC2_VSX,
2411                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2412    alter_insns(&pcc->insns_flags2, PPC2_DFP,
2413                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2414
2415    if (dcache_size != -1) {
2416        pcc->l1_dcache_size = dcache_size;
2417    }
2418
2419    if (icache_size != -1) {
2420        pcc->l1_icache_size = icache_size;
2421    }
2422
2423#if defined(TARGET_PPC64)
2424    pcc->radix_page_info = kvm_get_radix_page_info();
2425
2426    if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2427        /*
2428         * POWER9 DD1 has some bugs which make it not really ISA 3.00
2429         * compliant.  More importantly, advertising ISA 3.00
2430         * architected mode may prevent guests from activating
2431         * necessary DD1 workarounds.
2432         */
2433        pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2434                                | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2435    }
2436#endif /* defined(TARGET_PPC64) */
2437}
2438
2439bool kvmppc_has_cap_epr(void)
2440{
2441    return cap_epr;
2442}
2443
2444bool kvmppc_has_cap_fixup_hcalls(void)
2445{
2446    return cap_fixup_hcalls;
2447}
2448
2449bool kvmppc_has_cap_htm(void)
2450{
2451    return cap_htm;
2452}
2453
2454bool kvmppc_has_cap_mmu_radix(void)
2455{
2456    return cap_mmu_radix;
2457}
2458
2459bool kvmppc_has_cap_mmu_hash_v3(void)
2460{
2461    return cap_mmu_hash_v3;
2462}
2463
2464static void kvmppc_get_cpu_characteristics(KVMState *s)
2465{
2466    struct kvm_ppc_cpu_char c;
2467    int ret;
2468
2469    /* Assume broken */
2470    cap_ppc_safe_cache = 0;
2471    cap_ppc_safe_bounds_check = 0;
2472    cap_ppc_safe_indirect_branch = 0;
2473
2474    ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2475    if (!ret) {
2476        return;
2477    }
2478    ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2479    if (ret < 0) {
2480        return;
2481    }
2482    /* Parse and set cap_ppc_safe_cache */
2483    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2484        cap_ppc_safe_cache = 2;
2485    } else if ((c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2486               (c.character & c.character_mask
2487                & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2488        cap_ppc_safe_cache = 1;
2489    }
2490    /* Parse and set cap_ppc_safe_bounds_check */
2491    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2492        cap_ppc_safe_bounds_check = 2;
2493    } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2494        cap_ppc_safe_bounds_check = 1;
2495    }
2496    /* Parse and set cap_ppc_safe_indirect_branch */
2497    if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2498        cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_CCD;
2499    } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2500        cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_IBS;
2501    }
2502}
2503
2504int kvmppc_get_cap_safe_cache(void)
2505{
2506    return cap_ppc_safe_cache;
2507}
2508
2509int kvmppc_get_cap_safe_bounds_check(void)
2510{
2511    return cap_ppc_safe_bounds_check;
2512}
2513
2514int kvmppc_get_cap_safe_indirect_branch(void)
2515{
2516    return cap_ppc_safe_indirect_branch;
2517}
2518
2519bool kvmppc_has_cap_spapr_vfio(void)
2520{
2521    return cap_spapr_vfio;
2522}
2523
2524PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2525{
2526    uint32_t host_pvr = mfpvr();
2527    PowerPCCPUClass *pvr_pcc;
2528
2529    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2530    if (pvr_pcc == NULL) {
2531        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2532    }
2533
2534    return pvr_pcc;
2535}
2536
2537static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2538{
2539    TypeInfo type_info = {
2540        .name = TYPE_HOST_POWERPC_CPU,
2541        .class_init = kvmppc_host_cpu_class_init,
2542    };
2543    MachineClass *mc = MACHINE_GET_CLASS(ms);
2544    PowerPCCPUClass *pvr_pcc;
2545    ObjectClass *oc;
2546    DeviceClass *dc;
2547    int i;
2548
2549    pvr_pcc = kvm_ppc_get_host_cpu_class();
2550    if (pvr_pcc == NULL) {
2551        return -1;
2552    }
2553    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2554    type_register(&type_info);
2555    if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2556        /* override TCG default cpu type with 'host' cpu model */
2557        mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2558    }
2559
2560    oc = object_class_by_name(type_info.name);
2561    g_assert(oc);
2562
2563    /*
2564     * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2565     * we want "POWER8" to be a "family" alias that points to the current
2566     * host CPU type, too)
2567     */
2568    dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2569    for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2570        if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2571            char *suffix;
2572
2573            ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2574            suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2575            if (suffix) {
2576                *suffix = 0;
2577            }
2578            break;
2579        }
2580    }
2581
2582    return 0;
2583}
2584
2585int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2586{
2587    struct kvm_rtas_token_args args = {
2588        .token = token,
2589    };
2590
2591    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2592        return -ENOENT;
2593    }
2594
2595    strncpy(args.name, function, sizeof(args.name));
2596
2597    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2598}
2599
2600int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2601{
2602    struct kvm_get_htab_fd s = {
2603        .flags = write ? KVM_GET_HTAB_WRITE : 0,
2604        .start_index = index,
2605    };
2606    int ret;
2607
2608    if (!cap_htab_fd) {
2609        error_setg(errp, "KVM version doesn't support %s the HPT",
2610                   write ? "writing" : "reading");
2611        return -ENOTSUP;
2612    }
2613
2614    ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2615    if (ret < 0) {
2616        error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2617                   write ? "writing" : "reading", write ? "to" : "from",
2618                   strerror(errno));
2619        return -errno;
2620    }
2621
2622    return ret;
2623}
2624
2625int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2626{
2627    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2628    uint8_t buf[bufsize];
2629    ssize_t rc;
2630
2631    do {
2632        rc = read(fd, buf, bufsize);
2633        if (rc < 0) {
2634            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2635                    strerror(errno));
2636            return rc;
2637        } else if (rc) {
2638            uint8_t *buffer = buf;
2639            ssize_t n = rc;
2640            while (n) {
2641                struct kvm_get_htab_header *head =
2642                    (struct kvm_get_htab_header *) buffer;
2643                size_t chunksize = sizeof(*head) +
2644                     HASH_PTE_SIZE_64 * head->n_valid;
2645
2646                qemu_put_be32(f, head->index);
2647                qemu_put_be16(f, head->n_valid);
2648                qemu_put_be16(f, head->n_invalid);
2649                qemu_put_buffer(f, (void *)(head + 1),
2650                                HASH_PTE_SIZE_64 * head->n_valid);
2651
2652                buffer += chunksize;
2653                n -= chunksize;
2654            }
2655        }
2656    } while ((rc != 0)
2657             && ((max_ns < 0)
2658                 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2659
2660    return (rc == 0) ? 1 : 0;
2661}
2662
2663int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2664                           uint16_t n_valid, uint16_t n_invalid)
2665{
2666    struct kvm_get_htab_header *buf;
2667    size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2668    ssize_t rc;
2669
2670    buf = alloca(chunksize);
2671    buf->index = index;
2672    buf->n_valid = n_valid;
2673    buf->n_invalid = n_invalid;
2674
2675    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2676
2677    rc = write(fd, buf, chunksize);
2678    if (rc < 0) {
2679        fprintf(stderr, "Error writing KVM hash table: %s\n",
2680                strerror(errno));
2681        return rc;
2682    }
2683    if (rc != chunksize) {
2684        /* We should never get a short write on a single chunk */
2685        fprintf(stderr, "Short write, restoring KVM hash table\n");
2686        return -1;
2687    }
2688    return 0;
2689}
2690
2691bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2692{
2693    return true;
2694}
2695
2696void kvm_arch_init_irq_routing(KVMState *s)
2697{
2698}
2699
2700void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2701{
2702    int fd, rc;
2703    int i;
2704
2705    fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2706
2707    i = 0;
2708    while (i < n) {
2709        struct kvm_get_htab_header *hdr;
2710        int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2711        char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2712
2713        rc = read(fd, buf, sizeof(buf));
2714        if (rc < 0) {
2715            hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2716        }
2717
2718        hdr = (struct kvm_get_htab_header *)buf;
2719        while ((i < n) && ((char *)hdr < (buf + rc))) {
2720            int invalid = hdr->n_invalid, valid = hdr->n_valid;
2721
2722            if (hdr->index != (ptex + i)) {
2723                hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2724                         " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2725            }
2726
2727            if (n - i < valid) {
2728                valid = n - i;
2729            }
2730            memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2731            i += valid;
2732
2733            if ((n - i) < invalid) {
2734                invalid = n - i;
2735            }
2736            memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2737            i += invalid;
2738
2739            hdr = (struct kvm_get_htab_header *)
2740                ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2741        }
2742    }
2743
2744    close(fd);
2745}
2746
2747void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2748{
2749    int fd, rc;
2750    struct {
2751        struct kvm_get_htab_header hdr;
2752        uint64_t pte0;
2753        uint64_t pte1;
2754    } buf;
2755
2756    fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2757
2758    buf.hdr.n_valid = 1;
2759    buf.hdr.n_invalid = 0;
2760    buf.hdr.index = ptex;
2761    buf.pte0 = cpu_to_be64(pte0);
2762    buf.pte1 = cpu_to_be64(pte1);
2763
2764    rc = write(fd, &buf, sizeof(buf));
2765    if (rc != sizeof(buf)) {
2766        hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2767    }
2768    close(fd);
2769}
2770
2771int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2772                             uint64_t address, uint32_t data, PCIDevice *dev)
2773{
2774    return 0;
2775}
2776
2777int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2778                                int vector, PCIDevice *dev)
2779{
2780    return 0;
2781}
2782
2783int kvm_arch_release_virq_post(int virq)
2784{
2785    return 0;
2786}
2787
2788int kvm_arch_msi_data_to_gsi(uint32_t data)
2789{
2790    return data & 0xffff;
2791}
2792
2793int kvmppc_enable_hwrng(void)
2794{
2795    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2796        return -1;
2797    }
2798
2799    return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2800}
2801
2802void kvmppc_check_papr_resize_hpt(Error **errp)
2803{
2804    if (!kvm_enabled()) {
2805        return; /* No KVM, we're good */
2806    }
2807
2808    if (cap_resize_hpt) {
2809        return; /* Kernel has explicit support, we're good */
2810    }
2811
2812    /* Otherwise fallback on looking for PR KVM */
2813    if (kvmppc_is_pr(kvm_state)) {
2814        return;
2815    }
2816
2817    error_setg(errp,
2818               "Hash page table resizing not available with this KVM version");
2819}
2820
2821int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2822{
2823    CPUState *cs = CPU(cpu);
2824    struct kvm_ppc_resize_hpt rhpt = {
2825        .flags = flags,
2826        .shift = shift,
2827    };
2828
2829    if (!cap_resize_hpt) {
2830        return -ENOSYS;
2831    }
2832
2833    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2834}
2835
2836int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2837{
2838    CPUState *cs = CPU(cpu);
2839    struct kvm_ppc_resize_hpt rhpt = {
2840        .flags = flags,
2841        .shift = shift,
2842    };
2843
2844    if (!cap_resize_hpt) {
2845        return -ENOSYS;
2846    }
2847
2848    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2849}
2850
2851/*
2852 * This is a helper function to detect a post migration scenario
2853 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2854 * the guest kernel can't handle a PVR value other than the actual host
2855 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2856 *
2857 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2858 * (so, we're HV), return true. The workaround itself is done in
2859 * cpu_post_load.
2860 *
2861 * The order here is important: we'll only check for KVM PR as a
2862 * fallback if the guest kernel can't handle the situation itself.
2863 * We need to avoid as much as possible querying the running KVM type
2864 * in QEMU level.
2865 */
2866bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2867{
2868    CPUState *cs = CPU(cpu);
2869
2870    if (!kvm_enabled()) {
2871        return false;
2872    }
2873
2874    if (cap_ppc_pvr_compat) {
2875        return false;
2876    }
2877
2878    return !kvmppc_is_pr(cs->kvm_state);
2879}
2880