qemu/target/ppc/kvm.c
<<
>>
Prefs
   1/*
   2 * PowerPC implementation of KVM hooks
   3 *
   4 * Copyright IBM Corp. 2007
   5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6 *
   7 * Authors:
   8 *  Jerone Young <jyoung5@us.ibm.com>
   9 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10 *  Hollis Blanchard <hollisb@us.ibm.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13 * See the COPYING file in the top-level directory.
  14 *
  15 */
  16
  17#include "qemu/osdep.h"
  18#include <dirent.h>
  19#include <sys/ioctl.h>
  20#include <sys/vfs.h>
  21
  22#include <linux/kvm.h>
  23
  24#include "qemu-common.h"
  25#include "qapi/error.h"
  26#include "qemu/error-report.h"
  27#include "cpu.h"
  28#include "cpu-models.h"
  29#include "qemu/timer.h"
  30#include "sysemu/sysemu.h"
  31#include "sysemu/hw_accel.h"
  32#include "kvm_ppc.h"
  33#include "sysemu/cpus.h"
  34#include "sysemu/device_tree.h"
  35#include "mmu-hash64.h"
  36
  37#include "hw/sysbus.h"
  38#include "hw/ppc/spapr.h"
  39#include "hw/ppc/spapr_vio.h"
  40#include "hw/ppc/spapr_cpu_core.h"
  41#include "hw/ppc/ppc.h"
  42#include "sysemu/watchdog.h"
  43#include "trace.h"
  44#include "exec/gdbstub.h"
  45#include "exec/memattrs.h"
  46#include "exec/ram_addr.h"
  47#include "sysemu/hostmem.h"
  48#include "qemu/cutils.h"
  49#include "qemu/mmap-alloc.h"
  50#if defined(TARGET_PPC64)
  51#include "hw/ppc/spapr_cpu_core.h"
  52#endif
  53#include "elf.h"
  54#include "sysemu/kvm_int.h"
  55
  56//#define DEBUG_KVM
  57
  58#ifdef DEBUG_KVM
  59#define DPRINTF(fmt, ...) \
  60    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  61#else
  62#define DPRINTF(fmt, ...) \
  63    do { } while (0)
  64#endif
  65
  66#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  67
  68const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  69    KVM_CAP_LAST_INFO
  70};
  71
  72static int cap_interrupt_unset = false;
  73static int cap_interrupt_level = false;
  74static int cap_segstate;
  75static int cap_booke_sregs;
  76static int cap_ppc_smt;
  77static int cap_ppc_rma;
  78static int cap_spapr_tce;
  79static int cap_spapr_tce_64;
  80static int cap_spapr_multitce;
  81static int cap_spapr_vfio;
  82static int cap_hior;
  83static int cap_one_reg;
  84static int cap_epr;
  85static int cap_ppc_watchdog;
  86static int cap_papr;
  87static int cap_htab_fd;
  88static int cap_fixup_hcalls;
  89static int cap_htm;             /* Hardware transactional memory support */
  90static int cap_mmu_radix;
  91static int cap_mmu_hash_v3;
  92static int cap_resize_hpt;
  93static int cap_ppc_pvr_compat;
  94
  95static uint32_t debug_inst_opcode;
  96
  97/* XXX We have a race condition where we actually have a level triggered
  98 *     interrupt, but the infrastructure can't expose that yet, so the guest
  99 *     takes but ignores it, goes to sleep and never gets notified that there's
 100 *     still an interrupt pending.
 101 *
 102 *     As a quick workaround, let's just wake up again 20 ms after we injected
 103 *     an interrupt. That way we can assure that we're always reinjecting
 104 *     interrupts in case the guest swallowed them.
 105 */
 106static QEMUTimer *idle_timer;
 107
 108static void kvm_kick_cpu(void *opaque)
 109{
 110    PowerPCCPU *cpu = opaque;
 111
 112    qemu_cpu_kick(CPU(cpu));
 113}
 114
 115/* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 116 * should only be used for fallback tests - generally we should use
 117 * explicit capabilities for the features we want, rather than
 118 * assuming what is/isn't available depending on the KVM variant. */
 119static bool kvmppc_is_pr(KVMState *ks)
 120{
 121    /* Assume KVM-PR if the GET_PVINFO capability is available */
 122    return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 123}
 124
 125static int kvm_ppc_register_host_cpu_type(void);
 126
 127int kvm_arch_init(MachineState *ms, KVMState *s)
 128{
 129    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 130    cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 131    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 132    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 133    cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 134    cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 135    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 136    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 137    cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 138    cap_spapr_vfio = false;
 139    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 140    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 141    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 142    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 143    /* Note: we don't set cap_papr here, because this capability is
 144     * only activated after this by kvmppc_set_papr() */
 145    cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 146    cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 147    cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 148    cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 149    cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 150    cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 151    /*
 152     * Note: setting it to false because there is not such capability
 153     * in KVM at this moment.
 154     *
 155     * TODO: call kvm_vm_check_extension() with the right capability
 156     * after the kernel starts implementing it.*/
 157    cap_ppc_pvr_compat = false;
 158
 159    if (!cap_interrupt_level) {
 160        fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 161                        "VM to stall at times!\n");
 162    }
 163
 164    kvm_ppc_register_host_cpu_type();
 165
 166    return 0;
 167}
 168
 169int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 170{
 171    return 0;
 172}
 173
 174static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 175{
 176    CPUPPCState *cenv = &cpu->env;
 177    CPUState *cs = CPU(cpu);
 178    struct kvm_sregs sregs;
 179    int ret;
 180
 181    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 182        /* What we're really trying to say is "if we're on BookE, we use
 183           the native PVR for now". This is the only sane way to check
 184           it though, so we potentially confuse users that they can run
 185           BookE guests on BookS. Let's hope nobody dares enough :) */
 186        return 0;
 187    } else {
 188        if (!cap_segstate) {
 189            fprintf(stderr, "kvm error: missing PVR setting capability\n");
 190            return -ENOSYS;
 191        }
 192    }
 193
 194    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 195    if (ret) {
 196        return ret;
 197    }
 198
 199    sregs.pvr = cenv->spr[SPR_PVR];
 200    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 201}
 202
 203/* Set up a shared TLB array with KVM */
 204static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 205{
 206    CPUPPCState *env = &cpu->env;
 207    CPUState *cs = CPU(cpu);
 208    struct kvm_book3e_206_tlb_params params = {};
 209    struct kvm_config_tlb cfg = {};
 210    unsigned int entries = 0;
 211    int ret, i;
 212
 213    if (!kvm_enabled() ||
 214        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 215        return 0;
 216    }
 217
 218    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 219
 220    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 221        params.tlb_sizes[i] = booke206_tlb_size(env, i);
 222        params.tlb_ways[i] = booke206_tlb_ways(env, i);
 223        entries += params.tlb_sizes[i];
 224    }
 225
 226    assert(entries == env->nb_tlb);
 227    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 228
 229    env->tlb_dirty = true;
 230
 231    cfg.array = (uintptr_t)env->tlb.tlbm;
 232    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 233    cfg.params = (uintptr_t)&params;
 234    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 235
 236    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 237    if (ret < 0) {
 238        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 239                __func__, strerror(-ret));
 240        return ret;
 241    }
 242
 243    env->kvm_sw_tlb = true;
 244    return 0;
 245}
 246
 247
 248#if defined(TARGET_PPC64)
 249static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 250                                       struct kvm_ppc_smmu_info *info)
 251{
 252    CPUPPCState *env = &cpu->env;
 253    CPUState *cs = CPU(cpu);
 254
 255    memset(info, 0, sizeof(*info));
 256
 257    /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 258     * need to "guess" what the supported page sizes are.
 259     *
 260     * For that to work we make a few assumptions:
 261     *
 262     * - Check whether we are running "PR" KVM which only supports 4K
 263     *   and 16M pages, but supports them regardless of the backing
 264     *   store characteritics. We also don't support 1T segments.
 265     *
 266     *   This is safe as if HV KVM ever supports that capability or PR
 267     *   KVM grows supports for more page/segment sizes, those versions
 268     *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 269     *   will not hit this fallback
 270     *
 271     * - Else we are running HV KVM. This means we only support page
 272     *   sizes that fit in the backing store. Additionally we only
 273     *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 274     *   P7 encodings for the SLB and hash table. Here too, we assume
 275     *   support for any newer processor will mean a kernel that
 276     *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 277     *   this fallback.
 278     */
 279    if (kvmppc_is_pr(cs->kvm_state)) {
 280        /* No flags */
 281        info->flags = 0;
 282        info->slb_size = 64;
 283
 284        /* Standard 4k base page size segment */
 285        info->sps[0].page_shift = 12;
 286        info->sps[0].slb_enc = 0;
 287        info->sps[0].enc[0].page_shift = 12;
 288        info->sps[0].enc[0].pte_enc = 0;
 289
 290        /* Standard 16M large page size segment */
 291        info->sps[1].page_shift = 24;
 292        info->sps[1].slb_enc = SLB_VSID_L;
 293        info->sps[1].enc[0].page_shift = 24;
 294        info->sps[1].enc[0].pte_enc = 0;
 295    } else {
 296        int i = 0;
 297
 298        /* HV KVM has backing store size restrictions */
 299        info->flags = KVM_PPC_PAGE_SIZES_REAL;
 300
 301        if (env->mmu_model & POWERPC_MMU_1TSEG) {
 302            info->flags |= KVM_PPC_1T_SEGMENTS;
 303        }
 304
 305        if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 306           POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 307            info->slb_size = 32;
 308        } else {
 309            info->slb_size = 64;
 310        }
 311
 312        /* Standard 4k base page size segment */
 313        info->sps[i].page_shift = 12;
 314        info->sps[i].slb_enc = 0;
 315        info->sps[i].enc[0].page_shift = 12;
 316        info->sps[i].enc[0].pte_enc = 0;
 317        i++;
 318
 319        /* 64K on MMU 2.06 and later */
 320        if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 321            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 322            info->sps[i].page_shift = 16;
 323            info->sps[i].slb_enc = 0x110;
 324            info->sps[i].enc[0].page_shift = 16;
 325            info->sps[i].enc[0].pte_enc = 1;
 326            i++;
 327        }
 328
 329        /* Standard 16M large page size segment */
 330        info->sps[i].page_shift = 24;
 331        info->sps[i].slb_enc = SLB_VSID_L;
 332        info->sps[i].enc[0].page_shift = 24;
 333        info->sps[i].enc[0].pte_enc = 0;
 334    }
 335}
 336
 337static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 338{
 339    CPUState *cs = CPU(cpu);
 340    int ret;
 341
 342    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 343        ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 344        if (ret == 0) {
 345            return;
 346        }
 347    }
 348
 349    kvm_get_fallback_smmu_info(cpu, info);
 350}
 351
 352struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 353{
 354    KVMState *s = KVM_STATE(current_machine->accelerator);
 355    struct ppc_radix_page_info *radix_page_info;
 356    struct kvm_ppc_rmmu_info rmmu_info;
 357    int i;
 358
 359    if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 360        return NULL;
 361    }
 362    if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 363        return NULL;
 364    }
 365    radix_page_info = g_malloc0(sizeof(*radix_page_info));
 366    radix_page_info->count = 0;
 367    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 368        if (rmmu_info.ap_encodings[i]) {
 369            radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 370            radix_page_info->count++;
 371        }
 372    }
 373    return radix_page_info;
 374}
 375
 376target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 377                                     bool radix, bool gtse,
 378                                     uint64_t proc_tbl)
 379{
 380    CPUState *cs = CPU(cpu);
 381    int ret;
 382    uint64_t flags = 0;
 383    struct kvm_ppc_mmuv3_cfg cfg = {
 384        .process_table = proc_tbl,
 385    };
 386
 387    if (radix) {
 388        flags |= KVM_PPC_MMUV3_RADIX;
 389    }
 390    if (gtse) {
 391        flags |= KVM_PPC_MMUV3_GTSE;
 392    }
 393    cfg.flags = flags;
 394    ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 395    switch (ret) {
 396    case 0:
 397        return H_SUCCESS;
 398    case -EINVAL:
 399        return H_PARAMETER;
 400    case -ENODEV:
 401        return H_NOT_AVAILABLE;
 402    default:
 403        return H_HARDWARE;
 404    }
 405}
 406
 407static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 408{
 409    if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 410        return true;
 411    }
 412
 413    return (1ul << shift) <= rampgsize;
 414}
 415
 416static long max_cpu_page_size;
 417
 418static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 419{
 420    static struct kvm_ppc_smmu_info smmu_info;
 421    static bool has_smmu_info;
 422    CPUPPCState *env = &cpu->env;
 423    int iq, ik, jq, jk;
 424    bool has_64k_pages = false;
 425
 426    /* We only handle page sizes for 64-bit server guests for now */
 427    if (!(env->mmu_model & POWERPC_MMU_64)) {
 428        return;
 429    }
 430
 431    /* Collect MMU info from kernel if not already */
 432    if (!has_smmu_info) {
 433        kvm_get_smmu_info(cpu, &smmu_info);
 434        has_smmu_info = true;
 435    }
 436
 437    if (!max_cpu_page_size) {
 438        max_cpu_page_size = qemu_getrampagesize();
 439    }
 440
 441    /* Convert to QEMU form */
 442    memset(&env->sps, 0, sizeof(env->sps));
 443
 444    /* If we have HV KVM, we need to forbid CI large pages if our
 445     * host page size is smaller than 64K.
 446     */
 447    if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 448        env->ci_large_pages = getpagesize() >= 0x10000;
 449    }
 450
 451    /*
 452     * XXX This loop should be an entry wide AND of the capabilities that
 453     *     the selected CPU has with the capabilities that KVM supports.
 454     */
 455    for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 456        struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 457        struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 458
 459        if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 460                                 ksps->page_shift)) {
 461            continue;
 462        }
 463        qsps->page_shift = ksps->page_shift;
 464        qsps->slb_enc = ksps->slb_enc;
 465        for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 466            if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 467                                     ksps->enc[jk].page_shift)) {
 468                continue;
 469            }
 470            if (ksps->enc[jk].page_shift == 16) {
 471                has_64k_pages = true;
 472            }
 473            qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 474            qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 475            if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 476                break;
 477            }
 478        }
 479        if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 480            break;
 481        }
 482    }
 483    env->slb_nr = smmu_info.slb_size;
 484    if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 485        env->mmu_model &= ~POWERPC_MMU_1TSEG;
 486    }
 487    if (!has_64k_pages) {
 488        env->mmu_model &= ~POWERPC_MMU_64K;
 489    }
 490}
 491
 492bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 493{
 494    Object *mem_obj = object_resolve_path(obj_path, NULL);
 495    char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
 496    long pagesize;
 497
 498    if (mempath) {
 499        pagesize = qemu_mempath_getpagesize(mempath);
 500        g_free(mempath);
 501    } else {
 502        pagesize = getpagesize();
 503    }
 504
 505    return pagesize >= max_cpu_page_size;
 506}
 507
 508#else /* defined (TARGET_PPC64) */
 509
 510static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 511{
 512}
 513
 514bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 515{
 516    return true;
 517}
 518
 519#endif /* !defined (TARGET_PPC64) */
 520
 521unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 522{
 523    return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 524}
 525
 526/* e500 supports 2 h/w breakpoint and 2 watchpoint.
 527 * book3s supports only 1 watchpoint, so array size
 528 * of 4 is sufficient for now.
 529 */
 530#define MAX_HW_BKPTS 4
 531
 532static struct HWBreakpoint {
 533    target_ulong addr;
 534    int type;
 535} hw_debug_points[MAX_HW_BKPTS];
 536
 537static CPUWatchpoint hw_watchpoint;
 538
 539/* Default there is no breakpoint and watchpoint supported */
 540static int max_hw_breakpoint;
 541static int max_hw_watchpoint;
 542static int nb_hw_breakpoint;
 543static int nb_hw_watchpoint;
 544
 545static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 546{
 547    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 548        max_hw_breakpoint = 2;
 549        max_hw_watchpoint = 2;
 550    }
 551
 552    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 553        fprintf(stderr, "Error initializing h/w breakpoints\n");
 554        return;
 555    }
 556}
 557
 558int kvm_arch_init_vcpu(CPUState *cs)
 559{
 560    PowerPCCPU *cpu = POWERPC_CPU(cs);
 561    CPUPPCState *cenv = &cpu->env;
 562    int ret;
 563
 564    /* Gather server mmu info from KVM and update the CPU state */
 565    kvm_fixup_page_sizes(cpu);
 566
 567    /* Synchronize sregs with kvm */
 568    ret = kvm_arch_sync_sregs(cpu);
 569    if (ret) {
 570        if (ret == -EINVAL) {
 571            error_report("Register sync failed... If you're using kvm-hv.ko,"
 572                         " only \"-cpu host\" is possible");
 573        }
 574        return ret;
 575    }
 576
 577    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 578
 579    switch (cenv->mmu_model) {
 580    case POWERPC_MMU_BOOKE206:
 581        /* This target supports access to KVM's guest TLB */
 582        ret = kvm_booke206_tlb_init(cpu);
 583        break;
 584    case POWERPC_MMU_2_07:
 585        if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 586            /* KVM-HV has transactional memory on POWER8 also without the
 587             * KVM_CAP_PPC_HTM extension, so enable it here instead as
 588             * long as it's availble to userspace on the host. */
 589            if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 590                cap_htm = true;
 591            }
 592        }
 593        break;
 594    default:
 595        break;
 596    }
 597
 598    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 599    kvmppc_hw_debug_points_init(cenv);
 600
 601    return ret;
 602}
 603
 604static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 605{
 606    CPUPPCState *env = &cpu->env;
 607    CPUState *cs = CPU(cpu);
 608    struct kvm_dirty_tlb dirty_tlb;
 609    unsigned char *bitmap;
 610    int ret;
 611
 612    if (!env->kvm_sw_tlb) {
 613        return;
 614    }
 615
 616    bitmap = g_malloc((env->nb_tlb + 7) / 8);
 617    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 618
 619    dirty_tlb.bitmap = (uintptr_t)bitmap;
 620    dirty_tlb.num_dirty = env->nb_tlb;
 621
 622    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 623    if (ret) {
 624        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 625                __func__, strerror(-ret));
 626    }
 627
 628    g_free(bitmap);
 629}
 630
 631static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 632{
 633    PowerPCCPU *cpu = POWERPC_CPU(cs);
 634    CPUPPCState *env = &cpu->env;
 635    union {
 636        uint32_t u32;
 637        uint64_t u64;
 638    } val;
 639    struct kvm_one_reg reg = {
 640        .id = id,
 641        .addr = (uintptr_t) &val,
 642    };
 643    int ret;
 644
 645    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 646    if (ret != 0) {
 647        trace_kvm_failed_spr_get(spr, strerror(errno));
 648    } else {
 649        switch (id & KVM_REG_SIZE_MASK) {
 650        case KVM_REG_SIZE_U32:
 651            env->spr[spr] = val.u32;
 652            break;
 653
 654        case KVM_REG_SIZE_U64:
 655            env->spr[spr] = val.u64;
 656            break;
 657
 658        default:
 659            /* Don't handle this size yet */
 660            abort();
 661        }
 662    }
 663}
 664
 665static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 666{
 667    PowerPCCPU *cpu = POWERPC_CPU(cs);
 668    CPUPPCState *env = &cpu->env;
 669    union {
 670        uint32_t u32;
 671        uint64_t u64;
 672    } val;
 673    struct kvm_one_reg reg = {
 674        .id = id,
 675        .addr = (uintptr_t) &val,
 676    };
 677    int ret;
 678
 679    switch (id & KVM_REG_SIZE_MASK) {
 680    case KVM_REG_SIZE_U32:
 681        val.u32 = env->spr[spr];
 682        break;
 683
 684    case KVM_REG_SIZE_U64:
 685        val.u64 = env->spr[spr];
 686        break;
 687
 688    default:
 689        /* Don't handle this size yet */
 690        abort();
 691    }
 692
 693    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 694    if (ret != 0) {
 695        trace_kvm_failed_spr_set(spr, strerror(errno));
 696    }
 697}
 698
 699static int kvm_put_fp(CPUState *cs)
 700{
 701    PowerPCCPU *cpu = POWERPC_CPU(cs);
 702    CPUPPCState *env = &cpu->env;
 703    struct kvm_one_reg reg;
 704    int i;
 705    int ret;
 706
 707    if (env->insns_flags & PPC_FLOAT) {
 708        uint64_t fpscr = env->fpscr;
 709        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 710
 711        reg.id = KVM_REG_PPC_FPSCR;
 712        reg.addr = (uintptr_t)&fpscr;
 713        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 714        if (ret < 0) {
 715            DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 716            return ret;
 717        }
 718
 719        for (i = 0; i < 32; i++) {
 720            uint64_t vsr[2];
 721
 722#ifdef HOST_WORDS_BIGENDIAN
 723            vsr[0] = float64_val(env->fpr[i]);
 724            vsr[1] = env->vsr[i];
 725#else
 726            vsr[0] = env->vsr[i];
 727            vsr[1] = float64_val(env->fpr[i]);
 728#endif
 729            reg.addr = (uintptr_t) &vsr;
 730            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 731
 732            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 733            if (ret < 0) {
 734                DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 735                        i, strerror(errno));
 736                return ret;
 737            }
 738        }
 739    }
 740
 741    if (env->insns_flags & PPC_ALTIVEC) {
 742        reg.id = KVM_REG_PPC_VSCR;
 743        reg.addr = (uintptr_t)&env->vscr;
 744        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 745        if (ret < 0) {
 746            DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 747            return ret;
 748        }
 749
 750        for (i = 0; i < 32; i++) {
 751            reg.id = KVM_REG_PPC_VR(i);
 752            reg.addr = (uintptr_t)&env->avr[i];
 753            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 754            if (ret < 0) {
 755                DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 756                return ret;
 757            }
 758        }
 759    }
 760
 761    return 0;
 762}
 763
 764static int kvm_get_fp(CPUState *cs)
 765{
 766    PowerPCCPU *cpu = POWERPC_CPU(cs);
 767    CPUPPCState *env = &cpu->env;
 768    struct kvm_one_reg reg;
 769    int i;
 770    int ret;
 771
 772    if (env->insns_flags & PPC_FLOAT) {
 773        uint64_t fpscr;
 774        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 775
 776        reg.id = KVM_REG_PPC_FPSCR;
 777        reg.addr = (uintptr_t)&fpscr;
 778        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 779        if (ret < 0) {
 780            DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 781            return ret;
 782        } else {
 783            env->fpscr = fpscr;
 784        }
 785
 786        for (i = 0; i < 32; i++) {
 787            uint64_t vsr[2];
 788
 789            reg.addr = (uintptr_t) &vsr;
 790            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 791
 792            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 793            if (ret < 0) {
 794                DPRINTF("Unable to get %s%d from KVM: %s\n",
 795                        vsx ? "VSR" : "FPR", i, strerror(errno));
 796                return ret;
 797            } else {
 798#ifdef HOST_WORDS_BIGENDIAN
 799                env->fpr[i] = vsr[0];
 800                if (vsx) {
 801                    env->vsr[i] = vsr[1];
 802                }
 803#else
 804                env->fpr[i] = vsr[1];
 805                if (vsx) {
 806                    env->vsr[i] = vsr[0];
 807                }
 808#endif
 809            }
 810        }
 811    }
 812
 813    if (env->insns_flags & PPC_ALTIVEC) {
 814        reg.id = KVM_REG_PPC_VSCR;
 815        reg.addr = (uintptr_t)&env->vscr;
 816        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 817        if (ret < 0) {
 818            DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 819            return ret;
 820        }
 821
 822        for (i = 0; i < 32; i++) {
 823            reg.id = KVM_REG_PPC_VR(i);
 824            reg.addr = (uintptr_t)&env->avr[i];
 825            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 826            if (ret < 0) {
 827                DPRINTF("Unable to get VR%d from KVM: %s\n",
 828                        i, strerror(errno));
 829                return ret;
 830            }
 831        }
 832    }
 833
 834    return 0;
 835}
 836
 837#if defined(TARGET_PPC64)
 838static int kvm_get_vpa(CPUState *cs)
 839{
 840    PowerPCCPU *cpu = POWERPC_CPU(cs);
 841    CPUPPCState *env = &cpu->env;
 842    struct kvm_one_reg reg;
 843    int ret;
 844
 845    reg.id = KVM_REG_PPC_VPA_ADDR;
 846    reg.addr = (uintptr_t)&env->vpa_addr;
 847    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 848    if (ret < 0) {
 849        DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 850        return ret;
 851    }
 852
 853    assert((uintptr_t)&env->slb_shadow_size
 854           == ((uintptr_t)&env->slb_shadow_addr + 8));
 855    reg.id = KVM_REG_PPC_VPA_SLB;
 856    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 857    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 858    if (ret < 0) {
 859        DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 860                strerror(errno));
 861        return ret;
 862    }
 863
 864    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 865    reg.id = KVM_REG_PPC_VPA_DTL;
 866    reg.addr = (uintptr_t)&env->dtl_addr;
 867    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 868    if (ret < 0) {
 869        DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 870                strerror(errno));
 871        return ret;
 872    }
 873
 874    return 0;
 875}
 876
 877static int kvm_put_vpa(CPUState *cs)
 878{
 879    PowerPCCPU *cpu = POWERPC_CPU(cs);
 880    CPUPPCState *env = &cpu->env;
 881    struct kvm_one_reg reg;
 882    int ret;
 883
 884    /* SLB shadow or DTL can't be registered unless a master VPA is
 885     * registered.  That means when restoring state, if a VPA *is*
 886     * registered, we need to set that up first.  If not, we need to
 887     * deregister the others before deregistering the master VPA */
 888    assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 889
 890    if (env->vpa_addr) {
 891        reg.id = KVM_REG_PPC_VPA_ADDR;
 892        reg.addr = (uintptr_t)&env->vpa_addr;
 893        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 894        if (ret < 0) {
 895            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 896            return ret;
 897        }
 898    }
 899
 900    assert((uintptr_t)&env->slb_shadow_size
 901           == ((uintptr_t)&env->slb_shadow_addr + 8));
 902    reg.id = KVM_REG_PPC_VPA_SLB;
 903    reg.addr = (uintptr_t)&env->slb_shadow_addr;
 904    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 905    if (ret < 0) {
 906        DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 907        return ret;
 908    }
 909
 910    assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 911    reg.id = KVM_REG_PPC_VPA_DTL;
 912    reg.addr = (uintptr_t)&env->dtl_addr;
 913    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 914    if (ret < 0) {
 915        DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 916                strerror(errno));
 917        return ret;
 918    }
 919
 920    if (!env->vpa_addr) {
 921        reg.id = KVM_REG_PPC_VPA_ADDR;
 922        reg.addr = (uintptr_t)&env->vpa_addr;
 923        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 924        if (ret < 0) {
 925            DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 926            return ret;
 927        }
 928    }
 929
 930    return 0;
 931}
 932#endif /* TARGET_PPC64 */
 933
 934int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 935{
 936    CPUPPCState *env = &cpu->env;
 937    struct kvm_sregs sregs;
 938    int i;
 939
 940    sregs.pvr = env->spr[SPR_PVR];
 941
 942    sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 943
 944    /* Sync SLB */
 945#ifdef TARGET_PPC64
 946    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 947        sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 948        if (env->slb[i].esid & SLB_ESID_V) {
 949            sregs.u.s.ppc64.slb[i].slbe |= i;
 950        }
 951        sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 952    }
 953#endif
 954
 955    /* Sync SRs */
 956    for (i = 0; i < 16; i++) {
 957        sregs.u.s.ppc32.sr[i] = env->sr[i];
 958    }
 959
 960    /* Sync BATs */
 961    for (i = 0; i < 8; i++) {
 962        /* Beware. We have to swap upper and lower bits here */
 963        sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 964            | env->DBAT[1][i];
 965        sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 966            | env->IBAT[1][i];
 967    }
 968
 969    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 970}
 971
 972int kvm_arch_put_registers(CPUState *cs, int level)
 973{
 974    PowerPCCPU *cpu = POWERPC_CPU(cs);
 975    CPUPPCState *env = &cpu->env;
 976    struct kvm_regs regs;
 977    int ret;
 978    int i;
 979
 980    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 981    if (ret < 0) {
 982        return ret;
 983    }
 984
 985    regs.ctr = env->ctr;
 986    regs.lr  = env->lr;
 987    regs.xer = cpu_read_xer(env);
 988    regs.msr = env->msr;
 989    regs.pc = env->nip;
 990
 991    regs.srr0 = env->spr[SPR_SRR0];
 992    regs.srr1 = env->spr[SPR_SRR1];
 993
 994    regs.sprg0 = env->spr[SPR_SPRG0];
 995    regs.sprg1 = env->spr[SPR_SPRG1];
 996    regs.sprg2 = env->spr[SPR_SPRG2];
 997    regs.sprg3 = env->spr[SPR_SPRG3];
 998    regs.sprg4 = env->spr[SPR_SPRG4];
 999    regs.sprg5 = env->spr[SPR_SPRG5];
1000    regs.sprg6 = env->spr[SPR_SPRG6];
1001    regs.sprg7 = env->spr[SPR_SPRG7];
1002
1003    regs.pid = env->spr[SPR_BOOKE_PID];
1004
1005    for (i = 0;i < 32; i++)
1006        regs.gpr[i] = env->gpr[i];
1007
1008    regs.cr = 0;
1009    for (i = 0; i < 8; i++) {
1010        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1011    }
1012
1013    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1014    if (ret < 0)
1015        return ret;
1016
1017    kvm_put_fp(cs);
1018
1019    if (env->tlb_dirty) {
1020        kvm_sw_tlb_put(cpu);
1021        env->tlb_dirty = false;
1022    }
1023
1024    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1025        ret = kvmppc_put_books_sregs(cpu);
1026        if (ret < 0) {
1027            return ret;
1028        }
1029    }
1030
1031    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1032        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1033    }
1034
1035    if (cap_one_reg) {
1036        int i;
1037
1038        /* We deliberately ignore errors here, for kernels which have
1039         * the ONE_REG calls, but don't support the specific
1040         * registers, there's a reasonable chance things will still
1041         * work, at least until we try to migrate. */
1042        for (i = 0; i < 1024; i++) {
1043            uint64_t id = env->spr_cb[i].one_reg_id;
1044
1045            if (id != 0) {
1046                kvm_put_one_spr(cs, id, i);
1047            }
1048        }
1049
1050#ifdef TARGET_PPC64
1051        if (msr_ts) {
1052            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1053                kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1054            }
1055            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1056                kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1057            }
1058            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1059            kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1060            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1061            kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1062            kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1063            kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1064            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1065            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1066            kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1067            kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1068        }
1069
1070        if (cap_papr) {
1071            if (kvm_put_vpa(cs) < 0) {
1072                DPRINTF("Warning: Unable to set VPA information to KVM\n");
1073            }
1074        }
1075
1076        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1077#endif /* TARGET_PPC64 */
1078    }
1079
1080    return ret;
1081}
1082
1083static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1084{
1085     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1086}
1087
1088static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1089{
1090    CPUPPCState *env = &cpu->env;
1091    struct kvm_sregs sregs;
1092    int ret;
1093
1094    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1095    if (ret < 0) {
1096        return ret;
1097    }
1098
1099    if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1100        env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1101        env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1102        env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1103        env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1104        env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1105        env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1106        env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1107        env->spr[SPR_DECR] = sregs.u.e.dec;
1108        env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1109        env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1110        env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1111    }
1112
1113    if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1114        env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1115        env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1116        env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1117        env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1118        env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1119    }
1120
1121    if (sregs.u.e.features & KVM_SREGS_E_64) {
1122        env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1123    }
1124
1125    if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1126        env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1127    }
1128
1129    if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1130        env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1131        kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1132        env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1133        kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1134        env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1135        kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1136        env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1137        kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1138        env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1139        kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1140        env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1141        kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1142        env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1143        kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1144        env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1145        kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1146        env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1147        kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1148        env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1149        kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1150        env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1151        kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1152        env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1153        kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1154        env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1155        kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1156        env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1157        kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1158        env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1159        kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1160        env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1161        kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1162
1163        if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1164            env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1165            kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1166            env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1167            kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1168            env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1169            kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1170        }
1171
1172        if (sregs.u.e.features & KVM_SREGS_E_PM) {
1173            env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1174            kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1175        }
1176
1177        if (sregs.u.e.features & KVM_SREGS_E_PC) {
1178            env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1179            kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1180            env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1181            kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1182        }
1183    }
1184
1185    if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1186        env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1187        env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1188        env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1189        env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1190        env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1191        env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1192        env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1193        env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1194        env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1195        env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1196    }
1197
1198    if (sregs.u.e.features & KVM_SREGS_EXP) {
1199        env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1200    }
1201
1202    if (sregs.u.e.features & KVM_SREGS_E_PD) {
1203        env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1204        env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1205    }
1206
1207    if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1208        env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1209        env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1210        env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1211
1212        if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1213            env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1214            env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1215        }
1216    }
1217
1218    return 0;
1219}
1220
1221static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1222{
1223    CPUPPCState *env = &cpu->env;
1224    struct kvm_sregs sregs;
1225    int ret;
1226    int i;
1227
1228    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1229    if (ret < 0) {
1230        return ret;
1231    }
1232
1233    if (!cpu->vhyp) {
1234        ppc_store_sdr1(env, sregs.u.s.sdr1);
1235    }
1236
1237    /* Sync SLB */
1238#ifdef TARGET_PPC64
1239    /*
1240     * The packed SLB array we get from KVM_GET_SREGS only contains
1241     * information about valid entries. So we flush our internal copy
1242     * to get rid of stale ones, then put all valid SLB entries back
1243     * in.
1244     */
1245    memset(env->slb, 0, sizeof(env->slb));
1246    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1247        target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1248        target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1249        /*
1250         * Only restore valid entries
1251         */
1252        if (rb & SLB_ESID_V) {
1253            ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1254        }
1255    }
1256#endif
1257
1258    /* Sync SRs */
1259    for (i = 0; i < 16; i++) {
1260        env->sr[i] = sregs.u.s.ppc32.sr[i];
1261    }
1262
1263    /* Sync BATs */
1264    for (i = 0; i < 8; i++) {
1265        env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1266        env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1267        env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1268        env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1269    }
1270
1271    return 0;
1272}
1273
1274int kvm_arch_get_registers(CPUState *cs)
1275{
1276    PowerPCCPU *cpu = POWERPC_CPU(cs);
1277    CPUPPCState *env = &cpu->env;
1278    struct kvm_regs regs;
1279    uint32_t cr;
1280    int i, ret;
1281
1282    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1283    if (ret < 0)
1284        return ret;
1285
1286    cr = regs.cr;
1287    for (i = 7; i >= 0; i--) {
1288        env->crf[i] = cr & 15;
1289        cr >>= 4;
1290    }
1291
1292    env->ctr = regs.ctr;
1293    env->lr = regs.lr;
1294    cpu_write_xer(env, regs.xer);
1295    env->msr = regs.msr;
1296    env->nip = regs.pc;
1297
1298    env->spr[SPR_SRR0] = regs.srr0;
1299    env->spr[SPR_SRR1] = regs.srr1;
1300
1301    env->spr[SPR_SPRG0] = regs.sprg0;
1302    env->spr[SPR_SPRG1] = regs.sprg1;
1303    env->spr[SPR_SPRG2] = regs.sprg2;
1304    env->spr[SPR_SPRG3] = regs.sprg3;
1305    env->spr[SPR_SPRG4] = regs.sprg4;
1306    env->spr[SPR_SPRG5] = regs.sprg5;
1307    env->spr[SPR_SPRG6] = regs.sprg6;
1308    env->spr[SPR_SPRG7] = regs.sprg7;
1309
1310    env->spr[SPR_BOOKE_PID] = regs.pid;
1311
1312    for (i = 0;i < 32; i++)
1313        env->gpr[i] = regs.gpr[i];
1314
1315    kvm_get_fp(cs);
1316
1317    if (cap_booke_sregs) {
1318        ret = kvmppc_get_booke_sregs(cpu);
1319        if (ret < 0) {
1320            return ret;
1321        }
1322    }
1323
1324    if (cap_segstate) {
1325        ret = kvmppc_get_books_sregs(cpu);
1326        if (ret < 0) {
1327            return ret;
1328        }
1329    }
1330
1331    if (cap_hior) {
1332        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1333    }
1334
1335    if (cap_one_reg) {
1336        int i;
1337
1338        /* We deliberately ignore errors here, for kernels which have
1339         * the ONE_REG calls, but don't support the specific
1340         * registers, there's a reasonable chance things will still
1341         * work, at least until we try to migrate. */
1342        for (i = 0; i < 1024; i++) {
1343            uint64_t id = env->spr_cb[i].one_reg_id;
1344
1345            if (id != 0) {
1346                kvm_get_one_spr(cs, id, i);
1347            }
1348        }
1349
1350#ifdef TARGET_PPC64
1351        if (msr_ts) {
1352            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1353                kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1354            }
1355            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1356                kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1357            }
1358            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1359            kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1360            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1361            kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1362            kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1363            kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1364            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1365            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1366            kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1367            kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1368        }
1369
1370        if (cap_papr) {
1371            if (kvm_get_vpa(cs) < 0) {
1372                DPRINTF("Warning: Unable to get VPA information from KVM\n");
1373            }
1374        }
1375
1376        kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1377#endif
1378    }
1379
1380    return 0;
1381}
1382
1383int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1384{
1385    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1386
1387    if (irq != PPC_INTERRUPT_EXT) {
1388        return 0;
1389    }
1390
1391    if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1392        return 0;
1393    }
1394
1395    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1396
1397    return 0;
1398}
1399
1400#if defined(TARGET_PPCEMB)
1401#define PPC_INPUT_INT PPC40x_INPUT_INT
1402#elif defined(TARGET_PPC64)
1403#define PPC_INPUT_INT PPC970_INPUT_INT
1404#else
1405#define PPC_INPUT_INT PPC6xx_INPUT_INT
1406#endif
1407
1408void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1409{
1410    PowerPCCPU *cpu = POWERPC_CPU(cs);
1411    CPUPPCState *env = &cpu->env;
1412    int r;
1413    unsigned irq;
1414
1415    qemu_mutex_lock_iothread();
1416
1417    /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1418     * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1419    if (!cap_interrupt_level &&
1420        run->ready_for_interrupt_injection &&
1421        (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1422        (env->irq_input_state & (1<<PPC_INPUT_INT)))
1423    {
1424        /* For now KVM disregards the 'irq' argument. However, in the
1425         * future KVM could cache it in-kernel to avoid a heavyweight exit
1426         * when reading the UIC.
1427         */
1428        irq = KVM_INTERRUPT_SET;
1429
1430        DPRINTF("injected interrupt %d\n", irq);
1431        r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1432        if (r < 0) {
1433            printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1434        }
1435
1436        /* Always wake up soon in case the interrupt was level based */
1437        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1438                       (NANOSECONDS_PER_SECOND / 50));
1439    }
1440
1441    /* We don't know if there are more interrupts pending after this. However,
1442     * the guest will return to userspace in the course of handling this one
1443     * anyways, so we will get a chance to deliver the rest. */
1444
1445    qemu_mutex_unlock_iothread();
1446}
1447
1448MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1449{
1450    return MEMTXATTRS_UNSPECIFIED;
1451}
1452
1453int kvm_arch_process_async_events(CPUState *cs)
1454{
1455    return cs->halted;
1456}
1457
1458static int kvmppc_handle_halt(PowerPCCPU *cpu)
1459{
1460    CPUState *cs = CPU(cpu);
1461    CPUPPCState *env = &cpu->env;
1462
1463    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1464        cs->halted = 1;
1465        cs->exception_index = EXCP_HLT;
1466    }
1467
1468    return 0;
1469}
1470
1471/* map dcr access to existing qemu dcr emulation */
1472static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1473{
1474    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1475        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1476
1477    return 0;
1478}
1479
1480static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1481{
1482    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1483        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1484
1485    return 0;
1486}
1487
1488int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1489{
1490    /* Mixed endian case is not handled */
1491    uint32_t sc = debug_inst_opcode;
1492
1493    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1494                            sizeof(sc), 0) ||
1495        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1496        return -EINVAL;
1497    }
1498
1499    return 0;
1500}
1501
1502int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1503{
1504    uint32_t sc;
1505
1506    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1507        sc != debug_inst_opcode ||
1508        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1509                            sizeof(sc), 1)) {
1510        return -EINVAL;
1511    }
1512
1513    return 0;
1514}
1515
1516static int find_hw_breakpoint(target_ulong addr, int type)
1517{
1518    int n;
1519
1520    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1521           <= ARRAY_SIZE(hw_debug_points));
1522
1523    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1524        if (hw_debug_points[n].addr == addr &&
1525             hw_debug_points[n].type == type) {
1526            return n;
1527        }
1528    }
1529
1530    return -1;
1531}
1532
1533static int find_hw_watchpoint(target_ulong addr, int *flag)
1534{
1535    int n;
1536
1537    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1538    if (n >= 0) {
1539        *flag = BP_MEM_ACCESS;
1540        return n;
1541    }
1542
1543    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1544    if (n >= 0) {
1545        *flag = BP_MEM_WRITE;
1546        return n;
1547    }
1548
1549    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1550    if (n >= 0) {
1551        *flag = BP_MEM_READ;
1552        return n;
1553    }
1554
1555    return -1;
1556}
1557
1558int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1559                                  target_ulong len, int type)
1560{
1561    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1562        return -ENOBUFS;
1563    }
1564
1565    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1566    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1567
1568    switch (type) {
1569    case GDB_BREAKPOINT_HW:
1570        if (nb_hw_breakpoint >= max_hw_breakpoint) {
1571            return -ENOBUFS;
1572        }
1573
1574        if (find_hw_breakpoint(addr, type) >= 0) {
1575            return -EEXIST;
1576        }
1577
1578        nb_hw_breakpoint++;
1579        break;
1580
1581    case GDB_WATCHPOINT_WRITE:
1582    case GDB_WATCHPOINT_READ:
1583    case GDB_WATCHPOINT_ACCESS:
1584        if (nb_hw_watchpoint >= max_hw_watchpoint) {
1585            return -ENOBUFS;
1586        }
1587
1588        if (find_hw_breakpoint(addr, type) >= 0) {
1589            return -EEXIST;
1590        }
1591
1592        nb_hw_watchpoint++;
1593        break;
1594
1595    default:
1596        return -ENOSYS;
1597    }
1598
1599    return 0;
1600}
1601
1602int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1603                                  target_ulong len, int type)
1604{
1605    int n;
1606
1607    n = find_hw_breakpoint(addr, type);
1608    if (n < 0) {
1609        return -ENOENT;
1610    }
1611
1612    switch (type) {
1613    case GDB_BREAKPOINT_HW:
1614        nb_hw_breakpoint--;
1615        break;
1616
1617    case GDB_WATCHPOINT_WRITE:
1618    case GDB_WATCHPOINT_READ:
1619    case GDB_WATCHPOINT_ACCESS:
1620        nb_hw_watchpoint--;
1621        break;
1622
1623    default:
1624        return -ENOSYS;
1625    }
1626    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1627
1628    return 0;
1629}
1630
1631void kvm_arch_remove_all_hw_breakpoints(void)
1632{
1633    nb_hw_breakpoint = nb_hw_watchpoint = 0;
1634}
1635
1636void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1637{
1638    int n;
1639
1640    /* Software Breakpoint updates */
1641    if (kvm_sw_breakpoints_active(cs)) {
1642        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1643    }
1644
1645    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1646           <= ARRAY_SIZE(hw_debug_points));
1647    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1648
1649    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1650        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1651        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1652        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1653            switch (hw_debug_points[n].type) {
1654            case GDB_BREAKPOINT_HW:
1655                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1656                break;
1657            case GDB_WATCHPOINT_WRITE:
1658                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1659                break;
1660            case GDB_WATCHPOINT_READ:
1661                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1662                break;
1663            case GDB_WATCHPOINT_ACCESS:
1664                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1665                                        KVMPPC_DEBUG_WATCH_READ;
1666                break;
1667            default:
1668                cpu_abort(cs, "Unsupported breakpoint type\n");
1669            }
1670            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1671        }
1672    }
1673}
1674
1675static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1676{
1677    CPUState *cs = CPU(cpu);
1678    CPUPPCState *env = &cpu->env;
1679    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1680    int handle = 0;
1681    int n;
1682    int flag = 0;
1683
1684    if (cs->singlestep_enabled) {
1685        handle = 1;
1686    } else if (arch_info->status) {
1687        if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1688            if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1689                n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1690                if (n >= 0) {
1691                    handle = 1;
1692                }
1693            } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1694                                            KVMPPC_DEBUG_WATCH_WRITE)) {
1695                n = find_hw_watchpoint(arch_info->address,  &flag);
1696                if (n >= 0) {
1697                    handle = 1;
1698                    cs->watchpoint_hit = &hw_watchpoint;
1699                    hw_watchpoint.vaddr = hw_debug_points[n].addr;
1700                    hw_watchpoint.flags = flag;
1701                }
1702            }
1703        }
1704    } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1705        handle = 1;
1706    } else {
1707        /* QEMU is not able to handle debug exception, so inject
1708         * program exception to guest;
1709         * Yes program exception NOT debug exception !!
1710         * When QEMU is using debug resources then debug exception must
1711         * be always set. To achieve this we set MSR_DE and also set
1712         * MSRP_DEP so guest cannot change MSR_DE.
1713         * When emulating debug resource for guest we want guest
1714         * to control MSR_DE (enable/disable debug interrupt on need).
1715         * Supporting both configurations are NOT possible.
1716         * So the result is that we cannot share debug resources
1717         * between QEMU and Guest on BOOKE architecture.
1718         * In the current design QEMU gets the priority over guest,
1719         * this means that if QEMU is using debug resources then guest
1720         * cannot use them;
1721         * For software breakpoint QEMU uses a privileged instruction;
1722         * So there cannot be any reason that we are here for guest
1723         * set debug exception, only possibility is guest executed a
1724         * privileged / illegal instruction and that's why we are
1725         * injecting a program interrupt.
1726         */
1727
1728        cpu_synchronize_state(cs);
1729        /* env->nip is PC, so increment this by 4 to use
1730         * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1731         */
1732        env->nip += 4;
1733        cs->exception_index = POWERPC_EXCP_PROGRAM;
1734        env->error_code = POWERPC_EXCP_INVAL;
1735        ppc_cpu_do_interrupt(cs);
1736    }
1737
1738    return handle;
1739}
1740
1741int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1742{
1743    PowerPCCPU *cpu = POWERPC_CPU(cs);
1744    CPUPPCState *env = &cpu->env;
1745    int ret;
1746
1747    qemu_mutex_lock_iothread();
1748
1749    switch (run->exit_reason) {
1750    case KVM_EXIT_DCR:
1751        if (run->dcr.is_write) {
1752            DPRINTF("handle dcr write\n");
1753            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1754        } else {
1755            DPRINTF("handle dcr read\n");
1756            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1757        }
1758        break;
1759    case KVM_EXIT_HLT:
1760        DPRINTF("handle halt\n");
1761        ret = kvmppc_handle_halt(cpu);
1762        break;
1763#if defined(TARGET_PPC64)
1764    case KVM_EXIT_PAPR_HCALL:
1765        DPRINTF("handle PAPR hypercall\n");
1766        run->papr_hcall.ret = spapr_hypercall(cpu,
1767                                              run->papr_hcall.nr,
1768                                              run->papr_hcall.args);
1769        ret = 0;
1770        break;
1771#endif
1772    case KVM_EXIT_EPR:
1773        DPRINTF("handle epr\n");
1774        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1775        ret = 0;
1776        break;
1777    case KVM_EXIT_WATCHDOG:
1778        DPRINTF("handle watchdog expiry\n");
1779        watchdog_perform_action();
1780        ret = 0;
1781        break;
1782
1783    case KVM_EXIT_DEBUG:
1784        DPRINTF("handle debug exception\n");
1785        if (kvm_handle_debug(cpu, run)) {
1786            ret = EXCP_DEBUG;
1787            break;
1788        }
1789        /* re-enter, this exception was guest-internal */
1790        ret = 0;
1791        break;
1792
1793    default:
1794        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1795        ret = -1;
1796        break;
1797    }
1798
1799    qemu_mutex_unlock_iothread();
1800    return ret;
1801}
1802
1803int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1804{
1805    CPUState *cs = CPU(cpu);
1806    uint32_t bits = tsr_bits;
1807    struct kvm_one_reg reg = {
1808        .id = KVM_REG_PPC_OR_TSR,
1809        .addr = (uintptr_t) &bits,
1810    };
1811
1812    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1813}
1814
1815int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1816{
1817
1818    CPUState *cs = CPU(cpu);
1819    uint32_t bits = tsr_bits;
1820    struct kvm_one_reg reg = {
1821        .id = KVM_REG_PPC_CLEAR_TSR,
1822        .addr = (uintptr_t) &bits,
1823    };
1824
1825    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1826}
1827
1828int kvmppc_set_tcr(PowerPCCPU *cpu)
1829{
1830    CPUState *cs = CPU(cpu);
1831    CPUPPCState *env = &cpu->env;
1832    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1833
1834    struct kvm_one_reg reg = {
1835        .id = KVM_REG_PPC_TCR,
1836        .addr = (uintptr_t) &tcr,
1837    };
1838
1839    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1840}
1841
1842int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1843{
1844    CPUState *cs = CPU(cpu);
1845    int ret;
1846
1847    if (!kvm_enabled()) {
1848        return -1;
1849    }
1850
1851    if (!cap_ppc_watchdog) {
1852        printf("warning: KVM does not support watchdog");
1853        return -1;
1854    }
1855
1856    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1857    if (ret < 0) {
1858        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1859                __func__, strerror(-ret));
1860        return ret;
1861    }
1862
1863    return ret;
1864}
1865
1866static int read_cpuinfo(const char *field, char *value, int len)
1867{
1868    FILE *f;
1869    int ret = -1;
1870    int field_len = strlen(field);
1871    char line[512];
1872
1873    f = fopen("/proc/cpuinfo", "r");
1874    if (!f) {
1875        return -1;
1876    }
1877
1878    do {
1879        if (!fgets(line, sizeof(line), f)) {
1880            break;
1881        }
1882        if (!strncmp(line, field, field_len)) {
1883            pstrcpy(value, len, line);
1884            ret = 0;
1885            break;
1886        }
1887    } while(*line);
1888
1889    fclose(f);
1890
1891    return ret;
1892}
1893
1894uint32_t kvmppc_get_tbfreq(void)
1895{
1896    char line[512];
1897    char *ns;
1898    uint32_t retval = NANOSECONDS_PER_SECOND;
1899
1900    if (read_cpuinfo("timebase", line, sizeof(line))) {
1901        return retval;
1902    }
1903
1904    if (!(ns = strchr(line, ':'))) {
1905        return retval;
1906    }
1907
1908    ns++;
1909
1910    return atoi(ns);
1911}
1912
1913bool kvmppc_get_host_serial(char **value)
1914{
1915    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1916                               NULL);
1917}
1918
1919bool kvmppc_get_host_model(char **value)
1920{
1921    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1922}
1923
1924/* Try to find a device tree node for a CPU with clock-frequency property */
1925static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1926{
1927    struct dirent *dirp;
1928    DIR *dp;
1929
1930    if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1931        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1932        return -1;
1933    }
1934
1935    buf[0] = '\0';
1936    while ((dirp = readdir(dp)) != NULL) {
1937        FILE *f;
1938        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1939                 dirp->d_name);
1940        f = fopen(buf, "r");
1941        if (f) {
1942            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1943            fclose(f);
1944            break;
1945        }
1946        buf[0] = '\0';
1947    }
1948    closedir(dp);
1949    if (buf[0] == '\0') {
1950        printf("Unknown host!\n");
1951        return -1;
1952    }
1953
1954    return 0;
1955}
1956
1957static uint64_t kvmppc_read_int_dt(const char *filename)
1958{
1959    union {
1960        uint32_t v32;
1961        uint64_t v64;
1962    } u;
1963    FILE *f;
1964    int len;
1965
1966    f = fopen(filename, "rb");
1967    if (!f) {
1968        return -1;
1969    }
1970
1971    len = fread(&u, 1, sizeof(u), f);
1972    fclose(f);
1973    switch (len) {
1974    case 4:
1975        /* property is a 32-bit quantity */
1976        return be32_to_cpu(u.v32);
1977    case 8:
1978        return be64_to_cpu(u.v64);
1979    }
1980
1981    return 0;
1982}
1983
1984/* Read a CPU node property from the host device tree that's a single
1985 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1986 * (can't find or open the property, or doesn't understand the
1987 * format) */
1988static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1989{
1990    char buf[PATH_MAX], *tmp;
1991    uint64_t val;
1992
1993    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1994        return -1;
1995    }
1996
1997    tmp = g_strdup_printf("%s/%s", buf, propname);
1998    val = kvmppc_read_int_dt(tmp);
1999    g_free(tmp);
2000
2001    return val;
2002}
2003
2004uint64_t kvmppc_get_clockfreq(void)
2005{
2006    return kvmppc_read_int_cpu_dt("clock-frequency");
2007}
2008
2009uint32_t kvmppc_get_vmx(void)
2010{
2011    return kvmppc_read_int_cpu_dt("ibm,vmx");
2012}
2013
2014uint32_t kvmppc_get_dfp(void)
2015{
2016    return kvmppc_read_int_cpu_dt("ibm,dfp");
2017}
2018
2019static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2020 {
2021     PowerPCCPU *cpu = ppc_env_get_cpu(env);
2022     CPUState *cs = CPU(cpu);
2023
2024    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2025        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2026        return 0;
2027    }
2028
2029    return 1;
2030}
2031
2032int kvmppc_get_hasidle(CPUPPCState *env)
2033{
2034    struct kvm_ppc_pvinfo pvinfo;
2035
2036    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2037        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2038        return 1;
2039    }
2040
2041    return 0;
2042}
2043
2044int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2045{
2046    uint32_t *hc = (uint32_t*)buf;
2047    struct kvm_ppc_pvinfo pvinfo;
2048
2049    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2050        memcpy(buf, pvinfo.hcall, buf_len);
2051        return 0;
2052    }
2053
2054    /*
2055     * Fallback to always fail hypercalls regardless of endianness:
2056     *
2057     *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2058     *     li r3, -1
2059     *     b .+8       (becomes nop in wrong endian)
2060     *     bswap32(li r3, -1)
2061     */
2062
2063    hc[0] = cpu_to_be32(0x08000048);
2064    hc[1] = cpu_to_be32(0x3860ffff);
2065    hc[2] = cpu_to_be32(0x48000008);
2066    hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2067
2068    return 1;
2069}
2070
2071static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2072{
2073    return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2074}
2075
2076void kvmppc_enable_logical_ci_hcalls(void)
2077{
2078    /*
2079     * FIXME: it would be nice if we could detect the cases where
2080     * we're using a device which requires the in kernel
2081     * implementation of these hcalls, but the kernel lacks them and
2082     * produce a warning.
2083     */
2084    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2085    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2086}
2087
2088void kvmppc_enable_set_mode_hcall(void)
2089{
2090    kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2091}
2092
2093void kvmppc_enable_clear_ref_mod_hcalls(void)
2094{
2095    kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2096    kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2097}
2098
2099void kvmppc_set_papr(PowerPCCPU *cpu)
2100{
2101    CPUState *cs = CPU(cpu);
2102    int ret;
2103
2104    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2105    if (ret) {
2106        error_report("This vCPU type or KVM version does not support PAPR");
2107        exit(1);
2108    }
2109
2110    /* Update the capability flag so we sync the right information
2111     * with kvm */
2112    cap_papr = 1;
2113}
2114
2115int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2116{
2117    return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2118}
2119
2120void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2121{
2122    CPUState *cs = CPU(cpu);
2123    int ret;
2124
2125    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2126    if (ret && mpic_proxy) {
2127        error_report("This KVM version does not support EPR");
2128        exit(1);
2129    }
2130}
2131
2132int kvmppc_smt_threads(void)
2133{
2134    return cap_ppc_smt ? cap_ppc_smt : 1;
2135}
2136
2137#ifdef TARGET_PPC64
2138off_t kvmppc_alloc_rma(void **rma)
2139{
2140    off_t size;
2141    int fd;
2142    struct kvm_allocate_rma ret;
2143
2144    /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2145     * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2146     *                      not necessary on this hardware
2147     * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2148     *
2149     * FIXME: We should allow the user to force contiguous RMA
2150     * allocation in the cap_ppc_rma==1 case.
2151     */
2152    if (cap_ppc_rma < 2) {
2153        return 0;
2154    }
2155
2156    fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2157    if (fd < 0) {
2158        fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2159                strerror(errno));
2160        return -1;
2161    }
2162
2163    size = MIN(ret.rma_size, 256ul << 20);
2164
2165    *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2166    if (*rma == MAP_FAILED) {
2167        fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2168        return -1;
2169    };
2170
2171    return size;
2172}
2173
2174uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2175{
2176    struct kvm_ppc_smmu_info info;
2177    long rampagesize, best_page_shift;
2178    int i;
2179
2180    if (cap_ppc_rma >= 2) {
2181        return current_size;
2182    }
2183
2184    /* Find the largest hardware supported page size that's less than
2185     * or equal to the (logical) backing page size of guest RAM */
2186    kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2187    rampagesize = qemu_getrampagesize();
2188    best_page_shift = 0;
2189
2190    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2191        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2192
2193        if (!sps->page_shift) {
2194            continue;
2195        }
2196
2197        if ((sps->page_shift > best_page_shift)
2198            && ((1UL << sps->page_shift) <= rampagesize)) {
2199            best_page_shift = sps->page_shift;
2200        }
2201    }
2202
2203    return MIN(current_size,
2204               1ULL << (best_page_shift + hash_shift - 7));
2205}
2206#endif
2207
2208bool kvmppc_spapr_use_multitce(void)
2209{
2210    return cap_spapr_multitce;
2211}
2212
2213int kvmppc_spapr_enable_inkernel_multitce(void)
2214{
2215    int ret;
2216
2217    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2218                            H_PUT_TCE_INDIRECT, 1);
2219    if (!ret) {
2220        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2221                                H_STUFF_TCE, 1);
2222    }
2223
2224    return ret;
2225}
2226
2227void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2228                              uint64_t bus_offset, uint32_t nb_table,
2229                              int *pfd, bool need_vfio)
2230{
2231    long len;
2232    int fd;
2233    void *table;
2234
2235    /* Must set fd to -1 so we don't try to munmap when called for
2236     * destroying the table, which the upper layers -will- do
2237     */
2238    *pfd = -1;
2239    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2240        return NULL;
2241    }
2242
2243    if (cap_spapr_tce_64) {
2244        struct kvm_create_spapr_tce_64 args = {
2245            .liobn = liobn,
2246            .page_shift = page_shift,
2247            .offset = bus_offset >> page_shift,
2248            .size = nb_table,
2249            .flags = 0
2250        };
2251        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2252        if (fd < 0) {
2253            fprintf(stderr,
2254                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2255                    liobn);
2256            return NULL;
2257        }
2258    } else if (cap_spapr_tce) {
2259        uint64_t window_size = (uint64_t) nb_table << page_shift;
2260        struct kvm_create_spapr_tce args = {
2261            .liobn = liobn,
2262            .window_size = window_size,
2263        };
2264        if ((window_size != args.window_size) || bus_offset) {
2265            return NULL;
2266        }
2267        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2268        if (fd < 0) {
2269            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2270                    liobn);
2271            return NULL;
2272        }
2273    } else {
2274        return NULL;
2275    }
2276
2277    len = nb_table * sizeof(uint64_t);
2278    /* FIXME: round this up to page size */
2279
2280    table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2281    if (table == MAP_FAILED) {
2282        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2283                liobn);
2284        close(fd);
2285        return NULL;
2286    }
2287
2288    *pfd = fd;
2289    return table;
2290}
2291
2292int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2293{
2294    long len;
2295
2296    if (fd < 0) {
2297        return -1;
2298    }
2299
2300    len = nb_table * sizeof(uint64_t);
2301    if ((munmap(table, len) < 0) ||
2302        (close(fd) < 0)) {
2303        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2304                strerror(errno));
2305        /* Leak the table */
2306    }
2307
2308    return 0;
2309}
2310
2311int kvmppc_reset_htab(int shift_hint)
2312{
2313    uint32_t shift = shift_hint;
2314
2315    if (!kvm_enabled()) {
2316        /* Full emulation, tell caller to allocate htab itself */
2317        return 0;
2318    }
2319    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2320        int ret;
2321        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2322        if (ret == -ENOTTY) {
2323            /* At least some versions of PR KVM advertise the
2324             * capability, but don't implement the ioctl().  Oops.
2325             * Return 0 so that we allocate the htab in qemu, as is
2326             * correct for PR. */
2327            return 0;
2328        } else if (ret < 0) {
2329            return ret;
2330        }
2331        return shift;
2332    }
2333
2334    /* We have a kernel that predates the htab reset calls.  For PR
2335     * KVM, we need to allocate the htab ourselves, for an HV KVM of
2336     * this era, it has allocated a 16MB fixed size hash table already. */
2337    if (kvmppc_is_pr(kvm_state)) {
2338        /* PR - tell caller to allocate htab */
2339        return 0;
2340    } else {
2341        /* HV - assume 16MB kernel allocated htab */
2342        return 24;
2343    }
2344}
2345
2346static inline uint32_t mfpvr(void)
2347{
2348    uint32_t pvr;
2349
2350    asm ("mfpvr %0"
2351         : "=r"(pvr));
2352    return pvr;
2353}
2354
2355static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2356{
2357    if (on) {
2358        *word |= flags;
2359    } else {
2360        *word &= ~flags;
2361    }
2362}
2363
2364static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2365{
2366    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2367    uint32_t vmx = kvmppc_get_vmx();
2368    uint32_t dfp = kvmppc_get_dfp();
2369    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2370    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2371
2372    /* Now fix up the class with information we can query from the host */
2373    pcc->pvr = mfpvr();
2374
2375    if (vmx != -1) {
2376        /* Only override when we know what the host supports */
2377        alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2378        alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2379    }
2380    if (dfp != -1) {
2381        /* Only override when we know what the host supports */
2382        alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2383    }
2384
2385    if (dcache_size != -1) {
2386        pcc->l1_dcache_size = dcache_size;
2387    }
2388
2389    if (icache_size != -1) {
2390        pcc->l1_icache_size = icache_size;
2391    }
2392
2393#if defined(TARGET_PPC64)
2394    pcc->radix_page_info = kvm_get_radix_page_info();
2395
2396    if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2397        /*
2398         * POWER9 DD1 has some bugs which make it not really ISA 3.00
2399         * compliant.  More importantly, advertising ISA 3.00
2400         * architected mode may prevent guests from activating
2401         * necessary DD1 workarounds.
2402         */
2403        pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2404                                | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2405    }
2406#endif /* defined(TARGET_PPC64) */
2407}
2408
2409bool kvmppc_has_cap_epr(void)
2410{
2411    return cap_epr;
2412}
2413
2414bool kvmppc_has_cap_htab_fd(void)
2415{
2416    return cap_htab_fd;
2417}
2418
2419bool kvmppc_has_cap_fixup_hcalls(void)
2420{
2421    return cap_fixup_hcalls;
2422}
2423
2424bool kvmppc_has_cap_htm(void)
2425{
2426    return cap_htm;
2427}
2428
2429bool kvmppc_has_cap_mmu_radix(void)
2430{
2431    return cap_mmu_radix;
2432}
2433
2434bool kvmppc_has_cap_mmu_hash_v3(void)
2435{
2436    return cap_mmu_hash_v3;
2437}
2438
2439PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2440{
2441    uint32_t host_pvr = mfpvr();
2442    PowerPCCPUClass *pvr_pcc;
2443
2444    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2445    if (pvr_pcc == NULL) {
2446        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2447    }
2448
2449    return pvr_pcc;
2450}
2451
2452static int kvm_ppc_register_host_cpu_type(void)
2453{
2454    TypeInfo type_info = {
2455        .name = TYPE_HOST_POWERPC_CPU,
2456        .class_init = kvmppc_host_cpu_class_init,
2457    };
2458    PowerPCCPUClass *pvr_pcc;
2459    ObjectClass *oc;
2460    DeviceClass *dc;
2461    int i;
2462
2463    pvr_pcc = kvm_ppc_get_host_cpu_class();
2464    if (pvr_pcc == NULL) {
2465        return -1;
2466    }
2467    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2468    type_register(&type_info);
2469
2470    oc = object_class_by_name(type_info.name);
2471    g_assert(oc);
2472
2473#if defined(TARGET_PPC64)
2474    type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2475    type_info.parent = TYPE_SPAPR_CPU_CORE,
2476    type_info.instance_size = sizeof(sPAPRCPUCore);
2477    type_info.instance_init = NULL;
2478    type_info.class_init = spapr_cpu_core_class_init;
2479    type_info.class_data = (void *) "host";
2480    type_register(&type_info);
2481    g_free((void *)type_info.name);
2482#endif
2483
2484    /*
2485     * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2486     * we want "POWER8" to be a "family" alias that points to the current
2487     * host CPU type, too)
2488     */
2489    dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2490    for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2491        if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2492            char *suffix;
2493
2494            ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2495            suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2496            if (suffix) {
2497                *suffix = 0;
2498            }
2499            ppc_cpu_aliases[i].oc = oc;
2500            break;
2501        }
2502    }
2503
2504    return 0;
2505}
2506
2507int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2508{
2509    struct kvm_rtas_token_args args = {
2510        .token = token,
2511    };
2512
2513    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2514        return -ENOENT;
2515    }
2516
2517    strncpy(args.name, function, sizeof(args.name));
2518
2519    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2520}
2521
2522int kvmppc_get_htab_fd(bool write)
2523{
2524    struct kvm_get_htab_fd s = {
2525        .flags = write ? KVM_GET_HTAB_WRITE : 0,
2526        .start_index = 0,
2527    };
2528
2529    if (!cap_htab_fd) {
2530        fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2531        return -1;
2532    }
2533
2534    return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2535}
2536
2537int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2538{
2539    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2540    uint8_t buf[bufsize];
2541    ssize_t rc;
2542
2543    do {
2544        rc = read(fd, buf, bufsize);
2545        if (rc < 0) {
2546            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2547                    strerror(errno));
2548            return rc;
2549        } else if (rc) {
2550            uint8_t *buffer = buf;
2551            ssize_t n = rc;
2552            while (n) {
2553                struct kvm_get_htab_header *head =
2554                    (struct kvm_get_htab_header *) buffer;
2555                size_t chunksize = sizeof(*head) +
2556                     HASH_PTE_SIZE_64 * head->n_valid;
2557
2558                qemu_put_be32(f, head->index);
2559                qemu_put_be16(f, head->n_valid);
2560                qemu_put_be16(f, head->n_invalid);
2561                qemu_put_buffer(f, (void *)(head + 1),
2562                                HASH_PTE_SIZE_64 * head->n_valid);
2563
2564                buffer += chunksize;
2565                n -= chunksize;
2566            }
2567        }
2568    } while ((rc != 0)
2569             && ((max_ns < 0)
2570                 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2571
2572    return (rc == 0) ? 1 : 0;
2573}
2574
2575int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2576                           uint16_t n_valid, uint16_t n_invalid)
2577{
2578    struct kvm_get_htab_header *buf;
2579    size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2580    ssize_t rc;
2581
2582    buf = alloca(chunksize);
2583    buf->index = index;
2584    buf->n_valid = n_valid;
2585    buf->n_invalid = n_invalid;
2586
2587    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2588
2589    rc = write(fd, buf, chunksize);
2590    if (rc < 0) {
2591        fprintf(stderr, "Error writing KVM hash table: %s\n",
2592                strerror(errno));
2593        return rc;
2594    }
2595    if (rc != chunksize) {
2596        /* We should never get a short write on a single chunk */
2597        fprintf(stderr, "Short write, restoring KVM hash table\n");
2598        return -1;
2599    }
2600    return 0;
2601}
2602
2603bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2604{
2605    return true;
2606}
2607
2608void kvm_arch_init_irq_routing(KVMState *s)
2609{
2610}
2611
2612void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2613{
2614    struct kvm_get_htab_fd ghf = {
2615        .flags = 0,
2616        .start_index = ptex,
2617    };
2618    int fd, rc;
2619    int i;
2620
2621    fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2622    if (fd < 0) {
2623        hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2624    }
2625
2626    i = 0;
2627    while (i < n) {
2628        struct kvm_get_htab_header *hdr;
2629        int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2630        char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2631
2632        rc = read(fd, buf, sizeof(buf));
2633        if (rc < 0) {
2634            hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2635        }
2636
2637        hdr = (struct kvm_get_htab_header *)buf;
2638        while ((i < n) && ((char *)hdr < (buf + rc))) {
2639            int invalid = hdr->n_invalid;
2640
2641            if (hdr->index != (ptex + i)) {
2642                hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2643                         " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2644            }
2645
2646            memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2647            i += hdr->n_valid;
2648
2649            if ((n - i) < invalid) {
2650                invalid = n - i;
2651            }
2652            memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2653            i += hdr->n_invalid;
2654
2655            hdr = (struct kvm_get_htab_header *)
2656                ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2657        }
2658    }
2659
2660    close(fd);
2661}
2662
2663void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2664{
2665    int fd, rc;
2666    struct kvm_get_htab_fd ghf;
2667    struct {
2668        struct kvm_get_htab_header hdr;
2669        uint64_t pte0;
2670        uint64_t pte1;
2671    } buf;
2672
2673    ghf.flags = 0;
2674    ghf.start_index = 0;     /* Ignored */
2675    fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2676    if (fd < 0) {
2677        hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2678    }
2679
2680    buf.hdr.n_valid = 1;
2681    buf.hdr.n_invalid = 0;
2682    buf.hdr.index = ptex;
2683    buf.pte0 = cpu_to_be64(pte0);
2684    buf.pte1 = cpu_to_be64(pte1);
2685
2686    rc = write(fd, &buf, sizeof(buf));
2687    if (rc != sizeof(buf)) {
2688        hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2689    }
2690    close(fd);
2691}
2692
2693int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2694                             uint64_t address, uint32_t data, PCIDevice *dev)
2695{
2696    return 0;
2697}
2698
2699int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2700                                int vector, PCIDevice *dev)
2701{
2702    return 0;
2703}
2704
2705int kvm_arch_release_virq_post(int virq)
2706{
2707    return 0;
2708}
2709
2710int kvm_arch_msi_data_to_gsi(uint32_t data)
2711{
2712    return data & 0xffff;
2713}
2714
2715int kvmppc_enable_hwrng(void)
2716{
2717    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2718        return -1;
2719    }
2720
2721    return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2722}
2723
2724void kvmppc_check_papr_resize_hpt(Error **errp)
2725{
2726    if (!kvm_enabled()) {
2727        return; /* No KVM, we're good */
2728    }
2729
2730    if (cap_resize_hpt) {
2731        return; /* Kernel has explicit support, we're good */
2732    }
2733
2734    /* Otherwise fallback on looking for PR KVM */
2735    if (kvmppc_is_pr(kvm_state)) {
2736        return;
2737    }
2738
2739    error_setg(errp,
2740               "Hash page table resizing not available with this KVM version");
2741}
2742
2743int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2744{
2745    CPUState *cs = CPU(cpu);
2746    struct kvm_ppc_resize_hpt rhpt = {
2747        .flags = flags,
2748        .shift = shift,
2749    };
2750
2751    if (!cap_resize_hpt) {
2752        return -ENOSYS;
2753    }
2754
2755    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2756}
2757
2758int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2759{
2760    CPUState *cs = CPU(cpu);
2761    struct kvm_ppc_resize_hpt rhpt = {
2762        .flags = flags,
2763        .shift = shift,
2764    };
2765
2766    if (!cap_resize_hpt) {
2767        return -ENOSYS;
2768    }
2769
2770    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2771}
2772
2773static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg)
2774{
2775    target_ulong sdr1 = arg.target_ptr;
2776    PowerPCCPU *cpu = POWERPC_CPU(cs);
2777    CPUPPCState *env = &cpu->env;
2778
2779    /* This is just for the benefit of PR KVM */
2780    cpu_synchronize_state(cs);
2781    env->spr[SPR_SDR1] = sdr1;
2782    if (kvmppc_put_books_sregs(cpu) < 0) {
2783        error_report("Unable to update SDR1 in KVM");
2784        exit(1);
2785    }
2786}
2787
2788void kvmppc_update_sdr1(target_ulong sdr1)
2789{
2790    CPUState *cs;
2791
2792    CPU_FOREACH(cs) {
2793        run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1));
2794    }
2795}
2796
2797/*
2798 * This is a helper function to detect a post migration scenario
2799 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2800 * the guest kernel can't handle a PVR value other than the actual host
2801 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2802 *
2803 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2804 * (so, we're HV), return true. The workaround itself is done in
2805 * cpu_post_load.
2806 *
2807 * The order here is important: we'll only check for KVM PR as a
2808 * fallback if the guest kernel can't handle the situation itself.
2809 * We need to avoid as much as possible querying the running KVM type
2810 * in QEMU level.
2811 */
2812bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2813{
2814    CPUState *cs = CPU(cpu);
2815
2816    if (!kvm_enabled()) {
2817        return false;
2818    }
2819
2820    if (cap_ppc_pvr_compat) {
2821        return false;
2822    }
2823
2824    return !kvmppc_is_pr(cs->kvm_state);
2825}
2826