qemu/target/ppc/kvm.c
<<
>>
Prefs
   1/*
   2 * PowerPC implementation of KVM hooks
   3 *
   4 * Copyright IBM Corp. 2007
   5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6 *
   7 * Authors:
   8 *  Jerone Young <jyoung5@us.ibm.com>
   9 *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10 *  Hollis Blanchard <hollisb@us.ibm.com>
  11 *
  12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13 * See the COPYING file in the top-level directory.
  14 *
  15 */
  16
  17#include "qemu/osdep.h"
  18#include <dirent.h>
  19#include <sys/ioctl.h>
  20#include <sys/vfs.h>
  21
  22#include <linux/kvm.h>
  23
  24#include "qemu-common.h"
  25#include "qapi/error.h"
  26#include "qemu/error-report.h"
  27#include "cpu.h"
  28#include "cpu-models.h"
  29#include "qemu/timer.h"
  30#include "sysemu/hw_accel.h"
  31#include "kvm_ppc.h"
  32#include "sysemu/cpus.h"
  33#include "sysemu/device_tree.h"
  34#include "mmu-hash64.h"
  35
  36#include "hw/sysbus.h"
  37#include "hw/ppc/spapr.h"
  38#include "hw/ppc/spapr_cpu_core.h"
  39#include "hw/hw.h"
  40#include "hw/ppc/ppc.h"
  41#include "migration/qemu-file-types.h"
  42#include "sysemu/watchdog.h"
  43#include "trace.h"
  44#include "exec/gdbstub.h"
  45#include "exec/memattrs.h"
  46#include "exec/ram_addr.h"
  47#include "sysemu/hostmem.h"
  48#include "qemu/cutils.h"
  49#include "qemu/main-loop.h"
  50#include "qemu/mmap-alloc.h"
  51#include "elf.h"
  52#include "sysemu/kvm_int.h"
  53
  54#define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  55
  56const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  57    KVM_CAP_LAST_INFO
  58};
  59
  60static int cap_interrupt_unset;
  61static int cap_segstate;
  62static int cap_booke_sregs;
  63static int cap_ppc_smt;
  64static int cap_ppc_smt_possible;
  65static int cap_spapr_tce;
  66static int cap_spapr_tce_64;
  67static int cap_spapr_multitce;
  68static int cap_spapr_vfio;
  69static int cap_hior;
  70static int cap_one_reg;
  71static int cap_epr;
  72static int cap_ppc_watchdog;
  73static int cap_papr;
  74static int cap_htab_fd;
  75static int cap_fixup_hcalls;
  76static int cap_htm;             /* Hardware transactional memory support */
  77static int cap_mmu_radix;
  78static int cap_mmu_hash_v3;
  79static int cap_xive;
  80static int cap_resize_hpt;
  81static int cap_ppc_pvr_compat;
  82static int cap_ppc_safe_cache;
  83static int cap_ppc_safe_bounds_check;
  84static int cap_ppc_safe_indirect_branch;
  85static int cap_ppc_count_cache_flush_assist;
  86static int cap_ppc_nested_kvm_hv;
  87static int cap_large_decr;
  88
  89static uint32_t debug_inst_opcode;
  90
  91/*
  92 * Check whether we are running with KVM-PR (instead of KVM-HV).  This
  93 * should only be used for fallback tests - generally we should use
  94 * explicit capabilities for the features we want, rather than
  95 * assuming what is/isn't available depending on the KVM variant.
  96 */
  97static bool kvmppc_is_pr(KVMState *ks)
  98{
  99    /* Assume KVM-PR if the GET_PVINFO capability is available */
 100    return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 101}
 102
 103static int kvm_ppc_register_host_cpu_type(void);
 104static void kvmppc_get_cpu_characteristics(KVMState *s);
 105static int kvmppc_get_dec_bits(void);
 106
 107int kvm_arch_init(MachineState *ms, KVMState *s)
 108{
 109    cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 110    cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 111    cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 112    cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 113    cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 114    cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 115    cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 116    cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
 117    cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 118    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 119    cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 120    cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 121    /*
 122     * Note: we don't set cap_papr here, because this capability is
 123     * only activated after this by kvmppc_set_papr()
 124     */
 125    cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 126    cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 127    cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 128    cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 129    cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 130    cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 131    cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE);
 132    cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 133    kvmppc_get_cpu_characteristics(s);
 134    cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
 135    cap_large_decr = kvmppc_get_dec_bits();
 136    /*
 137     * Note: setting it to false because there is not such capability
 138     * in KVM at this moment.
 139     *
 140     * TODO: call kvm_vm_check_extension() with the right capability
 141     * after the kernel starts implementing it.
 142     */
 143    cap_ppc_pvr_compat = false;
 144
 145    if (!kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL)) {
 146        error_report("KVM: Host kernel doesn't have level irq capability");
 147        exit(1);
 148    }
 149
 150    kvm_ppc_register_host_cpu_type();
 151
 152    return 0;
 153}
 154
 155int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 156{
 157    return 0;
 158}
 159
 160static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 161{
 162    CPUPPCState *cenv = &cpu->env;
 163    CPUState *cs = CPU(cpu);
 164    struct kvm_sregs sregs;
 165    int ret;
 166
 167    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 168        /*
 169         * What we're really trying to say is "if we're on BookE, we
 170         * use the native PVR for now". This is the only sane way to
 171         * check it though, so we potentially confuse users that they
 172         * can run BookE guests on BookS. Let's hope nobody dares
 173         * enough :)
 174         */
 175        return 0;
 176    } else {
 177        if (!cap_segstate) {
 178            fprintf(stderr, "kvm error: missing PVR setting capability\n");
 179            return -ENOSYS;
 180        }
 181    }
 182
 183    ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 184    if (ret) {
 185        return ret;
 186    }
 187
 188    sregs.pvr = cenv->spr[SPR_PVR];
 189    return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 190}
 191
 192/* Set up a shared TLB array with KVM */
 193static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 194{
 195    CPUPPCState *env = &cpu->env;
 196    CPUState *cs = CPU(cpu);
 197    struct kvm_book3e_206_tlb_params params = {};
 198    struct kvm_config_tlb cfg = {};
 199    unsigned int entries = 0;
 200    int ret, i;
 201
 202    if (!kvm_enabled() ||
 203        !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 204        return 0;
 205    }
 206
 207    assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 208
 209    for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 210        params.tlb_sizes[i] = booke206_tlb_size(env, i);
 211        params.tlb_ways[i] = booke206_tlb_ways(env, i);
 212        entries += params.tlb_sizes[i];
 213    }
 214
 215    assert(entries == env->nb_tlb);
 216    assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 217
 218    env->tlb_dirty = true;
 219
 220    cfg.array = (uintptr_t)env->tlb.tlbm;
 221    cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 222    cfg.params = (uintptr_t)&params;
 223    cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 224
 225    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 226    if (ret < 0) {
 227        fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 228                __func__, strerror(-ret));
 229        return ret;
 230    }
 231
 232    env->kvm_sw_tlb = true;
 233    return 0;
 234}
 235
 236
 237#if defined(TARGET_PPC64)
 238static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
 239{
 240    int ret;
 241
 242    assert(kvm_state != NULL);
 243
 244    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 245        error_setg(errp, "KVM doesn't expose the MMU features it supports");
 246        error_append_hint(errp, "Consider switching to a newer KVM\n");
 247        return;
 248    }
 249
 250    ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 251    if (ret == 0) {
 252        return;
 253    }
 254
 255    error_setg_errno(errp, -ret,
 256                     "KVM failed to provide the MMU features it supports");
 257}
 258
 259struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 260{
 261    KVMState *s = KVM_STATE(current_machine->accelerator);
 262    struct ppc_radix_page_info *radix_page_info;
 263    struct kvm_ppc_rmmu_info rmmu_info;
 264    int i;
 265
 266    if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 267        return NULL;
 268    }
 269    if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 270        return NULL;
 271    }
 272    radix_page_info = g_malloc0(sizeof(*radix_page_info));
 273    radix_page_info->count = 0;
 274    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 275        if (rmmu_info.ap_encodings[i]) {
 276            radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 277            radix_page_info->count++;
 278        }
 279    }
 280    return radix_page_info;
 281}
 282
 283target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 284                                     bool radix, bool gtse,
 285                                     uint64_t proc_tbl)
 286{
 287    CPUState *cs = CPU(cpu);
 288    int ret;
 289    uint64_t flags = 0;
 290    struct kvm_ppc_mmuv3_cfg cfg = {
 291        .process_table = proc_tbl,
 292    };
 293
 294    if (radix) {
 295        flags |= KVM_PPC_MMUV3_RADIX;
 296    }
 297    if (gtse) {
 298        flags |= KVM_PPC_MMUV3_GTSE;
 299    }
 300    cfg.flags = flags;
 301    ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 302    switch (ret) {
 303    case 0:
 304        return H_SUCCESS;
 305    case -EINVAL:
 306        return H_PARAMETER;
 307    case -ENODEV:
 308        return H_NOT_AVAILABLE;
 309    default:
 310        return H_HARDWARE;
 311    }
 312}
 313
 314bool kvmppc_hpt_needs_host_contiguous_pages(void)
 315{
 316    static struct kvm_ppc_smmu_info smmu_info;
 317
 318    if (!kvm_enabled()) {
 319        return false;
 320    }
 321
 322    kvm_get_smmu_info(&smmu_info, &error_fatal);
 323    return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
 324}
 325
 326void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
 327{
 328    struct kvm_ppc_smmu_info smmu_info;
 329    int iq, ik, jq, jk;
 330    Error *local_err = NULL;
 331
 332    /* For now, we only have anything to check on hash64 MMUs */
 333    if (!cpu->hash64_opts || !kvm_enabled()) {
 334        return;
 335    }
 336
 337    kvm_get_smmu_info(&smmu_info, &local_err);
 338    if (local_err) {
 339        error_propagate(errp, local_err);
 340        return;
 341    }
 342
 343    if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
 344        && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 345        error_setg(errp,
 346                   "KVM does not support 1TiB segments which guest expects");
 347        return;
 348    }
 349
 350    if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
 351        error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
 352                   smmu_info.slb_size, cpu->hash64_opts->slb_size);
 353        return;
 354    }
 355
 356    /*
 357     * Verify that every pagesize supported by the cpu model is
 358     * supported by KVM with the same encodings
 359     */
 360    for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
 361        PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
 362        struct kvm_ppc_one_seg_page_size *ksps;
 363
 364        for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
 365            if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
 366                break;
 367            }
 368        }
 369        if (ik >= ARRAY_SIZE(smmu_info.sps)) {
 370            error_setg(errp, "KVM doesn't support for base page shift %u",
 371                       qsps->page_shift);
 372            return;
 373        }
 374
 375        ksps = &smmu_info.sps[ik];
 376        if (ksps->slb_enc != qsps->slb_enc) {
 377            error_setg(errp,
 378"KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
 379                       ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
 380            return;
 381        }
 382
 383        for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
 384            for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
 385                if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
 386                    break;
 387                }
 388            }
 389
 390            if (jk >= ARRAY_SIZE(ksps->enc)) {
 391                error_setg(errp, "KVM doesn't support page shift %u/%u",
 392                           qsps->enc[jq].page_shift, qsps->page_shift);
 393                return;
 394            }
 395            if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
 396                error_setg(errp,
 397"KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
 398                           ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
 399                           qsps->page_shift, qsps->enc[jq].pte_enc);
 400                return;
 401            }
 402        }
 403    }
 404
 405    if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
 406        /*
 407         * Mostly what guest pagesizes we can use are related to the
 408         * host pages used to map guest RAM, which is handled in the
 409         * platform code. Cache-Inhibited largepages (64k) however are
 410         * used for I/O, so if they're mapped to the host at all it
 411         * will be a normal mapping, not a special hugepage one used
 412         * for RAM.
 413         */
 414        if (qemu_real_host_page_size < 0x10000) {
 415            error_setg(errp,
 416                       "KVM can't supply 64kiB CI pages, which guest expects");
 417        }
 418    }
 419}
 420#endif /* !defined (TARGET_PPC64) */
 421
 422unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 423{
 424    return POWERPC_CPU(cpu)->vcpu_id;
 425}
 426
 427/*
 428 * e500 supports 2 h/w breakpoint and 2 watchpoint.  book3s supports
 429 * only 1 watchpoint, so array size of 4 is sufficient for now.
 430 */
 431#define MAX_HW_BKPTS 4
 432
 433static struct HWBreakpoint {
 434    target_ulong addr;
 435    int type;
 436} hw_debug_points[MAX_HW_BKPTS];
 437
 438static CPUWatchpoint hw_watchpoint;
 439
 440/* Default there is no breakpoint and watchpoint supported */
 441static int max_hw_breakpoint;
 442static int max_hw_watchpoint;
 443static int nb_hw_breakpoint;
 444static int nb_hw_watchpoint;
 445
 446static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 447{
 448    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 449        max_hw_breakpoint = 2;
 450        max_hw_watchpoint = 2;
 451    }
 452
 453    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 454        fprintf(stderr, "Error initializing h/w breakpoints\n");
 455        return;
 456    }
 457}
 458
 459int kvm_arch_init_vcpu(CPUState *cs)
 460{
 461    PowerPCCPU *cpu = POWERPC_CPU(cs);
 462    CPUPPCState *cenv = &cpu->env;
 463    int ret;
 464
 465    /* Synchronize sregs with kvm */
 466    ret = kvm_arch_sync_sregs(cpu);
 467    if (ret) {
 468        if (ret == -EINVAL) {
 469            error_report("Register sync failed... If you're using kvm-hv.ko,"
 470                         " only \"-cpu host\" is possible");
 471        }
 472        return ret;
 473    }
 474
 475    switch (cenv->mmu_model) {
 476    case POWERPC_MMU_BOOKE206:
 477        /* This target supports access to KVM's guest TLB */
 478        ret = kvm_booke206_tlb_init(cpu);
 479        break;
 480    case POWERPC_MMU_2_07:
 481        if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 482            /*
 483             * KVM-HV has transactional memory on POWER8 also without
 484             * the KVM_CAP_PPC_HTM extension, so enable it here
 485             * instead as long as it's availble to userspace on the
 486             * host.
 487             */
 488            if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 489                cap_htm = true;
 490            }
 491        }
 492        break;
 493    default:
 494        break;
 495    }
 496
 497    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 498    kvmppc_hw_debug_points_init(cenv);
 499
 500    return ret;
 501}
 502
 503int kvm_arch_destroy_vcpu(CPUState *cs)
 504{
 505    return 0;
 506}
 507
 508static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 509{
 510    CPUPPCState *env = &cpu->env;
 511    CPUState *cs = CPU(cpu);
 512    struct kvm_dirty_tlb dirty_tlb;
 513    unsigned char *bitmap;
 514    int ret;
 515
 516    if (!env->kvm_sw_tlb) {
 517        return;
 518    }
 519
 520    bitmap = g_malloc((env->nb_tlb + 7) / 8);
 521    memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 522
 523    dirty_tlb.bitmap = (uintptr_t)bitmap;
 524    dirty_tlb.num_dirty = env->nb_tlb;
 525
 526    ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 527    if (ret) {
 528        fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 529                __func__, strerror(-ret));
 530    }
 531
 532    g_free(bitmap);
 533}
 534
 535static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 536{
 537    PowerPCCPU *cpu = POWERPC_CPU(cs);
 538    CPUPPCState *env = &cpu->env;
 539    union {
 540        uint32_t u32;
 541        uint64_t u64;
 542    } val;
 543    struct kvm_one_reg reg = {
 544        .id = id,
 545        .addr = (uintptr_t) &val,
 546    };
 547    int ret;
 548
 549    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 550    if (ret != 0) {
 551        trace_kvm_failed_spr_get(spr, strerror(errno));
 552    } else {
 553        switch (id & KVM_REG_SIZE_MASK) {
 554        case KVM_REG_SIZE_U32:
 555            env->spr[spr] = val.u32;
 556            break;
 557
 558        case KVM_REG_SIZE_U64:
 559            env->spr[spr] = val.u64;
 560            break;
 561
 562        default:
 563            /* Don't handle this size yet */
 564            abort();
 565        }
 566    }
 567}
 568
 569static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 570{
 571    PowerPCCPU *cpu = POWERPC_CPU(cs);
 572    CPUPPCState *env = &cpu->env;
 573    union {
 574        uint32_t u32;
 575        uint64_t u64;
 576    } val;
 577    struct kvm_one_reg reg = {
 578        .id = id,
 579        .addr = (uintptr_t) &val,
 580    };
 581    int ret;
 582
 583    switch (id & KVM_REG_SIZE_MASK) {
 584    case KVM_REG_SIZE_U32:
 585        val.u32 = env->spr[spr];
 586        break;
 587
 588    case KVM_REG_SIZE_U64:
 589        val.u64 = env->spr[spr];
 590        break;
 591
 592    default:
 593        /* Don't handle this size yet */
 594        abort();
 595    }
 596
 597    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 598    if (ret != 0) {
 599        trace_kvm_failed_spr_set(spr, strerror(errno));
 600    }
 601}
 602
 603static int kvm_put_fp(CPUState *cs)
 604{
 605    PowerPCCPU *cpu = POWERPC_CPU(cs);
 606    CPUPPCState *env = &cpu->env;
 607    struct kvm_one_reg reg;
 608    int i;
 609    int ret;
 610
 611    if (env->insns_flags & PPC_FLOAT) {
 612        uint64_t fpscr = env->fpscr;
 613        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 614
 615        reg.id = KVM_REG_PPC_FPSCR;
 616        reg.addr = (uintptr_t)&fpscr;
 617        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 618        if (ret < 0) {
 619            trace_kvm_failed_fpscr_set(strerror(errno));
 620            return ret;
 621        }
 622
 623        for (i = 0; i < 32; i++) {
 624            uint64_t vsr[2];
 625            uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
 626            uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
 627
 628#ifdef HOST_WORDS_BIGENDIAN
 629            vsr[0] = float64_val(*fpr);
 630            vsr[1] = *vsrl;
 631#else
 632            vsr[0] = *vsrl;
 633            vsr[1] = float64_val(*fpr);
 634#endif
 635            reg.addr = (uintptr_t) &vsr;
 636            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 637
 638            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 639            if (ret < 0) {
 640                trace_kvm_failed_fp_set(vsx ? "VSR" : "FPR", i,
 641                                        strerror(errno));
 642                return ret;
 643            }
 644        }
 645    }
 646
 647    if (env->insns_flags & PPC_ALTIVEC) {
 648        reg.id = KVM_REG_PPC_VSCR;
 649        reg.addr = (uintptr_t)&env->vscr;
 650        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 651        if (ret < 0) {
 652            trace_kvm_failed_vscr_set(strerror(errno));
 653            return ret;
 654        }
 655
 656        for (i = 0; i < 32; i++) {
 657            reg.id = KVM_REG_PPC_VR(i);
 658            reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
 659            ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 660            if (ret < 0) {
 661                trace_kvm_failed_vr_set(i, strerror(errno));
 662                return ret;
 663            }
 664        }
 665    }
 666
 667    return 0;
 668}
 669
 670static int kvm_get_fp(CPUState *cs)
 671{
 672    PowerPCCPU *cpu = POWERPC_CPU(cs);
 673    CPUPPCState *env = &cpu->env;
 674    struct kvm_one_reg reg;
 675    int i;
 676    int ret;
 677
 678    if (env->insns_flags & PPC_FLOAT) {
 679        uint64_t fpscr;
 680        bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 681
 682        reg.id = KVM_REG_PPC_FPSCR;
 683        reg.addr = (uintptr_t)&fpscr;
 684        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 685        if (ret < 0) {
 686            trace_kvm_failed_fpscr_get(strerror(errno));
 687            return ret;
 688        } else {
 689            env->fpscr = fpscr;
 690        }
 691
 692        for (i = 0; i < 32; i++) {
 693            uint64_t vsr[2];
 694            uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
 695            uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
 696
 697            reg.addr = (uintptr_t) &vsr;
 698            reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 699
 700            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 701            if (ret < 0) {
 702                trace_kvm_failed_fp_get(vsx ? "VSR" : "FPR", i,
 703                                        strerror(errno));
 704                return ret;
 705            } else {
 706#ifdef HOST_WORDS_BIGENDIAN
 707                *fpr = vsr[0];
 708                if (vsx) {
 709                    *vsrl = vsr[1];
 710                }
 711#else
 712                *fpr = vsr[1];
 713                if (vsx) {
 714                    *vsrl = vsr[0];
 715                }
 716#endif
 717            }
 718        }
 719    }
 720
 721    if (env->insns_flags & PPC_ALTIVEC) {
 722        reg.id = KVM_REG_PPC_VSCR;
 723        reg.addr = (uintptr_t)&env->vscr;
 724        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 725        if (ret < 0) {
 726            trace_kvm_failed_vscr_get(strerror(errno));
 727            return ret;
 728        }
 729
 730        for (i = 0; i < 32; i++) {
 731            reg.id = KVM_REG_PPC_VR(i);
 732            reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
 733            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 734            if (ret < 0) {
 735                trace_kvm_failed_vr_get(i, strerror(errno));
 736                return ret;
 737            }
 738        }
 739    }
 740
 741    return 0;
 742}
 743
 744#if defined(TARGET_PPC64)
 745static int kvm_get_vpa(CPUState *cs)
 746{
 747    PowerPCCPU *cpu = POWERPC_CPU(cs);
 748    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
 749    struct kvm_one_reg reg;
 750    int ret;
 751
 752    reg.id = KVM_REG_PPC_VPA_ADDR;
 753    reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 754    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 755    if (ret < 0) {
 756        trace_kvm_failed_vpa_addr_get(strerror(errno));
 757        return ret;
 758    }
 759
 760    assert((uintptr_t)&spapr_cpu->slb_shadow_size
 761           == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 762    reg.id = KVM_REG_PPC_VPA_SLB;
 763    reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 764    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 765    if (ret < 0) {
 766        trace_kvm_failed_slb_get(strerror(errno));
 767        return ret;
 768    }
 769
 770    assert((uintptr_t)&spapr_cpu->dtl_size
 771           == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 772    reg.id = KVM_REG_PPC_VPA_DTL;
 773    reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 774    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 775    if (ret < 0) {
 776        trace_kvm_failed_dtl_get(strerror(errno));
 777        return ret;
 778    }
 779
 780    return 0;
 781}
 782
 783static int kvm_put_vpa(CPUState *cs)
 784{
 785    PowerPCCPU *cpu = POWERPC_CPU(cs);
 786    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
 787    struct kvm_one_reg reg;
 788    int ret;
 789
 790    /*
 791     * SLB shadow or DTL can't be registered unless a master VPA is
 792     * registered.  That means when restoring state, if a VPA *is*
 793     * registered, we need to set that up first.  If not, we need to
 794     * deregister the others before deregistering the master VPA
 795     */
 796    assert(spapr_cpu->vpa_addr
 797           || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
 798
 799    if (spapr_cpu->vpa_addr) {
 800        reg.id = KVM_REG_PPC_VPA_ADDR;
 801        reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 802        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 803        if (ret < 0) {
 804            trace_kvm_failed_vpa_addr_set(strerror(errno));
 805            return ret;
 806        }
 807    }
 808
 809    assert((uintptr_t)&spapr_cpu->slb_shadow_size
 810           == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 811    reg.id = KVM_REG_PPC_VPA_SLB;
 812    reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 813    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 814    if (ret < 0) {
 815        trace_kvm_failed_slb_set(strerror(errno));
 816        return ret;
 817    }
 818
 819    assert((uintptr_t)&spapr_cpu->dtl_size
 820           == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 821    reg.id = KVM_REG_PPC_VPA_DTL;
 822    reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 823    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 824    if (ret < 0) {
 825        trace_kvm_failed_dtl_set(strerror(errno));
 826        return ret;
 827    }
 828
 829    if (!spapr_cpu->vpa_addr) {
 830        reg.id = KVM_REG_PPC_VPA_ADDR;
 831        reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 832        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 833        if (ret < 0) {
 834            trace_kvm_failed_null_vpa_addr_set(strerror(errno));
 835            return ret;
 836        }
 837    }
 838
 839    return 0;
 840}
 841#endif /* TARGET_PPC64 */
 842
 843int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 844{
 845    CPUPPCState *env = &cpu->env;
 846    struct kvm_sregs sregs;
 847    int i;
 848
 849    sregs.pvr = env->spr[SPR_PVR];
 850
 851    if (cpu->vhyp) {
 852        PPCVirtualHypervisorClass *vhc =
 853            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 854        sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 855    } else {
 856        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 857    }
 858
 859    /* Sync SLB */
 860#ifdef TARGET_PPC64
 861    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 862        sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 863        if (env->slb[i].esid & SLB_ESID_V) {
 864            sregs.u.s.ppc64.slb[i].slbe |= i;
 865        }
 866        sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 867    }
 868#endif
 869
 870    /* Sync SRs */
 871    for (i = 0; i < 16; i++) {
 872        sregs.u.s.ppc32.sr[i] = env->sr[i];
 873    }
 874
 875    /* Sync BATs */
 876    for (i = 0; i < 8; i++) {
 877        /* Beware. We have to swap upper and lower bits here */
 878        sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 879            | env->DBAT[1][i];
 880        sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 881            | env->IBAT[1][i];
 882    }
 883
 884    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 885}
 886
 887int kvm_arch_put_registers(CPUState *cs, int level)
 888{
 889    PowerPCCPU *cpu = POWERPC_CPU(cs);
 890    CPUPPCState *env = &cpu->env;
 891    struct kvm_regs regs;
 892    int ret;
 893    int i;
 894
 895    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 896    if (ret < 0) {
 897        return ret;
 898    }
 899
 900    regs.ctr = env->ctr;
 901    regs.lr  = env->lr;
 902    regs.xer = cpu_read_xer(env);
 903    regs.msr = env->msr;
 904    regs.pc = env->nip;
 905
 906    regs.srr0 = env->spr[SPR_SRR0];
 907    regs.srr1 = env->spr[SPR_SRR1];
 908
 909    regs.sprg0 = env->spr[SPR_SPRG0];
 910    regs.sprg1 = env->spr[SPR_SPRG1];
 911    regs.sprg2 = env->spr[SPR_SPRG2];
 912    regs.sprg3 = env->spr[SPR_SPRG3];
 913    regs.sprg4 = env->spr[SPR_SPRG4];
 914    regs.sprg5 = env->spr[SPR_SPRG5];
 915    regs.sprg6 = env->spr[SPR_SPRG6];
 916    regs.sprg7 = env->spr[SPR_SPRG7];
 917
 918    regs.pid = env->spr[SPR_BOOKE_PID];
 919
 920    for (i = 0; i < 32; i++) {
 921        regs.gpr[i] = env->gpr[i];
 922    }
 923
 924    regs.cr = 0;
 925    for (i = 0; i < 8; i++) {
 926        regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 927    }
 928
 929    ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 930    if (ret < 0) {
 931        return ret;
 932    }
 933
 934    kvm_put_fp(cs);
 935
 936    if (env->tlb_dirty) {
 937        kvm_sw_tlb_put(cpu);
 938        env->tlb_dirty = false;
 939    }
 940
 941    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 942        ret = kvmppc_put_books_sregs(cpu);
 943        if (ret < 0) {
 944            return ret;
 945        }
 946    }
 947
 948    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 949        kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 950    }
 951
 952    if (cap_one_reg) {
 953        int i;
 954
 955        /*
 956         * We deliberately ignore errors here, for kernels which have
 957         * the ONE_REG calls, but don't support the specific
 958         * registers, there's a reasonable chance things will still
 959         * work, at least until we try to migrate.
 960         */
 961        for (i = 0; i < 1024; i++) {
 962            uint64_t id = env->spr_cb[i].one_reg_id;
 963
 964            if (id != 0) {
 965                kvm_put_one_spr(cs, id, i);
 966            }
 967        }
 968
 969#ifdef TARGET_PPC64
 970        if (msr_ts) {
 971            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 972                kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 973            }
 974            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 975                kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 976            }
 977            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 978            kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 979            kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 980            kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 981            kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 982            kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 983            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 984            kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 985            kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 986            kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 987        }
 988
 989        if (cap_papr) {
 990            if (kvm_put_vpa(cs) < 0) {
 991                trace_kvm_failed_put_vpa();
 992            }
 993        }
 994
 995        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 996
 997        if (level > KVM_PUT_RUNTIME_STATE) {
 998            kvm_put_one_spr(cs, KVM_REG_PPC_DPDES, SPR_DPDES);
 999        }
1000#endif /* TARGET_PPC64 */
1001    }
1002
1003    return ret;
1004}
1005
1006static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1007{
1008     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1009}
1010
1011static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1012{
1013    CPUPPCState *env = &cpu->env;
1014    struct kvm_sregs sregs;
1015    int ret;
1016
1017    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1018    if (ret < 0) {
1019        return ret;
1020    }
1021
1022    if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1023        env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1024        env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1025        env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1026        env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1027        env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1028        env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1029        env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1030        env->spr[SPR_DECR] = sregs.u.e.dec;
1031        env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1032        env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1033        env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1034    }
1035
1036    if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1037        env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1038        env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1039        env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1040        env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1041        env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1042    }
1043
1044    if (sregs.u.e.features & KVM_SREGS_E_64) {
1045        env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1046    }
1047
1048    if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1049        env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1050    }
1051
1052    if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1053        env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1054        kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1055        env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1056        kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1057        env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1058        kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1059        env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1060        kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1061        env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1062        kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1063        env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1064        kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1065        env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1066        kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1067        env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1068        kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1069        env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1070        kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1071        env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1072        kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1073        env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1074        kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1075        env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1076        kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1077        env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1078        kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1079        env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1080        kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1081        env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1082        kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1083        env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1084        kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1085
1086        if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1087            env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1088            kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1089            env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1090            kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1091            env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1092            kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1093        }
1094
1095        if (sregs.u.e.features & KVM_SREGS_E_PM) {
1096            env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1097            kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1098        }
1099
1100        if (sregs.u.e.features & KVM_SREGS_E_PC) {
1101            env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1102            kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1103            env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1104            kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1105        }
1106    }
1107
1108    if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1109        env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1110        env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1111        env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1112        env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1113        env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1114        env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1115        env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1116        env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1117        env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1118        env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1119    }
1120
1121    if (sregs.u.e.features & KVM_SREGS_EXP) {
1122        env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1123    }
1124
1125    if (sregs.u.e.features & KVM_SREGS_E_PD) {
1126        env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1127        env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1128    }
1129
1130    if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1131        env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1132        env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1133        env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1134
1135        if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1136            env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1137            env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1138        }
1139    }
1140
1141    return 0;
1142}
1143
1144static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1145{
1146    CPUPPCState *env = &cpu->env;
1147    struct kvm_sregs sregs;
1148    int ret;
1149    int i;
1150
1151    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1152    if (ret < 0) {
1153        return ret;
1154    }
1155
1156    if (!cpu->vhyp) {
1157        ppc_store_sdr1(env, sregs.u.s.sdr1);
1158    }
1159
1160    /* Sync SLB */
1161#ifdef TARGET_PPC64
1162    /*
1163     * The packed SLB array we get from KVM_GET_SREGS only contains
1164     * information about valid entries. So we flush our internal copy
1165     * to get rid of stale ones, then put all valid SLB entries back
1166     * in.
1167     */
1168    memset(env->slb, 0, sizeof(env->slb));
1169    for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1170        target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1171        target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1172        /*
1173         * Only restore valid entries
1174         */
1175        if (rb & SLB_ESID_V) {
1176            ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1177        }
1178    }
1179#endif
1180
1181    /* Sync SRs */
1182    for (i = 0; i < 16; i++) {
1183        env->sr[i] = sregs.u.s.ppc32.sr[i];
1184    }
1185
1186    /* Sync BATs */
1187    for (i = 0; i < 8; i++) {
1188        env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1189        env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1190        env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1191        env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1192    }
1193
1194    return 0;
1195}
1196
1197int kvm_arch_get_registers(CPUState *cs)
1198{
1199    PowerPCCPU *cpu = POWERPC_CPU(cs);
1200    CPUPPCState *env = &cpu->env;
1201    struct kvm_regs regs;
1202    uint32_t cr;
1203    int i, ret;
1204
1205    ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1206    if (ret < 0) {
1207        return ret;
1208    }
1209
1210    cr = regs.cr;
1211    for (i = 7; i >= 0; i--) {
1212        env->crf[i] = cr & 15;
1213        cr >>= 4;
1214    }
1215
1216    env->ctr = regs.ctr;
1217    env->lr = regs.lr;
1218    cpu_write_xer(env, regs.xer);
1219    env->msr = regs.msr;
1220    env->nip = regs.pc;
1221
1222    env->spr[SPR_SRR0] = regs.srr0;
1223    env->spr[SPR_SRR1] = regs.srr1;
1224
1225    env->spr[SPR_SPRG0] = regs.sprg0;
1226    env->spr[SPR_SPRG1] = regs.sprg1;
1227    env->spr[SPR_SPRG2] = regs.sprg2;
1228    env->spr[SPR_SPRG3] = regs.sprg3;
1229    env->spr[SPR_SPRG4] = regs.sprg4;
1230    env->spr[SPR_SPRG5] = regs.sprg5;
1231    env->spr[SPR_SPRG6] = regs.sprg6;
1232    env->spr[SPR_SPRG7] = regs.sprg7;
1233
1234    env->spr[SPR_BOOKE_PID] = regs.pid;
1235
1236    for (i = 0; i < 32; i++) {
1237        env->gpr[i] = regs.gpr[i];
1238    }
1239
1240    kvm_get_fp(cs);
1241
1242    if (cap_booke_sregs) {
1243        ret = kvmppc_get_booke_sregs(cpu);
1244        if (ret < 0) {
1245            return ret;
1246        }
1247    }
1248
1249    if (cap_segstate) {
1250        ret = kvmppc_get_books_sregs(cpu);
1251        if (ret < 0) {
1252            return ret;
1253        }
1254    }
1255
1256    if (cap_hior) {
1257        kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1258    }
1259
1260    if (cap_one_reg) {
1261        int i;
1262
1263        /*
1264         * We deliberately ignore errors here, for kernels which have
1265         * the ONE_REG calls, but don't support the specific
1266         * registers, there's a reasonable chance things will still
1267         * work, at least until we try to migrate.
1268         */
1269        for (i = 0; i < 1024; i++) {
1270            uint64_t id = env->spr_cb[i].one_reg_id;
1271
1272            if (id != 0) {
1273                kvm_get_one_spr(cs, id, i);
1274            }
1275        }
1276
1277#ifdef TARGET_PPC64
1278        if (msr_ts) {
1279            for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1280                kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1281            }
1282            for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1283                kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1284            }
1285            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1286            kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1287            kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1288            kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1289            kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1290            kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1291            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1292            kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1293            kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1294            kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1295        }
1296
1297        if (cap_papr) {
1298            if (kvm_get_vpa(cs) < 0) {
1299                trace_kvm_failed_get_vpa();
1300            }
1301        }
1302
1303        kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1304        kvm_get_one_spr(cs, KVM_REG_PPC_DPDES, SPR_DPDES);
1305#endif
1306    }
1307
1308    return 0;
1309}
1310
1311int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1312{
1313    unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1314
1315    if (irq != PPC_INTERRUPT_EXT) {
1316        return 0;
1317    }
1318
1319    if (!kvm_enabled() || !cap_interrupt_unset) {
1320        return 0;
1321    }
1322
1323    kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1324
1325    return 0;
1326}
1327
1328#if defined(TARGET_PPC64)
1329#define PPC_INPUT_INT PPC970_INPUT_INT
1330#else
1331#define PPC_INPUT_INT PPC6xx_INPUT_INT
1332#endif
1333
1334void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1335{
1336    return;
1337}
1338
1339MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1340{
1341    return MEMTXATTRS_UNSPECIFIED;
1342}
1343
1344int kvm_arch_process_async_events(CPUState *cs)
1345{
1346    return cs->halted;
1347}
1348
1349static int kvmppc_handle_halt(PowerPCCPU *cpu)
1350{
1351    CPUState *cs = CPU(cpu);
1352    CPUPPCState *env = &cpu->env;
1353
1354    if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1355        cs->halted = 1;
1356        cs->exception_index = EXCP_HLT;
1357    }
1358
1359    return 0;
1360}
1361
1362/* map dcr access to existing qemu dcr emulation */
1363static int kvmppc_handle_dcr_read(CPUPPCState *env,
1364                                  uint32_t dcrn, uint32_t *data)
1365{
1366    if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) {
1367        fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1368    }
1369
1370    return 0;
1371}
1372
1373static int kvmppc_handle_dcr_write(CPUPPCState *env,
1374                                   uint32_t dcrn, uint32_t data)
1375{
1376    if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) {
1377        fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1378    }
1379
1380    return 0;
1381}
1382
1383int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1384{
1385    /* Mixed endian case is not handled */
1386    uint32_t sc = debug_inst_opcode;
1387
1388    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1389                            sizeof(sc), 0) ||
1390        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1391        return -EINVAL;
1392    }
1393
1394    return 0;
1395}
1396
1397int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1398{
1399    uint32_t sc;
1400
1401    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1402        sc != debug_inst_opcode ||
1403        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1404                            sizeof(sc), 1)) {
1405        return -EINVAL;
1406    }
1407
1408    return 0;
1409}
1410
1411static int find_hw_breakpoint(target_ulong addr, int type)
1412{
1413    int n;
1414
1415    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1416           <= ARRAY_SIZE(hw_debug_points));
1417
1418    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1419        if (hw_debug_points[n].addr == addr &&
1420             hw_debug_points[n].type == type) {
1421            return n;
1422        }
1423    }
1424
1425    return -1;
1426}
1427
1428static int find_hw_watchpoint(target_ulong addr, int *flag)
1429{
1430    int n;
1431
1432    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1433    if (n >= 0) {
1434        *flag = BP_MEM_ACCESS;
1435        return n;
1436    }
1437
1438    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1439    if (n >= 0) {
1440        *flag = BP_MEM_WRITE;
1441        return n;
1442    }
1443
1444    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1445    if (n >= 0) {
1446        *flag = BP_MEM_READ;
1447        return n;
1448    }
1449
1450    return -1;
1451}
1452
1453int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1454                                  target_ulong len, int type)
1455{
1456    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1457        return -ENOBUFS;
1458    }
1459
1460    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1461    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1462
1463    switch (type) {
1464    case GDB_BREAKPOINT_HW:
1465        if (nb_hw_breakpoint >= max_hw_breakpoint) {
1466            return -ENOBUFS;
1467        }
1468
1469        if (find_hw_breakpoint(addr, type) >= 0) {
1470            return -EEXIST;
1471        }
1472
1473        nb_hw_breakpoint++;
1474        break;
1475
1476    case GDB_WATCHPOINT_WRITE:
1477    case GDB_WATCHPOINT_READ:
1478    case GDB_WATCHPOINT_ACCESS:
1479        if (nb_hw_watchpoint >= max_hw_watchpoint) {
1480            return -ENOBUFS;
1481        }
1482
1483        if (find_hw_breakpoint(addr, type) >= 0) {
1484            return -EEXIST;
1485        }
1486
1487        nb_hw_watchpoint++;
1488        break;
1489
1490    default:
1491        return -ENOSYS;
1492    }
1493
1494    return 0;
1495}
1496
1497int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1498                                  target_ulong len, int type)
1499{
1500    int n;
1501
1502    n = find_hw_breakpoint(addr, type);
1503    if (n < 0) {
1504        return -ENOENT;
1505    }
1506
1507    switch (type) {
1508    case GDB_BREAKPOINT_HW:
1509        nb_hw_breakpoint--;
1510        break;
1511
1512    case GDB_WATCHPOINT_WRITE:
1513    case GDB_WATCHPOINT_READ:
1514    case GDB_WATCHPOINT_ACCESS:
1515        nb_hw_watchpoint--;
1516        break;
1517
1518    default:
1519        return -ENOSYS;
1520    }
1521    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1522
1523    return 0;
1524}
1525
1526void kvm_arch_remove_all_hw_breakpoints(void)
1527{
1528    nb_hw_breakpoint = nb_hw_watchpoint = 0;
1529}
1530
1531void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1532{
1533    int n;
1534
1535    /* Software Breakpoint updates */
1536    if (kvm_sw_breakpoints_active(cs)) {
1537        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1538    }
1539
1540    assert((nb_hw_breakpoint + nb_hw_watchpoint)
1541           <= ARRAY_SIZE(hw_debug_points));
1542    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1543
1544    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1545        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1546        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1547        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1548            switch (hw_debug_points[n].type) {
1549            case GDB_BREAKPOINT_HW:
1550                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1551                break;
1552            case GDB_WATCHPOINT_WRITE:
1553                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1554                break;
1555            case GDB_WATCHPOINT_READ:
1556                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1557                break;
1558            case GDB_WATCHPOINT_ACCESS:
1559                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1560                                        KVMPPC_DEBUG_WATCH_READ;
1561                break;
1562            default:
1563                cpu_abort(cs, "Unsupported breakpoint type\n");
1564            }
1565            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1566        }
1567    }
1568}
1569
1570static int kvm_handle_hw_breakpoint(CPUState *cs,
1571                                    struct kvm_debug_exit_arch *arch_info)
1572{
1573    int handle = 0;
1574    int n;
1575    int flag = 0;
1576
1577    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1578        if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1579            n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1580            if (n >= 0) {
1581                handle = 1;
1582            }
1583        } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1584                                        KVMPPC_DEBUG_WATCH_WRITE)) {
1585            n = find_hw_watchpoint(arch_info->address,  &flag);
1586            if (n >= 0) {
1587                handle = 1;
1588                cs->watchpoint_hit = &hw_watchpoint;
1589                hw_watchpoint.vaddr = hw_debug_points[n].addr;
1590                hw_watchpoint.flags = flag;
1591            }
1592        }
1593    }
1594    return handle;
1595}
1596
1597static int kvm_handle_singlestep(void)
1598{
1599    return 1;
1600}
1601
1602static int kvm_handle_sw_breakpoint(void)
1603{
1604    return 1;
1605}
1606
1607static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1608{
1609    CPUState *cs = CPU(cpu);
1610    CPUPPCState *env = &cpu->env;
1611    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1612
1613    if (cs->singlestep_enabled) {
1614        return kvm_handle_singlestep();
1615    }
1616
1617    if (arch_info->status) {
1618        return kvm_handle_hw_breakpoint(cs, arch_info);
1619    }
1620
1621    if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1622        return kvm_handle_sw_breakpoint();
1623    }
1624
1625    /*
1626     * QEMU is not able to handle debug exception, so inject
1627     * program exception to guest;
1628     * Yes program exception NOT debug exception !!
1629     * When QEMU is using debug resources then debug exception must
1630     * be always set. To achieve this we set MSR_DE and also set
1631     * MSRP_DEP so guest cannot change MSR_DE.
1632     * When emulating debug resource for guest we want guest
1633     * to control MSR_DE (enable/disable debug interrupt on need).
1634     * Supporting both configurations are NOT possible.
1635     * So the result is that we cannot share debug resources
1636     * between QEMU and Guest on BOOKE architecture.
1637     * In the current design QEMU gets the priority over guest,
1638     * this means that if QEMU is using debug resources then guest
1639     * cannot use them;
1640     * For software breakpoint QEMU uses a privileged instruction;
1641     * So there cannot be any reason that we are here for guest
1642     * set debug exception, only possibility is guest executed a
1643     * privileged / illegal instruction and that's why we are
1644     * injecting a program interrupt.
1645     */
1646    cpu_synchronize_state(cs);
1647    /*
1648     * env->nip is PC, so increment this by 4 to use
1649     * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1650     */
1651    env->nip += 4;
1652    cs->exception_index = POWERPC_EXCP_PROGRAM;
1653    env->error_code = POWERPC_EXCP_INVAL;
1654    ppc_cpu_do_interrupt(cs);
1655
1656    return 0;
1657}
1658
1659int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1660{
1661    PowerPCCPU *cpu = POWERPC_CPU(cs);
1662    CPUPPCState *env = &cpu->env;
1663    int ret;
1664
1665    qemu_mutex_lock_iothread();
1666
1667    switch (run->exit_reason) {
1668    case KVM_EXIT_DCR:
1669        if (run->dcr.is_write) {
1670            trace_kvm_handle_dcr_write();
1671            ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1672        } else {
1673            trace_kvm_handle_dcr_read();
1674            ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1675        }
1676        break;
1677    case KVM_EXIT_HLT:
1678        trace_kvm_handle_halt();
1679        ret = kvmppc_handle_halt(cpu);
1680        break;
1681#if defined(TARGET_PPC64)
1682    case KVM_EXIT_PAPR_HCALL:
1683        trace_kvm_handle_papr_hcall();
1684        run->papr_hcall.ret = spapr_hypercall(cpu,
1685                                              run->papr_hcall.nr,
1686                                              run->papr_hcall.args);
1687        ret = 0;
1688        break;
1689#endif
1690    case KVM_EXIT_EPR:
1691        trace_kvm_handle_epr();
1692        run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1693        ret = 0;
1694        break;
1695    case KVM_EXIT_WATCHDOG:
1696        trace_kvm_handle_watchdog_expiry();
1697        watchdog_perform_action();
1698        ret = 0;
1699        break;
1700
1701    case KVM_EXIT_DEBUG:
1702        trace_kvm_handle_debug_exception();
1703        if (kvm_handle_debug(cpu, run)) {
1704            ret = EXCP_DEBUG;
1705            break;
1706        }
1707        /* re-enter, this exception was guest-internal */
1708        ret = 0;
1709        break;
1710
1711    default:
1712        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1713        ret = -1;
1714        break;
1715    }
1716
1717    qemu_mutex_unlock_iothread();
1718    return ret;
1719}
1720
1721int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1722{
1723    CPUState *cs = CPU(cpu);
1724    uint32_t bits = tsr_bits;
1725    struct kvm_one_reg reg = {
1726        .id = KVM_REG_PPC_OR_TSR,
1727        .addr = (uintptr_t) &bits,
1728    };
1729
1730    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1731}
1732
1733int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1734{
1735
1736    CPUState *cs = CPU(cpu);
1737    uint32_t bits = tsr_bits;
1738    struct kvm_one_reg reg = {
1739        .id = KVM_REG_PPC_CLEAR_TSR,
1740        .addr = (uintptr_t) &bits,
1741    };
1742
1743    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1744}
1745
1746int kvmppc_set_tcr(PowerPCCPU *cpu)
1747{
1748    CPUState *cs = CPU(cpu);
1749    CPUPPCState *env = &cpu->env;
1750    uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1751
1752    struct kvm_one_reg reg = {
1753        .id = KVM_REG_PPC_TCR,
1754        .addr = (uintptr_t) &tcr,
1755    };
1756
1757    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1758}
1759
1760int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1761{
1762    CPUState *cs = CPU(cpu);
1763    int ret;
1764
1765    if (!kvm_enabled()) {
1766        return -1;
1767    }
1768
1769    if (!cap_ppc_watchdog) {
1770        printf("warning: KVM does not support watchdog");
1771        return -1;
1772    }
1773
1774    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1775    if (ret < 0) {
1776        fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1777                __func__, strerror(-ret));
1778        return ret;
1779    }
1780
1781    return ret;
1782}
1783
1784static int read_cpuinfo(const char *field, char *value, int len)
1785{
1786    FILE *f;
1787    int ret = -1;
1788    int field_len = strlen(field);
1789    char line[512];
1790
1791    f = fopen("/proc/cpuinfo", "r");
1792    if (!f) {
1793        return -1;
1794    }
1795
1796    do {
1797        if (!fgets(line, sizeof(line), f)) {
1798            break;
1799        }
1800        if (!strncmp(line, field, field_len)) {
1801            pstrcpy(value, len, line);
1802            ret = 0;
1803            break;
1804        }
1805    } while (*line);
1806
1807    fclose(f);
1808
1809    return ret;
1810}
1811
1812uint32_t kvmppc_get_tbfreq(void)
1813{
1814    char line[512];
1815    char *ns;
1816    uint32_t retval = NANOSECONDS_PER_SECOND;
1817
1818    if (read_cpuinfo("timebase", line, sizeof(line))) {
1819        return retval;
1820    }
1821
1822    ns = strchr(line, ':');
1823    if (!ns) {
1824        return retval;
1825    }
1826
1827    ns++;
1828
1829    return atoi(ns);
1830}
1831
1832bool kvmppc_get_host_serial(char **value)
1833{
1834    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1835                               NULL);
1836}
1837
1838bool kvmppc_get_host_model(char **value)
1839{
1840    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1841}
1842
1843/* Try to find a device tree node for a CPU with clock-frequency property */
1844static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1845{
1846    struct dirent *dirp;
1847    DIR *dp;
1848
1849    dp = opendir(PROC_DEVTREE_CPU);
1850    if (!dp) {
1851        printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1852        return -1;
1853    }
1854
1855    buf[0] = '\0';
1856    while ((dirp = readdir(dp)) != NULL) {
1857        FILE *f;
1858        snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1859                 dirp->d_name);
1860        f = fopen(buf, "r");
1861        if (f) {
1862            snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1863            fclose(f);
1864            break;
1865        }
1866        buf[0] = '\0';
1867    }
1868    closedir(dp);
1869    if (buf[0] == '\0') {
1870        printf("Unknown host!\n");
1871        return -1;
1872    }
1873
1874    return 0;
1875}
1876
1877static uint64_t kvmppc_read_int_dt(const char *filename)
1878{
1879    union {
1880        uint32_t v32;
1881        uint64_t v64;
1882    } u;
1883    FILE *f;
1884    int len;
1885
1886    f = fopen(filename, "rb");
1887    if (!f) {
1888        return -1;
1889    }
1890
1891    len = fread(&u, 1, sizeof(u), f);
1892    fclose(f);
1893    switch (len) {
1894    case 4:
1895        /* property is a 32-bit quantity */
1896        return be32_to_cpu(u.v32);
1897    case 8:
1898        return be64_to_cpu(u.v64);
1899    }
1900
1901    return 0;
1902}
1903
1904/*
1905 * Read a CPU node property from the host device tree that's a single
1906 * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1907 * (can't find or open the property, or doesn't understand the format)
1908 */
1909static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1910{
1911    char buf[PATH_MAX], *tmp;
1912    uint64_t val;
1913
1914    if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1915        return -1;
1916    }
1917
1918    tmp = g_strdup_printf("%s/%s", buf, propname);
1919    val = kvmppc_read_int_dt(tmp);
1920    g_free(tmp);
1921
1922    return val;
1923}
1924
1925uint64_t kvmppc_get_clockfreq(void)
1926{
1927    return kvmppc_read_int_cpu_dt("clock-frequency");
1928}
1929
1930static int kvmppc_get_dec_bits(void)
1931{
1932    int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits");
1933
1934    if (nr_bits > 0) {
1935        return nr_bits;
1936    }
1937    return 0;
1938}
1939
1940static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1941{
1942    CPUState *cs = env_cpu(env);
1943
1944    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1945        !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1946        return 0;
1947    }
1948
1949    return 1;
1950}
1951
1952int kvmppc_get_hasidle(CPUPPCState *env)
1953{
1954    struct kvm_ppc_pvinfo pvinfo;
1955
1956    if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1957        (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1958        return 1;
1959    }
1960
1961    return 0;
1962}
1963
1964int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1965{
1966    uint32_t *hc = (uint32_t *)buf;
1967    struct kvm_ppc_pvinfo pvinfo;
1968
1969    if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1970        memcpy(buf, pvinfo.hcall, buf_len);
1971        return 0;
1972    }
1973
1974    /*
1975     * Fallback to always fail hypercalls regardless of endianness:
1976     *
1977     *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1978     *     li r3, -1
1979     *     b .+8       (becomes nop in wrong endian)
1980     *     bswap32(li r3, -1)
1981     */
1982
1983    hc[0] = cpu_to_be32(0x08000048);
1984    hc[1] = cpu_to_be32(0x3860ffff);
1985    hc[2] = cpu_to_be32(0x48000008);
1986    hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1987
1988    return 1;
1989}
1990
1991static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1992{
1993    return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1994}
1995
1996void kvmppc_enable_logical_ci_hcalls(void)
1997{
1998    /*
1999     * FIXME: it would be nice if we could detect the cases where
2000     * we're using a device which requires the in kernel
2001     * implementation of these hcalls, but the kernel lacks them and
2002     * produce a warning.
2003     */
2004    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2005    kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2006}
2007
2008void kvmppc_enable_set_mode_hcall(void)
2009{
2010    kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2011}
2012
2013void kvmppc_enable_clear_ref_mod_hcalls(void)
2014{
2015    kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2016    kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2017}
2018
2019void kvmppc_enable_h_page_init(void)
2020{
2021    kvmppc_enable_hcall(kvm_state, H_PAGE_INIT);
2022}
2023
2024void kvmppc_set_papr(PowerPCCPU *cpu)
2025{
2026    CPUState *cs = CPU(cpu);
2027    int ret;
2028
2029    if (!kvm_enabled()) {
2030        return;
2031    }
2032
2033    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2034    if (ret) {
2035        error_report("This vCPU type or KVM version does not support PAPR");
2036        exit(1);
2037    }
2038
2039    /*
2040     * Update the capability flag so we sync the right information
2041     * with kvm
2042     */
2043    cap_papr = 1;
2044}
2045
2046int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2047{
2048    return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2049}
2050
2051void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2052{
2053    CPUState *cs = CPU(cpu);
2054    int ret;
2055
2056    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2057    if (ret && mpic_proxy) {
2058        error_report("This KVM version does not support EPR");
2059        exit(1);
2060    }
2061}
2062
2063int kvmppc_smt_threads(void)
2064{
2065    return cap_ppc_smt ? cap_ppc_smt : 1;
2066}
2067
2068int kvmppc_set_smt_threads(int smt)
2069{
2070    int ret;
2071
2072    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2073    if (!ret) {
2074        cap_ppc_smt = smt;
2075    }
2076    return ret;
2077}
2078
2079void kvmppc_hint_smt_possible(Error **errp)
2080{
2081    int i;
2082    GString *g;
2083    char *s;
2084
2085    assert(kvm_enabled());
2086    if (cap_ppc_smt_possible) {
2087        g = g_string_new("Available VSMT modes:");
2088        for (i = 63; i >= 0; i--) {
2089            if ((1UL << i) & cap_ppc_smt_possible) {
2090                g_string_append_printf(g, " %lu", (1UL << i));
2091            }
2092        }
2093        s = g_string_free(g, false);
2094        error_append_hint(errp, "%s.\n", s);
2095        g_free(s);
2096    } else {
2097        error_append_hint(errp,
2098                          "This KVM seems to be too old to support VSMT.\n");
2099    }
2100}
2101
2102
2103#ifdef TARGET_PPC64
2104uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2105{
2106    struct kvm_ppc_smmu_info info;
2107    long rampagesize, best_page_shift;
2108    int i;
2109
2110    /*
2111     * Find the largest hardware supported page size that's less than
2112     * or equal to the (logical) backing page size of guest RAM
2113     */
2114    kvm_get_smmu_info(&info, &error_fatal);
2115    rampagesize = qemu_minrampagesize();
2116    best_page_shift = 0;
2117
2118    for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2119        struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2120
2121        if (!sps->page_shift) {
2122            continue;
2123        }
2124
2125        if ((sps->page_shift > best_page_shift)
2126            && ((1UL << sps->page_shift) <= rampagesize)) {
2127            best_page_shift = sps->page_shift;
2128        }
2129    }
2130
2131    return MIN(current_size,
2132               1ULL << (best_page_shift + hash_shift - 7));
2133}
2134#endif
2135
2136bool kvmppc_spapr_use_multitce(void)
2137{
2138    return cap_spapr_multitce;
2139}
2140
2141int kvmppc_spapr_enable_inkernel_multitce(void)
2142{
2143    int ret;
2144
2145    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2146                            H_PUT_TCE_INDIRECT, 1);
2147    if (!ret) {
2148        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2149                                H_STUFF_TCE, 1);
2150    }
2151
2152    return ret;
2153}
2154
2155void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2156                              uint64_t bus_offset, uint32_t nb_table,
2157                              int *pfd, bool need_vfio)
2158{
2159    long len;
2160    int fd;
2161    void *table;
2162
2163    /*
2164     * Must set fd to -1 so we don't try to munmap when called for
2165     * destroying the table, which the upper layers -will- do
2166     */
2167    *pfd = -1;
2168    if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2169        return NULL;
2170    }
2171
2172    if (cap_spapr_tce_64) {
2173        struct kvm_create_spapr_tce_64 args = {
2174            .liobn = liobn,
2175            .page_shift = page_shift,
2176            .offset = bus_offset >> page_shift,
2177            .size = nb_table,
2178            .flags = 0
2179        };
2180        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2181        if (fd < 0) {
2182            fprintf(stderr,
2183                    "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2184                    liobn);
2185            return NULL;
2186        }
2187    } else if (cap_spapr_tce) {
2188        uint64_t window_size = (uint64_t) nb_table << page_shift;
2189        struct kvm_create_spapr_tce args = {
2190            .liobn = liobn,
2191            .window_size = window_size,
2192        };
2193        if ((window_size != args.window_size) || bus_offset) {
2194            return NULL;
2195        }
2196        fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2197        if (fd < 0) {
2198            fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2199                    liobn);
2200            return NULL;
2201        }
2202    } else {
2203        return NULL;
2204    }
2205
2206    len = nb_table * sizeof(uint64_t);
2207    /* FIXME: round this up to page size */
2208
2209    table = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
2210    if (table == MAP_FAILED) {
2211        fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2212                liobn);
2213        close(fd);
2214        return NULL;
2215    }
2216
2217    *pfd = fd;
2218    return table;
2219}
2220
2221int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2222{
2223    long len;
2224
2225    if (fd < 0) {
2226        return -1;
2227    }
2228
2229    len = nb_table * sizeof(uint64_t);
2230    if ((munmap(table, len) < 0) ||
2231        (close(fd) < 0)) {
2232        fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2233                strerror(errno));
2234        /* Leak the table */
2235    }
2236
2237    return 0;
2238}
2239
2240int kvmppc_reset_htab(int shift_hint)
2241{
2242    uint32_t shift = shift_hint;
2243
2244    if (!kvm_enabled()) {
2245        /* Full emulation, tell caller to allocate htab itself */
2246        return 0;
2247    }
2248    if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2249        int ret;
2250        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2251        if (ret == -ENOTTY) {
2252            /*
2253             * At least some versions of PR KVM advertise the
2254             * capability, but don't implement the ioctl().  Oops.
2255             * Return 0 so that we allocate the htab in qemu, as is
2256             * correct for PR.
2257             */
2258            return 0;
2259        } else if (ret < 0) {
2260            return ret;
2261        }
2262        return shift;
2263    }
2264
2265    /*
2266     * We have a kernel that predates the htab reset calls.  For PR
2267     * KVM, we need to allocate the htab ourselves, for an HV KVM of
2268     * this era, it has allocated a 16MB fixed size hash table
2269     * already.
2270     */
2271    if (kvmppc_is_pr(kvm_state)) {
2272        /* PR - tell caller to allocate htab */
2273        return 0;
2274    } else {
2275        /* HV - assume 16MB kernel allocated htab */
2276        return 24;
2277    }
2278}
2279
2280static inline uint32_t mfpvr(void)
2281{
2282    uint32_t pvr;
2283
2284    asm ("mfpvr %0"
2285         : "=r"(pvr));
2286    return pvr;
2287}
2288
2289static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2290{
2291    if (on) {
2292        *word |= flags;
2293    } else {
2294        *word &= ~flags;
2295    }
2296}
2297
2298static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2299{
2300    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2301    uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2302    uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2303
2304    /* Now fix up the class with information we can query from the host */
2305    pcc->pvr = mfpvr();
2306
2307    alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2308                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2309    alter_insns(&pcc->insns_flags2, PPC2_VSX,
2310                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2311    alter_insns(&pcc->insns_flags2, PPC2_DFP,
2312                qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2313
2314    if (dcache_size != -1) {
2315        pcc->l1_dcache_size = dcache_size;
2316    }
2317
2318    if (icache_size != -1) {
2319        pcc->l1_icache_size = icache_size;
2320    }
2321
2322#if defined(TARGET_PPC64)
2323    pcc->radix_page_info = kvm_get_radix_page_info();
2324
2325    if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2326        /*
2327         * POWER9 DD1 has some bugs which make it not really ISA 3.00
2328         * compliant.  More importantly, advertising ISA 3.00
2329         * architected mode may prevent guests from activating
2330         * necessary DD1 workarounds.
2331         */
2332        pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2333                                | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2334    }
2335#endif /* defined(TARGET_PPC64) */
2336}
2337
2338bool kvmppc_has_cap_epr(void)
2339{
2340    return cap_epr;
2341}
2342
2343bool kvmppc_has_cap_fixup_hcalls(void)
2344{
2345    return cap_fixup_hcalls;
2346}
2347
2348bool kvmppc_has_cap_htm(void)
2349{
2350    return cap_htm;
2351}
2352
2353bool kvmppc_has_cap_mmu_radix(void)
2354{
2355    return cap_mmu_radix;
2356}
2357
2358bool kvmppc_has_cap_mmu_hash_v3(void)
2359{
2360    return cap_mmu_hash_v3;
2361}
2362
2363static bool kvmppc_power8_host(void)
2364{
2365    bool ret = false;
2366#ifdef TARGET_PPC64
2367    {
2368        uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2369        ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2370              (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2371              (base_pvr == CPU_POWERPC_POWER8_BASE);
2372    }
2373#endif /* TARGET_PPC64 */
2374    return ret;
2375}
2376
2377static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2378{
2379    bool l1d_thread_priv_req = !kvmppc_power8_host();
2380
2381    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2382        return 2;
2383    } else if ((!l1d_thread_priv_req ||
2384                c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2385               (c.character & c.character_mask
2386                & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2387        return 1;
2388    }
2389
2390    return 0;
2391}
2392
2393static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2394{
2395    if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2396        return 2;
2397    } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2398        return 1;
2399    }
2400
2401    return 0;
2402}
2403
2404static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2405{
2406    if ((~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) &&
2407        (~c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) &&
2408        (~c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED)) {
2409        return SPAPR_CAP_FIXED_NA;
2410    } else if (c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) {
2411        return SPAPR_CAP_WORKAROUND;
2412    } else if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2413        return  SPAPR_CAP_FIXED_CCD;
2414    } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2415        return SPAPR_CAP_FIXED_IBS;
2416    }
2417
2418    return 0;
2419}
2420
2421static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
2422{
2423    if (c.character & c.character_mask & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) {
2424        return 1;
2425    }
2426    return 0;
2427}
2428
2429bool kvmppc_has_cap_xive(void)
2430{
2431    return cap_xive;
2432}
2433
2434static void kvmppc_get_cpu_characteristics(KVMState *s)
2435{
2436    struct kvm_ppc_cpu_char c;
2437    int ret;
2438
2439    /* Assume broken */
2440    cap_ppc_safe_cache = 0;
2441    cap_ppc_safe_bounds_check = 0;
2442    cap_ppc_safe_indirect_branch = 0;
2443
2444    ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2445    if (!ret) {
2446        return;
2447    }
2448    ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2449    if (ret < 0) {
2450        return;
2451    }
2452
2453    cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2454    cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2455    cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2456    cap_ppc_count_cache_flush_assist =
2457        parse_cap_ppc_count_cache_flush_assist(c);
2458}
2459
2460int kvmppc_get_cap_safe_cache(void)
2461{
2462    return cap_ppc_safe_cache;
2463}
2464
2465int kvmppc_get_cap_safe_bounds_check(void)
2466{
2467    return cap_ppc_safe_bounds_check;
2468}
2469
2470int kvmppc_get_cap_safe_indirect_branch(void)
2471{
2472    return cap_ppc_safe_indirect_branch;
2473}
2474
2475int kvmppc_get_cap_count_cache_flush_assist(void)
2476{
2477    return cap_ppc_count_cache_flush_assist;
2478}
2479
2480bool kvmppc_has_cap_nested_kvm_hv(void)
2481{
2482    return !!cap_ppc_nested_kvm_hv;
2483}
2484
2485int kvmppc_set_cap_nested_kvm_hv(int enable)
2486{
2487    return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2488}
2489
2490bool kvmppc_has_cap_spapr_vfio(void)
2491{
2492    return cap_spapr_vfio;
2493}
2494
2495int kvmppc_get_cap_large_decr(void)
2496{
2497    return cap_large_decr;
2498}
2499
2500int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
2501{
2502    CPUState *cs = CPU(cpu);
2503    uint64_t lpcr;
2504
2505    kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2506    /* Do we need to modify the LPCR? */
2507    if (!!(lpcr & LPCR_LD) != !!enable) {
2508        if (enable) {
2509            lpcr |= LPCR_LD;
2510        } else {
2511            lpcr &= ~LPCR_LD;
2512        }
2513        kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2514        kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2515
2516        if (!!(lpcr & LPCR_LD) != !!enable) {
2517            return -1;
2518        }
2519    }
2520
2521    return 0;
2522}
2523
2524PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2525{
2526    uint32_t host_pvr = mfpvr();
2527    PowerPCCPUClass *pvr_pcc;
2528
2529    pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2530    if (pvr_pcc == NULL) {
2531        pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2532    }
2533
2534    return pvr_pcc;
2535}
2536
2537static void pseries_machine_class_fixup(ObjectClass *oc, void *opaque)
2538{
2539    MachineClass *mc = MACHINE_CLASS(oc);
2540
2541    mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2542}
2543
2544static int kvm_ppc_register_host_cpu_type(void)
2545{
2546    TypeInfo type_info = {
2547        .name = TYPE_HOST_POWERPC_CPU,
2548        .class_init = kvmppc_host_cpu_class_init,
2549    };
2550    PowerPCCPUClass *pvr_pcc;
2551    ObjectClass *oc;
2552    DeviceClass *dc;
2553    int i;
2554
2555    pvr_pcc = kvm_ppc_get_host_cpu_class();
2556    if (pvr_pcc == NULL) {
2557        return -1;
2558    }
2559    type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2560    type_register(&type_info);
2561    /* override TCG default cpu type with 'host' cpu model */
2562    object_class_foreach(pseries_machine_class_fixup, TYPE_SPAPR_MACHINE,
2563                         false, NULL);
2564
2565    oc = object_class_by_name(type_info.name);
2566    g_assert(oc);
2567
2568    /*
2569     * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2570     * we want "POWER8" to be a "family" alias that points to the current
2571     * host CPU type, too)
2572     */
2573    dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2574    for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2575        if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2576            char *suffix;
2577
2578            ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2579            suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2580            if (suffix) {
2581                *suffix = 0;
2582            }
2583            break;
2584        }
2585    }
2586
2587    return 0;
2588}
2589
2590int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2591{
2592    struct kvm_rtas_token_args args = {
2593        .token = token,
2594    };
2595
2596    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2597        return -ENOENT;
2598    }
2599
2600    strncpy(args.name, function, sizeof(args.name) - 1);
2601
2602    return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2603}
2604
2605int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2606{
2607    struct kvm_get_htab_fd s = {
2608        .flags = write ? KVM_GET_HTAB_WRITE : 0,
2609        .start_index = index,
2610    };
2611    int ret;
2612
2613    if (!cap_htab_fd) {
2614        error_setg(errp, "KVM version doesn't support %s the HPT",
2615                   write ? "writing" : "reading");
2616        return -ENOTSUP;
2617    }
2618
2619    ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2620    if (ret < 0) {
2621        error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2622                   write ? "writing" : "reading", write ? "to" : "from",
2623                   strerror(errno));
2624        return -errno;
2625    }
2626
2627    return ret;
2628}
2629
2630int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2631{
2632    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2633    uint8_t buf[bufsize];
2634    ssize_t rc;
2635
2636    do {
2637        rc = read(fd, buf, bufsize);
2638        if (rc < 0) {
2639            fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2640                    strerror(errno));
2641            return rc;
2642        } else if (rc) {
2643            uint8_t *buffer = buf;
2644            ssize_t n = rc;
2645            while (n) {
2646                struct kvm_get_htab_header *head =
2647                    (struct kvm_get_htab_header *) buffer;
2648                size_t chunksize = sizeof(*head) +
2649                     HASH_PTE_SIZE_64 * head->n_valid;
2650
2651                qemu_put_be32(f, head->index);
2652                qemu_put_be16(f, head->n_valid);
2653                qemu_put_be16(f, head->n_invalid);
2654                qemu_put_buffer(f, (void *)(head + 1),
2655                                HASH_PTE_SIZE_64 * head->n_valid);
2656
2657                buffer += chunksize;
2658                n -= chunksize;
2659            }
2660        }
2661    } while ((rc != 0)
2662             && ((max_ns < 0) ||
2663                 ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2664
2665    return (rc == 0) ? 1 : 0;
2666}
2667
2668int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2669                           uint16_t n_valid, uint16_t n_invalid)
2670{
2671    struct kvm_get_htab_header *buf;
2672    size_t chunksize = sizeof(*buf) + n_valid * HASH_PTE_SIZE_64;
2673    ssize_t rc;
2674
2675    buf = alloca(chunksize);
2676    buf->index = index;
2677    buf->n_valid = n_valid;
2678    buf->n_invalid = n_invalid;
2679
2680    qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64 * n_valid);
2681
2682    rc = write(fd, buf, chunksize);
2683    if (rc < 0) {
2684        fprintf(stderr, "Error writing KVM hash table: %s\n",
2685                strerror(errno));
2686        return rc;
2687    }
2688    if (rc != chunksize) {
2689        /* We should never get a short write on a single chunk */
2690        fprintf(stderr, "Short write, restoring KVM hash table\n");
2691        return -1;
2692    }
2693    return 0;
2694}
2695
2696bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2697{
2698    return true;
2699}
2700
2701void kvm_arch_init_irq_routing(KVMState *s)
2702{
2703}
2704
2705void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2706{
2707    int fd, rc;
2708    int i;
2709
2710    fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2711
2712    i = 0;
2713    while (i < n) {
2714        struct kvm_get_htab_header *hdr;
2715        int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2716        char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2717
2718        rc = read(fd, buf, sizeof(buf));
2719        if (rc < 0) {
2720            hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2721        }
2722
2723        hdr = (struct kvm_get_htab_header *)buf;
2724        while ((i < n) && ((char *)hdr < (buf + rc))) {
2725            int invalid = hdr->n_invalid, valid = hdr->n_valid;
2726
2727            if (hdr->index != (ptex + i)) {
2728                hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2729                         " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2730            }
2731
2732            if (n - i < valid) {
2733                valid = n - i;
2734            }
2735            memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2736            i += valid;
2737
2738            if ((n - i) < invalid) {
2739                invalid = n - i;
2740            }
2741            memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2742            i += invalid;
2743
2744            hdr = (struct kvm_get_htab_header *)
2745                ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2746        }
2747    }
2748
2749    close(fd);
2750}
2751
2752void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2753{
2754    int fd, rc;
2755    struct {
2756        struct kvm_get_htab_header hdr;
2757        uint64_t pte0;
2758        uint64_t pte1;
2759    } buf;
2760
2761    fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2762
2763    buf.hdr.n_valid = 1;
2764    buf.hdr.n_invalid = 0;
2765    buf.hdr.index = ptex;
2766    buf.pte0 = cpu_to_be64(pte0);
2767    buf.pte1 = cpu_to_be64(pte1);
2768
2769    rc = write(fd, &buf, sizeof(buf));
2770    if (rc != sizeof(buf)) {
2771        hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2772    }
2773    close(fd);
2774}
2775
2776int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2777                             uint64_t address, uint32_t data, PCIDevice *dev)
2778{
2779    return 0;
2780}
2781
2782int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2783                                int vector, PCIDevice *dev)
2784{
2785    return 0;
2786}
2787
2788int kvm_arch_release_virq_post(int virq)
2789{
2790    return 0;
2791}
2792
2793int kvm_arch_msi_data_to_gsi(uint32_t data)
2794{
2795    return data & 0xffff;
2796}
2797
2798int kvmppc_enable_hwrng(void)
2799{
2800    if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2801        return -1;
2802    }
2803
2804    return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2805}
2806
2807void kvmppc_check_papr_resize_hpt(Error **errp)
2808{
2809    if (!kvm_enabled()) {
2810        return; /* No KVM, we're good */
2811    }
2812
2813    if (cap_resize_hpt) {
2814        return; /* Kernel has explicit support, we're good */
2815    }
2816
2817    /* Otherwise fallback on looking for PR KVM */
2818    if (kvmppc_is_pr(kvm_state)) {
2819        return;
2820    }
2821
2822    error_setg(errp,
2823               "Hash page table resizing not available with this KVM version");
2824}
2825
2826int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2827{
2828    CPUState *cs = CPU(cpu);
2829    struct kvm_ppc_resize_hpt rhpt = {
2830        .flags = flags,
2831        .shift = shift,
2832    };
2833
2834    if (!cap_resize_hpt) {
2835        return -ENOSYS;
2836    }
2837
2838    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2839}
2840
2841int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2842{
2843    CPUState *cs = CPU(cpu);
2844    struct kvm_ppc_resize_hpt rhpt = {
2845        .flags = flags,
2846        .shift = shift,
2847    };
2848
2849    if (!cap_resize_hpt) {
2850        return -ENOSYS;
2851    }
2852
2853    return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2854}
2855
2856/*
2857 * This is a helper function to detect a post migration scenario
2858 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2859 * the guest kernel can't handle a PVR value other than the actual host
2860 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2861 *
2862 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2863 * (so, we're HV), return true. The workaround itself is done in
2864 * cpu_post_load.
2865 *
2866 * The order here is important: we'll only check for KVM PR as a
2867 * fallback if the guest kernel can't handle the situation itself.
2868 * We need to avoid as much as possible querying the running KVM type
2869 * in QEMU level.
2870 */
2871bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2872{
2873    CPUState *cs = CPU(cpu);
2874
2875    if (!kvm_enabled()) {
2876        return false;
2877    }
2878
2879    if (cap_ppc_pvr_compat) {
2880        return false;
2881    }
2882
2883    return !kvmppc_is_pr(cs->kvm_state);
2884}
2885
2886void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2887{
2888    CPUState *cs = CPU(cpu);
2889
2890    if (kvm_enabled()) {
2891        kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
2892    }
2893}
2894
2895void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset)
2896{
2897    CPUState *cs = CPU(cpu);
2898
2899    if (kvm_enabled()) {
2900        kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &tb_offset);
2901    }
2902}
2903