qemu/target/arm/kvm64.c
<<
>>
Prefs
   1/*
   2 * ARM implementation of KVM hooks, 64 bit specific code
   3 *
   4 * Copyright Mian-M. Hamayun 2013, Virtual Open Systems
   5 * Copyright Alex Bennée 2014, Linaro
   6 *
   7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   8 * See the COPYING file in the top-level directory.
   9 *
  10 */
  11
  12#include "qemu/osdep.h"
  13#include <sys/ioctl.h>
  14#include <sys/ptrace.h>
  15
  16#include <linux/elf.h>
  17#include <linux/kvm.h>
  18
  19#include "qapi/error.h"
  20#include "cpu.h"
  21#include "qemu/timer.h"
  22#include "qemu/error-report.h"
  23#include "qemu/host-utils.h"
  24#include "qemu/main-loop.h"
  25#include "exec/gdbstub.h"
  26#include "sysemu/runstate.h"
  27#include "sysemu/kvm.h"
  28#include "sysemu/kvm_int.h"
  29#include "kvm_arm.h"
  30#include "internals.h"
  31#include "hw/acpi/acpi.h"
  32#include "hw/acpi/ghes.h"
  33#include "hw/arm/virt.h"
  34
  35static bool have_guest_debug;
  36
  37void kvm_arm_init_debug(KVMState *s)
  38{
  39    have_guest_debug = kvm_check_extension(s,
  40                                           KVM_CAP_SET_GUEST_DEBUG);
  41
  42    max_hw_wps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS);
  43    hw_watchpoints = g_array_sized_new(true, true,
  44                                       sizeof(HWWatchpoint), max_hw_wps);
  45
  46    max_hw_bps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS);
  47    hw_breakpoints = g_array_sized_new(true, true,
  48                                       sizeof(HWBreakpoint), max_hw_bps);
  49    return;
  50}
  51
  52int kvm_arch_insert_hw_breakpoint(target_ulong addr,
  53                                  target_ulong len, int type)
  54{
  55    switch (type) {
  56    case GDB_BREAKPOINT_HW:
  57        return insert_hw_breakpoint(addr);
  58        break;
  59    case GDB_WATCHPOINT_READ:
  60    case GDB_WATCHPOINT_WRITE:
  61    case GDB_WATCHPOINT_ACCESS:
  62        return insert_hw_watchpoint(addr, len, type);
  63    default:
  64        return -ENOSYS;
  65    }
  66}
  67
  68int kvm_arch_remove_hw_breakpoint(target_ulong addr,
  69                                  target_ulong len, int type)
  70{
  71    switch (type) {
  72    case GDB_BREAKPOINT_HW:
  73        return delete_hw_breakpoint(addr);
  74    case GDB_WATCHPOINT_READ:
  75    case GDB_WATCHPOINT_WRITE:
  76    case GDB_WATCHPOINT_ACCESS:
  77        return delete_hw_watchpoint(addr, len, type);
  78    default:
  79        return -ENOSYS;
  80    }
  81}
  82
  83
  84void kvm_arch_remove_all_hw_breakpoints(void)
  85{
  86    if (cur_hw_wps > 0) {
  87        g_array_remove_range(hw_watchpoints, 0, cur_hw_wps);
  88    }
  89    if (cur_hw_bps > 0) {
  90        g_array_remove_range(hw_breakpoints, 0, cur_hw_bps);
  91    }
  92}
  93
  94void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr)
  95{
  96    int i;
  97    memset(ptr, 0, sizeof(struct kvm_guest_debug_arch));
  98
  99    for (i = 0; i < max_hw_wps; i++) {
 100        HWWatchpoint *wp = get_hw_wp(i);
 101        ptr->dbg_wcr[i] = wp->wcr;
 102        ptr->dbg_wvr[i] = wp->wvr;
 103    }
 104    for (i = 0; i < max_hw_bps; i++) {
 105        HWBreakpoint *bp = get_hw_bp(i);
 106        ptr->dbg_bcr[i] = bp->bcr;
 107        ptr->dbg_bvr[i] = bp->bvr;
 108    }
 109}
 110
 111bool kvm_arm_hw_debug_active(CPUState *cs)
 112{
 113    return ((cur_hw_wps > 0) || (cur_hw_bps > 0));
 114}
 115
 116static bool kvm_arm_set_device_attr(CPUState *cs, struct kvm_device_attr *attr,
 117                                    const char *name)
 118{
 119    int err;
 120
 121    err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr);
 122    if (err != 0) {
 123        error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err));
 124        return false;
 125    }
 126
 127    err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr);
 128    if (err != 0) {
 129        error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err));
 130        return false;
 131    }
 132
 133    return true;
 134}
 135
 136void kvm_arm_pmu_init(CPUState *cs)
 137{
 138    struct kvm_device_attr attr = {
 139        .group = KVM_ARM_VCPU_PMU_V3_CTRL,
 140        .attr = KVM_ARM_VCPU_PMU_V3_INIT,
 141    };
 142
 143    if (!ARM_CPU(cs)->has_pmu) {
 144        return;
 145    }
 146    if (!kvm_arm_set_device_attr(cs, &attr, "PMU")) {
 147        error_report("failed to init PMU");
 148        abort();
 149    }
 150}
 151
 152void kvm_arm_pmu_set_irq(CPUState *cs, int irq)
 153{
 154    struct kvm_device_attr attr = {
 155        .group = KVM_ARM_VCPU_PMU_V3_CTRL,
 156        .addr = (intptr_t)&irq,
 157        .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
 158    };
 159
 160    if (!ARM_CPU(cs)->has_pmu) {
 161        return;
 162    }
 163    if (!kvm_arm_set_device_attr(cs, &attr, "PMU")) {
 164        error_report("failed to set irq for PMU");
 165        abort();
 166    }
 167}
 168
 169void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa)
 170{
 171    struct kvm_device_attr attr = {
 172        .group = KVM_ARM_VCPU_PVTIME_CTRL,
 173        .attr = KVM_ARM_VCPU_PVTIME_IPA,
 174        .addr = (uint64_t)&ipa,
 175    };
 176
 177    if (ARM_CPU(cs)->kvm_steal_time == ON_OFF_AUTO_OFF) {
 178        return;
 179    }
 180    if (!kvm_arm_set_device_attr(cs, &attr, "PVTIME IPA")) {
 181        error_report("failed to init PVTIME IPA");
 182        abort();
 183    }
 184}
 185
 186static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id)
 187{
 188    uint64_t ret;
 189    struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret };
 190    int err;
 191
 192    assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64);
 193    err = ioctl(fd, KVM_GET_ONE_REG, &idreg);
 194    if (err < 0) {
 195        return -1;
 196    }
 197    *pret = ret;
 198    return 0;
 199}
 200
 201static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id)
 202{
 203    struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret };
 204
 205    assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64);
 206    return ioctl(fd, KVM_GET_ONE_REG, &idreg);
 207}
 208
 209static bool kvm_arm_pauth_supported(void)
 210{
 211    return (kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_ADDRESS) &&
 212            kvm_check_extension(kvm_state, KVM_CAP_ARM_PTRAUTH_GENERIC));
 213}
 214
 215bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
 216{
 217    /* Identify the feature bits corresponding to the host CPU, and
 218     * fill out the ARMHostCPUClass fields accordingly. To do this
 219     * we have to create a scratch VM, create a single CPU inside it,
 220     * and then query that CPU for the relevant ID registers.
 221     */
 222    int fdarray[3];
 223    bool sve_supported;
 224    bool pmu_supported = false;
 225    uint64_t features = 0;
 226    int err;
 227
 228    /* Old kernels may not know about the PREFERRED_TARGET ioctl: however
 229     * we know these will only support creating one kind of guest CPU,
 230     * which is its preferred CPU type. Fortunately these old kernels
 231     * support only a very limited number of CPUs.
 232     */
 233    static const uint32_t cpus_to_try[] = {
 234        KVM_ARM_TARGET_AEM_V8,
 235        KVM_ARM_TARGET_FOUNDATION_V8,
 236        KVM_ARM_TARGET_CORTEX_A57,
 237        QEMU_KVM_ARM_TARGET_NONE
 238    };
 239    /*
 240     * target = -1 informs kvm_arm_create_scratch_host_vcpu()
 241     * to use the preferred target
 242     */
 243    struct kvm_vcpu_init init = { .target = -1, };
 244
 245    /*
 246     * Ask for SVE if supported, so that we can query ID_AA64ZFR0,
 247     * which is otherwise RAZ.
 248     */
 249    sve_supported = kvm_arm_sve_supported();
 250    if (sve_supported) {
 251        init.features[0] |= 1 << KVM_ARM_VCPU_SVE;
 252    }
 253
 254    /*
 255     * Ask for Pointer Authentication if supported, so that we get
 256     * the unsanitized field values for AA64ISAR1_EL1.
 257     */
 258    if (kvm_arm_pauth_supported()) {
 259        init.features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS |
 260                             1 << KVM_ARM_VCPU_PTRAUTH_GENERIC);
 261    }
 262
 263    if (kvm_arm_pmu_supported()) {
 264        init.features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
 265        pmu_supported = true;
 266    }
 267
 268    if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) {
 269        return false;
 270    }
 271
 272    ahcf->target = init.target;
 273    ahcf->dtb_compatible = "arm,arm-v8";
 274
 275    err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0,
 276                         ARM64_SYS_REG(3, 0, 0, 4, 0));
 277    if (unlikely(err < 0)) {
 278        /*
 279         * Before v4.15, the kernel only exposed a limited number of system
 280         * registers, not including any of the interesting AArch64 ID regs.
 281         * For the most part we could leave these fields as zero with minimal
 282         * effect, since this does not affect the values seen by the guest.
 283         *
 284         * However, it could cause problems down the line for QEMU,
 285         * so provide a minimal v8.0 default.
 286         *
 287         * ??? Could read MIDR and use knowledge from cpu64.c.
 288         * ??? Could map a page of memory into our temp guest and
 289         *     run the tiniest of hand-crafted kernels to extract
 290         *     the values seen by the guest.
 291         * ??? Either of these sounds like too much effort just
 292         *     to work around running a modern host kernel.
 293         */
 294        ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */
 295        err = 0;
 296    } else {
 297        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1,
 298                              ARM64_SYS_REG(3, 0, 0, 4, 1));
 299        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0,
 300                              ARM64_SYS_REG(3, 0, 0, 4, 5));
 301        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0,
 302                              ARM64_SYS_REG(3, 0, 0, 5, 0));
 303        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1,
 304                              ARM64_SYS_REG(3, 0, 0, 5, 1));
 305        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0,
 306                              ARM64_SYS_REG(3, 0, 0, 6, 0));
 307        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1,
 308                              ARM64_SYS_REG(3, 0, 0, 6, 1));
 309        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0,
 310                              ARM64_SYS_REG(3, 0, 0, 7, 0));
 311        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1,
 312                              ARM64_SYS_REG(3, 0, 0, 7, 1));
 313        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2,
 314                              ARM64_SYS_REG(3, 0, 0, 7, 2));
 315
 316        /*
 317         * Note that if AArch32 support is not present in the host,
 318         * the AArch32 sysregs are present to be read, but will
 319         * return UNKNOWN values.  This is neither better nor worse
 320         * than skipping the reads and leaving 0, as we must avoid
 321         * considering the values in every case.
 322         */
 323        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr0,
 324                              ARM64_SYS_REG(3, 0, 0, 1, 0));
 325        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr1,
 326                              ARM64_SYS_REG(3, 0, 0, 1, 1));
 327        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0,
 328                              ARM64_SYS_REG(3, 0, 0, 1, 2));
 329        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0,
 330                              ARM64_SYS_REG(3, 0, 0, 1, 4));
 331        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1,
 332                              ARM64_SYS_REG(3, 0, 0, 1, 5));
 333        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2,
 334                              ARM64_SYS_REG(3, 0, 0, 1, 6));
 335        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3,
 336                              ARM64_SYS_REG(3, 0, 0, 1, 7));
 337        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0,
 338                              ARM64_SYS_REG(3, 0, 0, 2, 0));
 339        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1,
 340                              ARM64_SYS_REG(3, 0, 0, 2, 1));
 341        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2,
 342                              ARM64_SYS_REG(3, 0, 0, 2, 2));
 343        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3,
 344                              ARM64_SYS_REG(3, 0, 0, 2, 3));
 345        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4,
 346                              ARM64_SYS_REG(3, 0, 0, 2, 4));
 347        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5,
 348                              ARM64_SYS_REG(3, 0, 0, 2, 5));
 349        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4,
 350                              ARM64_SYS_REG(3, 0, 0, 2, 6));
 351        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6,
 352                              ARM64_SYS_REG(3, 0, 0, 2, 7));
 353
 354        err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0,
 355                              ARM64_SYS_REG(3, 0, 0, 3, 0));
 356        err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1,
 357                              ARM64_SYS_REG(3, 0, 0, 3, 1));
 358        err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2,
 359                              ARM64_SYS_REG(3, 0, 0, 3, 2));
 360        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr2,
 361                              ARM64_SYS_REG(3, 0, 0, 3, 4));
 362        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr1,
 363                              ARM64_SYS_REG(3, 0, 0, 3, 5));
 364        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr5,
 365                              ARM64_SYS_REG(3, 0, 0, 3, 6));
 366
 367        /*
 368         * DBGDIDR is a bit complicated because the kernel doesn't
 369         * provide an accessor for it in 64-bit mode, which is what this
 370         * scratch VM is in, and there's no architected "64-bit sysreg
 371         * which reads the same as the 32-bit register" the way there is
 372         * for other ID registers. Instead we synthesize a value from the
 373         * AArch64 ID_AA64DFR0, the same way the kernel code in
 374         * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does.
 375         * We only do this if the CPU supports AArch32 at EL1.
 376         */
 377        if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) {
 378            int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS);
 379            int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS);
 380            int ctx_cmps =
 381                FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS);
 382            int version = 6; /* ARMv8 debug architecture */
 383            bool has_el3 =
 384                !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3);
 385            uint32_t dbgdidr = 0;
 386
 387            dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps);
 388            dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, BRPS, brps);
 389            dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, CTX_CMPS, ctx_cmps);
 390            dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, VERSION, version);
 391            dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3);
 392            dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3);
 393            dbgdidr |= (1 << 15); /* RES1 bit */
 394            ahcf->isar.dbgdidr = dbgdidr;
 395        }
 396
 397        if (pmu_supported) {
 398            /* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */
 399            err |= read_sys_reg64(fdarray[2], &ahcf->isar.reset_pmcr_el0,
 400                                  ARM64_SYS_REG(3, 3, 9, 12, 0));
 401        }
 402
 403        if (sve_supported) {
 404            /*
 405             * There is a range of kernels between kernel commit 73433762fcae
 406             * and f81cb2c3ad41 which have a bug where the kernel doesn't
 407             * expose SYS_ID_AA64ZFR0_EL1 via the ONE_REG API unless the VM has
 408             * enabled SVE support, which resulted in an error rather than RAZ.
 409             * So only read the register if we set KVM_ARM_VCPU_SVE above.
 410             */
 411            err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0,
 412                                  ARM64_SYS_REG(3, 0, 0, 4, 4));
 413        }
 414    }
 415
 416    kvm_arm_destroy_scratch_host_vcpu(fdarray);
 417
 418    if (err < 0) {
 419        return false;
 420    }
 421
 422    /*
 423     * We can assume any KVM supporting CPU is at least a v8
 424     * with VFPv4+Neon; this in turn implies most of the other
 425     * feature bits.
 426     */
 427    features |= 1ULL << ARM_FEATURE_V8;
 428    features |= 1ULL << ARM_FEATURE_NEON;
 429    features |= 1ULL << ARM_FEATURE_AARCH64;
 430    features |= 1ULL << ARM_FEATURE_PMU;
 431    features |= 1ULL << ARM_FEATURE_GENERIC_TIMER;
 432
 433    ahcf->features = features;
 434
 435    return true;
 436}
 437
 438void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp)
 439{
 440    bool has_steal_time = kvm_arm_steal_time_supported();
 441
 442    if (cpu->kvm_steal_time == ON_OFF_AUTO_AUTO) {
 443        if (!has_steal_time || !arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
 444            cpu->kvm_steal_time = ON_OFF_AUTO_OFF;
 445        } else {
 446            cpu->kvm_steal_time = ON_OFF_AUTO_ON;
 447        }
 448    } else if (cpu->kvm_steal_time == ON_OFF_AUTO_ON) {
 449        if (!has_steal_time) {
 450            error_setg(errp, "'kvm-steal-time' cannot be enabled "
 451                             "on this host");
 452            return;
 453        } else if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
 454            /*
 455             * DEN0057A chapter 2 says "This specification only covers
 456             * systems in which the Execution state of the hypervisor
 457             * as well as EL1 of virtual machines is AArch64.". And,
 458             * to ensure that, the smc/hvc calls are only specified as
 459             * smc64/hvc64.
 460             */
 461            error_setg(errp, "'kvm-steal-time' cannot be enabled "
 462                             "for AArch32 guests");
 463            return;
 464        }
 465    }
 466}
 467
 468bool kvm_arm_aarch32_supported(void)
 469{
 470    return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL1_32BIT);
 471}
 472
 473bool kvm_arm_sve_supported(void)
 474{
 475    return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE);
 476}
 477
 478bool kvm_arm_steal_time_supported(void)
 479{
 480    return kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME);
 481}
 482
 483QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
 484
 485uint32_t kvm_arm_sve_get_vls(CPUState *cs)
 486{
 487    /* Only call this function if kvm_arm_sve_supported() returns true. */
 488    static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS];
 489    static bool probed;
 490    uint32_t vq = 0;
 491    int i;
 492
 493    /*
 494     * KVM ensures all host CPUs support the same set of vector lengths.
 495     * So we only need to create the scratch VCPUs once and then cache
 496     * the results.
 497     */
 498    if (!probed) {
 499        struct kvm_vcpu_init init = {
 500            .target = -1,
 501            .features[0] = (1 << KVM_ARM_VCPU_SVE),
 502        };
 503        struct kvm_one_reg reg = {
 504            .id = KVM_REG_ARM64_SVE_VLS,
 505            .addr = (uint64_t)&vls[0],
 506        };
 507        int fdarray[3], ret;
 508
 509        probed = true;
 510
 511        if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) {
 512            error_report("failed to create scratch VCPU with SVE enabled");
 513            abort();
 514        }
 515        ret = ioctl(fdarray[2], KVM_GET_ONE_REG, &reg);
 516        kvm_arm_destroy_scratch_host_vcpu(fdarray);
 517        if (ret) {
 518            error_report("failed to get KVM_REG_ARM64_SVE_VLS: %s",
 519                         strerror(errno));
 520            abort();
 521        }
 522
 523        for (i = KVM_ARM64_SVE_VLS_WORDS - 1; i >= 0; --i) {
 524            if (vls[i]) {
 525                vq = 64 - clz64(vls[i]) + i * 64;
 526                break;
 527            }
 528        }
 529        if (vq > ARM_MAX_VQ) {
 530            warn_report("KVM supports vector lengths larger than "
 531                        "QEMU can enable");
 532            vls[0] &= MAKE_64BIT_MASK(0, ARM_MAX_VQ);
 533        }
 534    }
 535
 536    return vls[0];
 537}
 538
 539static int kvm_arm_sve_set_vls(CPUState *cs)
 540{
 541    ARMCPU *cpu = ARM_CPU(cs);
 542    uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = { cpu->sve_vq.map };
 543    struct kvm_one_reg reg = {
 544        .id = KVM_REG_ARM64_SVE_VLS,
 545        .addr = (uint64_t)&vls[0],
 546    };
 547
 548    assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX);
 549
 550    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 551}
 552
 553#define ARM_CPU_ID_MPIDR       3, 0, 0, 0, 5
 554
 555int kvm_arch_init_vcpu(CPUState *cs)
 556{
 557    int ret;
 558    uint64_t mpidr;
 559    ARMCPU *cpu = ARM_CPU(cs);
 560    CPUARMState *env = &cpu->env;
 561    uint64_t psciver;
 562
 563    if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE ||
 564        !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) {
 565        error_report("KVM is not supported for this guest CPU type");
 566        return -EINVAL;
 567    }
 568
 569    qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs);
 570
 571    /* Determine init features for this CPU */
 572    memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features));
 573    if (cs->start_powered_off) {
 574        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
 575    }
 576    if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) {
 577        cpu->psci_version = QEMU_PSCI_VERSION_0_2;
 578        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
 579    }
 580    if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
 581        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT;
 582    }
 583    if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) {
 584        cpu->has_pmu = false;
 585    }
 586    if (cpu->has_pmu) {
 587        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
 588    } else {
 589        env->features &= ~(1ULL << ARM_FEATURE_PMU);
 590    }
 591    if (cpu_isar_feature(aa64_sve, cpu)) {
 592        assert(kvm_arm_sve_supported());
 593        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE;
 594    }
 595    if (cpu_isar_feature(aa64_pauth, cpu)) {
 596        cpu->kvm_init_features[0] |= (1 << KVM_ARM_VCPU_PTRAUTH_ADDRESS |
 597                                      1 << KVM_ARM_VCPU_PTRAUTH_GENERIC);
 598    }
 599
 600    /* Do KVM_ARM_VCPU_INIT ioctl */
 601    ret = kvm_arm_vcpu_init(cs);
 602    if (ret) {
 603        return ret;
 604    }
 605
 606    if (cpu_isar_feature(aa64_sve, cpu)) {
 607        ret = kvm_arm_sve_set_vls(cs);
 608        if (ret) {
 609            return ret;
 610        }
 611        ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_SVE);
 612        if (ret) {
 613            return ret;
 614        }
 615    }
 616
 617    /*
 618     * KVM reports the exact PSCI version it is implementing via a
 619     * special sysreg. If it is present, use its contents to determine
 620     * what to report to the guest in the dtb (it is the PSCI version,
 621     * in the same 15-bits major 16-bits minor format that PSCI_VERSION
 622     * returns).
 623     */
 624    if (!kvm_get_one_reg(cs, KVM_REG_ARM_PSCI_VERSION, &psciver)) {
 625        cpu->psci_version = psciver;
 626    }
 627
 628    /*
 629     * When KVM is in use, PSCI is emulated in-kernel and not by qemu.
 630     * Currently KVM has its own idea about MPIDR assignment, so we
 631     * override our defaults with what we get from KVM.
 632     */
 633    ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr);
 634    if (ret) {
 635        return ret;
 636    }
 637    cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK;
 638
 639    /* Check whether user space can specify guest syndrome value */
 640    kvm_arm_init_serror_injection(cs);
 641
 642    return kvm_arm_init_cpreg_list(cpu);
 643}
 644
 645int kvm_arch_destroy_vcpu(CPUState *cs)
 646{
 647    return 0;
 648}
 649
 650bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx)
 651{
 652    /* Return true if the regidx is a register we should synchronize
 653     * via the cpreg_tuples array (ie is not a core or sve reg that
 654     * we sync by hand in kvm_arch_get/put_registers())
 655     */
 656    switch (regidx & KVM_REG_ARM_COPROC_MASK) {
 657    case KVM_REG_ARM_CORE:
 658    case KVM_REG_ARM64_SVE:
 659        return false;
 660    default:
 661        return true;
 662    }
 663}
 664
 665typedef struct CPRegStateLevel {
 666    uint64_t regidx;
 667    int level;
 668} CPRegStateLevel;
 669
 670/* All system registers not listed in the following table are assumed to be
 671 * of the level KVM_PUT_RUNTIME_STATE. If a register should be written less
 672 * often, you must add it to this table with a state of either
 673 * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE.
 674 */
 675static const CPRegStateLevel non_runtime_cpregs[] = {
 676    { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE },
 677};
 678
 679int kvm_arm_cpreg_level(uint64_t regidx)
 680{
 681    int i;
 682
 683    for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) {
 684        const CPRegStateLevel *l = &non_runtime_cpregs[i];
 685        if (l->regidx == regidx) {
 686            return l->level;
 687        }
 688    }
 689
 690    return KVM_PUT_RUNTIME_STATE;
 691}
 692
 693/* Callers must hold the iothread mutex lock */
 694static void kvm_inject_arm_sea(CPUState *c)
 695{
 696    ARMCPU *cpu = ARM_CPU(c);
 697    CPUARMState *env = &cpu->env;
 698    uint32_t esr;
 699    bool same_el;
 700
 701    c->exception_index = EXCP_DATA_ABORT;
 702    env->exception.target_el = 1;
 703
 704    /*
 705     * Set the DFSC to synchronous external abort and set FnV to not valid,
 706     * this will tell guest the FAR_ELx is UNKNOWN for this abort.
 707     */
 708    same_el = arm_current_el(env) == env->exception.target_el;
 709    esr = syn_data_abort_no_iss(same_el, 1, 0, 0, 0, 0, 0x10);
 710
 711    env->exception.syndrome = esr;
 712
 713    arm_cpu_do_interrupt(c);
 714}
 715
 716#define AARCH64_CORE_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
 717                 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 718
 719#define AARCH64_SIMD_CORE_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \
 720                 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 721
 722#define AARCH64_SIMD_CTRL_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \
 723                 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 724
 725static int kvm_arch_put_fpsimd(CPUState *cs)
 726{
 727    CPUARMState *env = &ARM_CPU(cs)->env;
 728    struct kvm_one_reg reg;
 729    int i, ret;
 730
 731    for (i = 0; i < 32; i++) {
 732        uint64_t *q = aa64_vfp_qreg(env, i);
 733#if HOST_BIG_ENDIAN
 734        uint64_t fp_val[2] = { q[1], q[0] };
 735        reg.addr = (uintptr_t)fp_val;
 736#else
 737        reg.addr = (uintptr_t)q;
 738#endif
 739        reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
 740        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 741        if (ret) {
 742            return ret;
 743        }
 744    }
 745
 746    return 0;
 747}
 748
 749/*
 750 * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits
 751 * and PREGS and the FFR have a slice size of 256 bits. However we simply hard
 752 * code the slice index to zero for now as it's unlikely we'll need more than
 753 * one slice for quite some time.
 754 */
 755static int kvm_arch_put_sve(CPUState *cs)
 756{
 757    ARMCPU *cpu = ARM_CPU(cs);
 758    CPUARMState *env = &cpu->env;
 759    uint64_t tmp[ARM_MAX_VQ * 2];
 760    uint64_t *r;
 761    struct kvm_one_reg reg;
 762    int n, ret;
 763
 764    for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) {
 765        r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2);
 766        reg.addr = (uintptr_t)r;
 767        reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0);
 768        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 769        if (ret) {
 770            return ret;
 771        }
 772    }
 773
 774    for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) {
 775        r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0],
 776                        DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
 777        reg.addr = (uintptr_t)r;
 778        reg.id = KVM_REG_ARM64_SVE_PREG(n, 0);
 779        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 780        if (ret) {
 781            return ret;
 782        }
 783    }
 784
 785    r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0],
 786                    DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
 787    reg.addr = (uintptr_t)r;
 788    reg.id = KVM_REG_ARM64_SVE_FFR(0);
 789    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 790    if (ret) {
 791        return ret;
 792    }
 793
 794    return 0;
 795}
 796
 797int kvm_arch_put_registers(CPUState *cs, int level)
 798{
 799    struct kvm_one_reg reg;
 800    uint64_t val;
 801    uint32_t fpr;
 802    int i, ret;
 803    unsigned int el;
 804
 805    ARMCPU *cpu = ARM_CPU(cs);
 806    CPUARMState *env = &cpu->env;
 807
 808    /* If we are in AArch32 mode then we need to copy the AArch32 regs to the
 809     * AArch64 registers before pushing them out to 64-bit KVM.
 810     */
 811    if (!is_a64(env)) {
 812        aarch64_sync_32_to_64(env);
 813    }
 814
 815    for (i = 0; i < 31; i++) {
 816        reg.id = AARCH64_CORE_REG(regs.regs[i]);
 817        reg.addr = (uintptr_t) &env->xregs[i];
 818        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 819        if (ret) {
 820            return ret;
 821        }
 822    }
 823
 824    /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
 825     * QEMU side we keep the current SP in xregs[31] as well.
 826     */
 827    aarch64_save_sp(env, 1);
 828
 829    reg.id = AARCH64_CORE_REG(regs.sp);
 830    reg.addr = (uintptr_t) &env->sp_el[0];
 831    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 832    if (ret) {
 833        return ret;
 834    }
 835
 836    reg.id = AARCH64_CORE_REG(sp_el1);
 837    reg.addr = (uintptr_t) &env->sp_el[1];
 838    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 839    if (ret) {
 840        return ret;
 841    }
 842
 843    /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */
 844    if (is_a64(env)) {
 845        val = pstate_read(env);
 846    } else {
 847        val = cpsr_read(env);
 848    }
 849    reg.id = AARCH64_CORE_REG(regs.pstate);
 850    reg.addr = (uintptr_t) &val;
 851    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 852    if (ret) {
 853        return ret;
 854    }
 855
 856    reg.id = AARCH64_CORE_REG(regs.pc);
 857    reg.addr = (uintptr_t) &env->pc;
 858    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 859    if (ret) {
 860        return ret;
 861    }
 862
 863    reg.id = AARCH64_CORE_REG(elr_el1);
 864    reg.addr = (uintptr_t) &env->elr_el[1];
 865    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 866    if (ret) {
 867        return ret;
 868    }
 869
 870    /* Saved Program State Registers
 871     *
 872     * Before we restore from the banked_spsr[] array we need to
 873     * ensure that any modifications to env->spsr are correctly
 874     * reflected in the banks.
 875     */
 876    el = arm_current_el(env);
 877    if (el > 0 && !is_a64(env)) {
 878        i = bank_number(env->uncached_cpsr & CPSR_M);
 879        env->banked_spsr[i] = env->spsr;
 880    }
 881
 882    /* KVM 0-4 map to QEMU banks 1-5 */
 883    for (i = 0; i < KVM_NR_SPSR; i++) {
 884        reg.id = AARCH64_CORE_REG(spsr[i]);
 885        reg.addr = (uintptr_t) &env->banked_spsr[i + 1];
 886        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 887        if (ret) {
 888            return ret;
 889        }
 890    }
 891
 892    if (cpu_isar_feature(aa64_sve, cpu)) {
 893        ret = kvm_arch_put_sve(cs);
 894    } else {
 895        ret = kvm_arch_put_fpsimd(cs);
 896    }
 897    if (ret) {
 898        return ret;
 899    }
 900
 901    reg.addr = (uintptr_t)(&fpr);
 902    fpr = vfp_get_fpsr(env);
 903    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
 904    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 905    if (ret) {
 906        return ret;
 907    }
 908
 909    reg.addr = (uintptr_t)(&fpr);
 910    fpr = vfp_get_fpcr(env);
 911    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
 912    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 913    if (ret) {
 914        return ret;
 915    }
 916
 917    write_cpustate_to_list(cpu, true);
 918
 919    if (!write_list_to_kvmstate(cpu, level)) {
 920        return -EINVAL;
 921    }
 922
 923   /*
 924    * Setting VCPU events should be triggered after syncing the registers
 925    * to avoid overwriting potential changes made by KVM upon calling
 926    * KVM_SET_VCPU_EVENTS ioctl
 927    */
 928    ret = kvm_put_vcpu_events(cpu);
 929    if (ret) {
 930        return ret;
 931    }
 932
 933    kvm_arm_sync_mpstate_to_kvm(cpu);
 934
 935    return ret;
 936}
 937
 938static int kvm_arch_get_fpsimd(CPUState *cs)
 939{
 940    CPUARMState *env = &ARM_CPU(cs)->env;
 941    struct kvm_one_reg reg;
 942    int i, ret;
 943
 944    for (i = 0; i < 32; i++) {
 945        uint64_t *q = aa64_vfp_qreg(env, i);
 946        reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
 947        reg.addr = (uintptr_t)q;
 948        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 949        if (ret) {
 950            return ret;
 951        } else {
 952#if HOST_BIG_ENDIAN
 953            uint64_t t;
 954            t = q[0], q[0] = q[1], q[1] = t;
 955#endif
 956        }
 957    }
 958
 959    return 0;
 960}
 961
 962/*
 963 * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits
 964 * and PREGS and the FFR have a slice size of 256 bits. However we simply hard
 965 * code the slice index to zero for now as it's unlikely we'll need more than
 966 * one slice for quite some time.
 967 */
 968static int kvm_arch_get_sve(CPUState *cs)
 969{
 970    ARMCPU *cpu = ARM_CPU(cs);
 971    CPUARMState *env = &cpu->env;
 972    struct kvm_one_reg reg;
 973    uint64_t *r;
 974    int n, ret;
 975
 976    for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) {
 977        r = &env->vfp.zregs[n].d[0];
 978        reg.addr = (uintptr_t)r;
 979        reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0);
 980        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 981        if (ret) {
 982            return ret;
 983        }
 984        sve_bswap64(r, r, cpu->sve_max_vq * 2);
 985    }
 986
 987    for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) {
 988        r = &env->vfp.pregs[n].p[0];
 989        reg.addr = (uintptr_t)r;
 990        reg.id = KVM_REG_ARM64_SVE_PREG(n, 0);
 991        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 992        if (ret) {
 993            return ret;
 994        }
 995        sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
 996    }
 997
 998    r = &env->vfp.pregs[FFR_PRED_NUM].p[0];
 999    reg.addr = (uintptr_t)r;
1000    reg.id = KVM_REG_ARM64_SVE_FFR(0);
1001    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1002    if (ret) {
1003        return ret;
1004    }
1005    sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
1006
1007    return 0;
1008}
1009
1010int kvm_arch_get_registers(CPUState *cs)
1011{
1012    struct kvm_one_reg reg;
1013    uint64_t val;
1014    unsigned int el;
1015    uint32_t fpr;
1016    int i, ret;
1017
1018    ARMCPU *cpu = ARM_CPU(cs);
1019    CPUARMState *env = &cpu->env;
1020
1021    for (i = 0; i < 31; i++) {
1022        reg.id = AARCH64_CORE_REG(regs.regs[i]);
1023        reg.addr = (uintptr_t) &env->xregs[i];
1024        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1025        if (ret) {
1026            return ret;
1027        }
1028    }
1029
1030    reg.id = AARCH64_CORE_REG(regs.sp);
1031    reg.addr = (uintptr_t) &env->sp_el[0];
1032    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1033    if (ret) {
1034        return ret;
1035    }
1036
1037    reg.id = AARCH64_CORE_REG(sp_el1);
1038    reg.addr = (uintptr_t) &env->sp_el[1];
1039    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1040    if (ret) {
1041        return ret;
1042    }
1043
1044    reg.id = AARCH64_CORE_REG(regs.pstate);
1045    reg.addr = (uintptr_t) &val;
1046    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1047    if (ret) {
1048        return ret;
1049    }
1050
1051    env->aarch64 = ((val & PSTATE_nRW) == 0);
1052    if (is_a64(env)) {
1053        pstate_write(env, val);
1054    } else {
1055        cpsr_write(env, val, 0xffffffff, CPSRWriteRaw);
1056    }
1057
1058    /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
1059     * QEMU side we keep the current SP in xregs[31] as well.
1060     */
1061    aarch64_restore_sp(env, 1);
1062
1063    reg.id = AARCH64_CORE_REG(regs.pc);
1064    reg.addr = (uintptr_t) &env->pc;
1065    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1066    if (ret) {
1067        return ret;
1068    }
1069
1070    /* If we are in AArch32 mode then we need to sync the AArch32 regs with the
1071     * incoming AArch64 regs received from 64-bit KVM.
1072     * We must perform this after all of the registers have been acquired from
1073     * the kernel.
1074     */
1075    if (!is_a64(env)) {
1076        aarch64_sync_64_to_32(env);
1077    }
1078
1079    reg.id = AARCH64_CORE_REG(elr_el1);
1080    reg.addr = (uintptr_t) &env->elr_el[1];
1081    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1082    if (ret) {
1083        return ret;
1084    }
1085
1086    /* Fetch the SPSR registers
1087     *
1088     * KVM SPSRs 0-4 map to QEMU banks 1-5
1089     */
1090    for (i = 0; i < KVM_NR_SPSR; i++) {
1091        reg.id = AARCH64_CORE_REG(spsr[i]);
1092        reg.addr = (uintptr_t) &env->banked_spsr[i + 1];
1093        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1094        if (ret) {
1095            return ret;
1096        }
1097    }
1098
1099    el = arm_current_el(env);
1100    if (el > 0 && !is_a64(env)) {
1101        i = bank_number(env->uncached_cpsr & CPSR_M);
1102        env->spsr = env->banked_spsr[i];
1103    }
1104
1105    if (cpu_isar_feature(aa64_sve, cpu)) {
1106        ret = kvm_arch_get_sve(cs);
1107    } else {
1108        ret = kvm_arch_get_fpsimd(cs);
1109    }
1110    if (ret) {
1111        return ret;
1112    }
1113
1114    reg.addr = (uintptr_t)(&fpr);
1115    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
1116    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1117    if (ret) {
1118        return ret;
1119    }
1120    vfp_set_fpsr(env, fpr);
1121
1122    reg.addr = (uintptr_t)(&fpr);
1123    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
1124    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1125    if (ret) {
1126        return ret;
1127    }
1128    vfp_set_fpcr(env, fpr);
1129
1130    ret = kvm_get_vcpu_events(cpu);
1131    if (ret) {
1132        return ret;
1133    }
1134
1135    if (!write_kvmstate_to_list(cpu)) {
1136        return -EINVAL;
1137    }
1138    /* Note that it's OK to have registers which aren't in CPUState,
1139     * so we can ignore a failure return here.
1140     */
1141    write_list_to_cpustate(cpu);
1142
1143    kvm_arm_sync_mpstate_to_qemu(cpu);
1144
1145    /* TODO: other registers */
1146    return ret;
1147}
1148
1149void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
1150{
1151    ram_addr_t ram_addr;
1152    hwaddr paddr;
1153
1154    assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
1155
1156    if (acpi_ghes_present() && addr) {
1157        ram_addr = qemu_ram_addr_from_host(addr);
1158        if (ram_addr != RAM_ADDR_INVALID &&
1159            kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
1160            kvm_hwpoison_page_add(ram_addr);
1161            /*
1162             * If this is a BUS_MCEERR_AR, we know we have been called
1163             * synchronously from the vCPU thread, so we can easily
1164             * synchronize the state and inject an error.
1165             *
1166             * TODO: we currently don't tell the guest at all about
1167             * BUS_MCEERR_AO. In that case we might either be being
1168             * called synchronously from the vCPU thread, or a bit
1169             * later from the main thread, so doing the injection of
1170             * the error would be more complicated.
1171             */
1172            if (code == BUS_MCEERR_AR) {
1173                kvm_cpu_synchronize_state(c);
1174                if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
1175                    kvm_inject_arm_sea(c);
1176                } else {
1177                    error_report("failed to record the error");
1178                    abort();
1179                }
1180            }
1181            return;
1182        }
1183        if (code == BUS_MCEERR_AO) {
1184            error_report("Hardware memory error at addr %p for memory used by "
1185                "QEMU itself instead of guest system!", addr);
1186        }
1187    }
1188
1189    if (code == BUS_MCEERR_AR) {
1190        error_report("Hardware memory error!");
1191        exit(1);
1192    }
1193}
1194
1195/* C6.6.29 BRK instruction */
1196static const uint32_t brk_insn = 0xd4200000;
1197
1198int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1199{
1200    if (have_guest_debug) {
1201        if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) ||
1202            cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) {
1203            return -EINVAL;
1204        }
1205        return 0;
1206    } else {
1207        error_report("guest debug not supported on this kernel");
1208        return -EINVAL;
1209    }
1210}
1211
1212int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1213{
1214    static uint32_t brk;
1215
1216    if (have_guest_debug) {
1217        if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) ||
1218            brk != brk_insn ||
1219            cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) {
1220            return -EINVAL;
1221        }
1222        return 0;
1223    } else {
1224        error_report("guest debug not supported on this kernel");
1225        return -EINVAL;
1226    }
1227}
1228
1229/* See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register
1230 *
1231 * To minimise translating between kernel and user-space the kernel
1232 * ABI just provides user-space with the full exception syndrome
1233 * register value to be decoded in QEMU.
1234 */
1235
1236bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit)
1237{
1238    int hsr_ec = syn_get_ec(debug_exit->hsr);
1239    ARMCPU *cpu = ARM_CPU(cs);
1240    CPUARMState *env = &cpu->env;
1241
1242    /* Ensure PC is synchronised */
1243    kvm_cpu_synchronize_state(cs);
1244
1245    switch (hsr_ec) {
1246    case EC_SOFTWARESTEP:
1247        if (cs->singlestep_enabled) {
1248            return true;
1249        } else {
1250            /*
1251             * The kernel should have suppressed the guest's ability to
1252             * single step at this point so something has gone wrong.
1253             */
1254            error_report("%s: guest single-step while debugging unsupported"
1255                         " (%"PRIx64", %"PRIx32")",
1256                         __func__, env->pc, debug_exit->hsr);
1257            return false;
1258        }
1259        break;
1260    case EC_AA64_BKPT:
1261        if (kvm_find_sw_breakpoint(cs, env->pc)) {
1262            return true;
1263        }
1264        break;
1265    case EC_BREAKPOINT:
1266        if (find_hw_breakpoint(cs, env->pc)) {
1267            return true;
1268        }
1269        break;
1270    case EC_WATCHPOINT:
1271    {
1272        CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far);
1273        if (wp) {
1274            cs->watchpoint_hit = wp;
1275            return true;
1276        }
1277        break;
1278    }
1279    default:
1280        error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")",
1281                     __func__, debug_exit->hsr, env->pc);
1282    }
1283
1284    /* If we are not handling the debug exception it must belong to
1285     * the guest. Let's re-use the existing TCG interrupt code to set
1286     * everything up properly.
1287     */
1288    cs->exception_index = EXCP_BKPT;
1289    env->exception.syndrome = debug_exit->hsr;
1290    env->exception.vaddress = debug_exit->far;
1291    env->exception.target_el = 1;
1292    qemu_mutex_lock_iothread();
1293    arm_cpu_do_interrupt(cs);
1294    qemu_mutex_unlock_iothread();
1295
1296    return false;
1297}
1298
1299#define ARM64_REG_ESR_EL1 ARM64_SYS_REG(3, 0, 5, 2, 0)
1300#define ARM64_REG_TCR_EL1 ARM64_SYS_REG(3, 0, 2, 0, 2)
1301
1302/*
1303 * ESR_EL1
1304 * ISS encoding
1305 * AARCH64: DFSC,   bits [5:0]
1306 * AARCH32:
1307 *      TTBCR.EAE == 0
1308 *          FS[4]   - DFSR[10]
1309 *          FS[3:0] - DFSR[3:0]
1310 *      TTBCR.EAE == 1
1311 *          FS, bits [5:0]
1312 */
1313#define ESR_DFSC(aarch64, lpae, v)        \
1314    ((aarch64 || (lpae)) ? ((v) & 0x3F)   \
1315               : (((v) >> 6) | ((v) & 0x1F)))
1316
1317#define ESR_DFSC_EXTABT(aarch64, lpae) \
1318    ((aarch64) ? 0x10 : (lpae) ? 0x10 : 0x8)
1319
1320bool kvm_arm_verify_ext_dabt_pending(CPUState *cs)
1321{
1322    uint64_t dfsr_val;
1323
1324    if (!kvm_get_one_reg(cs, ARM64_REG_ESR_EL1, &dfsr_val)) {
1325        ARMCPU *cpu = ARM_CPU(cs);
1326        CPUARMState *env = &cpu->env;
1327        int aarch64_mode = arm_feature(env, ARM_FEATURE_AARCH64);
1328        int lpae = 0;
1329
1330        if (!aarch64_mode) {
1331            uint64_t ttbcr;
1332
1333            if (!kvm_get_one_reg(cs, ARM64_REG_TCR_EL1, &ttbcr)) {
1334                lpae = arm_feature(env, ARM_FEATURE_LPAE)
1335                        && (ttbcr & TTBCR_EAE);
1336            }
1337        }
1338        /*
1339         * The verification here is based on the DFSC bits
1340         * of the ESR_EL1 reg only
1341         */
1342         return (ESR_DFSC(aarch64_mode, lpae, dfsr_val) ==
1343                ESR_DFSC_EXTABT(aarch64_mode, lpae));
1344    }
1345    return false;
1346}
1347