qemu/target/arm/kvm64.c
<<
>>
Prefs
   1/*
   2 * ARM implementation of KVM hooks, 64 bit specific code
   3 *
   4 * Copyright Mian-M. Hamayun 2013, Virtual Open Systems
   5 * Copyright Alex Bennée 2014, Linaro
   6 *
   7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   8 * See the COPYING file in the top-level directory.
   9 *
  10 */
  11
  12#include "qemu/osdep.h"
  13#include <sys/ioctl.h>
  14#include <sys/ptrace.h>
  15
  16#include <linux/elf.h>
  17#include <linux/kvm.h>
  18
  19#include "qemu-common.h"
  20#include "cpu.h"
  21#include "qemu/timer.h"
  22#include "qemu/error-report.h"
  23#include "qemu/host-utils.h"
  24#include "qemu/main-loop.h"
  25#include "exec/gdbstub.h"
  26#include "sysemu/kvm.h"
  27#include "sysemu/kvm_int.h"
  28#include "kvm_arm.h"
  29#include "hw/boards.h"
  30#include "internals.h"
  31
  32static bool have_guest_debug;
  33
  34/*
  35 * Although the ARM implementation of hardware assisted debugging
  36 * allows for different breakpoints per-core, the current GDB
  37 * interface treats them as a global pool of registers (which seems to
  38 * be the case for x86, ppc and s390). As a result we store one copy
  39 * of registers which is used for all active cores.
  40 *
  41 * Write access is serialised by virtue of the GDB protocol which
  42 * updates things. Read access (i.e. when the values are copied to the
  43 * vCPU) is also gated by GDB's run control.
  44 *
  45 * This is not unreasonable as most of the time debugging kernels you
  46 * never know which core will eventually execute your function.
  47 */
  48
  49typedef struct {
  50    uint64_t bcr;
  51    uint64_t bvr;
  52} HWBreakpoint;
  53
  54/* The watchpoint registers can cover more area than the requested
  55 * watchpoint so we need to store the additional information
  56 * somewhere. We also need to supply a CPUWatchpoint to the GDB stub
  57 * when the watchpoint is hit.
  58 */
  59typedef struct {
  60    uint64_t wcr;
  61    uint64_t wvr;
  62    CPUWatchpoint details;
  63} HWWatchpoint;
  64
  65/* Maximum and current break/watch point counts */
  66int max_hw_bps, max_hw_wps;
  67GArray *hw_breakpoints, *hw_watchpoints;
  68
  69#define cur_hw_wps      (hw_watchpoints->len)
  70#define cur_hw_bps      (hw_breakpoints->len)
  71#define get_hw_bp(i)    (&g_array_index(hw_breakpoints, HWBreakpoint, i))
  72#define get_hw_wp(i)    (&g_array_index(hw_watchpoints, HWWatchpoint, i))
  73
  74/**
  75 * kvm_arm_init_debug() - check for guest debug capabilities
  76 * @cs: CPUState
  77 *
  78 * kvm_check_extension returns the number of debug registers we have
  79 * or 0 if we have none.
  80 *
  81 */
  82static void kvm_arm_init_debug(CPUState *cs)
  83{
  84    have_guest_debug = kvm_check_extension(cs->kvm_state,
  85                                           KVM_CAP_SET_GUEST_DEBUG);
  86
  87    max_hw_wps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_WPS);
  88    hw_watchpoints = g_array_sized_new(true, true,
  89                                       sizeof(HWWatchpoint), max_hw_wps);
  90
  91    max_hw_bps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_BPS);
  92    hw_breakpoints = g_array_sized_new(true, true,
  93                                       sizeof(HWBreakpoint), max_hw_bps);
  94    return;
  95}
  96
  97/**
  98 * insert_hw_breakpoint()
  99 * @addr: address of breakpoint
 100 *
 101 * See ARM ARM D2.9.1 for details but here we are only going to create
 102 * simple un-linked breakpoints (i.e. we don't chain breakpoints
 103 * together to match address and context or vmid). The hardware is
 104 * capable of fancier matching but that will require exposing that
 105 * fanciness to GDB's interface
 106 *
 107 * DBGBCR<n>_EL1, Debug Breakpoint Control Registers
 108 *
 109 *  31  24 23  20 19   16 15 14  13  12   9 8   5 4    3 2   1  0
 110 * +------+------+-------+-----+----+------+-----+------+-----+---+
 111 * | RES0 |  BT  |  LBN  | SSC | HMC| RES0 | BAS | RES0 | PMC | E |
 112 * +------+------+-------+-----+----+------+-----+------+-----+---+
 113 *
 114 * BT: Breakpoint type (0 = unlinked address match)
 115 * LBN: Linked BP number (0 = unused)
 116 * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12)
 117 * BAS: Byte Address Select (RES1 for AArch64)
 118 * E: Enable bit
 119 *
 120 * DBGBVR<n>_EL1, Debug Breakpoint Value Registers
 121 *
 122 *  63  53 52       49 48       2  1 0
 123 * +------+-----------+----------+-----+
 124 * | RESS | VA[52:49] | VA[48:2] | 0 0 |
 125 * +------+-----------+----------+-----+
 126 *
 127 * Depending on the addressing mode bits the top bits of the register
 128 * are a sign extension of the highest applicable VA bit. Some
 129 * versions of GDB don't do it correctly so we ensure they are correct
 130 * here so future PC comparisons will work properly.
 131 */
 132
 133static int insert_hw_breakpoint(target_ulong addr)
 134{
 135    HWBreakpoint brk = {
 136        .bcr = 0x1,                             /* BCR E=1, enable */
 137        .bvr = sextract64(addr, 0, 53)
 138    };
 139
 140    if (cur_hw_bps >= max_hw_bps) {
 141        return -ENOBUFS;
 142    }
 143
 144    brk.bcr = deposit32(brk.bcr, 1, 2, 0x3);   /* PMC = 11 */
 145    brk.bcr = deposit32(brk.bcr, 5, 4, 0xf);   /* BAS = RES1 */
 146
 147    g_array_append_val(hw_breakpoints, brk);
 148
 149    return 0;
 150}
 151
 152/**
 153 * delete_hw_breakpoint()
 154 * @pc: address of breakpoint
 155 *
 156 * Delete a breakpoint and shuffle any above down
 157 */
 158
 159static int delete_hw_breakpoint(target_ulong pc)
 160{
 161    int i;
 162    for (i = 0; i < hw_breakpoints->len; i++) {
 163        HWBreakpoint *brk = get_hw_bp(i);
 164        if (brk->bvr == pc) {
 165            g_array_remove_index(hw_breakpoints, i);
 166            return 0;
 167        }
 168    }
 169    return -ENOENT;
 170}
 171
 172/**
 173 * insert_hw_watchpoint()
 174 * @addr: address of watch point
 175 * @len: size of area
 176 * @type: type of watch point
 177 *
 178 * See ARM ARM D2.10. As with the breakpoints we can do some advanced
 179 * stuff if we want to. The watch points can be linked with the break
 180 * points above to make them context aware. However for simplicity
 181 * currently we only deal with simple read/write watch points.
 182 *
 183 * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers
 184 *
 185 *  31  29 28   24 23  21  20  19 16 15 14  13   12  5 4   3 2   1  0
 186 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+
 187 * | RES0 |  MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E |
 188 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+
 189 *
 190 * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes))
 191 * WT: 0 - unlinked, 1 - linked (not currently used)
 192 * LBN: Linked BP number (not currently used)
 193 * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11)
 194 * BAS: Byte Address Select
 195 * LSC: Load/Store control (01: load, 10: store, 11: both)
 196 * E: Enable
 197 *
 198 * The bottom 2 bits of the value register are masked. Therefore to
 199 * break on any sizes smaller than an unaligned word you need to set
 200 * MASK=0, BAS=bit per byte in question. For larger regions (^2) you
 201 * need to ensure you mask the address as required and set BAS=0xff
 202 */
 203
 204static int insert_hw_watchpoint(target_ulong addr,
 205                                target_ulong len, int type)
 206{
 207    HWWatchpoint wp = {
 208        .wcr = 1, /* E=1, enable */
 209        .wvr = addr & (~0x7ULL),
 210        .details = { .vaddr = addr, .len = len }
 211    };
 212
 213    if (cur_hw_wps >= max_hw_wps) {
 214        return -ENOBUFS;
 215    }
 216
 217    /*
 218     * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state,
 219     * valid whether EL3 is implemented or not
 220     */
 221    wp.wcr = deposit32(wp.wcr, 1, 2, 3);
 222
 223    switch (type) {
 224    case GDB_WATCHPOINT_READ:
 225        wp.wcr = deposit32(wp.wcr, 3, 2, 1);
 226        wp.details.flags = BP_MEM_READ;
 227        break;
 228    case GDB_WATCHPOINT_WRITE:
 229        wp.wcr = deposit32(wp.wcr, 3, 2, 2);
 230        wp.details.flags = BP_MEM_WRITE;
 231        break;
 232    case GDB_WATCHPOINT_ACCESS:
 233        wp.wcr = deposit32(wp.wcr, 3, 2, 3);
 234        wp.details.flags = BP_MEM_ACCESS;
 235        break;
 236    default:
 237        g_assert_not_reached();
 238        break;
 239    }
 240    if (len <= 8) {
 241        /* we align the address and set the bits in BAS */
 242        int off = addr & 0x7;
 243        int bas = (1 << len) - 1;
 244
 245        wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas);
 246    } else {
 247        /* For ranges above 8 bytes we need to be a power of 2 */
 248        if (is_power_of_2(len)) {
 249            int bits = ctz64(len);
 250
 251            wp.wvr &= ~((1 << bits) - 1);
 252            wp.wcr = deposit32(wp.wcr, 24, 4, bits);
 253            wp.wcr = deposit32(wp.wcr, 5, 8, 0xff);
 254        } else {
 255            return -ENOBUFS;
 256        }
 257    }
 258
 259    g_array_append_val(hw_watchpoints, wp);
 260    return 0;
 261}
 262
 263
 264static bool check_watchpoint_in_range(int i, target_ulong addr)
 265{
 266    HWWatchpoint *wp = get_hw_wp(i);
 267    uint64_t addr_top, addr_bottom = wp->wvr;
 268    int bas = extract32(wp->wcr, 5, 8);
 269    int mask = extract32(wp->wcr, 24, 4);
 270
 271    if (mask) {
 272        addr_top = addr_bottom + (1 << mask);
 273    } else {
 274        /* BAS must be contiguous but can offset against the base
 275         * address in DBGWVR */
 276        addr_bottom = addr_bottom + ctz32(bas);
 277        addr_top = addr_bottom + clo32(bas);
 278    }
 279
 280    if (addr >= addr_bottom && addr <= addr_top) {
 281        return true;
 282    }
 283
 284    return false;
 285}
 286
 287/**
 288 * delete_hw_watchpoint()
 289 * @addr: address of breakpoint
 290 *
 291 * Delete a breakpoint and shuffle any above down
 292 */
 293
 294static int delete_hw_watchpoint(target_ulong addr,
 295                                target_ulong len, int type)
 296{
 297    int i;
 298    for (i = 0; i < cur_hw_wps; i++) {
 299        if (check_watchpoint_in_range(i, addr)) {
 300            g_array_remove_index(hw_watchpoints, i);
 301            return 0;
 302        }
 303    }
 304    return -ENOENT;
 305}
 306
 307
 308int kvm_arch_insert_hw_breakpoint(target_ulong addr,
 309                                  target_ulong len, int type)
 310{
 311    switch (type) {
 312    case GDB_BREAKPOINT_HW:
 313        return insert_hw_breakpoint(addr);
 314        break;
 315    case GDB_WATCHPOINT_READ:
 316    case GDB_WATCHPOINT_WRITE:
 317    case GDB_WATCHPOINT_ACCESS:
 318        return insert_hw_watchpoint(addr, len, type);
 319    default:
 320        return -ENOSYS;
 321    }
 322}
 323
 324int kvm_arch_remove_hw_breakpoint(target_ulong addr,
 325                                  target_ulong len, int type)
 326{
 327    switch (type) {
 328    case GDB_BREAKPOINT_HW:
 329        return delete_hw_breakpoint(addr);
 330        break;
 331    case GDB_WATCHPOINT_READ:
 332    case GDB_WATCHPOINT_WRITE:
 333    case GDB_WATCHPOINT_ACCESS:
 334        return delete_hw_watchpoint(addr, len, type);
 335    default:
 336        return -ENOSYS;
 337    }
 338}
 339
 340
 341void kvm_arch_remove_all_hw_breakpoints(void)
 342{
 343    if (cur_hw_wps > 0) {
 344        g_array_remove_range(hw_watchpoints, 0, cur_hw_wps);
 345    }
 346    if (cur_hw_bps > 0) {
 347        g_array_remove_range(hw_breakpoints, 0, cur_hw_bps);
 348    }
 349}
 350
 351void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr)
 352{
 353    int i;
 354    memset(ptr, 0, sizeof(struct kvm_guest_debug_arch));
 355
 356    for (i = 0; i < max_hw_wps; i++) {
 357        HWWatchpoint *wp = get_hw_wp(i);
 358        ptr->dbg_wcr[i] = wp->wcr;
 359        ptr->dbg_wvr[i] = wp->wvr;
 360    }
 361    for (i = 0; i < max_hw_bps; i++) {
 362        HWBreakpoint *bp = get_hw_bp(i);
 363        ptr->dbg_bcr[i] = bp->bcr;
 364        ptr->dbg_bvr[i] = bp->bvr;
 365    }
 366}
 367
 368bool kvm_arm_hw_debug_active(CPUState *cs)
 369{
 370    return ((cur_hw_wps > 0) || (cur_hw_bps > 0));
 371}
 372
 373static bool find_hw_breakpoint(CPUState *cpu, target_ulong pc)
 374{
 375    int i;
 376
 377    for (i = 0; i < cur_hw_bps; i++) {
 378        HWBreakpoint *bp = get_hw_bp(i);
 379        if (bp->bvr == pc) {
 380            return true;
 381        }
 382    }
 383    return false;
 384}
 385
 386static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr)
 387{
 388    int i;
 389
 390    for (i = 0; i < cur_hw_wps; i++) {
 391        if (check_watchpoint_in_range(i, addr)) {
 392            return &get_hw_wp(i)->details;
 393        }
 394    }
 395    return NULL;
 396}
 397
 398static bool kvm_arm_pmu_set_attr(CPUState *cs, struct kvm_device_attr *attr)
 399{
 400    int err;
 401
 402    err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr);
 403    if (err != 0) {
 404        error_report("PMU: KVM_HAS_DEVICE_ATTR: %s", strerror(-err));
 405        return false;
 406    }
 407
 408    err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr);
 409    if (err != 0) {
 410        error_report("PMU: KVM_SET_DEVICE_ATTR: %s", strerror(-err));
 411        return false;
 412    }
 413
 414    return true;
 415}
 416
 417void kvm_arm_pmu_init(CPUState *cs)
 418{
 419    struct kvm_device_attr attr = {
 420        .group = KVM_ARM_VCPU_PMU_V3_CTRL,
 421        .attr = KVM_ARM_VCPU_PMU_V3_INIT,
 422    };
 423
 424    if (!ARM_CPU(cs)->has_pmu) {
 425        return;
 426    }
 427    if (!kvm_arm_pmu_set_attr(cs, &attr)) {
 428        error_report("failed to init PMU");
 429        abort();
 430    }
 431}
 432
 433void kvm_arm_pmu_set_irq(CPUState *cs, int irq)
 434{
 435    struct kvm_device_attr attr = {
 436        .group = KVM_ARM_VCPU_PMU_V3_CTRL,
 437        .addr = (intptr_t)&irq,
 438        .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
 439    };
 440
 441    if (!ARM_CPU(cs)->has_pmu) {
 442        return;
 443    }
 444    if (!kvm_arm_pmu_set_attr(cs, &attr)) {
 445        error_report("failed to set irq for PMU");
 446        abort();
 447    }
 448}
 449
 450static inline void set_feature(uint64_t *features, int feature)
 451{
 452    *features |= 1ULL << feature;
 453}
 454
 455static inline void unset_feature(uint64_t *features, int feature)
 456{
 457    *features &= ~(1ULL << feature);
 458}
 459
 460static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id)
 461{
 462    uint64_t ret;
 463    struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret };
 464    int err;
 465
 466    assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64);
 467    err = ioctl(fd, KVM_GET_ONE_REG, &idreg);
 468    if (err < 0) {
 469        return -1;
 470    }
 471    *pret = ret;
 472    return 0;
 473}
 474
 475static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id)
 476{
 477    struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret };
 478
 479    assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64);
 480    return ioctl(fd, KVM_GET_ONE_REG, &idreg);
 481}
 482
 483bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
 484{
 485    /* Identify the feature bits corresponding to the host CPU, and
 486     * fill out the ARMHostCPUClass fields accordingly. To do this
 487     * we have to create a scratch VM, create a single CPU inside it,
 488     * and then query that CPU for the relevant ID registers.
 489     */
 490    int fdarray[3];
 491    bool sve_supported;
 492    uint64_t features = 0;
 493    uint64_t t;
 494    int err;
 495
 496    /* Old kernels may not know about the PREFERRED_TARGET ioctl: however
 497     * we know these will only support creating one kind of guest CPU,
 498     * which is its preferred CPU type. Fortunately these old kernels
 499     * support only a very limited number of CPUs.
 500     */
 501    static const uint32_t cpus_to_try[] = {
 502        KVM_ARM_TARGET_AEM_V8,
 503        KVM_ARM_TARGET_FOUNDATION_V8,
 504        KVM_ARM_TARGET_CORTEX_A57,
 505        QEMU_KVM_ARM_TARGET_NONE
 506    };
 507    /*
 508     * target = -1 informs kvm_arm_create_scratch_host_vcpu()
 509     * to use the preferred target
 510     */
 511    struct kvm_vcpu_init init = { .target = -1, };
 512
 513    if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) {
 514        return false;
 515    }
 516
 517    ahcf->target = init.target;
 518    ahcf->dtb_compatible = "arm,arm-v8";
 519
 520    err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0,
 521                         ARM64_SYS_REG(3, 0, 0, 4, 0));
 522    if (unlikely(err < 0)) {
 523        /*
 524         * Before v4.15, the kernel only exposed a limited number of system
 525         * registers, not including any of the interesting AArch64 ID regs.
 526         * For the most part we could leave these fields as zero with minimal
 527         * effect, since this does not affect the values seen by the guest.
 528         *
 529         * However, it could cause problems down the line for QEMU,
 530         * so provide a minimal v8.0 default.
 531         *
 532         * ??? Could read MIDR and use knowledge from cpu64.c.
 533         * ??? Could map a page of memory into our temp guest and
 534         *     run the tiniest of hand-crafted kernels to extract
 535         *     the values seen by the guest.
 536         * ??? Either of these sounds like too much effort just
 537         *     to work around running a modern host kernel.
 538         */
 539        ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */
 540        err = 0;
 541    } else {
 542        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1,
 543                              ARM64_SYS_REG(3, 0, 0, 4, 1));
 544        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0,
 545                              ARM64_SYS_REG(3, 0, 0, 6, 0));
 546        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1,
 547                              ARM64_SYS_REG(3, 0, 0, 6, 1));
 548        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0,
 549                              ARM64_SYS_REG(3, 0, 0, 7, 0));
 550        err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1,
 551                              ARM64_SYS_REG(3, 0, 0, 7, 1));
 552
 553        /*
 554         * Note that if AArch32 support is not present in the host,
 555         * the AArch32 sysregs are present to be read, but will
 556         * return UNKNOWN values.  This is neither better nor worse
 557         * than skipping the reads and leaving 0, as we must avoid
 558         * considering the values in every case.
 559         */
 560        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0,
 561                              ARM64_SYS_REG(3, 0, 0, 2, 0));
 562        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1,
 563                              ARM64_SYS_REG(3, 0, 0, 2, 1));
 564        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2,
 565                              ARM64_SYS_REG(3, 0, 0, 2, 2));
 566        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3,
 567                              ARM64_SYS_REG(3, 0, 0, 2, 3));
 568        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4,
 569                              ARM64_SYS_REG(3, 0, 0, 2, 4));
 570        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5,
 571                              ARM64_SYS_REG(3, 0, 0, 2, 5));
 572        err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6,
 573                              ARM64_SYS_REG(3, 0, 0, 2, 7));
 574
 575        err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0,
 576                              ARM64_SYS_REG(3, 0, 0, 3, 0));
 577        err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1,
 578                              ARM64_SYS_REG(3, 0, 0, 3, 1));
 579        err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2,
 580                              ARM64_SYS_REG(3, 0, 0, 3, 2));
 581    }
 582
 583    sve_supported = ioctl(fdarray[0], KVM_CHECK_EXTENSION, KVM_CAP_ARM_SVE) > 0;
 584
 585    kvm_arm_destroy_scratch_host_vcpu(fdarray);
 586
 587    if (err < 0) {
 588        return false;
 589    }
 590
 591    /* Add feature bits that can't appear until after VCPU init. */
 592    if (sve_supported) {
 593        t = ahcf->isar.id_aa64pfr0;
 594        t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1);
 595        ahcf->isar.id_aa64pfr0 = t;
 596    }
 597
 598    /*
 599     * We can assume any KVM supporting CPU is at least a v8
 600     * with VFPv4+Neon; this in turn implies most of the other
 601     * feature bits.
 602     */
 603    set_feature(&features, ARM_FEATURE_V8);
 604    set_feature(&features, ARM_FEATURE_VFP4);
 605    set_feature(&features, ARM_FEATURE_NEON);
 606    set_feature(&features, ARM_FEATURE_AARCH64);
 607    set_feature(&features, ARM_FEATURE_PMU);
 608
 609    ahcf->features = features;
 610
 611    return true;
 612}
 613
 614bool kvm_arm_aarch32_supported(CPUState *cpu)
 615{
 616    KVMState *s = KVM_STATE(current_machine->accelerator);
 617
 618    return kvm_check_extension(s, KVM_CAP_ARM_EL1_32BIT);
 619}
 620
 621bool kvm_arm_sve_supported(CPUState *cpu)
 622{
 623    KVMState *s = KVM_STATE(current_machine->accelerator);
 624
 625    return kvm_check_extension(s, KVM_CAP_ARM_SVE);
 626}
 627
 628QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
 629
 630void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map)
 631{
 632    /* Only call this function if kvm_arm_sve_supported() returns true. */
 633    static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS];
 634    static bool probed;
 635    uint32_t vq = 0;
 636    int i, j;
 637
 638    bitmap_clear(map, 0, ARM_MAX_VQ);
 639
 640    /*
 641     * KVM ensures all host CPUs support the same set of vector lengths.
 642     * So we only need to create the scratch VCPUs once and then cache
 643     * the results.
 644     */
 645    if (!probed) {
 646        struct kvm_vcpu_init init = {
 647            .target = -1,
 648            .features[0] = (1 << KVM_ARM_VCPU_SVE),
 649        };
 650        struct kvm_one_reg reg = {
 651            .id = KVM_REG_ARM64_SVE_VLS,
 652            .addr = (uint64_t)&vls[0],
 653        };
 654        int fdarray[3], ret;
 655
 656        probed = true;
 657
 658        if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) {
 659            error_report("failed to create scratch VCPU with SVE enabled");
 660            abort();
 661        }
 662        ret = ioctl(fdarray[2], KVM_GET_ONE_REG, &reg);
 663        kvm_arm_destroy_scratch_host_vcpu(fdarray);
 664        if (ret) {
 665            error_report("failed to get KVM_REG_ARM64_SVE_VLS: %s",
 666                         strerror(errno));
 667            abort();
 668        }
 669
 670        for (i = KVM_ARM64_SVE_VLS_WORDS - 1; i >= 0; --i) {
 671            if (vls[i]) {
 672                vq = 64 - clz64(vls[i]) + i * 64;
 673                break;
 674            }
 675        }
 676        if (vq > ARM_MAX_VQ) {
 677            warn_report("KVM supports vector lengths larger than "
 678                        "QEMU can enable");
 679        }
 680    }
 681
 682    for (i = 0; i < KVM_ARM64_SVE_VLS_WORDS; ++i) {
 683        if (!vls[i]) {
 684            continue;
 685        }
 686        for (j = 1; j <= 64; ++j) {
 687            vq = j + i * 64;
 688            if (vq > ARM_MAX_VQ) {
 689                return;
 690            }
 691            if (vls[i] & (1UL << (j - 1))) {
 692                set_bit(vq - 1, map);
 693            }
 694        }
 695    }
 696}
 697
 698static int kvm_arm_sve_set_vls(CPUState *cs)
 699{
 700    uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = {0};
 701    struct kvm_one_reg reg = {
 702        .id = KVM_REG_ARM64_SVE_VLS,
 703        .addr = (uint64_t)&vls[0],
 704    };
 705    ARMCPU *cpu = ARM_CPU(cs);
 706    uint32_t vq;
 707    int i, j;
 708
 709    assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX);
 710
 711    for (vq = 1; vq <= cpu->sve_max_vq; ++vq) {
 712        if (test_bit(vq - 1, cpu->sve_vq_map)) {
 713            i = (vq - 1) / 64;
 714            j = (vq - 1) % 64;
 715            vls[i] |= 1UL << j;
 716        }
 717    }
 718
 719    return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 720}
 721
 722#define ARM_CPU_ID_MPIDR       3, 0, 0, 0, 5
 723
 724int kvm_arch_init_vcpu(CPUState *cs)
 725{
 726    int ret;
 727    uint64_t mpidr;
 728    ARMCPU *cpu = ARM_CPU(cs);
 729    CPUARMState *env = &cpu->env;
 730
 731    if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE ||
 732        !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) {
 733        error_report("KVM is not supported for this guest CPU type");
 734        return -EINVAL;
 735    }
 736
 737    /* Determine init features for this CPU */
 738    memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features));
 739    if (cpu->start_powered_off) {
 740        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF;
 741    }
 742    if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) {
 743        cpu->psci_version = 2;
 744        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2;
 745    }
 746    if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
 747        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT;
 748    }
 749    if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) {
 750        cpu->has_pmu = false;
 751    }
 752    if (cpu->has_pmu) {
 753        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
 754    } else {
 755        unset_feature(&env->features, ARM_FEATURE_PMU);
 756    }
 757    if (cpu_isar_feature(aa64_sve, cpu)) {
 758        assert(kvm_arm_sve_supported(cs));
 759        cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE;
 760    }
 761
 762    /* Do KVM_ARM_VCPU_INIT ioctl */
 763    ret = kvm_arm_vcpu_init(cs);
 764    if (ret) {
 765        return ret;
 766    }
 767
 768    if (cpu_isar_feature(aa64_sve, cpu)) {
 769        ret = kvm_arm_sve_set_vls(cs);
 770        if (ret) {
 771            return ret;
 772        }
 773        ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_SVE);
 774        if (ret) {
 775            return ret;
 776        }
 777    }
 778
 779    /*
 780     * When KVM is in use, PSCI is emulated in-kernel and not by qemu.
 781     * Currently KVM has its own idea about MPIDR assignment, so we
 782     * override our defaults with what we get from KVM.
 783     */
 784    ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr);
 785    if (ret) {
 786        return ret;
 787    }
 788    cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK;
 789
 790    kvm_arm_init_debug(cs);
 791
 792    /* Check whether user space can specify guest syndrome value */
 793    kvm_arm_init_serror_injection(cs);
 794
 795    return kvm_arm_init_cpreg_list(cpu);
 796}
 797
 798int kvm_arch_destroy_vcpu(CPUState *cs)
 799{
 800    return 0;
 801}
 802
 803bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx)
 804{
 805    /* Return true if the regidx is a register we should synchronize
 806     * via the cpreg_tuples array (ie is not a core or sve reg that
 807     * we sync by hand in kvm_arch_get/put_registers())
 808     */
 809    switch (regidx & KVM_REG_ARM_COPROC_MASK) {
 810    case KVM_REG_ARM_CORE:
 811    case KVM_REG_ARM64_SVE:
 812        return false;
 813    default:
 814        return true;
 815    }
 816}
 817
 818typedef struct CPRegStateLevel {
 819    uint64_t regidx;
 820    int level;
 821} CPRegStateLevel;
 822
 823/* All system registers not listed in the following table are assumed to be
 824 * of the level KVM_PUT_RUNTIME_STATE. If a register should be written less
 825 * often, you must add it to this table with a state of either
 826 * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE.
 827 */
 828static const CPRegStateLevel non_runtime_cpregs[] = {
 829    { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE },
 830};
 831
 832int kvm_arm_cpreg_level(uint64_t regidx)
 833{
 834    int i;
 835
 836    for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) {
 837        const CPRegStateLevel *l = &non_runtime_cpregs[i];
 838        if (l->regidx == regidx) {
 839            return l->level;
 840        }
 841    }
 842
 843    return KVM_PUT_RUNTIME_STATE;
 844}
 845
 846#define AARCH64_CORE_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
 847                 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 848
 849#define AARCH64_SIMD_CORE_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \
 850                 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 851
 852#define AARCH64_SIMD_CTRL_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \
 853                 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 854
 855static int kvm_arch_put_fpsimd(CPUState *cs)
 856{
 857    CPUARMState *env = &ARM_CPU(cs)->env;
 858    struct kvm_one_reg reg;
 859    int i, ret;
 860
 861    for (i = 0; i < 32; i++) {
 862        uint64_t *q = aa64_vfp_qreg(env, i);
 863#ifdef HOST_WORDS_BIGENDIAN
 864        uint64_t fp_val[2] = { q[1], q[0] };
 865        reg.addr = (uintptr_t)fp_val;
 866#else
 867        reg.addr = (uintptr_t)q;
 868#endif
 869        reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
 870        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 871        if (ret) {
 872            return ret;
 873        }
 874    }
 875
 876    return 0;
 877}
 878
 879/*
 880 * SVE registers are encoded in KVM's memory in an endianness-invariant format.
 881 * The byte at offset i from the start of the in-memory representation contains
 882 * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the
 883 * lowest offsets are stored in the lowest memory addresses, then that nearly
 884 * matches QEMU's representation, which is to use an array of host-endian
 885 * uint64_t's, where the lower offsets are at the lower indices. To complete
 886 * the translation we just need to byte swap the uint64_t's on big-endian hosts.
 887 */
 888static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
 889{
 890#ifdef HOST_WORDS_BIGENDIAN
 891    int i;
 892
 893    for (i = 0; i < nr; ++i) {
 894        dst[i] = bswap64(src[i]);
 895    }
 896
 897    return dst;
 898#else
 899    return src;
 900#endif
 901}
 902
 903/*
 904 * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits
 905 * and PREGS and the FFR have a slice size of 256 bits. However we simply hard
 906 * code the slice index to zero for now as it's unlikely we'll need more than
 907 * one slice for quite some time.
 908 */
 909static int kvm_arch_put_sve(CPUState *cs)
 910{
 911    ARMCPU *cpu = ARM_CPU(cs);
 912    CPUARMState *env = &cpu->env;
 913    uint64_t tmp[ARM_MAX_VQ * 2];
 914    uint64_t *r;
 915    struct kvm_one_reg reg;
 916    int n, ret;
 917
 918    for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) {
 919        r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2);
 920        reg.addr = (uintptr_t)r;
 921        reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0);
 922        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 923        if (ret) {
 924            return ret;
 925        }
 926    }
 927
 928    for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) {
 929        r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0],
 930                        DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
 931        reg.addr = (uintptr_t)r;
 932        reg.id = KVM_REG_ARM64_SVE_PREG(n, 0);
 933        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 934        if (ret) {
 935            return ret;
 936        }
 937    }
 938
 939    r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0],
 940                    DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
 941    reg.addr = (uintptr_t)r;
 942    reg.id = KVM_REG_ARM64_SVE_FFR(0);
 943    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 944    if (ret) {
 945        return ret;
 946    }
 947
 948    return 0;
 949}
 950
 951int kvm_arch_put_registers(CPUState *cs, int level)
 952{
 953    struct kvm_one_reg reg;
 954    uint64_t val;
 955    uint32_t fpr;
 956    int i, ret;
 957    unsigned int el;
 958
 959    ARMCPU *cpu = ARM_CPU(cs);
 960    CPUARMState *env = &cpu->env;
 961
 962    /* If we are in AArch32 mode then we need to copy the AArch32 regs to the
 963     * AArch64 registers before pushing them out to 64-bit KVM.
 964     */
 965    if (!is_a64(env)) {
 966        aarch64_sync_32_to_64(env);
 967    }
 968
 969    for (i = 0; i < 31; i++) {
 970        reg.id = AARCH64_CORE_REG(regs.regs[i]);
 971        reg.addr = (uintptr_t) &env->xregs[i];
 972        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 973        if (ret) {
 974            return ret;
 975        }
 976    }
 977
 978    /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
 979     * QEMU side we keep the current SP in xregs[31] as well.
 980     */
 981    aarch64_save_sp(env, 1);
 982
 983    reg.id = AARCH64_CORE_REG(regs.sp);
 984    reg.addr = (uintptr_t) &env->sp_el[0];
 985    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 986    if (ret) {
 987        return ret;
 988    }
 989
 990    reg.id = AARCH64_CORE_REG(sp_el1);
 991    reg.addr = (uintptr_t) &env->sp_el[1];
 992    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 993    if (ret) {
 994        return ret;
 995    }
 996
 997    /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */
 998    if (is_a64(env)) {
 999        val = pstate_read(env);
1000    } else {
1001        val = cpsr_read(env);
1002    }
1003    reg.id = AARCH64_CORE_REG(regs.pstate);
1004    reg.addr = (uintptr_t) &val;
1005    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1006    if (ret) {
1007        return ret;
1008    }
1009
1010    reg.id = AARCH64_CORE_REG(regs.pc);
1011    reg.addr = (uintptr_t) &env->pc;
1012    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1013    if (ret) {
1014        return ret;
1015    }
1016
1017    reg.id = AARCH64_CORE_REG(elr_el1);
1018    reg.addr = (uintptr_t) &env->elr_el[1];
1019    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1020    if (ret) {
1021        return ret;
1022    }
1023
1024    /* Saved Program State Registers
1025     *
1026     * Before we restore from the banked_spsr[] array we need to
1027     * ensure that any modifications to env->spsr are correctly
1028     * reflected in the banks.
1029     */
1030    el = arm_current_el(env);
1031    if (el > 0 && !is_a64(env)) {
1032        i = bank_number(env->uncached_cpsr & CPSR_M);
1033        env->banked_spsr[i] = env->spsr;
1034    }
1035
1036    /* KVM 0-4 map to QEMU banks 1-5 */
1037    for (i = 0; i < KVM_NR_SPSR; i++) {
1038        reg.id = AARCH64_CORE_REG(spsr[i]);
1039        reg.addr = (uintptr_t) &env->banked_spsr[i + 1];
1040        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1041        if (ret) {
1042            return ret;
1043        }
1044    }
1045
1046    if (cpu_isar_feature(aa64_sve, cpu)) {
1047        ret = kvm_arch_put_sve(cs);
1048    } else {
1049        ret = kvm_arch_put_fpsimd(cs);
1050    }
1051    if (ret) {
1052        return ret;
1053    }
1054
1055    reg.addr = (uintptr_t)(&fpr);
1056    fpr = vfp_get_fpsr(env);
1057    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
1058    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1059    if (ret) {
1060        return ret;
1061    }
1062
1063    reg.addr = (uintptr_t)(&fpr);
1064    fpr = vfp_get_fpcr(env);
1065    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
1066    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1067    if (ret) {
1068        return ret;
1069    }
1070
1071    ret = kvm_put_vcpu_events(cpu);
1072    if (ret) {
1073        return ret;
1074    }
1075
1076    write_cpustate_to_list(cpu, true);
1077
1078    if (!write_list_to_kvmstate(cpu, level)) {
1079        return -EINVAL;
1080    }
1081
1082    kvm_arm_sync_mpstate_to_kvm(cpu);
1083
1084    return ret;
1085}
1086
1087static int kvm_arch_get_fpsimd(CPUState *cs)
1088{
1089    CPUARMState *env = &ARM_CPU(cs)->env;
1090    struct kvm_one_reg reg;
1091    int i, ret;
1092
1093    for (i = 0; i < 32; i++) {
1094        uint64_t *q = aa64_vfp_qreg(env, i);
1095        reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
1096        reg.addr = (uintptr_t)q;
1097        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1098        if (ret) {
1099            return ret;
1100        } else {
1101#ifdef HOST_WORDS_BIGENDIAN
1102            uint64_t t;
1103            t = q[0], q[0] = q[1], q[1] = t;
1104#endif
1105        }
1106    }
1107
1108    return 0;
1109}
1110
1111/*
1112 * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits
1113 * and PREGS and the FFR have a slice size of 256 bits. However we simply hard
1114 * code the slice index to zero for now as it's unlikely we'll need more than
1115 * one slice for quite some time.
1116 */
1117static int kvm_arch_get_sve(CPUState *cs)
1118{
1119    ARMCPU *cpu = ARM_CPU(cs);
1120    CPUARMState *env = &cpu->env;
1121    struct kvm_one_reg reg;
1122    uint64_t *r;
1123    int n, ret;
1124
1125    for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) {
1126        r = &env->vfp.zregs[n].d[0];
1127        reg.addr = (uintptr_t)r;
1128        reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0);
1129        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1130        if (ret) {
1131            return ret;
1132        }
1133        sve_bswap64(r, r, cpu->sve_max_vq * 2);
1134    }
1135
1136    for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) {
1137        r = &env->vfp.pregs[n].p[0];
1138        reg.addr = (uintptr_t)r;
1139        reg.id = KVM_REG_ARM64_SVE_PREG(n, 0);
1140        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1141        if (ret) {
1142            return ret;
1143        }
1144        sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
1145    }
1146
1147    r = &env->vfp.pregs[FFR_PRED_NUM].p[0];
1148    reg.addr = (uintptr_t)r;
1149    reg.id = KVM_REG_ARM64_SVE_FFR(0);
1150    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1151    if (ret) {
1152        return ret;
1153    }
1154    sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8));
1155
1156    return 0;
1157}
1158
1159int kvm_arch_get_registers(CPUState *cs)
1160{
1161    struct kvm_one_reg reg;
1162    uint64_t val;
1163    unsigned int el;
1164    uint32_t fpr;
1165    int i, ret;
1166
1167    ARMCPU *cpu = ARM_CPU(cs);
1168    CPUARMState *env = &cpu->env;
1169
1170    for (i = 0; i < 31; i++) {
1171        reg.id = AARCH64_CORE_REG(regs.regs[i]);
1172        reg.addr = (uintptr_t) &env->xregs[i];
1173        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1174        if (ret) {
1175            return ret;
1176        }
1177    }
1178
1179    reg.id = AARCH64_CORE_REG(regs.sp);
1180    reg.addr = (uintptr_t) &env->sp_el[0];
1181    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1182    if (ret) {
1183        return ret;
1184    }
1185
1186    reg.id = AARCH64_CORE_REG(sp_el1);
1187    reg.addr = (uintptr_t) &env->sp_el[1];
1188    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1189    if (ret) {
1190        return ret;
1191    }
1192
1193    reg.id = AARCH64_CORE_REG(regs.pstate);
1194    reg.addr = (uintptr_t) &val;
1195    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1196    if (ret) {
1197        return ret;
1198    }
1199
1200    env->aarch64 = ((val & PSTATE_nRW) == 0);
1201    if (is_a64(env)) {
1202        pstate_write(env, val);
1203    } else {
1204        cpsr_write(env, val, 0xffffffff, CPSRWriteRaw);
1205    }
1206
1207    /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
1208     * QEMU side we keep the current SP in xregs[31] as well.
1209     */
1210    aarch64_restore_sp(env, 1);
1211
1212    reg.id = AARCH64_CORE_REG(regs.pc);
1213    reg.addr = (uintptr_t) &env->pc;
1214    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1215    if (ret) {
1216        return ret;
1217    }
1218
1219    /* If we are in AArch32 mode then we need to sync the AArch32 regs with the
1220     * incoming AArch64 regs received from 64-bit KVM.
1221     * We must perform this after all of the registers have been acquired from
1222     * the kernel.
1223     */
1224    if (!is_a64(env)) {
1225        aarch64_sync_64_to_32(env);
1226    }
1227
1228    reg.id = AARCH64_CORE_REG(elr_el1);
1229    reg.addr = (uintptr_t) &env->elr_el[1];
1230    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1231    if (ret) {
1232        return ret;
1233    }
1234
1235    /* Fetch the SPSR registers
1236     *
1237     * KVM SPSRs 0-4 map to QEMU banks 1-5
1238     */
1239    for (i = 0; i < KVM_NR_SPSR; i++) {
1240        reg.id = AARCH64_CORE_REG(spsr[i]);
1241        reg.addr = (uintptr_t) &env->banked_spsr[i + 1];
1242        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1243        if (ret) {
1244            return ret;
1245        }
1246    }
1247
1248    el = arm_current_el(env);
1249    if (el > 0 && !is_a64(env)) {
1250        i = bank_number(env->uncached_cpsr & CPSR_M);
1251        env->spsr = env->banked_spsr[i];
1252    }
1253
1254    if (cpu_isar_feature(aa64_sve, cpu)) {
1255        ret = kvm_arch_get_sve(cs);
1256    } else {
1257        ret = kvm_arch_get_fpsimd(cs);
1258    }
1259    if (ret) {
1260        return ret;
1261    }
1262
1263    reg.addr = (uintptr_t)(&fpr);
1264    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
1265    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1266    if (ret) {
1267        return ret;
1268    }
1269    vfp_set_fpsr(env, fpr);
1270
1271    reg.addr = (uintptr_t)(&fpr);
1272    reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
1273    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
1274    if (ret) {
1275        return ret;
1276    }
1277    vfp_set_fpcr(env, fpr);
1278
1279    ret = kvm_get_vcpu_events(cpu);
1280    if (ret) {
1281        return ret;
1282    }
1283
1284    if (!write_kvmstate_to_list(cpu)) {
1285        return -EINVAL;
1286    }
1287    /* Note that it's OK to have registers which aren't in CPUState,
1288     * so we can ignore a failure return here.
1289     */
1290    write_list_to_cpustate(cpu);
1291
1292    kvm_arm_sync_mpstate_to_qemu(cpu);
1293
1294    /* TODO: other registers */
1295    return ret;
1296}
1297
1298/* C6.6.29 BRK instruction */
1299static const uint32_t brk_insn = 0xd4200000;
1300
1301int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1302{
1303    if (have_guest_debug) {
1304        if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) ||
1305            cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) {
1306            return -EINVAL;
1307        }
1308        return 0;
1309    } else {
1310        error_report("guest debug not supported on this kernel");
1311        return -EINVAL;
1312    }
1313}
1314
1315int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1316{
1317    static uint32_t brk;
1318
1319    if (have_guest_debug) {
1320        if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) ||
1321            brk != brk_insn ||
1322            cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) {
1323            return -EINVAL;
1324        }
1325        return 0;
1326    } else {
1327        error_report("guest debug not supported on this kernel");
1328        return -EINVAL;
1329    }
1330}
1331
1332/* See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register
1333 *
1334 * To minimise translating between kernel and user-space the kernel
1335 * ABI just provides user-space with the full exception syndrome
1336 * register value to be decoded in QEMU.
1337 */
1338
1339bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit)
1340{
1341    int hsr_ec = syn_get_ec(debug_exit->hsr);
1342    ARMCPU *cpu = ARM_CPU(cs);
1343    CPUClass *cc = CPU_GET_CLASS(cs);
1344    CPUARMState *env = &cpu->env;
1345
1346    /* Ensure PC is synchronised */
1347    kvm_cpu_synchronize_state(cs);
1348
1349    switch (hsr_ec) {
1350    case EC_SOFTWARESTEP:
1351        if (cs->singlestep_enabled) {
1352            return true;
1353        } else {
1354            /*
1355             * The kernel should have suppressed the guest's ability to
1356             * single step at this point so something has gone wrong.
1357             */
1358            error_report("%s: guest single-step while debugging unsupported"
1359                         " (%"PRIx64", %"PRIx32")",
1360                         __func__, env->pc, debug_exit->hsr);
1361            return false;
1362        }
1363        break;
1364    case EC_AA64_BKPT:
1365        if (kvm_find_sw_breakpoint(cs, env->pc)) {
1366            return true;
1367        }
1368        break;
1369    case EC_BREAKPOINT:
1370        if (find_hw_breakpoint(cs, env->pc)) {
1371            return true;
1372        }
1373        break;
1374    case EC_WATCHPOINT:
1375    {
1376        CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far);
1377        if (wp) {
1378            cs->watchpoint_hit = wp;
1379            return true;
1380        }
1381        break;
1382    }
1383    default:
1384        error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")",
1385                     __func__, debug_exit->hsr, env->pc);
1386    }
1387
1388    /* If we are not handling the debug exception it must belong to
1389     * the guest. Let's re-use the existing TCG interrupt code to set
1390     * everything up properly.
1391     */
1392    cs->exception_index = EXCP_BKPT;
1393    env->exception.syndrome = debug_exit->hsr;
1394    env->exception.vaddress = debug_exit->far;
1395    env->exception.target_el = 1;
1396    qemu_mutex_lock_iothread();
1397    cc->do_interrupt(cs);
1398    qemu_mutex_unlock_iothread();
1399
1400    return false;
1401}
1402