linux/virt/kvm/arm/arm.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2012 - Virtual Open Systems and Columbia University
   4 * Author: Christoffer Dall <c.dall@virtualopensystems.com>
   5 */
   6
   7#include <linux/bug.h>
   8#include <linux/cpu_pm.h>
   9#include <linux/errno.h>
  10#include <linux/err.h>
  11#include <linux/kvm_host.h>
  12#include <linux/list.h>
  13#include <linux/module.h>
  14#include <linux/vmalloc.h>
  15#include <linux/fs.h>
  16#include <linux/mman.h>
  17#include <linux/sched.h>
  18#include <linux/kvm.h>
  19#include <linux/kvm_irqfd.h>
  20#include <linux/irqbypass.h>
  21#include <linux/sched/stat.h>
  22#include <trace/events/kvm.h>
  23#include <kvm/arm_pmu.h>
  24#include <kvm/arm_psci.h>
  25
  26#define CREATE_TRACE_POINTS
  27#include "trace.h"
  28
  29#include <linux/uaccess.h>
  30#include <asm/ptrace.h>
  31#include <asm/mman.h>
  32#include <asm/tlbflush.h>
  33#include <asm/cacheflush.h>
  34#include <asm/cpufeature.h>
  35#include <asm/virt.h>
  36#include <asm/kvm_arm.h>
  37#include <asm/kvm_asm.h>
  38#include <asm/kvm_mmu.h>
  39#include <asm/kvm_emulate.h>
  40#include <asm/kvm_coproc.h>
  41#include <asm/sections.h>
  42
  43#ifdef REQUIRES_VIRT
  44__asm__(".arch_extension        virt");
  45#endif
  46
  47DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data);
  48static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
  49
  50/* Per-CPU variable containing the currently running vcpu. */
  51static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
  52
  53/* The VMID used in the VTTBR */
  54static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
  55static u32 kvm_next_vmid;
  56static DEFINE_SPINLOCK(kvm_vmid_lock);
  57
  58static bool vgic_present;
  59
  60static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
  61
  62static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
  63{
  64        __this_cpu_write(kvm_arm_running_vcpu, vcpu);
  65}
  66
  67DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
  68
  69/**
  70 * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
  71 * Must be called from non-preemptible context
  72 */
  73struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
  74{
  75        return __this_cpu_read(kvm_arm_running_vcpu);
  76}
  77
  78/**
  79 * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
  80 */
  81struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
  82{
  83        return &kvm_arm_running_vcpu;
  84}
  85
  86int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
  87{
  88        return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
  89}
  90
  91int kvm_arch_hardware_setup(void)
  92{
  93        return 0;
  94}
  95
  96int kvm_arch_check_processor_compat(void)
  97{
  98        return 0;
  99}
 100
 101
 102/**
 103 * kvm_arch_init_vm - initializes a VM data structure
 104 * @kvm:        pointer to the KVM struct
 105 */
 106int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 107{
 108        int ret, cpu;
 109
 110        ret = kvm_arm_setup_stage2(kvm, type);
 111        if (ret)
 112                return ret;
 113
 114        kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
 115        if (!kvm->arch.last_vcpu_ran)
 116                return -ENOMEM;
 117
 118        for_each_possible_cpu(cpu)
 119                *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
 120
 121        ret = kvm_alloc_stage2_pgd(kvm);
 122        if (ret)
 123                goto out_fail_alloc;
 124
 125        ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
 126        if (ret)
 127                goto out_free_stage2_pgd;
 128
 129        kvm_vgic_early_init(kvm);
 130
 131        /* Mark the initial VMID generation invalid */
 132        kvm->arch.vmid.vmid_gen = 0;
 133
 134        /* The maximum number of VCPUs is limited by the host's GIC model */
 135        kvm->arch.max_vcpus = vgic_present ?
 136                                kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
 137
 138        return ret;
 139out_free_stage2_pgd:
 140        kvm_free_stage2_pgd(kvm);
 141out_fail_alloc:
 142        free_percpu(kvm->arch.last_vcpu_ran);
 143        kvm->arch.last_vcpu_ran = NULL;
 144        return ret;
 145}
 146
 147int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
 148{
 149        return 0;
 150}
 151
 152vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 153{
 154        return VM_FAULT_SIGBUS;
 155}
 156
 157
 158/**
 159 * kvm_arch_destroy_vm - destroy the VM data structure
 160 * @kvm:        pointer to the KVM struct
 161 */
 162void kvm_arch_destroy_vm(struct kvm *kvm)
 163{
 164        int i;
 165
 166        kvm_vgic_destroy(kvm);
 167
 168        free_percpu(kvm->arch.last_vcpu_ran);
 169        kvm->arch.last_vcpu_ran = NULL;
 170
 171        for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 172                if (kvm->vcpus[i]) {
 173                        kvm_arch_vcpu_free(kvm->vcpus[i]);
 174                        kvm->vcpus[i] = NULL;
 175                }
 176        }
 177        atomic_set(&kvm->online_vcpus, 0);
 178}
 179
 180int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 181{
 182        int r;
 183        switch (ext) {
 184        case KVM_CAP_IRQCHIP:
 185                r = vgic_present;
 186                break;
 187        case KVM_CAP_IOEVENTFD:
 188        case KVM_CAP_DEVICE_CTRL:
 189        case KVM_CAP_USER_MEMORY:
 190        case KVM_CAP_SYNC_MMU:
 191        case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 192        case KVM_CAP_ONE_REG:
 193        case KVM_CAP_ARM_PSCI:
 194        case KVM_CAP_ARM_PSCI_0_2:
 195        case KVM_CAP_READONLY_MEM:
 196        case KVM_CAP_MP_STATE:
 197        case KVM_CAP_IMMEDIATE_EXIT:
 198        case KVM_CAP_VCPU_EVENTS:
 199                r = 1;
 200                break;
 201        case KVM_CAP_ARM_SET_DEVICE_ADDR:
 202                r = 1;
 203                break;
 204        case KVM_CAP_NR_VCPUS:
 205                r = num_online_cpus();
 206                break;
 207        case KVM_CAP_MAX_VCPUS:
 208                r = KVM_MAX_VCPUS;
 209                break;
 210        case KVM_CAP_MAX_VCPU_ID:
 211                r = KVM_MAX_VCPU_ID;
 212                break;
 213        case KVM_CAP_MSI_DEVID:
 214                if (!kvm)
 215                        r = -EINVAL;
 216                else
 217                        r = kvm->arch.vgic.msis_require_devid;
 218                break;
 219        case KVM_CAP_ARM_USER_IRQ:
 220                /*
 221                 * 1: EL1_VTIMER, EL1_PTIMER, and PMU.
 222                 * (bump this number if adding more devices)
 223                 */
 224                r = 1;
 225                break;
 226        default:
 227                r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
 228                break;
 229        }
 230        return r;
 231}
 232
 233long kvm_arch_dev_ioctl(struct file *filp,
 234                        unsigned int ioctl, unsigned long arg)
 235{
 236        return -EINVAL;
 237}
 238
 239struct kvm *kvm_arch_alloc_vm(void)
 240{
 241        if (!has_vhe())
 242                return kzalloc(sizeof(struct kvm), GFP_KERNEL);
 243
 244        return vzalloc(sizeof(struct kvm));
 245}
 246
 247void kvm_arch_free_vm(struct kvm *kvm)
 248{
 249        if (!has_vhe())
 250                kfree(kvm);
 251        else
 252                vfree(kvm);
 253}
 254
 255struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 256{
 257        int err;
 258        struct kvm_vcpu *vcpu;
 259
 260        if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
 261                err = -EBUSY;
 262                goto out;
 263        }
 264
 265        if (id >= kvm->arch.max_vcpus) {
 266                err = -EINVAL;
 267                goto out;
 268        }
 269
 270        vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 271        if (!vcpu) {
 272                err = -ENOMEM;
 273                goto out;
 274        }
 275
 276        err = kvm_vcpu_init(vcpu, kvm, id);
 277        if (err)
 278                goto free_vcpu;
 279
 280        err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
 281        if (err)
 282                goto vcpu_uninit;
 283
 284        return vcpu;
 285vcpu_uninit:
 286        kvm_vcpu_uninit(vcpu);
 287free_vcpu:
 288        kmem_cache_free(kvm_vcpu_cache, vcpu);
 289out:
 290        return ERR_PTR(err);
 291}
 292
 293void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 294{
 295}
 296
 297void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 298{
 299        if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
 300                static_branch_dec(&userspace_irqchip_in_use);
 301
 302        kvm_mmu_free_memory_caches(vcpu);
 303        kvm_timer_vcpu_terminate(vcpu);
 304        kvm_pmu_vcpu_destroy(vcpu);
 305        kvm_vcpu_uninit(vcpu);
 306        kmem_cache_free(kvm_vcpu_cache, vcpu);
 307}
 308
 309void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 310{
 311        kvm_arch_vcpu_free(vcpu);
 312}
 313
 314int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 315{
 316        return kvm_timer_is_pending(vcpu);
 317}
 318
 319void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 320{
 321        /*
 322         * If we're about to block (most likely because we've just hit a
 323         * WFI), we need to sync back the state of the GIC CPU interface
 324         * so that we have the lastest PMR and group enables. This ensures
 325         * that kvm_arch_vcpu_runnable has up-to-date data to decide
 326         * whether we have pending interrupts.
 327         */
 328        preempt_disable();
 329        kvm_vgic_vmcr_sync(vcpu);
 330        preempt_enable();
 331
 332        kvm_vgic_v4_enable_doorbell(vcpu);
 333}
 334
 335void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 336{
 337        kvm_vgic_v4_disable_doorbell(vcpu);
 338}
 339
 340int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 341{
 342        /* Force users to call KVM_ARM_VCPU_INIT */
 343        vcpu->arch.target = -1;
 344        bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 345
 346        /* Set up the timer */
 347        kvm_timer_vcpu_init(vcpu);
 348
 349        kvm_pmu_vcpu_init(vcpu);
 350
 351        kvm_arm_reset_debug_ptr(vcpu);
 352
 353        return kvm_vgic_vcpu_init(vcpu);
 354}
 355
 356void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 357{
 358        int *last_ran;
 359        kvm_host_data_t *cpu_data;
 360
 361        last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
 362        cpu_data = this_cpu_ptr(&kvm_host_data);
 363
 364        /*
 365         * We might get preempted before the vCPU actually runs, but
 366         * over-invalidation doesn't affect correctness.
 367         */
 368        if (*last_ran != vcpu->vcpu_id) {
 369                kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
 370                *last_ran = vcpu->vcpu_id;
 371        }
 372
 373        vcpu->cpu = cpu;
 374        vcpu->arch.host_cpu_context = &cpu_data->host_ctxt;
 375
 376        kvm_arm_set_running_vcpu(vcpu);
 377        kvm_vgic_load(vcpu);
 378        kvm_timer_vcpu_load(vcpu);
 379        kvm_vcpu_load_sysregs(vcpu);
 380        kvm_arch_vcpu_load_fp(vcpu);
 381        kvm_vcpu_pmu_restore_guest(vcpu);
 382
 383        if (single_task_running())
 384                vcpu_clear_wfe_traps(vcpu);
 385        else
 386                vcpu_set_wfe_traps(vcpu);
 387
 388        vcpu_ptrauth_setup_lazy(vcpu);
 389}
 390
 391void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 392{
 393        kvm_arch_vcpu_put_fp(vcpu);
 394        kvm_vcpu_put_sysregs(vcpu);
 395        kvm_timer_vcpu_put(vcpu);
 396        kvm_vgic_put(vcpu);
 397        kvm_vcpu_pmu_restore_host(vcpu);
 398
 399        vcpu->cpu = -1;
 400
 401        kvm_arm_set_running_vcpu(NULL);
 402}
 403
 404static void vcpu_power_off(struct kvm_vcpu *vcpu)
 405{
 406        vcpu->arch.power_off = true;
 407        kvm_make_request(KVM_REQ_SLEEP, vcpu);
 408        kvm_vcpu_kick(vcpu);
 409}
 410
 411int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 412                                    struct kvm_mp_state *mp_state)
 413{
 414        if (vcpu->arch.power_off)
 415                mp_state->mp_state = KVM_MP_STATE_STOPPED;
 416        else
 417                mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
 418
 419        return 0;
 420}
 421
 422int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 423                                    struct kvm_mp_state *mp_state)
 424{
 425        int ret = 0;
 426
 427        switch (mp_state->mp_state) {
 428        case KVM_MP_STATE_RUNNABLE:
 429                vcpu->arch.power_off = false;
 430                break;
 431        case KVM_MP_STATE_STOPPED:
 432                vcpu_power_off(vcpu);
 433                break;
 434        default:
 435                ret = -EINVAL;
 436        }
 437
 438        return ret;
 439}
 440
 441/**
 442 * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
 443 * @v:          The VCPU pointer
 444 *
 445 * If the guest CPU is not waiting for interrupts or an interrupt line is
 446 * asserted, the CPU is by definition runnable.
 447 */
 448int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 449{
 450        bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
 451        return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
 452                && !v->arch.power_off && !v->arch.pause);
 453}
 454
 455bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
 456{
 457        return vcpu_mode_priv(vcpu);
 458}
 459
 460/* Just ensure a guest exit from a particular CPU */
 461static void exit_vm_noop(void *info)
 462{
 463}
 464
 465void force_vm_exit(const cpumask_t *mask)
 466{
 467        preempt_disable();
 468        smp_call_function_many(mask, exit_vm_noop, NULL, true);
 469        preempt_enable();
 470}
 471
 472/**
 473 * need_new_vmid_gen - check that the VMID is still valid
 474 * @vmid: The VMID to check
 475 *
 476 * return true if there is a new generation of VMIDs being used
 477 *
 478 * The hardware supports a limited set of values with the value zero reserved
 479 * for the host, so we check if an assigned value belongs to a previous
 480 * generation, which which requires us to assign a new value. If we're the
 481 * first to use a VMID for the new generation, we must flush necessary caches
 482 * and TLBs on all CPUs.
 483 */
 484static bool need_new_vmid_gen(struct kvm_vmid *vmid)
 485{
 486        u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
 487        smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
 488        return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
 489}
 490
 491/**
 492 * update_vmid - Update the vmid with a valid VMID for the current generation
 493 * @kvm: The guest that struct vmid belongs to
 494 * @vmid: The stage-2 VMID information struct
 495 */
 496static void update_vmid(struct kvm_vmid *vmid)
 497{
 498        if (!need_new_vmid_gen(vmid))
 499                return;
 500
 501        spin_lock(&kvm_vmid_lock);
 502
 503        /*
 504         * We need to re-check the vmid_gen here to ensure that if another vcpu
 505         * already allocated a valid vmid for this vm, then this vcpu should
 506         * use the same vmid.
 507         */
 508        if (!need_new_vmid_gen(vmid)) {
 509                spin_unlock(&kvm_vmid_lock);
 510                return;
 511        }
 512
 513        /* First user of a new VMID generation? */
 514        if (unlikely(kvm_next_vmid == 0)) {
 515                atomic64_inc(&kvm_vmid_gen);
 516                kvm_next_vmid = 1;
 517
 518                /*
 519                 * On SMP we know no other CPUs can use this CPU's or each
 520                 * other's VMID after force_vm_exit returns since the
 521                 * kvm_vmid_lock blocks them from reentry to the guest.
 522                 */
 523                force_vm_exit(cpu_all_mask);
 524                /*
 525                 * Now broadcast TLB + ICACHE invalidation over the inner
 526                 * shareable domain to make sure all data structures are
 527                 * clean.
 528                 */
 529                kvm_call_hyp(__kvm_flush_vm_context);
 530        }
 531
 532        vmid->vmid = kvm_next_vmid;
 533        kvm_next_vmid++;
 534        kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
 535
 536        smp_wmb();
 537        WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
 538
 539        spin_unlock(&kvm_vmid_lock);
 540}
 541
 542static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 543{
 544        struct kvm *kvm = vcpu->kvm;
 545        int ret = 0;
 546
 547        if (likely(vcpu->arch.has_run_once))
 548                return 0;
 549
 550        if (!kvm_arm_vcpu_is_finalized(vcpu))
 551                return -EPERM;
 552
 553        vcpu->arch.has_run_once = true;
 554
 555        if (likely(irqchip_in_kernel(kvm))) {
 556                /*
 557                 * Map the VGIC hardware resources before running a vcpu the
 558                 * first time on this VM.
 559                 */
 560                if (unlikely(!vgic_ready(kvm))) {
 561                        ret = kvm_vgic_map_resources(kvm);
 562                        if (ret)
 563                                return ret;
 564                }
 565        } else {
 566                /*
 567                 * Tell the rest of the code that there are userspace irqchip
 568                 * VMs in the wild.
 569                 */
 570                static_branch_inc(&userspace_irqchip_in_use);
 571        }
 572
 573        ret = kvm_timer_enable(vcpu);
 574        if (ret)
 575                return ret;
 576
 577        ret = kvm_arm_pmu_v3_enable(vcpu);
 578
 579        return ret;
 580}
 581
 582bool kvm_arch_intc_initialized(struct kvm *kvm)
 583{
 584        return vgic_initialized(kvm);
 585}
 586
 587void kvm_arm_halt_guest(struct kvm *kvm)
 588{
 589        int i;
 590        struct kvm_vcpu *vcpu;
 591
 592        kvm_for_each_vcpu(i, vcpu, kvm)
 593                vcpu->arch.pause = true;
 594        kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP);
 595}
 596
 597void kvm_arm_resume_guest(struct kvm *kvm)
 598{
 599        int i;
 600        struct kvm_vcpu *vcpu;
 601
 602        kvm_for_each_vcpu(i, vcpu, kvm) {
 603                vcpu->arch.pause = false;
 604                swake_up_one(kvm_arch_vcpu_wq(vcpu));
 605        }
 606}
 607
 608static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
 609{
 610        struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
 611
 612        swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
 613                                       (!vcpu->arch.pause)));
 614
 615        if (vcpu->arch.power_off || vcpu->arch.pause) {
 616                /* Awaken to handle a signal, request we sleep again later. */
 617                kvm_make_request(KVM_REQ_SLEEP, vcpu);
 618        }
 619
 620        /*
 621         * Make sure we will observe a potential reset request if we've
 622         * observed a change to the power state. Pairs with the smp_wmb() in
 623         * kvm_psci_vcpu_on().
 624         */
 625        smp_rmb();
 626}
 627
 628static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
 629{
 630        return vcpu->arch.target >= 0;
 631}
 632
 633static void check_vcpu_requests(struct kvm_vcpu *vcpu)
 634{
 635        if (kvm_request_pending(vcpu)) {
 636                if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
 637                        vcpu_req_sleep(vcpu);
 638
 639                if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
 640                        kvm_reset_vcpu(vcpu);
 641
 642                /*
 643                 * Clear IRQ_PENDING requests that were made to guarantee
 644                 * that a VCPU sees new virtual interrupts.
 645                 */
 646                kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
 647        }
 648}
 649
 650/**
 651 * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
 652 * @vcpu:       The VCPU pointer
 653 * @run:        The kvm_run structure pointer used for userspace state exchange
 654 *
 655 * This function is called through the VCPU_RUN ioctl called from user space. It
 656 * will execute VM code in a loop until the time slice for the process is used
 657 * or some emulation is needed from user space in which case the function will
 658 * return with return value 0 and with the kvm_run structure filled in with the
 659 * required data for the requested emulation.
 660 */
 661int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 662{
 663        int ret;
 664
 665        if (unlikely(!kvm_vcpu_initialized(vcpu)))
 666                return -ENOEXEC;
 667
 668        ret = kvm_vcpu_first_run_init(vcpu);
 669        if (ret)
 670                return ret;
 671
 672        if (run->exit_reason == KVM_EXIT_MMIO) {
 673                ret = kvm_handle_mmio_return(vcpu, vcpu->run);
 674                if (ret)
 675                        return ret;
 676        }
 677
 678        if (run->immediate_exit)
 679                return -EINTR;
 680
 681        vcpu_load(vcpu);
 682
 683        kvm_sigset_activate(vcpu);
 684
 685        ret = 1;
 686        run->exit_reason = KVM_EXIT_UNKNOWN;
 687        while (ret > 0) {
 688                /*
 689                 * Check conditions before entering the guest
 690                 */
 691                cond_resched();
 692
 693                update_vmid(&vcpu->kvm->arch.vmid);
 694
 695                check_vcpu_requests(vcpu);
 696
 697                /*
 698                 * Preparing the interrupts to be injected also
 699                 * involves poking the GIC, which must be done in a
 700                 * non-preemptible context.
 701                 */
 702                preempt_disable();
 703
 704                kvm_pmu_flush_hwstate(vcpu);
 705
 706                local_irq_disable();
 707
 708                kvm_vgic_flush_hwstate(vcpu);
 709
 710                /*
 711                 * Exit if we have a signal pending so that we can deliver the
 712                 * signal to user space.
 713                 */
 714                if (signal_pending(current)) {
 715                        ret = -EINTR;
 716                        run->exit_reason = KVM_EXIT_INTR;
 717                }
 718
 719                /*
 720                 * If we're using a userspace irqchip, then check if we need
 721                 * to tell a userspace irqchip about timer or PMU level
 722                 * changes and if so, exit to userspace (the actual level
 723                 * state gets updated in kvm_timer_update_run and
 724                 * kvm_pmu_update_run below).
 725                 */
 726                if (static_branch_unlikely(&userspace_irqchip_in_use)) {
 727                        if (kvm_timer_should_notify_user(vcpu) ||
 728                            kvm_pmu_should_notify_user(vcpu)) {
 729                                ret = -EINTR;
 730                                run->exit_reason = KVM_EXIT_INTR;
 731                        }
 732                }
 733
 734                /*
 735                 * Ensure we set mode to IN_GUEST_MODE after we disable
 736                 * interrupts and before the final VCPU requests check.
 737                 * See the comment in kvm_vcpu_exiting_guest_mode() and
 738                 * Documentation/virt/kvm/vcpu-requests.rst
 739                 */
 740                smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 741
 742                if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) ||
 743                    kvm_request_pending(vcpu)) {
 744                        vcpu->mode = OUTSIDE_GUEST_MODE;
 745                        isb(); /* Ensure work in x_flush_hwstate is committed */
 746                        kvm_pmu_sync_hwstate(vcpu);
 747                        if (static_branch_unlikely(&userspace_irqchip_in_use))
 748                                kvm_timer_sync_hwstate(vcpu);
 749                        kvm_vgic_sync_hwstate(vcpu);
 750                        local_irq_enable();
 751                        preempt_enable();
 752                        continue;
 753                }
 754
 755                kvm_arm_setup_debug(vcpu);
 756
 757                /**************************************************************
 758                 * Enter the guest
 759                 */
 760                trace_kvm_entry(*vcpu_pc(vcpu));
 761                guest_enter_irqoff();
 762
 763                if (has_vhe()) {
 764                        kvm_arm_vhe_guest_enter();
 765                        ret = kvm_vcpu_run_vhe(vcpu);
 766                        kvm_arm_vhe_guest_exit();
 767                } else {
 768                        ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu);
 769                }
 770
 771                vcpu->mode = OUTSIDE_GUEST_MODE;
 772                vcpu->stat.exits++;
 773                /*
 774                 * Back from guest
 775                 *************************************************************/
 776
 777                kvm_arm_clear_debug(vcpu);
 778
 779                /*
 780                 * We must sync the PMU state before the vgic state so
 781                 * that the vgic can properly sample the updated state of the
 782                 * interrupt line.
 783                 */
 784                kvm_pmu_sync_hwstate(vcpu);
 785
 786                /*
 787                 * Sync the vgic state before syncing the timer state because
 788                 * the timer code needs to know if the virtual timer
 789                 * interrupts are active.
 790                 */
 791                kvm_vgic_sync_hwstate(vcpu);
 792
 793                /*
 794                 * Sync the timer hardware state before enabling interrupts as
 795                 * we don't want vtimer interrupts to race with syncing the
 796                 * timer virtual interrupt state.
 797                 */
 798                if (static_branch_unlikely(&userspace_irqchip_in_use))
 799                        kvm_timer_sync_hwstate(vcpu);
 800
 801                kvm_arch_vcpu_ctxsync_fp(vcpu);
 802
 803                /*
 804                 * We may have taken a host interrupt in HYP mode (ie
 805                 * while executing the guest). This interrupt is still
 806                 * pending, as we haven't serviced it yet!
 807                 *
 808                 * We're now back in SVC mode, with interrupts
 809                 * disabled.  Enabling the interrupts now will have
 810                 * the effect of taking the interrupt again, in SVC
 811                 * mode this time.
 812                 */
 813                local_irq_enable();
 814
 815                /*
 816                 * We do local_irq_enable() before calling guest_exit() so
 817                 * that if a timer interrupt hits while running the guest we
 818                 * account that tick as being spent in the guest.  We enable
 819                 * preemption after calling guest_exit() so that if we get
 820                 * preempted we make sure ticks after that is not counted as
 821                 * guest time.
 822                 */
 823                guest_exit();
 824                trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
 825
 826                /* Exit types that need handling before we can be preempted */
 827                handle_exit_early(vcpu, run, ret);
 828
 829                preempt_enable();
 830
 831                ret = handle_exit(vcpu, run, ret);
 832        }
 833
 834        /* Tell userspace about in-kernel device output levels */
 835        if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
 836                kvm_timer_update_run(vcpu);
 837                kvm_pmu_update_run(vcpu);
 838        }
 839
 840        kvm_sigset_deactivate(vcpu);
 841
 842        vcpu_put(vcpu);
 843        return ret;
 844}
 845
 846static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
 847{
 848        int bit_index;
 849        bool set;
 850        unsigned long *hcr;
 851
 852        if (number == KVM_ARM_IRQ_CPU_IRQ)
 853                bit_index = __ffs(HCR_VI);
 854        else /* KVM_ARM_IRQ_CPU_FIQ */
 855                bit_index = __ffs(HCR_VF);
 856
 857        hcr = vcpu_hcr(vcpu);
 858        if (level)
 859                set = test_and_set_bit(bit_index, hcr);
 860        else
 861                set = test_and_clear_bit(bit_index, hcr);
 862
 863        /*
 864         * If we didn't change anything, no need to wake up or kick other CPUs
 865         */
 866        if (set == level)
 867                return 0;
 868
 869        /*
 870         * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
 871         * trigger a world-switch round on the running physical CPU to set the
 872         * virtual IRQ/FIQ fields in the HCR appropriately.
 873         */
 874        kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 875        kvm_vcpu_kick(vcpu);
 876
 877        return 0;
 878}
 879
 880int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 881                          bool line_status)
 882{
 883        u32 irq = irq_level->irq;
 884        unsigned int irq_type, vcpu_idx, irq_num;
 885        int nrcpus = atomic_read(&kvm->online_vcpus);
 886        struct kvm_vcpu *vcpu = NULL;
 887        bool level = irq_level->level;
 888
 889        irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
 890        vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
 891        irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
 892
 893        trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
 894
 895        switch (irq_type) {
 896        case KVM_ARM_IRQ_TYPE_CPU:
 897                if (irqchip_in_kernel(kvm))
 898                        return -ENXIO;
 899
 900                if (vcpu_idx >= nrcpus)
 901                        return -EINVAL;
 902
 903                vcpu = kvm_get_vcpu(kvm, vcpu_idx);
 904                if (!vcpu)
 905                        return -EINVAL;
 906
 907                if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
 908                        return -EINVAL;
 909
 910                return vcpu_interrupt_line(vcpu, irq_num, level);
 911        case KVM_ARM_IRQ_TYPE_PPI:
 912                if (!irqchip_in_kernel(kvm))
 913                        return -ENXIO;
 914
 915                if (vcpu_idx >= nrcpus)
 916                        return -EINVAL;
 917
 918                vcpu = kvm_get_vcpu(kvm, vcpu_idx);
 919                if (!vcpu)
 920                        return -EINVAL;
 921
 922                if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
 923                        return -EINVAL;
 924
 925                return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
 926        case KVM_ARM_IRQ_TYPE_SPI:
 927                if (!irqchip_in_kernel(kvm))
 928                        return -ENXIO;
 929
 930                if (irq_num < VGIC_NR_PRIVATE_IRQS)
 931                        return -EINVAL;
 932
 933                return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
 934        }
 935
 936        return -EINVAL;
 937}
 938
 939static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 940                               const struct kvm_vcpu_init *init)
 941{
 942        unsigned int i, ret;
 943        int phys_target = kvm_target_cpu();
 944
 945        if (init->target != phys_target)
 946                return -EINVAL;
 947
 948        /*
 949         * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
 950         * use the same target.
 951         */
 952        if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
 953                return -EINVAL;
 954
 955        /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
 956        for (i = 0; i < sizeof(init->features) * 8; i++) {
 957                bool set = (init->features[i / 32] & (1 << (i % 32)));
 958
 959                if (set && i >= KVM_VCPU_MAX_FEATURES)
 960                        return -ENOENT;
 961
 962                /*
 963                 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
 964                 * use the same feature set.
 965                 */
 966                if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
 967                    test_bit(i, vcpu->arch.features) != set)
 968                        return -EINVAL;
 969
 970                if (set)
 971                        set_bit(i, vcpu->arch.features);
 972        }
 973
 974        vcpu->arch.target = phys_target;
 975
 976        /* Now we know what it is, we can reset it. */
 977        ret = kvm_reset_vcpu(vcpu);
 978        if (ret) {
 979                vcpu->arch.target = -1;
 980                bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 981        }
 982
 983        return ret;
 984}
 985
 986static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 987                                         struct kvm_vcpu_init *init)
 988{
 989        int ret;
 990
 991        ret = kvm_vcpu_set_target(vcpu, init);
 992        if (ret)
 993                return ret;
 994
 995        /*
 996         * Ensure a rebooted VM will fault in RAM pages and detect if the
 997         * guest MMU is turned off and flush the caches as needed.
 998         */
 999        if (vcpu->arch.has_run_once)
1000                stage2_unmap_vm(vcpu->kvm);
1001
1002        vcpu_reset_hcr(vcpu);
1003
1004        /*
1005         * Handle the "start in power-off" case.
1006         */
1007        if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
1008                vcpu_power_off(vcpu);
1009        else
1010                vcpu->arch.power_off = false;
1011
1012        return 0;
1013}
1014
1015static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
1016                                 struct kvm_device_attr *attr)
1017{
1018        int ret = -ENXIO;
1019
1020        switch (attr->group) {
1021        default:
1022                ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
1023                break;
1024        }
1025
1026        return ret;
1027}
1028
1029static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
1030                                 struct kvm_device_attr *attr)
1031{
1032        int ret = -ENXIO;
1033
1034        switch (attr->group) {
1035        default:
1036                ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
1037                break;
1038        }
1039
1040        return ret;
1041}
1042
1043static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
1044                                 struct kvm_device_attr *attr)
1045{
1046        int ret = -ENXIO;
1047
1048        switch (attr->group) {
1049        default:
1050                ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
1051                break;
1052        }
1053
1054        return ret;
1055}
1056
1057static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
1058                                   struct kvm_vcpu_events *events)
1059{
1060        memset(events, 0, sizeof(*events));
1061
1062        return __kvm_arm_vcpu_get_events(vcpu, events);
1063}
1064
1065static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
1066                                   struct kvm_vcpu_events *events)
1067{
1068        int i;
1069
1070        /* check whether the reserved field is zero */
1071        for (i = 0; i < ARRAY_SIZE(events->reserved); i++)
1072                if (events->reserved[i])
1073                        return -EINVAL;
1074
1075        /* check whether the pad field is zero */
1076        for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++)
1077                if (events->exception.pad[i])
1078                        return -EINVAL;
1079
1080        return __kvm_arm_vcpu_set_events(vcpu, events);
1081}
1082
1083long kvm_arch_vcpu_ioctl(struct file *filp,
1084                         unsigned int ioctl, unsigned long arg)
1085{
1086        struct kvm_vcpu *vcpu = filp->private_data;
1087        void __user *argp = (void __user *)arg;
1088        struct kvm_device_attr attr;
1089        long r;
1090
1091        switch (ioctl) {
1092        case KVM_ARM_VCPU_INIT: {
1093                struct kvm_vcpu_init init;
1094
1095                r = -EFAULT;
1096                if (copy_from_user(&init, argp, sizeof(init)))
1097                        break;
1098
1099                r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
1100                break;
1101        }
1102        case KVM_SET_ONE_REG:
1103        case KVM_GET_ONE_REG: {
1104                struct kvm_one_reg reg;
1105
1106                r = -ENOEXEC;
1107                if (unlikely(!kvm_vcpu_initialized(vcpu)))
1108                        break;
1109
1110                r = -EFAULT;
1111                if (copy_from_user(&reg, argp, sizeof(reg)))
1112                        break;
1113
1114                if (ioctl == KVM_SET_ONE_REG)
1115                        r = kvm_arm_set_reg(vcpu, &reg);
1116                else
1117                        r = kvm_arm_get_reg(vcpu, &reg);
1118                break;
1119        }
1120        case KVM_GET_REG_LIST: {
1121                struct kvm_reg_list __user *user_list = argp;
1122                struct kvm_reg_list reg_list;
1123                unsigned n;
1124
1125                r = -ENOEXEC;
1126                if (unlikely(!kvm_vcpu_initialized(vcpu)))
1127                        break;
1128
1129                r = -EPERM;
1130                if (!kvm_arm_vcpu_is_finalized(vcpu))
1131                        break;
1132
1133                r = -EFAULT;
1134                if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
1135                        break;
1136                n = reg_list.n;
1137                reg_list.n = kvm_arm_num_regs(vcpu);
1138                if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
1139                        break;
1140                r = -E2BIG;
1141                if (n < reg_list.n)
1142                        break;
1143                r = kvm_arm_copy_reg_indices(vcpu, user_list->reg);
1144                break;
1145        }
1146        case KVM_SET_DEVICE_ATTR: {
1147                r = -EFAULT;
1148                if (copy_from_user(&attr, argp, sizeof(attr)))
1149                        break;
1150                r = kvm_arm_vcpu_set_attr(vcpu, &attr);
1151                break;
1152        }
1153        case KVM_GET_DEVICE_ATTR: {
1154                r = -EFAULT;
1155                if (copy_from_user(&attr, argp, sizeof(attr)))
1156                        break;
1157                r = kvm_arm_vcpu_get_attr(vcpu, &attr);
1158                break;
1159        }
1160        case KVM_HAS_DEVICE_ATTR: {
1161                r = -EFAULT;
1162                if (copy_from_user(&attr, argp, sizeof(attr)))
1163                        break;
1164                r = kvm_arm_vcpu_has_attr(vcpu, &attr);
1165                break;
1166        }
1167        case KVM_GET_VCPU_EVENTS: {
1168                struct kvm_vcpu_events events;
1169
1170                if (kvm_arm_vcpu_get_events(vcpu, &events))
1171                        return -EINVAL;
1172
1173                if (copy_to_user(argp, &events, sizeof(events)))
1174                        return -EFAULT;
1175
1176                return 0;
1177        }
1178        case KVM_SET_VCPU_EVENTS: {
1179                struct kvm_vcpu_events events;
1180
1181                if (copy_from_user(&events, argp, sizeof(events)))
1182                        return -EFAULT;
1183
1184                return kvm_arm_vcpu_set_events(vcpu, &events);
1185        }
1186        case KVM_ARM_VCPU_FINALIZE: {
1187                int what;
1188
1189                if (!kvm_vcpu_initialized(vcpu))
1190                        return -ENOEXEC;
1191
1192                if (get_user(what, (const int __user *)argp))
1193                        return -EFAULT;
1194
1195                return kvm_arm_vcpu_finalize(vcpu, what);
1196        }
1197        default:
1198                r = -EINVAL;
1199        }
1200
1201        return r;
1202}
1203
1204/**
1205 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
1206 * @kvm: kvm instance
1207 * @log: slot id and address to which we copy the log
1208 *
1209 * Steps 1-4 below provide general overview of dirty page logging. See
1210 * kvm_get_dirty_log_protect() function description for additional details.
1211 *
1212 * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
1213 * always flush the TLB (step 4) even if previous step failed  and the dirty
1214 * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
1215 * does not preclude user space subsequent dirty log read. Flushing TLB ensures
1216 * writes will be marked dirty for next log read.
1217 *
1218 *   1. Take a snapshot of the bit and clear it if needed.
1219 *   2. Write protect the corresponding page.
1220 *   3. Copy the snapshot to the userspace.
1221 *   4. Flush TLB's if needed.
1222 */
1223int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1224{
1225        bool flush = false;
1226        int r;
1227
1228        mutex_lock(&kvm->slots_lock);
1229
1230        r = kvm_get_dirty_log_protect(kvm, log, &flush);
1231
1232        if (flush)
1233                kvm_flush_remote_tlbs(kvm);
1234
1235        mutex_unlock(&kvm->slots_lock);
1236        return r;
1237}
1238
1239int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
1240{
1241        bool flush = false;
1242        int r;
1243
1244        mutex_lock(&kvm->slots_lock);
1245
1246        r = kvm_clear_dirty_log_protect(kvm, log, &flush);
1247
1248        if (flush)
1249                kvm_flush_remote_tlbs(kvm);
1250
1251        mutex_unlock(&kvm->slots_lock);
1252        return r;
1253}
1254
1255static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
1256                                        struct kvm_arm_device_addr *dev_addr)
1257{
1258        unsigned long dev_id, type;
1259
1260        dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
1261                KVM_ARM_DEVICE_ID_SHIFT;
1262        type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
1263                KVM_ARM_DEVICE_TYPE_SHIFT;
1264
1265        switch (dev_id) {
1266        case KVM_ARM_DEVICE_VGIC_V2:
1267                if (!vgic_present)
1268                        return -ENXIO;
1269                return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
1270        default:
1271                return -ENODEV;
1272        }
1273}
1274
1275long kvm_arch_vm_ioctl(struct file *filp,
1276                       unsigned int ioctl, unsigned long arg)
1277{
1278        struct kvm *kvm = filp->private_data;
1279        void __user *argp = (void __user *)arg;
1280
1281        switch (ioctl) {
1282        case KVM_CREATE_IRQCHIP: {
1283                int ret;
1284                if (!vgic_present)
1285                        return -ENXIO;
1286                mutex_lock(&kvm->lock);
1287                ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
1288                mutex_unlock(&kvm->lock);
1289                return ret;
1290        }
1291        case KVM_ARM_SET_DEVICE_ADDR: {
1292                struct kvm_arm_device_addr dev_addr;
1293
1294                if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
1295                        return -EFAULT;
1296                return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
1297        }
1298        case KVM_ARM_PREFERRED_TARGET: {
1299                int err;
1300                struct kvm_vcpu_init init;
1301
1302                err = kvm_vcpu_preferred_target(&init);
1303                if (err)
1304                        return err;
1305
1306                if (copy_to_user(argp, &init, sizeof(init)))
1307                        return -EFAULT;
1308
1309                return 0;
1310        }
1311        default:
1312                return -EINVAL;
1313        }
1314}
1315
1316static void cpu_init_hyp_mode(void *dummy)
1317{
1318        phys_addr_t pgd_ptr;
1319        unsigned long hyp_stack_ptr;
1320        unsigned long stack_page;
1321        unsigned long vector_ptr;
1322
1323        /* Switch from the HYP stub to our own HYP init vector */
1324        __hyp_set_vectors(kvm_get_idmap_vector());
1325
1326        pgd_ptr = kvm_mmu_get_httbr();
1327        stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
1328        hyp_stack_ptr = stack_page + PAGE_SIZE;
1329        vector_ptr = (unsigned long)kvm_get_hyp_vector();
1330
1331        __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
1332        __cpu_init_stage2();
1333}
1334
1335static void cpu_hyp_reset(void)
1336{
1337        if (!is_kernel_in_hyp_mode())
1338                __hyp_reset_vectors();
1339}
1340
1341static void cpu_hyp_reinit(void)
1342{
1343        kvm_init_host_cpu_context(&this_cpu_ptr(&kvm_host_data)->host_ctxt);
1344
1345        cpu_hyp_reset();
1346
1347        if (is_kernel_in_hyp_mode())
1348                kvm_timer_init_vhe();
1349        else
1350                cpu_init_hyp_mode(NULL);
1351
1352        kvm_arm_init_debug();
1353
1354        if (vgic_present)
1355                kvm_vgic_init_cpu_hardware();
1356}
1357
1358static void _kvm_arch_hardware_enable(void *discard)
1359{
1360        if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
1361                cpu_hyp_reinit();
1362                __this_cpu_write(kvm_arm_hardware_enabled, 1);
1363        }
1364}
1365
1366int kvm_arch_hardware_enable(void)
1367{
1368        _kvm_arch_hardware_enable(NULL);
1369        return 0;
1370}
1371
1372static void _kvm_arch_hardware_disable(void *discard)
1373{
1374        if (__this_cpu_read(kvm_arm_hardware_enabled)) {
1375                cpu_hyp_reset();
1376                __this_cpu_write(kvm_arm_hardware_enabled, 0);
1377        }
1378}
1379
1380void kvm_arch_hardware_disable(void)
1381{
1382        _kvm_arch_hardware_disable(NULL);
1383}
1384
1385#ifdef CONFIG_CPU_PM
1386static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
1387                                    unsigned long cmd,
1388                                    void *v)
1389{
1390        /*
1391         * kvm_arm_hardware_enabled is left with its old value over
1392         * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
1393         * re-enable hyp.
1394         */
1395        switch (cmd) {
1396        case CPU_PM_ENTER:
1397                if (__this_cpu_read(kvm_arm_hardware_enabled))
1398                        /*
1399                         * don't update kvm_arm_hardware_enabled here
1400                         * so that the hardware will be re-enabled
1401                         * when we resume. See below.
1402                         */
1403                        cpu_hyp_reset();
1404
1405                return NOTIFY_OK;
1406        case CPU_PM_ENTER_FAILED:
1407        case CPU_PM_EXIT:
1408                if (__this_cpu_read(kvm_arm_hardware_enabled))
1409                        /* The hardware was enabled before suspend. */
1410                        cpu_hyp_reinit();
1411
1412                return NOTIFY_OK;
1413
1414        default:
1415                return NOTIFY_DONE;
1416        }
1417}
1418
1419static struct notifier_block hyp_init_cpu_pm_nb = {
1420        .notifier_call = hyp_init_cpu_pm_notifier,
1421};
1422
1423static void __init hyp_cpu_pm_init(void)
1424{
1425        cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
1426}
1427static void __init hyp_cpu_pm_exit(void)
1428{
1429        cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
1430}
1431#else
1432static inline void hyp_cpu_pm_init(void)
1433{
1434}
1435static inline void hyp_cpu_pm_exit(void)
1436{
1437}
1438#endif
1439
1440static int init_common_resources(void)
1441{
1442        kvm_set_ipa_limit();
1443
1444        return 0;
1445}
1446
1447static int init_subsystems(void)
1448{
1449        int err = 0;
1450
1451        /*
1452         * Enable hardware so that subsystem initialisation can access EL2.
1453         */
1454        on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
1455
1456        /*
1457         * Register CPU lower-power notifier
1458         */
1459        hyp_cpu_pm_init();
1460
1461        /*
1462         * Init HYP view of VGIC
1463         */
1464        err = kvm_vgic_hyp_init();
1465        switch (err) {
1466        case 0:
1467                vgic_present = true;
1468                break;
1469        case -ENODEV:
1470        case -ENXIO:
1471                vgic_present = false;
1472                err = 0;
1473                break;
1474        default:
1475                goto out;
1476        }
1477
1478        /*
1479         * Init HYP architected timer support
1480         */
1481        err = kvm_timer_hyp_init(vgic_present);
1482        if (err)
1483                goto out;
1484
1485        kvm_perf_init();
1486        kvm_coproc_table_init();
1487
1488out:
1489        on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
1490
1491        return err;
1492}
1493
1494static void teardown_hyp_mode(void)
1495{
1496        int cpu;
1497
1498        free_hyp_pgds();
1499        for_each_possible_cpu(cpu)
1500                free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
1501        hyp_cpu_pm_exit();
1502}
1503
1504/**
1505 * Inits Hyp-mode on all online CPUs
1506 */
1507static int init_hyp_mode(void)
1508{
1509        int cpu;
1510        int err = 0;
1511
1512        /*
1513         * Allocate Hyp PGD and setup Hyp identity mapping
1514         */
1515        err = kvm_mmu_init();
1516        if (err)
1517                goto out_err;
1518
1519        /*
1520         * Allocate stack pages for Hypervisor-mode
1521         */
1522        for_each_possible_cpu(cpu) {
1523                unsigned long stack_page;
1524
1525                stack_page = __get_free_page(GFP_KERNEL);
1526                if (!stack_page) {
1527                        err = -ENOMEM;
1528                        goto out_err;
1529                }
1530
1531                per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
1532        }
1533
1534        /*
1535         * Map the Hyp-code called directly from the host
1536         */
1537        err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
1538                                  kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC);
1539        if (err) {
1540                kvm_err("Cannot map world-switch code\n");
1541                goto out_err;
1542        }
1543
1544        err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
1545                                  kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
1546        if (err) {
1547                kvm_err("Cannot map rodata section\n");
1548                goto out_err;
1549        }
1550
1551        err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
1552                                  kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
1553        if (err) {
1554                kvm_err("Cannot map bss section\n");
1555                goto out_err;
1556        }
1557
1558        err = kvm_map_vectors();
1559        if (err) {
1560                kvm_err("Cannot map vectors\n");
1561                goto out_err;
1562        }
1563
1564        /*
1565         * Map the Hyp stack pages
1566         */
1567        for_each_possible_cpu(cpu) {
1568                char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
1569                err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
1570                                          PAGE_HYP);
1571
1572                if (err) {
1573                        kvm_err("Cannot map hyp stack\n");
1574                        goto out_err;
1575                }
1576        }
1577
1578        for_each_possible_cpu(cpu) {
1579                kvm_host_data_t *cpu_data;
1580
1581                cpu_data = per_cpu_ptr(&kvm_host_data, cpu);
1582                err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP);
1583
1584                if (err) {
1585                        kvm_err("Cannot map host CPU state: %d\n", err);
1586                        goto out_err;
1587                }
1588        }
1589
1590        err = hyp_map_aux_data();
1591        if (err)
1592                kvm_err("Cannot map host auxiliary data: %d\n", err);
1593
1594        return 0;
1595
1596out_err:
1597        teardown_hyp_mode();
1598        kvm_err("error initializing Hyp mode: %d\n", err);
1599        return err;
1600}
1601
1602static void check_kvm_target_cpu(void *ret)
1603{
1604        *(int *)ret = kvm_target_cpu();
1605}
1606
1607struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
1608{
1609        struct kvm_vcpu *vcpu;
1610        int i;
1611
1612        mpidr &= MPIDR_HWID_BITMASK;
1613        kvm_for_each_vcpu(i, vcpu, kvm) {
1614                if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
1615                        return vcpu;
1616        }
1617        return NULL;
1618}
1619
1620bool kvm_arch_has_irq_bypass(void)
1621{
1622        return true;
1623}
1624
1625int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
1626                                      struct irq_bypass_producer *prod)
1627{
1628        struct kvm_kernel_irqfd *irqfd =
1629                container_of(cons, struct kvm_kernel_irqfd, consumer);
1630
1631        return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
1632                                          &irqfd->irq_entry);
1633}
1634void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
1635                                      struct irq_bypass_producer *prod)
1636{
1637        struct kvm_kernel_irqfd *irqfd =
1638                container_of(cons, struct kvm_kernel_irqfd, consumer);
1639
1640        kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
1641                                     &irqfd->irq_entry);
1642}
1643
1644void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
1645{
1646        struct kvm_kernel_irqfd *irqfd =
1647                container_of(cons, struct kvm_kernel_irqfd, consumer);
1648
1649        kvm_arm_halt_guest(irqfd->kvm);
1650}
1651
1652void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
1653{
1654        struct kvm_kernel_irqfd *irqfd =
1655                container_of(cons, struct kvm_kernel_irqfd, consumer);
1656
1657        kvm_arm_resume_guest(irqfd->kvm);
1658}
1659
1660/**
1661 * Initialize Hyp-mode and memory mappings on all CPUs.
1662 */
1663int kvm_arch_init(void *opaque)
1664{
1665        int err;
1666        int ret, cpu;
1667        bool in_hyp_mode;
1668
1669        if (!is_hyp_mode_available()) {
1670                kvm_info("HYP mode not available\n");
1671                return -ENODEV;
1672        }
1673
1674        in_hyp_mode = is_kernel_in_hyp_mode();
1675
1676        if (!in_hyp_mode && kvm_arch_requires_vhe()) {
1677                kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n");
1678                return -ENODEV;
1679        }
1680
1681        for_each_online_cpu(cpu) {
1682                smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
1683                if (ret < 0) {
1684                        kvm_err("Error, CPU %d not supported!\n", cpu);
1685                        return -ENODEV;
1686                }
1687        }
1688
1689        err = init_common_resources();
1690        if (err)
1691                return err;
1692
1693        err = kvm_arm_init_sve();
1694        if (err)
1695                return err;
1696
1697        if (!in_hyp_mode) {
1698                err = init_hyp_mode();
1699                if (err)
1700                        goto out_err;
1701        }
1702
1703        err = init_subsystems();
1704        if (err)
1705                goto out_hyp;
1706
1707        if (in_hyp_mode)
1708                kvm_info("VHE mode initialized successfully\n");
1709        else
1710                kvm_info("Hyp mode initialized successfully\n");
1711
1712        return 0;
1713
1714out_hyp:
1715        if (!in_hyp_mode)
1716                teardown_hyp_mode();
1717out_err:
1718        return err;
1719}
1720
1721/* NOP: Compiling as a module not supported */
1722void kvm_arch_exit(void)
1723{
1724        kvm_perf_teardown();
1725}
1726
1727static int arm_init(void)
1728{
1729        int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1730        return rc;
1731}
1732
1733module_init(arm_init);
1734