linux/arch/x86/kvm/svm/avic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Kernel-based Virtual Machine driver for Linux
   4 *
   5 * AMD SVM support
   6 *
   7 * Copyright (C) 2006 Qumranet, Inc.
   8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
   9 *
  10 * Authors:
  11 *   Yaniv Kamay  <yaniv@qumranet.com>
  12 *   Avi Kivity   <avi@qumranet.com>
  13 */
  14
  15#define pr_fmt(fmt) "SVM: " fmt
  16
  17#include <linux/kvm_types.h>
  18#include <linux/hashtable.h>
  19#include <linux/amd-iommu.h>
  20#include <linux/kvm_host.h>
  21
  22#include <asm/irq_remapping.h>
  23
  24#include "trace.h"
  25#include "lapic.h"
  26#include "x86.h"
  27#include "irq.h"
  28#include "svm.h"
  29
  30#define SVM_AVIC_DOORBELL       0xc001011b
  31
  32#define AVIC_HPA_MASK   ~((0xFFFULL << 52) | 0xFFF)
  33
  34/*
  35 * 0xff is broadcast, so the max index allowed for physical APIC ID
  36 * table is 0xfe.  APIC IDs above 0xff are reserved.
  37 */
  38#define AVIC_MAX_PHYSICAL_ID_COUNT      255
  39
  40#define AVIC_UNACCEL_ACCESS_WRITE_MASK          1
  41#define AVIC_UNACCEL_ACCESS_OFFSET_MASK         0xFF0
  42#define AVIC_UNACCEL_ACCESS_VECTOR_MASK         0xFFFFFFFF
  43
  44/* AVIC GATAG is encoded using VM and VCPU IDs */
  45#define AVIC_VCPU_ID_BITS               8
  46#define AVIC_VCPU_ID_MASK               ((1 << AVIC_VCPU_ID_BITS) - 1)
  47
  48#define AVIC_VM_ID_BITS                 24
  49#define AVIC_VM_ID_NR                   (1 << AVIC_VM_ID_BITS)
  50#define AVIC_VM_ID_MASK                 ((1 << AVIC_VM_ID_BITS) - 1)
  51
  52#define AVIC_GATAG(x, y)                (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
  53                                                (y & AVIC_VCPU_ID_MASK))
  54#define AVIC_GATAG_TO_VMID(x)           ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
  55#define AVIC_GATAG_TO_VCPUID(x)         (x & AVIC_VCPU_ID_MASK)
  56
  57/* Note:
  58 * This hash table is used to map VM_ID to a struct kvm_svm,
  59 * when handling AMD IOMMU GALOG notification to schedule in
  60 * a particular vCPU.
  61 */
  62#define SVM_VM_DATA_HASH_BITS   8
  63static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
  64static u32 next_vm_id = 0;
  65static bool next_vm_id_wrapped = 0;
  66static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
  67
  68/*
  69 * This is a wrapper of struct amd_iommu_ir_data.
  70 */
  71struct amd_svm_iommu_ir {
  72        struct list_head node;  /* Used by SVM for per-vcpu ir_list */
  73        void *data;             /* Storing pointer to struct amd_ir_data */
  74};
  75
  76enum avic_ipi_failure_cause {
  77        AVIC_IPI_FAILURE_INVALID_INT_TYPE,
  78        AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
  79        AVIC_IPI_FAILURE_INVALID_TARGET,
  80        AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
  81};
  82
  83/* Note:
  84 * This function is called from IOMMU driver to notify
  85 * SVM to schedule in a particular vCPU of a particular VM.
  86 */
  87int avic_ga_log_notifier(u32 ga_tag)
  88{
  89        unsigned long flags;
  90        struct kvm_svm *kvm_svm;
  91        struct kvm_vcpu *vcpu = NULL;
  92        u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
  93        u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
  94
  95        pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
  96        trace_kvm_avic_ga_log(vm_id, vcpu_id);
  97
  98        spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
  99        hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
 100                if (kvm_svm->avic_vm_id != vm_id)
 101                        continue;
 102                vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
 103                break;
 104        }
 105        spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 106
 107        /* Note:
 108         * At this point, the IOMMU should have already set the pending
 109         * bit in the vAPIC backing page. So, we just need to schedule
 110         * in the vcpu.
 111         */
 112        if (vcpu)
 113                kvm_vcpu_wake_up(vcpu);
 114
 115        return 0;
 116}
 117
 118void avic_vm_destroy(struct kvm *kvm)
 119{
 120        unsigned long flags;
 121        struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
 122
 123        if (!enable_apicv)
 124                return;
 125
 126        if (kvm_svm->avic_logical_id_table_page)
 127                __free_page(kvm_svm->avic_logical_id_table_page);
 128        if (kvm_svm->avic_physical_id_table_page)
 129                __free_page(kvm_svm->avic_physical_id_table_page);
 130
 131        spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
 132        hash_del(&kvm_svm->hnode);
 133        spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 134}
 135
 136int avic_vm_init(struct kvm *kvm)
 137{
 138        unsigned long flags;
 139        int err = -ENOMEM;
 140        struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
 141        struct kvm_svm *k2;
 142        struct page *p_page;
 143        struct page *l_page;
 144        u32 vm_id;
 145
 146        if (!enable_apicv)
 147                return 0;
 148
 149        /* Allocating physical APIC ID table (4KB) */
 150        p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 151        if (!p_page)
 152                goto free_avic;
 153
 154        kvm_svm->avic_physical_id_table_page = p_page;
 155
 156        /* Allocating logical APIC ID table (4KB) */
 157        l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 158        if (!l_page)
 159                goto free_avic;
 160
 161        kvm_svm->avic_logical_id_table_page = l_page;
 162
 163        spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
 164 again:
 165        vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
 166        if (vm_id == 0) { /* id is 1-based, zero is not okay */
 167                next_vm_id_wrapped = 1;
 168                goto again;
 169        }
 170        /* Is it still in use? Only possible if wrapped at least once */
 171        if (next_vm_id_wrapped) {
 172                hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
 173                        if (k2->avic_vm_id == vm_id)
 174                                goto again;
 175                }
 176        }
 177        kvm_svm->avic_vm_id = vm_id;
 178        hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
 179        spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 180
 181        return 0;
 182
 183free_avic:
 184        avic_vm_destroy(kvm);
 185        return err;
 186}
 187
 188void avic_init_vmcb(struct vcpu_svm *svm)
 189{
 190        struct vmcb *vmcb = svm->vmcb;
 191        struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
 192        phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
 193        phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
 194        phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
 195
 196        vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
 197        vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
 198        vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
 199        vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
 200        if (kvm_apicv_activated(svm->vcpu.kvm))
 201                vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
 202        else
 203                vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
 204}
 205
 206static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
 207                                       unsigned int index)
 208{
 209        u64 *avic_physical_id_table;
 210        struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 211
 212        if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
 213                return NULL;
 214
 215        avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
 216
 217        return &avic_physical_id_table[index];
 218}
 219
 220/*
 221 * Note:
 222 * AVIC hardware walks the nested page table to check permissions,
 223 * but does not use the SPA address specified in the leaf page
 224 * table entry since it uses  address in the AVIC_BACKING_PAGE pointer
 225 * field of the VMCB. Therefore, we set up the
 226 * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
 227 */
 228static int avic_update_access_page(struct kvm *kvm, bool activate)
 229{
 230        void __user *ret;
 231        int r = 0;
 232
 233        mutex_lock(&kvm->slots_lock);
 234        /*
 235         * During kvm_destroy_vm(), kvm_pit_set_reinject() could trigger
 236         * APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
 237         * memory region. So, we need to ensure that kvm->mm == current->mm.
 238         */
 239        if ((kvm->arch.apic_access_memslot_enabled == activate) ||
 240            (kvm->mm != current->mm))
 241                goto out;
 242
 243        ret = __x86_set_memory_region(kvm,
 244                                      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
 245                                      APIC_DEFAULT_PHYS_BASE,
 246                                      activate ? PAGE_SIZE : 0);
 247        if (IS_ERR(ret)) {
 248                r = PTR_ERR(ret);
 249                goto out;
 250        }
 251
 252        kvm->arch.apic_access_memslot_enabled = activate;
 253out:
 254        mutex_unlock(&kvm->slots_lock);
 255        return r;
 256}
 257
 258static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 259{
 260        u64 *entry, new_entry;
 261        int id = vcpu->vcpu_id;
 262        struct vcpu_svm *svm = to_svm(vcpu);
 263
 264        if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
 265                return -EINVAL;
 266
 267        if (!vcpu->arch.apic->regs)
 268                return -EINVAL;
 269
 270        if (kvm_apicv_activated(vcpu->kvm)) {
 271                int ret;
 272
 273                ret = avic_update_access_page(vcpu->kvm, true);
 274                if (ret)
 275                        return ret;
 276        }
 277
 278        svm->avic_backing_page = virt_to_page(vcpu->arch.apic->regs);
 279
 280        /* Setting AVIC backing page address in the phy APIC ID table */
 281        entry = avic_get_physical_id_entry(vcpu, id);
 282        if (!entry)
 283                return -EINVAL;
 284
 285        new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
 286                              AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
 287                              AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
 288        WRITE_ONCE(*entry, new_entry);
 289
 290        svm->avic_physical_id_cache = entry;
 291
 292        return 0;
 293}
 294
 295static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
 296                                   u32 icrl, u32 icrh)
 297{
 298        struct kvm_vcpu *vcpu;
 299        int i;
 300
 301        kvm_for_each_vcpu(i, vcpu, kvm) {
 302                bool m = kvm_apic_match_dest(vcpu, source,
 303                                             icrl & APIC_SHORT_MASK,
 304                                             GET_APIC_DEST_FIELD(icrh),
 305                                             icrl & APIC_DEST_MASK);
 306
 307                if (m && !avic_vcpu_is_running(vcpu))
 308                        kvm_vcpu_wake_up(vcpu);
 309        }
 310}
 311
 312int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
 313{
 314        struct vcpu_svm *svm = to_svm(vcpu);
 315        u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
 316        u32 icrl = svm->vmcb->control.exit_info_1;
 317        u32 id = svm->vmcb->control.exit_info_2 >> 32;
 318        u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
 319        struct kvm_lapic *apic = vcpu->arch.apic;
 320
 321        trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
 322
 323        switch (id) {
 324        case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
 325                /*
 326                 * AVIC hardware handles the generation of
 327                 * IPIs when the specified Message Type is Fixed
 328                 * (also known as fixed delivery mode) and
 329                 * the Trigger Mode is edge-triggered. The hardware
 330                 * also supports self and broadcast delivery modes
 331                 * specified via the Destination Shorthand(DSH)
 332                 * field of the ICRL. Logical and physical APIC ID
 333                 * formats are supported. All other IPI types cause
 334                 * a #VMEXIT, which needs to emulated.
 335                 */
 336                kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
 337                kvm_lapic_reg_write(apic, APIC_ICR, icrl);
 338                break;
 339        case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING:
 340                /*
 341                 * At this point, we expect that the AVIC HW has already
 342                 * set the appropriate IRR bits on the valid target
 343                 * vcpus. So, we just need to kick the appropriate vcpu.
 344                 */
 345                avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh);
 346                break;
 347        case AVIC_IPI_FAILURE_INVALID_TARGET:
 348                WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
 349                          index, vcpu->vcpu_id, icrh, icrl);
 350                break;
 351        case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
 352                WARN_ONCE(1, "Invalid backing page\n");
 353                break;
 354        default:
 355                pr_err("Unknown IPI interception\n");
 356        }
 357
 358        return 1;
 359}
 360
 361static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
 362{
 363        struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 364        int index;
 365        u32 *logical_apic_id_table;
 366        int dlid = GET_APIC_LOGICAL_ID(ldr);
 367
 368        if (!dlid)
 369                return NULL;
 370
 371        if (flat) { /* flat */
 372                index = ffs(dlid) - 1;
 373                if (index > 7)
 374                        return NULL;
 375        } else { /* cluster */
 376                int cluster = (dlid & 0xf0) >> 4;
 377                int apic = ffs(dlid & 0x0f) - 1;
 378
 379                if ((apic < 0) || (apic > 7) ||
 380                    (cluster >= 0xf))
 381                        return NULL;
 382                index = (cluster << 2) + apic;
 383        }
 384
 385        logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
 386
 387        return &logical_apic_id_table[index];
 388}
 389
 390static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
 391{
 392        bool flat;
 393        u32 *entry, new_entry;
 394
 395        flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
 396        entry = avic_get_logical_id_entry(vcpu, ldr, flat);
 397        if (!entry)
 398                return -EINVAL;
 399
 400        new_entry = READ_ONCE(*entry);
 401        new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
 402        new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
 403        new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
 404        WRITE_ONCE(*entry, new_entry);
 405
 406        return 0;
 407}
 408
 409static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
 410{
 411        struct vcpu_svm *svm = to_svm(vcpu);
 412        bool flat = svm->dfr_reg == APIC_DFR_FLAT;
 413        u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
 414
 415        if (entry)
 416                clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
 417}
 418
 419static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
 420{
 421        int ret = 0;
 422        struct vcpu_svm *svm = to_svm(vcpu);
 423        u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
 424        u32 id = kvm_xapic_id(vcpu->arch.apic);
 425
 426        if (ldr == svm->ldr_reg)
 427                return 0;
 428
 429        avic_invalidate_logical_id_entry(vcpu);
 430
 431        if (ldr)
 432                ret = avic_ldr_write(vcpu, id, ldr);
 433
 434        if (!ret)
 435                svm->ldr_reg = ldr;
 436
 437        return ret;
 438}
 439
 440static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
 441{
 442        u64 *old, *new;
 443        struct vcpu_svm *svm = to_svm(vcpu);
 444        u32 id = kvm_xapic_id(vcpu->arch.apic);
 445
 446        if (vcpu->vcpu_id == id)
 447                return 0;
 448
 449        old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
 450        new = avic_get_physical_id_entry(vcpu, id);
 451        if (!new || !old)
 452                return 1;
 453
 454        /* We need to move physical_id_entry to new offset */
 455        *new = *old;
 456        *old = 0ULL;
 457        to_svm(vcpu)->avic_physical_id_cache = new;
 458
 459        /*
 460         * Also update the guest physical APIC ID in the logical
 461         * APIC ID table entry if already setup the LDR.
 462         */
 463        if (svm->ldr_reg)
 464                avic_handle_ldr_update(vcpu);
 465
 466        return 0;
 467}
 468
 469static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
 470{
 471        struct vcpu_svm *svm = to_svm(vcpu);
 472        u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
 473
 474        if (svm->dfr_reg == dfr)
 475                return;
 476
 477        avic_invalidate_logical_id_entry(vcpu);
 478        svm->dfr_reg = dfr;
 479}
 480
 481static int avic_unaccel_trap_write(struct vcpu_svm *svm)
 482{
 483        struct kvm_lapic *apic = svm->vcpu.arch.apic;
 484        u32 offset = svm->vmcb->control.exit_info_1 &
 485                                AVIC_UNACCEL_ACCESS_OFFSET_MASK;
 486
 487        switch (offset) {
 488        case APIC_ID:
 489                if (avic_handle_apic_id_update(&svm->vcpu))
 490                        return 0;
 491                break;
 492        case APIC_LDR:
 493                if (avic_handle_ldr_update(&svm->vcpu))
 494                        return 0;
 495                break;
 496        case APIC_DFR:
 497                avic_handle_dfr_update(&svm->vcpu);
 498                break;
 499        default:
 500                break;
 501        }
 502
 503        kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
 504
 505        return 1;
 506}
 507
 508static bool is_avic_unaccelerated_access_trap(u32 offset)
 509{
 510        bool ret = false;
 511
 512        switch (offset) {
 513        case APIC_ID:
 514        case APIC_EOI:
 515        case APIC_RRR:
 516        case APIC_LDR:
 517        case APIC_DFR:
 518        case APIC_SPIV:
 519        case APIC_ESR:
 520        case APIC_ICR:
 521        case APIC_LVTT:
 522        case APIC_LVTTHMR:
 523        case APIC_LVTPC:
 524        case APIC_LVT0:
 525        case APIC_LVT1:
 526        case APIC_LVTERR:
 527        case APIC_TMICT:
 528        case APIC_TDCR:
 529                ret = true;
 530                break;
 531        default:
 532                break;
 533        }
 534        return ret;
 535}
 536
 537int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu)
 538{
 539        struct vcpu_svm *svm = to_svm(vcpu);
 540        int ret = 0;
 541        u32 offset = svm->vmcb->control.exit_info_1 &
 542                     AVIC_UNACCEL_ACCESS_OFFSET_MASK;
 543        u32 vector = svm->vmcb->control.exit_info_2 &
 544                     AVIC_UNACCEL_ACCESS_VECTOR_MASK;
 545        bool write = (svm->vmcb->control.exit_info_1 >> 32) &
 546                     AVIC_UNACCEL_ACCESS_WRITE_MASK;
 547        bool trap = is_avic_unaccelerated_access_trap(offset);
 548
 549        trace_kvm_avic_unaccelerated_access(vcpu->vcpu_id, offset,
 550                                            trap, write, vector);
 551        if (trap) {
 552                /* Handling Trap */
 553                WARN_ONCE(!write, "svm: Handling trap read.\n");
 554                ret = avic_unaccel_trap_write(svm);
 555        } else {
 556                /* Handling Fault */
 557                ret = kvm_emulate_instruction(vcpu, 0);
 558        }
 559
 560        return ret;
 561}
 562
 563int avic_init_vcpu(struct vcpu_svm *svm)
 564{
 565        int ret;
 566        struct kvm_vcpu *vcpu = &svm->vcpu;
 567
 568        if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm))
 569                return 0;
 570
 571        ret = avic_init_backing_page(vcpu);
 572        if (ret)
 573                return ret;
 574
 575        INIT_LIST_HEAD(&svm->ir_list);
 576        spin_lock_init(&svm->ir_list_lock);
 577        svm->dfr_reg = APIC_DFR_FLAT;
 578
 579        return ret;
 580}
 581
 582void avic_post_state_restore(struct kvm_vcpu *vcpu)
 583{
 584        if (avic_handle_apic_id_update(vcpu) != 0)
 585                return;
 586        avic_handle_dfr_update(vcpu);
 587        avic_handle_ldr_update(vcpu);
 588}
 589
 590void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate)
 591{
 592        if (!enable_apicv || !lapic_in_kernel(vcpu))
 593                return;
 594
 595        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 596        kvm_request_apicv_update(vcpu->kvm, activate,
 597                                 APICV_INHIBIT_REASON_IRQWIN);
 598        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 599}
 600
 601void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 602{
 603        return;
 604}
 605
 606void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 607{
 608}
 609
 610void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
 611{
 612}
 613
 614static int svm_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
 615{
 616        int ret = 0;
 617        unsigned long flags;
 618        struct amd_svm_iommu_ir *ir;
 619        struct vcpu_svm *svm = to_svm(vcpu);
 620
 621        if (!kvm_arch_has_assigned_device(vcpu->kvm))
 622                return 0;
 623
 624        /*
 625         * Here, we go through the per-vcpu ir_list to update all existing
 626         * interrupt remapping table entry targeting this vcpu.
 627         */
 628        spin_lock_irqsave(&svm->ir_list_lock, flags);
 629
 630        if (list_empty(&svm->ir_list))
 631                goto out;
 632
 633        list_for_each_entry(ir, &svm->ir_list, node) {
 634                if (activate)
 635                        ret = amd_iommu_activate_guest_mode(ir->data);
 636                else
 637                        ret = amd_iommu_deactivate_guest_mode(ir->data);
 638                if (ret)
 639                        break;
 640        }
 641out:
 642        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 643        return ret;
 644}
 645
 646void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 647{
 648        struct vcpu_svm *svm = to_svm(vcpu);
 649        struct vmcb *vmcb = svm->vmcb01.ptr;
 650        bool activated = kvm_vcpu_apicv_active(vcpu);
 651
 652        if (!enable_apicv)
 653                return;
 654
 655        if (activated) {
 656                /**
 657                 * During AVIC temporary deactivation, guest could update
 658                 * APIC ID, DFR and LDR registers, which would not be trapped
 659                 * by avic_unaccelerated_access_interception(). In this case,
 660                 * we need to check and update the AVIC logical APIC ID table
 661                 * accordingly before re-activating.
 662                 */
 663                avic_post_state_restore(vcpu);
 664                vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
 665        } else {
 666                vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
 667        }
 668        vmcb_mark_dirty(vmcb, VMCB_AVIC);
 669
 670        svm_set_pi_irte_mode(vcpu, activated);
 671}
 672
 673void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 674{
 675        return;
 676}
 677
 678int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
 679{
 680        if (!vcpu->arch.apicv_active)
 681                return -1;
 682
 683        kvm_lapic_set_irr(vec, vcpu->arch.apic);
 684        smp_mb__after_atomic();
 685
 686        if (avic_vcpu_is_running(vcpu)) {
 687                int cpuid = vcpu->cpu;
 688
 689                if (cpuid != get_cpu())
 690                        wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
 691                put_cpu();
 692        } else
 693                kvm_vcpu_wake_up(vcpu);
 694
 695        return 0;
 696}
 697
 698bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
 699{
 700        return false;
 701}
 702
 703static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
 704{
 705        unsigned long flags;
 706        struct amd_svm_iommu_ir *cur;
 707
 708        spin_lock_irqsave(&svm->ir_list_lock, flags);
 709        list_for_each_entry(cur, &svm->ir_list, node) {
 710                if (cur->data != pi->ir_data)
 711                        continue;
 712                list_del(&cur->node);
 713                kfree(cur);
 714                break;
 715        }
 716        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 717}
 718
 719static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
 720{
 721        int ret = 0;
 722        unsigned long flags;
 723        struct amd_svm_iommu_ir *ir;
 724
 725        /**
 726         * In some cases, the existing irte is updated and re-set,
 727         * so we need to check here if it's already been * added
 728         * to the ir_list.
 729         */
 730        if (pi->ir_data && (pi->prev_ga_tag != 0)) {
 731                struct kvm *kvm = svm->vcpu.kvm;
 732                u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
 733                struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
 734                struct vcpu_svm *prev_svm;
 735
 736                if (!prev_vcpu) {
 737                        ret = -EINVAL;
 738                        goto out;
 739                }
 740
 741                prev_svm = to_svm(prev_vcpu);
 742                svm_ir_list_del(prev_svm, pi);
 743        }
 744
 745        /**
 746         * Allocating new amd_iommu_pi_data, which will get
 747         * add to the per-vcpu ir_list.
 748         */
 749        ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
 750        if (!ir) {
 751                ret = -ENOMEM;
 752                goto out;
 753        }
 754        ir->data = pi->ir_data;
 755
 756        spin_lock_irqsave(&svm->ir_list_lock, flags);
 757        list_add(&ir->node, &svm->ir_list);
 758        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 759out:
 760        return ret;
 761}
 762
 763/*
 764 * Note:
 765 * The HW cannot support posting multicast/broadcast
 766 * interrupts to a vCPU. So, we still use legacy interrupt
 767 * remapping for these kind of interrupts.
 768 *
 769 * For lowest-priority interrupts, we only support
 770 * those with single CPU as the destination, e.g. user
 771 * configures the interrupts via /proc/irq or uses
 772 * irqbalance to make the interrupts single-CPU.
 773 */
 774static int
 775get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
 776                 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
 777{
 778        struct kvm_lapic_irq irq;
 779        struct kvm_vcpu *vcpu = NULL;
 780
 781        kvm_set_msi_irq(kvm, e, &irq);
 782
 783        if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
 784            !kvm_irq_is_postable(&irq)) {
 785                pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
 786                         __func__, irq.vector);
 787                return -1;
 788        }
 789
 790        pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
 791                 irq.vector);
 792        *svm = to_svm(vcpu);
 793        vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
 794        vcpu_info->vector = irq.vector;
 795
 796        return 0;
 797}
 798
 799/*
 800 * svm_update_pi_irte - set IRTE for Posted-Interrupts
 801 *
 802 * @kvm: kvm
 803 * @host_irq: host irq of the interrupt
 804 * @guest_irq: gsi of the interrupt
 805 * @set: set or unset PI
 806 * returns 0 on success, < 0 on failure
 807 */
 808int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 809                       uint32_t guest_irq, bool set)
 810{
 811        struct kvm_kernel_irq_routing_entry *e;
 812        struct kvm_irq_routing_table *irq_rt;
 813        int idx, ret = -EINVAL;
 814
 815        if (!kvm_arch_has_assigned_device(kvm) ||
 816            !irq_remapping_cap(IRQ_POSTING_CAP))
 817                return 0;
 818
 819        pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
 820                 __func__, host_irq, guest_irq, set);
 821
 822        idx = srcu_read_lock(&kvm->irq_srcu);
 823        irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 824        WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
 825
 826        hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
 827                struct vcpu_data vcpu_info;
 828                struct vcpu_svm *svm = NULL;
 829
 830                if (e->type != KVM_IRQ_ROUTING_MSI)
 831                        continue;
 832
 833                /**
 834                 * Here, we setup with legacy mode in the following cases:
 835                 * 1. When cannot target interrupt to a specific vcpu.
 836                 * 2. Unsetting posted interrupt.
 837                 * 3. APIC virtualization is disabled for the vcpu.
 838                 * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
 839                 */
 840                if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
 841                    kvm_vcpu_apicv_active(&svm->vcpu)) {
 842                        struct amd_iommu_pi_data pi;
 843
 844                        /* Try to enable guest_mode in IRTE */
 845                        pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
 846                                            AVIC_HPA_MASK);
 847                        pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
 848                                                     svm->vcpu.vcpu_id);
 849                        pi.is_guest_mode = true;
 850                        pi.vcpu_data = &vcpu_info;
 851                        ret = irq_set_vcpu_affinity(host_irq, &pi);
 852
 853                        /**
 854                         * Here, we successfully setting up vcpu affinity in
 855                         * IOMMU guest mode. Now, we need to store the posted
 856                         * interrupt information in a per-vcpu ir_list so that
 857                         * we can reference to them directly when we update vcpu
 858                         * scheduling information in IOMMU irte.
 859                         */
 860                        if (!ret && pi.is_guest_mode)
 861                                svm_ir_list_add(svm, &pi);
 862                } else {
 863                        /* Use legacy mode in IRTE */
 864                        struct amd_iommu_pi_data pi;
 865
 866                        /**
 867                         * Here, pi is used to:
 868                         * - Tell IOMMU to use legacy mode for this interrupt.
 869                         * - Retrieve ga_tag of prior interrupt remapping data.
 870                         */
 871                        pi.prev_ga_tag = 0;
 872                        pi.is_guest_mode = false;
 873                        ret = irq_set_vcpu_affinity(host_irq, &pi);
 874
 875                        /**
 876                         * Check if the posted interrupt was previously
 877                         * setup with the guest_mode by checking if the ga_tag
 878                         * was cached. If so, we need to clean up the per-vcpu
 879                         * ir_list.
 880                         */
 881                        if (!ret && pi.prev_ga_tag) {
 882                                int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
 883                                struct kvm_vcpu *vcpu;
 884
 885                                vcpu = kvm_get_vcpu_by_id(kvm, id);
 886                                if (vcpu)
 887                                        svm_ir_list_del(to_svm(vcpu), &pi);
 888                        }
 889                }
 890
 891                if (!ret && svm) {
 892                        trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
 893                                                 e->gsi, vcpu_info.vector,
 894                                                 vcpu_info.pi_desc_addr, set);
 895                }
 896
 897                if (ret < 0) {
 898                        pr_err("%s: failed to update PI IRTE\n", __func__);
 899                        goto out;
 900                }
 901        }
 902
 903        ret = 0;
 904out:
 905        srcu_read_unlock(&kvm->irq_srcu, idx);
 906        return ret;
 907}
 908
 909bool svm_check_apicv_inhibit_reasons(ulong bit)
 910{
 911        ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
 912                          BIT(APICV_INHIBIT_REASON_HYPERV) |
 913                          BIT(APICV_INHIBIT_REASON_NESTED) |
 914                          BIT(APICV_INHIBIT_REASON_IRQWIN) |
 915                          BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
 916                          BIT(APICV_INHIBIT_REASON_X2APIC);
 917
 918        return supported & BIT(bit);
 919}
 920
 921void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate)
 922{
 923        avic_update_access_page(kvm, activate);
 924}
 925
 926static inline int
 927avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 928{
 929        int ret = 0;
 930        unsigned long flags;
 931        struct amd_svm_iommu_ir *ir;
 932        struct vcpu_svm *svm = to_svm(vcpu);
 933
 934        if (!kvm_arch_has_assigned_device(vcpu->kvm))
 935                return 0;
 936
 937        /*
 938         * Here, we go through the per-vcpu ir_list to update all existing
 939         * interrupt remapping table entry targeting this vcpu.
 940         */
 941        spin_lock_irqsave(&svm->ir_list_lock, flags);
 942
 943        if (list_empty(&svm->ir_list))
 944                goto out;
 945
 946        list_for_each_entry(ir, &svm->ir_list, node) {
 947                ret = amd_iommu_update_ga(cpu, r, ir->data);
 948                if (ret)
 949                        break;
 950        }
 951out:
 952        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 953        return ret;
 954}
 955
 956void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 957{
 958        u64 entry;
 959        /* ID = 0xff (broadcast), ID > 0xff (reserved) */
 960        int h_physical_id = kvm_cpu_get_apicid(cpu);
 961        struct vcpu_svm *svm = to_svm(vcpu);
 962
 963        if (!kvm_vcpu_apicv_active(vcpu))
 964                return;
 965
 966        /*
 967         * Since the host physical APIC id is 8 bits,
 968         * we can support host APIC ID upto 255.
 969         */
 970        if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
 971                return;
 972
 973        entry = READ_ONCE(*(svm->avic_physical_id_cache));
 974        WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
 975
 976        entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
 977        entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
 978
 979        entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 980        if (svm->avic_is_running)
 981                entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 982
 983        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 984        avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
 985                                        svm->avic_is_running);
 986}
 987
 988void avic_vcpu_put(struct kvm_vcpu *vcpu)
 989{
 990        u64 entry;
 991        struct vcpu_svm *svm = to_svm(vcpu);
 992
 993        if (!kvm_vcpu_apicv_active(vcpu))
 994                return;
 995
 996        entry = READ_ONCE(*(svm->avic_physical_id_cache));
 997        if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
 998                avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
 999
1000        entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1001        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
1002}
1003
1004/*
1005 * This function is called during VCPU halt/unhalt.
1006 */
1007static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
1008{
1009        struct vcpu_svm *svm = to_svm(vcpu);
1010
1011        svm->avic_is_running = is_run;
1012        if (is_run)
1013                avic_vcpu_load(vcpu, vcpu->cpu);
1014        else
1015                avic_vcpu_put(vcpu);
1016}
1017
1018void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
1019{
1020        avic_set_running(vcpu, false);
1021}
1022
1023void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
1024{
1025        if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
1026                kvm_vcpu_update_apicv(vcpu);
1027        avic_set_running(vcpu, true);
1028}
1029