linux/arch/x86/kvm/svm/avic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Kernel-based Virtual Machine driver for Linux
   4 *
   5 * AMD SVM support
   6 *
   7 * Copyright (C) 2006 Qumranet, Inc.
   8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
   9 *
  10 * Authors:
  11 *   Yaniv Kamay  <yaniv@qumranet.com>
  12 *   Avi Kivity   <avi@qumranet.com>
  13 */
  14
  15#define pr_fmt(fmt) "SVM: " fmt
  16
  17#include <linux/kvm_types.h>
  18#include <linux/hashtable.h>
  19#include <linux/amd-iommu.h>
  20#include <linux/kvm_host.h>
  21
  22#include <asm/irq_remapping.h>
  23
  24#include "trace.h"
  25#include "lapic.h"
  26#include "x86.h"
  27#include "irq.h"
  28#include "svm.h"
  29
  30/* enable / disable AVIC */
  31int avic;
  32#ifdef CONFIG_X86_LOCAL_APIC
  33module_param(avic, int, S_IRUGO);
  34#endif
  35
  36#define SVM_AVIC_DOORBELL       0xc001011b
  37
  38#define AVIC_HPA_MASK   ~((0xFFFULL << 52) | 0xFFF)
  39
  40/*
  41 * 0xff is broadcast, so the max index allowed for physical APIC ID
  42 * table is 0xfe.  APIC IDs above 0xff are reserved.
  43 */
  44#define AVIC_MAX_PHYSICAL_ID_COUNT      255
  45
  46#define AVIC_UNACCEL_ACCESS_WRITE_MASK          1
  47#define AVIC_UNACCEL_ACCESS_OFFSET_MASK         0xFF0
  48#define AVIC_UNACCEL_ACCESS_VECTOR_MASK         0xFFFFFFFF
  49
  50/* AVIC GATAG is encoded using VM and VCPU IDs */
  51#define AVIC_VCPU_ID_BITS               8
  52#define AVIC_VCPU_ID_MASK               ((1 << AVIC_VCPU_ID_BITS) - 1)
  53
  54#define AVIC_VM_ID_BITS                 24
  55#define AVIC_VM_ID_NR                   (1 << AVIC_VM_ID_BITS)
  56#define AVIC_VM_ID_MASK                 ((1 << AVIC_VM_ID_BITS) - 1)
  57
  58#define AVIC_GATAG(x, y)                (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
  59                                                (y & AVIC_VCPU_ID_MASK))
  60#define AVIC_GATAG_TO_VMID(x)           ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
  61#define AVIC_GATAG_TO_VCPUID(x)         (x & AVIC_VCPU_ID_MASK)
  62
  63/* Note:
  64 * This hash table is used to map VM_ID to a struct kvm_svm,
  65 * when handling AMD IOMMU GALOG notification to schedule in
  66 * a particular vCPU.
  67 */
  68#define SVM_VM_DATA_HASH_BITS   8
  69static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
  70static u32 next_vm_id = 0;
  71static bool next_vm_id_wrapped = 0;
  72static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
  73
  74/*
  75 * This is a wrapper of struct amd_iommu_ir_data.
  76 */
  77struct amd_svm_iommu_ir {
  78        struct list_head node;  /* Used by SVM for per-vcpu ir_list */
  79        void *data;             /* Storing pointer to struct amd_ir_data */
  80};
  81
  82enum avic_ipi_failure_cause {
  83        AVIC_IPI_FAILURE_INVALID_INT_TYPE,
  84        AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
  85        AVIC_IPI_FAILURE_INVALID_TARGET,
  86        AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
  87};
  88
  89/* Note:
  90 * This function is called from IOMMU driver to notify
  91 * SVM to schedule in a particular vCPU of a particular VM.
  92 */
  93int avic_ga_log_notifier(u32 ga_tag)
  94{
  95        unsigned long flags;
  96        struct kvm_svm *kvm_svm;
  97        struct kvm_vcpu *vcpu = NULL;
  98        u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
  99        u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
 100
 101        pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
 102        trace_kvm_avic_ga_log(vm_id, vcpu_id);
 103
 104        spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
 105        hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
 106                if (kvm_svm->avic_vm_id != vm_id)
 107                        continue;
 108                vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
 109                break;
 110        }
 111        spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 112
 113        /* Note:
 114         * At this point, the IOMMU should have already set the pending
 115         * bit in the vAPIC backing page. So, we just need to schedule
 116         * in the vcpu.
 117         */
 118        if (vcpu)
 119                kvm_vcpu_wake_up(vcpu);
 120
 121        return 0;
 122}
 123
 124void avic_vm_destroy(struct kvm *kvm)
 125{
 126        unsigned long flags;
 127        struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
 128
 129        if (!avic)
 130                return;
 131
 132        if (kvm_svm->avic_logical_id_table_page)
 133                __free_page(kvm_svm->avic_logical_id_table_page);
 134        if (kvm_svm->avic_physical_id_table_page)
 135                __free_page(kvm_svm->avic_physical_id_table_page);
 136
 137        spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
 138        hash_del(&kvm_svm->hnode);
 139        spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 140}
 141
 142int avic_vm_init(struct kvm *kvm)
 143{
 144        unsigned long flags;
 145        int err = -ENOMEM;
 146        struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
 147        struct kvm_svm *k2;
 148        struct page *p_page;
 149        struct page *l_page;
 150        u32 vm_id;
 151
 152        if (!avic)
 153                return 0;
 154
 155        /* Allocating physical APIC ID table (4KB) */
 156        p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 157        if (!p_page)
 158                goto free_avic;
 159
 160        kvm_svm->avic_physical_id_table_page = p_page;
 161
 162        /* Allocating logical APIC ID table (4KB) */
 163        l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 164        if (!l_page)
 165                goto free_avic;
 166
 167        kvm_svm->avic_logical_id_table_page = l_page;
 168
 169        spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
 170 again:
 171        vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
 172        if (vm_id == 0) { /* id is 1-based, zero is not okay */
 173                next_vm_id_wrapped = 1;
 174                goto again;
 175        }
 176        /* Is it still in use? Only possible if wrapped at least once */
 177        if (next_vm_id_wrapped) {
 178                hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
 179                        if (k2->avic_vm_id == vm_id)
 180                                goto again;
 181                }
 182        }
 183        kvm_svm->avic_vm_id = vm_id;
 184        hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
 185        spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 186
 187        return 0;
 188
 189free_avic:
 190        avic_vm_destroy(kvm);
 191        return err;
 192}
 193
 194void avic_init_vmcb(struct vcpu_svm *svm)
 195{
 196        struct vmcb *vmcb = svm->vmcb;
 197        struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
 198        phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
 199        phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
 200        phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
 201
 202        vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
 203        vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
 204        vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
 205        vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
 206        if (kvm_apicv_activated(svm->vcpu.kvm))
 207                vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
 208        else
 209                vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
 210}
 211
 212static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
 213                                       unsigned int index)
 214{
 215        u64 *avic_physical_id_table;
 216        struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 217
 218        if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
 219                return NULL;
 220
 221        avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
 222
 223        return &avic_physical_id_table[index];
 224}
 225
 226/**
 227 * Note:
 228 * AVIC hardware walks the nested page table to check permissions,
 229 * but does not use the SPA address specified in the leaf page
 230 * table entry since it uses  address in the AVIC_BACKING_PAGE pointer
 231 * field of the VMCB. Therefore, we set up the
 232 * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
 233 */
 234static int avic_update_access_page(struct kvm *kvm, bool activate)
 235{
 236        void __user *ret;
 237        int r = 0;
 238
 239        mutex_lock(&kvm->slots_lock);
 240        /*
 241         * During kvm_destroy_vm(), kvm_pit_set_reinject() could trigger
 242         * APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
 243         * memory region. So, we need to ensure that kvm->mm == current->mm.
 244         */
 245        if ((kvm->arch.apic_access_page_done == activate) ||
 246            (kvm->mm != current->mm))
 247                goto out;
 248
 249        ret = __x86_set_memory_region(kvm,
 250                                      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
 251                                      APIC_DEFAULT_PHYS_BASE,
 252                                      activate ? PAGE_SIZE : 0);
 253        if (IS_ERR(ret)) {
 254                r = PTR_ERR(ret);
 255                goto out;
 256        }
 257
 258        kvm->arch.apic_access_page_done = activate;
 259out:
 260        mutex_unlock(&kvm->slots_lock);
 261        return r;
 262}
 263
 264static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 265{
 266        u64 *entry, new_entry;
 267        int id = vcpu->vcpu_id;
 268        struct vcpu_svm *svm = to_svm(vcpu);
 269
 270        if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
 271                return -EINVAL;
 272
 273        if (!svm->vcpu.arch.apic->regs)
 274                return -EINVAL;
 275
 276        if (kvm_apicv_activated(vcpu->kvm)) {
 277                int ret;
 278
 279                ret = avic_update_access_page(vcpu->kvm, true);
 280                if (ret)
 281                        return ret;
 282        }
 283
 284        svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
 285
 286        /* Setting AVIC backing page address in the phy APIC ID table */
 287        entry = avic_get_physical_id_entry(vcpu, id);
 288        if (!entry)
 289                return -EINVAL;
 290
 291        new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
 292                              AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
 293                              AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
 294        WRITE_ONCE(*entry, new_entry);
 295
 296        svm->avic_physical_id_cache = entry;
 297
 298        return 0;
 299}
 300
 301static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
 302                                   u32 icrl, u32 icrh)
 303{
 304        struct kvm_vcpu *vcpu;
 305        int i;
 306
 307        kvm_for_each_vcpu(i, vcpu, kvm) {
 308                bool m = kvm_apic_match_dest(vcpu, source,
 309                                             icrl & APIC_SHORT_MASK,
 310                                             GET_APIC_DEST_FIELD(icrh),
 311                                             icrl & APIC_DEST_MASK);
 312
 313                if (m && !avic_vcpu_is_running(vcpu))
 314                        kvm_vcpu_wake_up(vcpu);
 315        }
 316}
 317
 318int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
 319{
 320        u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
 321        u32 icrl = svm->vmcb->control.exit_info_1;
 322        u32 id = svm->vmcb->control.exit_info_2 >> 32;
 323        u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
 324        struct kvm_lapic *apic = svm->vcpu.arch.apic;
 325
 326        trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
 327
 328        switch (id) {
 329        case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
 330                /*
 331                 * AVIC hardware handles the generation of
 332                 * IPIs when the specified Message Type is Fixed
 333                 * (also known as fixed delivery mode) and
 334                 * the Trigger Mode is edge-triggered. The hardware
 335                 * also supports self and broadcast delivery modes
 336                 * specified via the Destination Shorthand(DSH)
 337                 * field of the ICRL. Logical and physical APIC ID
 338                 * formats are supported. All other IPI types cause
 339                 * a #VMEXIT, which needs to emulated.
 340                 */
 341                kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
 342                kvm_lapic_reg_write(apic, APIC_ICR, icrl);
 343                break;
 344        case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING:
 345                /*
 346                 * At this point, we expect that the AVIC HW has already
 347                 * set the appropriate IRR bits on the valid target
 348                 * vcpus. So, we just need to kick the appropriate vcpu.
 349                 */
 350                avic_kick_target_vcpus(svm->vcpu.kvm, apic, icrl, icrh);
 351                break;
 352        case AVIC_IPI_FAILURE_INVALID_TARGET:
 353                WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
 354                          index, svm->vcpu.vcpu_id, icrh, icrl);
 355                break;
 356        case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
 357                WARN_ONCE(1, "Invalid backing page\n");
 358                break;
 359        default:
 360                pr_err("Unknown IPI interception\n");
 361        }
 362
 363        return 1;
 364}
 365
 366static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
 367{
 368        struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 369        int index;
 370        u32 *logical_apic_id_table;
 371        int dlid = GET_APIC_LOGICAL_ID(ldr);
 372
 373        if (!dlid)
 374                return NULL;
 375
 376        if (flat) { /* flat */
 377                index = ffs(dlid) - 1;
 378                if (index > 7)
 379                        return NULL;
 380        } else { /* cluster */
 381                int cluster = (dlid & 0xf0) >> 4;
 382                int apic = ffs(dlid & 0x0f) - 1;
 383
 384                if ((apic < 0) || (apic > 7) ||
 385                    (cluster >= 0xf))
 386                        return NULL;
 387                index = (cluster << 2) + apic;
 388        }
 389
 390        logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
 391
 392        return &logical_apic_id_table[index];
 393}
 394
 395static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
 396{
 397        bool flat;
 398        u32 *entry, new_entry;
 399
 400        flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
 401        entry = avic_get_logical_id_entry(vcpu, ldr, flat);
 402        if (!entry)
 403                return -EINVAL;
 404
 405        new_entry = READ_ONCE(*entry);
 406        new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
 407        new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
 408        new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
 409        WRITE_ONCE(*entry, new_entry);
 410
 411        return 0;
 412}
 413
 414static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
 415{
 416        struct vcpu_svm *svm = to_svm(vcpu);
 417        bool flat = svm->dfr_reg == APIC_DFR_FLAT;
 418        u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
 419
 420        if (entry)
 421                clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
 422}
 423
 424static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
 425{
 426        int ret = 0;
 427        struct vcpu_svm *svm = to_svm(vcpu);
 428        u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
 429        u32 id = kvm_xapic_id(vcpu->arch.apic);
 430
 431        if (ldr == svm->ldr_reg)
 432                return 0;
 433
 434        avic_invalidate_logical_id_entry(vcpu);
 435
 436        if (ldr)
 437                ret = avic_ldr_write(vcpu, id, ldr);
 438
 439        if (!ret)
 440                svm->ldr_reg = ldr;
 441
 442        return ret;
 443}
 444
 445static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
 446{
 447        u64 *old, *new;
 448        struct vcpu_svm *svm = to_svm(vcpu);
 449        u32 id = kvm_xapic_id(vcpu->arch.apic);
 450
 451        if (vcpu->vcpu_id == id)
 452                return 0;
 453
 454        old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
 455        new = avic_get_physical_id_entry(vcpu, id);
 456        if (!new || !old)
 457                return 1;
 458
 459        /* We need to move physical_id_entry to new offset */
 460        *new = *old;
 461        *old = 0ULL;
 462        to_svm(vcpu)->avic_physical_id_cache = new;
 463
 464        /*
 465         * Also update the guest physical APIC ID in the logical
 466         * APIC ID table entry if already setup the LDR.
 467         */
 468        if (svm->ldr_reg)
 469                avic_handle_ldr_update(vcpu);
 470
 471        return 0;
 472}
 473
 474static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
 475{
 476        struct vcpu_svm *svm = to_svm(vcpu);
 477        u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
 478
 479        if (svm->dfr_reg == dfr)
 480                return;
 481
 482        avic_invalidate_logical_id_entry(vcpu);
 483        svm->dfr_reg = dfr;
 484}
 485
 486static int avic_unaccel_trap_write(struct vcpu_svm *svm)
 487{
 488        struct kvm_lapic *apic = svm->vcpu.arch.apic;
 489        u32 offset = svm->vmcb->control.exit_info_1 &
 490                                AVIC_UNACCEL_ACCESS_OFFSET_MASK;
 491
 492        switch (offset) {
 493        case APIC_ID:
 494                if (avic_handle_apic_id_update(&svm->vcpu))
 495                        return 0;
 496                break;
 497        case APIC_LDR:
 498                if (avic_handle_ldr_update(&svm->vcpu))
 499                        return 0;
 500                break;
 501        case APIC_DFR:
 502                avic_handle_dfr_update(&svm->vcpu);
 503                break;
 504        default:
 505                break;
 506        }
 507
 508        kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
 509
 510        return 1;
 511}
 512
 513static bool is_avic_unaccelerated_access_trap(u32 offset)
 514{
 515        bool ret = false;
 516
 517        switch (offset) {
 518        case APIC_ID:
 519        case APIC_EOI:
 520        case APIC_RRR:
 521        case APIC_LDR:
 522        case APIC_DFR:
 523        case APIC_SPIV:
 524        case APIC_ESR:
 525        case APIC_ICR:
 526        case APIC_LVTT:
 527        case APIC_LVTTHMR:
 528        case APIC_LVTPC:
 529        case APIC_LVT0:
 530        case APIC_LVT1:
 531        case APIC_LVTERR:
 532        case APIC_TMICT:
 533        case APIC_TDCR:
 534                ret = true;
 535                break;
 536        default:
 537                break;
 538        }
 539        return ret;
 540}
 541
 542int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
 543{
 544        int ret = 0;
 545        u32 offset = svm->vmcb->control.exit_info_1 &
 546                     AVIC_UNACCEL_ACCESS_OFFSET_MASK;
 547        u32 vector = svm->vmcb->control.exit_info_2 &
 548                     AVIC_UNACCEL_ACCESS_VECTOR_MASK;
 549        bool write = (svm->vmcb->control.exit_info_1 >> 32) &
 550                     AVIC_UNACCEL_ACCESS_WRITE_MASK;
 551        bool trap = is_avic_unaccelerated_access_trap(offset);
 552
 553        trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
 554                                            trap, write, vector);
 555        if (trap) {
 556                /* Handling Trap */
 557                WARN_ONCE(!write, "svm: Handling trap read.\n");
 558                ret = avic_unaccel_trap_write(svm);
 559        } else {
 560                /* Handling Fault */
 561                ret = kvm_emulate_instruction(&svm->vcpu, 0);
 562        }
 563
 564        return ret;
 565}
 566
 567int avic_init_vcpu(struct vcpu_svm *svm)
 568{
 569        int ret;
 570        struct kvm_vcpu *vcpu = &svm->vcpu;
 571
 572        if (!avic || !irqchip_in_kernel(vcpu->kvm))
 573                return 0;
 574
 575        ret = avic_init_backing_page(&svm->vcpu);
 576        if (ret)
 577                return ret;
 578
 579        INIT_LIST_HEAD(&svm->ir_list);
 580        spin_lock_init(&svm->ir_list_lock);
 581        svm->dfr_reg = APIC_DFR_FLAT;
 582
 583        return ret;
 584}
 585
 586void avic_post_state_restore(struct kvm_vcpu *vcpu)
 587{
 588        if (avic_handle_apic_id_update(vcpu) != 0)
 589                return;
 590        avic_handle_dfr_update(vcpu);
 591        avic_handle_ldr_update(vcpu);
 592}
 593
 594void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate)
 595{
 596        if (!avic || !lapic_in_kernel(vcpu))
 597                return;
 598
 599        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 600        kvm_request_apicv_update(vcpu->kvm, activate,
 601                                 APICV_INHIBIT_REASON_IRQWIN);
 602        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 603}
 604
 605void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 606{
 607        return;
 608}
 609
 610void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 611{
 612}
 613
 614void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
 615{
 616}
 617
 618static int svm_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
 619{
 620        int ret = 0;
 621        unsigned long flags;
 622        struct amd_svm_iommu_ir *ir;
 623        struct vcpu_svm *svm = to_svm(vcpu);
 624
 625        if (!kvm_arch_has_assigned_device(vcpu->kvm))
 626                return 0;
 627
 628        /*
 629         * Here, we go through the per-vcpu ir_list to update all existing
 630         * interrupt remapping table entry targeting this vcpu.
 631         */
 632        spin_lock_irqsave(&svm->ir_list_lock, flags);
 633
 634        if (list_empty(&svm->ir_list))
 635                goto out;
 636
 637        list_for_each_entry(ir, &svm->ir_list, node) {
 638                if (activate)
 639                        ret = amd_iommu_activate_guest_mode(ir->data);
 640                else
 641                        ret = amd_iommu_deactivate_guest_mode(ir->data);
 642                if (ret)
 643                        break;
 644        }
 645out:
 646        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 647        return ret;
 648}
 649
 650void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 651{
 652        struct vcpu_svm *svm = to_svm(vcpu);
 653        struct vmcb *vmcb = svm->vmcb;
 654        bool activated = kvm_vcpu_apicv_active(vcpu);
 655
 656        if (!avic)
 657                return;
 658
 659        if (activated) {
 660                /**
 661                 * During AVIC temporary deactivation, guest could update
 662                 * APIC ID, DFR and LDR registers, which would not be trapped
 663                 * by avic_unaccelerated_access_interception(). In this case,
 664                 * we need to check and update the AVIC logical APIC ID table
 665                 * accordingly before re-activating.
 666                 */
 667                avic_post_state_restore(vcpu);
 668                vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
 669        } else {
 670                vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
 671        }
 672        vmcb_mark_dirty(vmcb, VMCB_AVIC);
 673
 674        svm_set_pi_irte_mode(vcpu, activated);
 675}
 676
 677void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 678{
 679        return;
 680}
 681
 682int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
 683{
 684        if (!vcpu->arch.apicv_active)
 685                return -1;
 686
 687        kvm_lapic_set_irr(vec, vcpu->arch.apic);
 688        smp_mb__after_atomic();
 689
 690        if (avic_vcpu_is_running(vcpu)) {
 691                int cpuid = vcpu->cpu;
 692
 693                if (cpuid != get_cpu())
 694                        wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
 695                put_cpu();
 696        } else
 697                kvm_vcpu_wake_up(vcpu);
 698
 699        return 0;
 700}
 701
 702bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
 703{
 704        return false;
 705}
 706
 707static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
 708{
 709        unsigned long flags;
 710        struct amd_svm_iommu_ir *cur;
 711
 712        spin_lock_irqsave(&svm->ir_list_lock, flags);
 713        list_for_each_entry(cur, &svm->ir_list, node) {
 714                if (cur->data != pi->ir_data)
 715                        continue;
 716                list_del(&cur->node);
 717                kfree(cur);
 718                break;
 719        }
 720        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 721}
 722
 723static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
 724{
 725        int ret = 0;
 726        unsigned long flags;
 727        struct amd_svm_iommu_ir *ir;
 728
 729        /**
 730         * In some cases, the existing irte is updaed and re-set,
 731         * so we need to check here if it's already been * added
 732         * to the ir_list.
 733         */
 734        if (pi->ir_data && (pi->prev_ga_tag != 0)) {
 735                struct kvm *kvm = svm->vcpu.kvm;
 736                u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
 737                struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
 738                struct vcpu_svm *prev_svm;
 739
 740                if (!prev_vcpu) {
 741                        ret = -EINVAL;
 742                        goto out;
 743                }
 744
 745                prev_svm = to_svm(prev_vcpu);
 746                svm_ir_list_del(prev_svm, pi);
 747        }
 748
 749        /**
 750         * Allocating new amd_iommu_pi_data, which will get
 751         * add to the per-vcpu ir_list.
 752         */
 753        ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
 754        if (!ir) {
 755                ret = -ENOMEM;
 756                goto out;
 757        }
 758        ir->data = pi->ir_data;
 759
 760        spin_lock_irqsave(&svm->ir_list_lock, flags);
 761        list_add(&ir->node, &svm->ir_list);
 762        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 763out:
 764        return ret;
 765}
 766
 767/**
 768 * Note:
 769 * The HW cannot support posting multicast/broadcast
 770 * interrupts to a vCPU. So, we still use legacy interrupt
 771 * remapping for these kind of interrupts.
 772 *
 773 * For lowest-priority interrupts, we only support
 774 * those with single CPU as the destination, e.g. user
 775 * configures the interrupts via /proc/irq or uses
 776 * irqbalance to make the interrupts single-CPU.
 777 */
 778static int
 779get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
 780                 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
 781{
 782        struct kvm_lapic_irq irq;
 783        struct kvm_vcpu *vcpu = NULL;
 784
 785        kvm_set_msi_irq(kvm, e, &irq);
 786
 787        if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
 788            !kvm_irq_is_postable(&irq)) {
 789                pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
 790                         __func__, irq.vector);
 791                return -1;
 792        }
 793
 794        pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
 795                 irq.vector);
 796        *svm = to_svm(vcpu);
 797        vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
 798        vcpu_info->vector = irq.vector;
 799
 800        return 0;
 801}
 802
 803/*
 804 * svm_update_pi_irte - set IRTE for Posted-Interrupts
 805 *
 806 * @kvm: kvm
 807 * @host_irq: host irq of the interrupt
 808 * @guest_irq: gsi of the interrupt
 809 * @set: set or unset PI
 810 * returns 0 on success, < 0 on failure
 811 */
 812int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 813                       uint32_t guest_irq, bool set)
 814{
 815        struct kvm_kernel_irq_routing_entry *e;
 816        struct kvm_irq_routing_table *irq_rt;
 817        int idx, ret = -EINVAL;
 818
 819        if (!kvm_arch_has_assigned_device(kvm) ||
 820            !irq_remapping_cap(IRQ_POSTING_CAP))
 821                return 0;
 822
 823        pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
 824                 __func__, host_irq, guest_irq, set);
 825
 826        idx = srcu_read_lock(&kvm->irq_srcu);
 827        irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 828        WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
 829
 830        hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
 831                struct vcpu_data vcpu_info;
 832                struct vcpu_svm *svm = NULL;
 833
 834                if (e->type != KVM_IRQ_ROUTING_MSI)
 835                        continue;
 836
 837                /**
 838                 * Here, we setup with legacy mode in the following cases:
 839                 * 1. When cannot target interrupt to a specific vcpu.
 840                 * 2. Unsetting posted interrupt.
 841                 * 3. APIC virtialization is disabled for the vcpu.
 842                 * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
 843                 */
 844                if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
 845                    kvm_vcpu_apicv_active(&svm->vcpu)) {
 846                        struct amd_iommu_pi_data pi;
 847
 848                        /* Try to enable guest_mode in IRTE */
 849                        pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
 850                                            AVIC_HPA_MASK);
 851                        pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
 852                                                     svm->vcpu.vcpu_id);
 853                        pi.is_guest_mode = true;
 854                        pi.vcpu_data = &vcpu_info;
 855                        ret = irq_set_vcpu_affinity(host_irq, &pi);
 856
 857                        /**
 858                         * Here, we successfully setting up vcpu affinity in
 859                         * IOMMU guest mode. Now, we need to store the posted
 860                         * interrupt information in a per-vcpu ir_list so that
 861                         * we can reference to them directly when we update vcpu
 862                         * scheduling information in IOMMU irte.
 863                         */
 864                        if (!ret && pi.is_guest_mode)
 865                                svm_ir_list_add(svm, &pi);
 866                } else {
 867                        /* Use legacy mode in IRTE */
 868                        struct amd_iommu_pi_data pi;
 869
 870                        /**
 871                         * Here, pi is used to:
 872                         * - Tell IOMMU to use legacy mode for this interrupt.
 873                         * - Retrieve ga_tag of prior interrupt remapping data.
 874                         */
 875                        pi.prev_ga_tag = 0;
 876                        pi.is_guest_mode = false;
 877                        ret = irq_set_vcpu_affinity(host_irq, &pi);
 878
 879                        /**
 880                         * Check if the posted interrupt was previously
 881                         * setup with the guest_mode by checking if the ga_tag
 882                         * was cached. If so, we need to clean up the per-vcpu
 883                         * ir_list.
 884                         */
 885                        if (!ret && pi.prev_ga_tag) {
 886                                int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
 887                                struct kvm_vcpu *vcpu;
 888
 889                                vcpu = kvm_get_vcpu_by_id(kvm, id);
 890                                if (vcpu)
 891                                        svm_ir_list_del(to_svm(vcpu), &pi);
 892                        }
 893                }
 894
 895                if (!ret && svm) {
 896                        trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
 897                                                 e->gsi, vcpu_info.vector,
 898                                                 vcpu_info.pi_desc_addr, set);
 899                }
 900
 901                if (ret < 0) {
 902                        pr_err("%s: failed to update PI IRTE\n", __func__);
 903                        goto out;
 904                }
 905        }
 906
 907        ret = 0;
 908out:
 909        srcu_read_unlock(&kvm->irq_srcu, idx);
 910        return ret;
 911}
 912
 913bool svm_check_apicv_inhibit_reasons(ulong bit)
 914{
 915        ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
 916                          BIT(APICV_INHIBIT_REASON_HYPERV) |
 917                          BIT(APICV_INHIBIT_REASON_NESTED) |
 918                          BIT(APICV_INHIBIT_REASON_IRQWIN) |
 919                          BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
 920                          BIT(APICV_INHIBIT_REASON_X2APIC);
 921
 922        return supported & BIT(bit);
 923}
 924
 925void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate)
 926{
 927        avic_update_access_page(kvm, activate);
 928}
 929
 930static inline int
 931avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 932{
 933        int ret = 0;
 934        unsigned long flags;
 935        struct amd_svm_iommu_ir *ir;
 936        struct vcpu_svm *svm = to_svm(vcpu);
 937
 938        if (!kvm_arch_has_assigned_device(vcpu->kvm))
 939                return 0;
 940
 941        /*
 942         * Here, we go through the per-vcpu ir_list to update all existing
 943         * interrupt remapping table entry targeting this vcpu.
 944         */
 945        spin_lock_irqsave(&svm->ir_list_lock, flags);
 946
 947        if (list_empty(&svm->ir_list))
 948                goto out;
 949
 950        list_for_each_entry(ir, &svm->ir_list, node) {
 951                ret = amd_iommu_update_ga(cpu, r, ir->data);
 952                if (ret)
 953                        break;
 954        }
 955out:
 956        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 957        return ret;
 958}
 959
 960void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 961{
 962        u64 entry;
 963        /* ID = 0xff (broadcast), ID > 0xff (reserved) */
 964        int h_physical_id = kvm_cpu_get_apicid(cpu);
 965        struct vcpu_svm *svm = to_svm(vcpu);
 966
 967        if (!kvm_vcpu_apicv_active(vcpu))
 968                return;
 969
 970        /*
 971         * Since the host physical APIC id is 8 bits,
 972         * we can support host APIC ID upto 255.
 973         */
 974        if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
 975                return;
 976
 977        entry = READ_ONCE(*(svm->avic_physical_id_cache));
 978        WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
 979
 980        entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
 981        entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
 982
 983        entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 984        if (svm->avic_is_running)
 985                entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 986
 987        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 988        avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
 989                                        svm->avic_is_running);
 990}
 991
 992void avic_vcpu_put(struct kvm_vcpu *vcpu)
 993{
 994        u64 entry;
 995        struct vcpu_svm *svm = to_svm(vcpu);
 996
 997        if (!kvm_vcpu_apicv_active(vcpu))
 998                return;
 999
1000        entry = READ_ONCE(*(svm->avic_physical_id_cache));
1001        if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
1002                avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
1003
1004        entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1005        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
1006}
1007
1008/**
1009 * This function is called during VCPU halt/unhalt.
1010 */
1011static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
1012{
1013        struct vcpu_svm *svm = to_svm(vcpu);
1014
1015        svm->avic_is_running = is_run;
1016        if (is_run)
1017                avic_vcpu_load(vcpu, vcpu->cpu);
1018        else
1019                avic_vcpu_put(vcpu);
1020}
1021
1022void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
1023{
1024        avic_set_running(vcpu, false);
1025}
1026
1027void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
1028{
1029        if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
1030                kvm_vcpu_update_apicv(vcpu);
1031        avic_set_running(vcpu, true);
1032}
1033