linux/arch/x86/kvm/svm/avic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Kernel-based Virtual Machine driver for Linux
   4 *
   5 * AMD SVM support
   6 *
   7 * Copyright (C) 2006 Qumranet, Inc.
   8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
   9 *
  10 * Authors:
  11 *   Yaniv Kamay  <yaniv@qumranet.com>
  12 *   Avi Kivity   <avi@qumranet.com>
  13 */
  14
  15#define pr_fmt(fmt) "SVM: " fmt
  16
  17#include <linux/kvm_types.h>
  18#include <linux/hashtable.h>
  19#include <linux/amd-iommu.h>
  20#include <linux/kvm_host.h>
  21
  22#include <asm/irq_remapping.h>
  23
  24#include "trace.h"
  25#include "lapic.h"
  26#include "x86.h"
  27#include "irq.h"
  28#include "svm.h"
  29
  30/* enable / disable AVIC */
  31int avic;
  32#ifdef CONFIG_X86_LOCAL_APIC
  33module_param(avic, int, S_IRUGO);
  34#endif
  35
  36#define SVM_AVIC_DOORBELL       0xc001011b
  37
  38#define AVIC_HPA_MASK   ~((0xFFFULL << 52) | 0xFFF)
  39
  40/*
  41 * 0xff is broadcast, so the max index allowed for physical APIC ID
  42 * table is 0xfe.  APIC IDs above 0xff are reserved.
  43 */
  44#define AVIC_MAX_PHYSICAL_ID_COUNT      255
  45
  46#define AVIC_UNACCEL_ACCESS_WRITE_MASK          1
  47#define AVIC_UNACCEL_ACCESS_OFFSET_MASK         0xFF0
  48#define AVIC_UNACCEL_ACCESS_VECTOR_MASK         0xFFFFFFFF
  49
  50/* AVIC GATAG is encoded using VM and VCPU IDs */
  51#define AVIC_VCPU_ID_BITS               8
  52#define AVIC_VCPU_ID_MASK               ((1 << AVIC_VCPU_ID_BITS) - 1)
  53
  54#define AVIC_VM_ID_BITS                 24
  55#define AVIC_VM_ID_NR                   (1 << AVIC_VM_ID_BITS)
  56#define AVIC_VM_ID_MASK                 ((1 << AVIC_VM_ID_BITS) - 1)
  57
  58#define AVIC_GATAG(x, y)                (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
  59                                                (y & AVIC_VCPU_ID_MASK))
  60#define AVIC_GATAG_TO_VMID(x)           ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
  61#define AVIC_GATAG_TO_VCPUID(x)         (x & AVIC_VCPU_ID_MASK)
  62
  63/* Note:
  64 * This hash table is used to map VM_ID to a struct kvm_svm,
  65 * when handling AMD IOMMU GALOG notification to schedule in
  66 * a particular vCPU.
  67 */
  68#define SVM_VM_DATA_HASH_BITS   8
  69static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
  70static u32 next_vm_id = 0;
  71static bool next_vm_id_wrapped = 0;
  72static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
  73
  74/*
  75 * This is a wrapper of struct amd_iommu_ir_data.
  76 */
  77struct amd_svm_iommu_ir {
  78        struct list_head node;  /* Used by SVM for per-vcpu ir_list */
  79        void *data;             /* Storing pointer to struct amd_ir_data */
  80};
  81
  82enum avic_ipi_failure_cause {
  83        AVIC_IPI_FAILURE_INVALID_INT_TYPE,
  84        AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
  85        AVIC_IPI_FAILURE_INVALID_TARGET,
  86        AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
  87};
  88
  89/* Note:
  90 * This function is called from IOMMU driver to notify
  91 * SVM to schedule in a particular vCPU of a particular VM.
  92 */
  93int avic_ga_log_notifier(u32 ga_tag)
  94{
  95        unsigned long flags;
  96        struct kvm_svm *kvm_svm;
  97        struct kvm_vcpu *vcpu = NULL;
  98        u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
  99        u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
 100
 101        pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
 102        trace_kvm_avic_ga_log(vm_id, vcpu_id);
 103
 104        spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
 105        hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
 106                if (kvm_svm->avic_vm_id != vm_id)
 107                        continue;
 108                vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
 109                break;
 110        }
 111        spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 112
 113        /* Note:
 114         * At this point, the IOMMU should have already set the pending
 115         * bit in the vAPIC backing page. So, we just need to schedule
 116         * in the vcpu.
 117         */
 118        if (vcpu)
 119                kvm_vcpu_wake_up(vcpu);
 120
 121        return 0;
 122}
 123
 124void avic_vm_destroy(struct kvm *kvm)
 125{
 126        unsigned long flags;
 127        struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
 128
 129        if (!avic)
 130                return;
 131
 132        if (kvm_svm->avic_logical_id_table_page)
 133                __free_page(kvm_svm->avic_logical_id_table_page);
 134        if (kvm_svm->avic_physical_id_table_page)
 135                __free_page(kvm_svm->avic_physical_id_table_page);
 136
 137        spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
 138        hash_del(&kvm_svm->hnode);
 139        spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 140}
 141
 142int avic_vm_init(struct kvm *kvm)
 143{
 144        unsigned long flags;
 145        int err = -ENOMEM;
 146        struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
 147        struct kvm_svm *k2;
 148        struct page *p_page;
 149        struct page *l_page;
 150        u32 vm_id;
 151
 152        if (!avic)
 153                return 0;
 154
 155        /* Allocating physical APIC ID table (4KB) */
 156        p_page = alloc_page(GFP_KERNEL_ACCOUNT);
 157        if (!p_page)
 158                goto free_avic;
 159
 160        kvm_svm->avic_physical_id_table_page = p_page;
 161        clear_page(page_address(p_page));
 162
 163        /* Allocating logical APIC ID table (4KB) */
 164        l_page = alloc_page(GFP_KERNEL_ACCOUNT);
 165        if (!l_page)
 166                goto free_avic;
 167
 168        kvm_svm->avic_logical_id_table_page = l_page;
 169        clear_page(page_address(l_page));
 170
 171        spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
 172 again:
 173        vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
 174        if (vm_id == 0) { /* id is 1-based, zero is not okay */
 175                next_vm_id_wrapped = 1;
 176                goto again;
 177        }
 178        /* Is it still in use? Only possible if wrapped at least once */
 179        if (next_vm_id_wrapped) {
 180                hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
 181                        if (k2->avic_vm_id == vm_id)
 182                                goto again;
 183                }
 184        }
 185        kvm_svm->avic_vm_id = vm_id;
 186        hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
 187        spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 188
 189        return 0;
 190
 191free_avic:
 192        avic_vm_destroy(kvm);
 193        return err;
 194}
 195
 196void avic_init_vmcb(struct vcpu_svm *svm)
 197{
 198        struct vmcb *vmcb = svm->vmcb;
 199        struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
 200        phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
 201        phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
 202        phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
 203
 204        vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
 205        vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
 206        vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
 207        vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
 208        if (kvm_apicv_activated(svm->vcpu.kvm))
 209                vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
 210        else
 211                vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
 212}
 213
 214static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
 215                                       unsigned int index)
 216{
 217        u64 *avic_physical_id_table;
 218        struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 219
 220        if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
 221                return NULL;
 222
 223        avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
 224
 225        return &avic_physical_id_table[index];
 226}
 227
 228/**
 229 * Note:
 230 * AVIC hardware walks the nested page table to check permissions,
 231 * but does not use the SPA address specified in the leaf page
 232 * table entry since it uses  address in the AVIC_BACKING_PAGE pointer
 233 * field of the VMCB. Therefore, we set up the
 234 * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
 235 */
 236static int avic_update_access_page(struct kvm *kvm, bool activate)
 237{
 238        int ret = 0;
 239
 240        mutex_lock(&kvm->slots_lock);
 241        /*
 242         * During kvm_destroy_vm(), kvm_pit_set_reinject() could trigger
 243         * APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
 244         * memory region. So, we need to ensure that kvm->mm == current->mm.
 245         */
 246        if ((kvm->arch.apic_access_page_done == activate) ||
 247            (kvm->mm != current->mm))
 248                goto out;
 249
 250        ret = __x86_set_memory_region(kvm,
 251                                      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
 252                                      APIC_DEFAULT_PHYS_BASE,
 253                                      activate ? PAGE_SIZE : 0);
 254        if (ret)
 255                goto out;
 256
 257        kvm->arch.apic_access_page_done = activate;
 258out:
 259        mutex_unlock(&kvm->slots_lock);
 260        return ret;
 261}
 262
 263static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 264{
 265        u64 *entry, new_entry;
 266        int id = vcpu->vcpu_id;
 267        struct vcpu_svm *svm = to_svm(vcpu);
 268
 269        if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
 270                return -EINVAL;
 271
 272        if (!svm->vcpu.arch.apic->regs)
 273                return -EINVAL;
 274
 275        if (kvm_apicv_activated(vcpu->kvm)) {
 276                int ret;
 277
 278                ret = avic_update_access_page(vcpu->kvm, true);
 279                if (ret)
 280                        return ret;
 281        }
 282
 283        svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
 284
 285        /* Setting AVIC backing page address in the phy APIC ID table */
 286        entry = avic_get_physical_id_entry(vcpu, id);
 287        if (!entry)
 288                return -EINVAL;
 289
 290        new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
 291                              AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
 292                              AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
 293        WRITE_ONCE(*entry, new_entry);
 294
 295        svm->avic_physical_id_cache = entry;
 296
 297        return 0;
 298}
 299
 300int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
 301{
 302        u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
 303        u32 icrl = svm->vmcb->control.exit_info_1;
 304        u32 id = svm->vmcb->control.exit_info_2 >> 32;
 305        u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
 306        struct kvm_lapic *apic = svm->vcpu.arch.apic;
 307
 308        trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
 309
 310        switch (id) {
 311        case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
 312                /*
 313                 * AVIC hardware handles the generation of
 314                 * IPIs when the specified Message Type is Fixed
 315                 * (also known as fixed delivery mode) and
 316                 * the Trigger Mode is edge-triggered. The hardware
 317                 * also supports self and broadcast delivery modes
 318                 * specified via the Destination Shorthand(DSH)
 319                 * field of the ICRL. Logical and physical APIC ID
 320                 * formats are supported. All other IPI types cause
 321                 * a #VMEXIT, which needs to emulated.
 322                 */
 323                kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
 324                kvm_lapic_reg_write(apic, APIC_ICR, icrl);
 325                break;
 326        case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
 327                int i;
 328                struct kvm_vcpu *vcpu;
 329                struct kvm *kvm = svm->vcpu.kvm;
 330                struct kvm_lapic *apic = svm->vcpu.arch.apic;
 331
 332                /*
 333                 * At this point, we expect that the AVIC HW has already
 334                 * set the appropriate IRR bits on the valid target
 335                 * vcpus. So, we just need to kick the appropriate vcpu.
 336                 */
 337                kvm_for_each_vcpu(i, vcpu, kvm) {
 338                        bool m = kvm_apic_match_dest(vcpu, apic,
 339                                                     icrl & APIC_SHORT_MASK,
 340                                                     GET_APIC_DEST_FIELD(icrh),
 341                                                     icrl & APIC_DEST_MASK);
 342
 343                        if (m && !avic_vcpu_is_running(vcpu))
 344                                kvm_vcpu_wake_up(vcpu);
 345                }
 346                break;
 347        }
 348        case AVIC_IPI_FAILURE_INVALID_TARGET:
 349                WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
 350                          index, svm->vcpu.vcpu_id, icrh, icrl);
 351                break;
 352        case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
 353                WARN_ONCE(1, "Invalid backing page\n");
 354                break;
 355        default:
 356                pr_err("Unknown IPI interception\n");
 357        }
 358
 359        return 1;
 360}
 361
 362static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
 363{
 364        struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 365        int index;
 366        u32 *logical_apic_id_table;
 367        int dlid = GET_APIC_LOGICAL_ID(ldr);
 368
 369        if (!dlid)
 370                return NULL;
 371
 372        if (flat) { /* flat */
 373                index = ffs(dlid) - 1;
 374                if (index > 7)
 375                        return NULL;
 376        } else { /* cluster */
 377                int cluster = (dlid & 0xf0) >> 4;
 378                int apic = ffs(dlid & 0x0f) - 1;
 379
 380                if ((apic < 0) || (apic > 7) ||
 381                    (cluster >= 0xf))
 382                        return NULL;
 383                index = (cluster << 2) + apic;
 384        }
 385
 386        logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
 387
 388        return &logical_apic_id_table[index];
 389}
 390
 391static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
 392{
 393        bool flat;
 394        u32 *entry, new_entry;
 395
 396        flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
 397        entry = avic_get_logical_id_entry(vcpu, ldr, flat);
 398        if (!entry)
 399                return -EINVAL;
 400
 401        new_entry = READ_ONCE(*entry);
 402        new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
 403        new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
 404        new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
 405        WRITE_ONCE(*entry, new_entry);
 406
 407        return 0;
 408}
 409
 410static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
 411{
 412        struct vcpu_svm *svm = to_svm(vcpu);
 413        bool flat = svm->dfr_reg == APIC_DFR_FLAT;
 414        u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
 415
 416        if (entry)
 417                clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
 418}
 419
 420static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
 421{
 422        int ret = 0;
 423        struct vcpu_svm *svm = to_svm(vcpu);
 424        u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
 425        u32 id = kvm_xapic_id(vcpu->arch.apic);
 426
 427        if (ldr == svm->ldr_reg)
 428                return 0;
 429
 430        avic_invalidate_logical_id_entry(vcpu);
 431
 432        if (ldr)
 433                ret = avic_ldr_write(vcpu, id, ldr);
 434
 435        if (!ret)
 436                svm->ldr_reg = ldr;
 437
 438        return ret;
 439}
 440
 441static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
 442{
 443        u64 *old, *new;
 444        struct vcpu_svm *svm = to_svm(vcpu);
 445        u32 id = kvm_xapic_id(vcpu->arch.apic);
 446
 447        if (vcpu->vcpu_id == id)
 448                return 0;
 449
 450        old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
 451        new = avic_get_physical_id_entry(vcpu, id);
 452        if (!new || !old)
 453                return 1;
 454
 455        /* We need to move physical_id_entry to new offset */
 456        *new = *old;
 457        *old = 0ULL;
 458        to_svm(vcpu)->avic_physical_id_cache = new;
 459
 460        /*
 461         * Also update the guest physical APIC ID in the logical
 462         * APIC ID table entry if already setup the LDR.
 463         */
 464        if (svm->ldr_reg)
 465                avic_handle_ldr_update(vcpu);
 466
 467        return 0;
 468}
 469
 470static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
 471{
 472        struct vcpu_svm *svm = to_svm(vcpu);
 473        u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
 474
 475        if (svm->dfr_reg == dfr)
 476                return;
 477
 478        avic_invalidate_logical_id_entry(vcpu);
 479        svm->dfr_reg = dfr;
 480}
 481
 482static int avic_unaccel_trap_write(struct vcpu_svm *svm)
 483{
 484        struct kvm_lapic *apic = svm->vcpu.arch.apic;
 485        u32 offset = svm->vmcb->control.exit_info_1 &
 486                                AVIC_UNACCEL_ACCESS_OFFSET_MASK;
 487
 488        switch (offset) {
 489        case APIC_ID:
 490                if (avic_handle_apic_id_update(&svm->vcpu))
 491                        return 0;
 492                break;
 493        case APIC_LDR:
 494                if (avic_handle_ldr_update(&svm->vcpu))
 495                        return 0;
 496                break;
 497        case APIC_DFR:
 498                avic_handle_dfr_update(&svm->vcpu);
 499                break;
 500        default:
 501                break;
 502        }
 503
 504        kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
 505
 506        return 1;
 507}
 508
 509static bool is_avic_unaccelerated_access_trap(u32 offset)
 510{
 511        bool ret = false;
 512
 513        switch (offset) {
 514        case APIC_ID:
 515        case APIC_EOI:
 516        case APIC_RRR:
 517        case APIC_LDR:
 518        case APIC_DFR:
 519        case APIC_SPIV:
 520        case APIC_ESR:
 521        case APIC_ICR:
 522        case APIC_LVTT:
 523        case APIC_LVTTHMR:
 524        case APIC_LVTPC:
 525        case APIC_LVT0:
 526        case APIC_LVT1:
 527        case APIC_LVTERR:
 528        case APIC_TMICT:
 529        case APIC_TDCR:
 530                ret = true;
 531                break;
 532        default:
 533                break;
 534        }
 535        return ret;
 536}
 537
 538int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
 539{
 540        int ret = 0;
 541        u32 offset = svm->vmcb->control.exit_info_1 &
 542                     AVIC_UNACCEL_ACCESS_OFFSET_MASK;
 543        u32 vector = svm->vmcb->control.exit_info_2 &
 544                     AVIC_UNACCEL_ACCESS_VECTOR_MASK;
 545        bool write = (svm->vmcb->control.exit_info_1 >> 32) &
 546                     AVIC_UNACCEL_ACCESS_WRITE_MASK;
 547        bool trap = is_avic_unaccelerated_access_trap(offset);
 548
 549        trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
 550                                            trap, write, vector);
 551        if (trap) {
 552                /* Handling Trap */
 553                WARN_ONCE(!write, "svm: Handling trap read.\n");
 554                ret = avic_unaccel_trap_write(svm);
 555        } else {
 556                /* Handling Fault */
 557                ret = kvm_emulate_instruction(&svm->vcpu, 0);
 558        }
 559
 560        return ret;
 561}
 562
 563int avic_init_vcpu(struct vcpu_svm *svm)
 564{
 565        int ret;
 566        struct kvm_vcpu *vcpu = &svm->vcpu;
 567
 568        if (!avic || !irqchip_in_kernel(vcpu->kvm))
 569                return 0;
 570
 571        ret = avic_init_backing_page(&svm->vcpu);
 572        if (ret)
 573                return ret;
 574
 575        INIT_LIST_HEAD(&svm->ir_list);
 576        spin_lock_init(&svm->ir_list_lock);
 577        svm->dfr_reg = APIC_DFR_FLAT;
 578
 579        return ret;
 580}
 581
 582void avic_post_state_restore(struct kvm_vcpu *vcpu)
 583{
 584        if (avic_handle_apic_id_update(vcpu) != 0)
 585                return;
 586        avic_handle_dfr_update(vcpu);
 587        avic_handle_ldr_update(vcpu);
 588}
 589
 590void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate)
 591{
 592        if (!avic || !lapic_in_kernel(vcpu))
 593                return;
 594
 595        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 596        kvm_request_apicv_update(vcpu->kvm, activate,
 597                                 APICV_INHIBIT_REASON_IRQWIN);
 598        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 599}
 600
 601void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 602{
 603        return;
 604}
 605
 606void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 607{
 608}
 609
 610void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
 611{
 612}
 613
 614static int svm_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
 615{
 616        int ret = 0;
 617        unsigned long flags;
 618        struct amd_svm_iommu_ir *ir;
 619        struct vcpu_svm *svm = to_svm(vcpu);
 620
 621        if (!kvm_arch_has_assigned_device(vcpu->kvm))
 622                return 0;
 623
 624        /*
 625         * Here, we go through the per-vcpu ir_list to update all existing
 626         * interrupt remapping table entry targeting this vcpu.
 627         */
 628        spin_lock_irqsave(&svm->ir_list_lock, flags);
 629
 630        if (list_empty(&svm->ir_list))
 631                goto out;
 632
 633        list_for_each_entry(ir, &svm->ir_list, node) {
 634                if (activate)
 635                        ret = amd_iommu_activate_guest_mode(ir->data);
 636                else
 637                        ret = amd_iommu_deactivate_guest_mode(ir->data);
 638                if (ret)
 639                        break;
 640        }
 641out:
 642        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 643        return ret;
 644}
 645
 646void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 647{
 648        struct vcpu_svm *svm = to_svm(vcpu);
 649        struct vmcb *vmcb = svm->vmcb;
 650        bool activated = kvm_vcpu_apicv_active(vcpu);
 651
 652        if (!avic)
 653                return;
 654
 655        if (activated) {
 656                /**
 657                 * During AVIC temporary deactivation, guest could update
 658                 * APIC ID, DFR and LDR registers, which would not be trapped
 659                 * by avic_unaccelerated_access_interception(). In this case,
 660                 * we need to check and update the AVIC logical APIC ID table
 661                 * accordingly before re-activating.
 662                 */
 663                avic_post_state_restore(vcpu);
 664                vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
 665        } else {
 666                vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
 667        }
 668        vmcb_mark_dirty(vmcb, VMCB_AVIC);
 669
 670        svm_set_pi_irte_mode(vcpu, activated);
 671}
 672
 673void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 674{
 675        return;
 676}
 677
 678int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
 679{
 680        if (!vcpu->arch.apicv_active)
 681                return -1;
 682
 683        kvm_lapic_set_irr(vec, vcpu->arch.apic);
 684        smp_mb__after_atomic();
 685
 686        if (avic_vcpu_is_running(vcpu)) {
 687                int cpuid = vcpu->cpu;
 688
 689                if (cpuid != get_cpu())
 690                        wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
 691                put_cpu();
 692        } else
 693                kvm_vcpu_wake_up(vcpu);
 694
 695        return 0;
 696}
 697
 698bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
 699{
 700        return false;
 701}
 702
 703static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
 704{
 705        unsigned long flags;
 706        struct amd_svm_iommu_ir *cur;
 707
 708        spin_lock_irqsave(&svm->ir_list_lock, flags);
 709        list_for_each_entry(cur, &svm->ir_list, node) {
 710                if (cur->data != pi->ir_data)
 711                        continue;
 712                list_del(&cur->node);
 713                kfree(cur);
 714                break;
 715        }
 716        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 717}
 718
 719static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
 720{
 721        int ret = 0;
 722        unsigned long flags;
 723        struct amd_svm_iommu_ir *ir;
 724
 725        /**
 726         * In some cases, the existing irte is updaed and re-set,
 727         * so we need to check here if it's already been * added
 728         * to the ir_list.
 729         */
 730        if (pi->ir_data && (pi->prev_ga_tag != 0)) {
 731                struct kvm *kvm = svm->vcpu.kvm;
 732                u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
 733                struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
 734                struct vcpu_svm *prev_svm;
 735
 736                if (!prev_vcpu) {
 737                        ret = -EINVAL;
 738                        goto out;
 739                }
 740
 741                prev_svm = to_svm(prev_vcpu);
 742                svm_ir_list_del(prev_svm, pi);
 743        }
 744
 745        /**
 746         * Allocating new amd_iommu_pi_data, which will get
 747         * add to the per-vcpu ir_list.
 748         */
 749        ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
 750        if (!ir) {
 751                ret = -ENOMEM;
 752                goto out;
 753        }
 754        ir->data = pi->ir_data;
 755
 756        spin_lock_irqsave(&svm->ir_list_lock, flags);
 757        list_add(&ir->node, &svm->ir_list);
 758        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 759out:
 760        return ret;
 761}
 762
 763/**
 764 * Note:
 765 * The HW cannot support posting multicast/broadcast
 766 * interrupts to a vCPU. So, we still use legacy interrupt
 767 * remapping for these kind of interrupts.
 768 *
 769 * For lowest-priority interrupts, we only support
 770 * those with single CPU as the destination, e.g. user
 771 * configures the interrupts via /proc/irq or uses
 772 * irqbalance to make the interrupts single-CPU.
 773 */
 774static int
 775get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
 776                 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
 777{
 778        struct kvm_lapic_irq irq;
 779        struct kvm_vcpu *vcpu = NULL;
 780
 781        kvm_set_msi_irq(kvm, e, &irq);
 782
 783        if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
 784            !kvm_irq_is_postable(&irq)) {
 785                pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
 786                         __func__, irq.vector);
 787                return -1;
 788        }
 789
 790        pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
 791                 irq.vector);
 792        *svm = to_svm(vcpu);
 793        vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
 794        vcpu_info->vector = irq.vector;
 795
 796        return 0;
 797}
 798
 799/*
 800 * svm_update_pi_irte - set IRTE for Posted-Interrupts
 801 *
 802 * @kvm: kvm
 803 * @host_irq: host irq of the interrupt
 804 * @guest_irq: gsi of the interrupt
 805 * @set: set or unset PI
 806 * returns 0 on success, < 0 on failure
 807 */
 808int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 809                       uint32_t guest_irq, bool set)
 810{
 811        struct kvm_kernel_irq_routing_entry *e;
 812        struct kvm_irq_routing_table *irq_rt;
 813        int idx, ret = -EINVAL;
 814
 815        if (!kvm_arch_has_assigned_device(kvm) ||
 816            !irq_remapping_cap(IRQ_POSTING_CAP))
 817                return 0;
 818
 819        pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
 820                 __func__, host_irq, guest_irq, set);
 821
 822        idx = srcu_read_lock(&kvm->irq_srcu);
 823        irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 824        WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
 825
 826        hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
 827                struct vcpu_data vcpu_info;
 828                struct vcpu_svm *svm = NULL;
 829
 830                if (e->type != KVM_IRQ_ROUTING_MSI)
 831                        continue;
 832
 833                /**
 834                 * Here, we setup with legacy mode in the following cases:
 835                 * 1. When cannot target interrupt to a specific vcpu.
 836                 * 2. Unsetting posted interrupt.
 837                 * 3. APIC virtialization is disabled for the vcpu.
 838                 * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
 839                 */
 840                if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
 841                    kvm_vcpu_apicv_active(&svm->vcpu)) {
 842                        struct amd_iommu_pi_data pi;
 843
 844                        /* Try to enable guest_mode in IRTE */
 845                        pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
 846                                            AVIC_HPA_MASK);
 847                        pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
 848                                                     svm->vcpu.vcpu_id);
 849                        pi.is_guest_mode = true;
 850                        pi.vcpu_data = &vcpu_info;
 851                        ret = irq_set_vcpu_affinity(host_irq, &pi);
 852
 853                        /**
 854                         * Here, we successfully setting up vcpu affinity in
 855                         * IOMMU guest mode. Now, we need to store the posted
 856                         * interrupt information in a per-vcpu ir_list so that
 857                         * we can reference to them directly when we update vcpu
 858                         * scheduling information in IOMMU irte.
 859                         */
 860                        if (!ret && pi.is_guest_mode)
 861                                svm_ir_list_add(svm, &pi);
 862                } else {
 863                        /* Use legacy mode in IRTE */
 864                        struct amd_iommu_pi_data pi;
 865
 866                        /**
 867                         * Here, pi is used to:
 868                         * - Tell IOMMU to use legacy mode for this interrupt.
 869                         * - Retrieve ga_tag of prior interrupt remapping data.
 870                         */
 871                        pi.is_guest_mode = false;
 872                        ret = irq_set_vcpu_affinity(host_irq, &pi);
 873
 874                        /**
 875                         * Check if the posted interrupt was previously
 876                         * setup with the guest_mode by checking if the ga_tag
 877                         * was cached. If so, we need to clean up the per-vcpu
 878                         * ir_list.
 879                         */
 880                        if (!ret && pi.prev_ga_tag) {
 881                                int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
 882                                struct kvm_vcpu *vcpu;
 883
 884                                vcpu = kvm_get_vcpu_by_id(kvm, id);
 885                                if (vcpu)
 886                                        svm_ir_list_del(to_svm(vcpu), &pi);
 887                        }
 888                }
 889
 890                if (!ret && svm) {
 891                        trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
 892                                                 e->gsi, vcpu_info.vector,
 893                                                 vcpu_info.pi_desc_addr, set);
 894                }
 895
 896                if (ret < 0) {
 897                        pr_err("%s: failed to update PI IRTE\n", __func__);
 898                        goto out;
 899                }
 900        }
 901
 902        ret = 0;
 903out:
 904        srcu_read_unlock(&kvm->irq_srcu, idx);
 905        return ret;
 906}
 907
 908bool svm_check_apicv_inhibit_reasons(ulong bit)
 909{
 910        ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
 911                          BIT(APICV_INHIBIT_REASON_HYPERV) |
 912                          BIT(APICV_INHIBIT_REASON_NESTED) |
 913                          BIT(APICV_INHIBIT_REASON_IRQWIN) |
 914                          BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
 915                          BIT(APICV_INHIBIT_REASON_X2APIC);
 916
 917        return supported & BIT(bit);
 918}
 919
 920void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate)
 921{
 922        avic_update_access_page(kvm, activate);
 923}
 924
 925static inline int
 926avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 927{
 928        int ret = 0;
 929        unsigned long flags;
 930        struct amd_svm_iommu_ir *ir;
 931        struct vcpu_svm *svm = to_svm(vcpu);
 932
 933        if (!kvm_arch_has_assigned_device(vcpu->kvm))
 934                return 0;
 935
 936        /*
 937         * Here, we go through the per-vcpu ir_list to update all existing
 938         * interrupt remapping table entry targeting this vcpu.
 939         */
 940        spin_lock_irqsave(&svm->ir_list_lock, flags);
 941
 942        if (list_empty(&svm->ir_list))
 943                goto out;
 944
 945        list_for_each_entry(ir, &svm->ir_list, node) {
 946                ret = amd_iommu_update_ga(cpu, r, ir->data);
 947                if (ret)
 948                        break;
 949        }
 950out:
 951        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 952        return ret;
 953}
 954
 955void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 956{
 957        u64 entry;
 958        /* ID = 0xff (broadcast), ID > 0xff (reserved) */
 959        int h_physical_id = kvm_cpu_get_apicid(cpu);
 960        struct vcpu_svm *svm = to_svm(vcpu);
 961
 962        if (!kvm_vcpu_apicv_active(vcpu))
 963                return;
 964
 965        /*
 966         * Since the host physical APIC id is 8 bits,
 967         * we can support host APIC ID upto 255.
 968         */
 969        if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
 970                return;
 971
 972        entry = READ_ONCE(*(svm->avic_physical_id_cache));
 973        WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
 974
 975        entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
 976        entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
 977
 978        entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 979        if (svm->avic_is_running)
 980                entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 981
 982        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 983        avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
 984                                        svm->avic_is_running);
 985}
 986
 987void avic_vcpu_put(struct kvm_vcpu *vcpu)
 988{
 989        u64 entry;
 990        struct vcpu_svm *svm = to_svm(vcpu);
 991
 992        if (!kvm_vcpu_apicv_active(vcpu))
 993                return;
 994
 995        entry = READ_ONCE(*(svm->avic_physical_id_cache));
 996        if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
 997                avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
 998
 999        entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1000        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
1001}
1002
1003/**
1004 * This function is called during VCPU halt/unhalt.
1005 */
1006static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
1007{
1008        struct vcpu_svm *svm = to_svm(vcpu);
1009
1010        svm->avic_is_running = is_run;
1011        if (is_run)
1012                avic_vcpu_load(vcpu, vcpu->cpu);
1013        else
1014                avic_vcpu_put(vcpu);
1015}
1016
1017void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
1018{
1019        avic_set_running(vcpu, false);
1020}
1021
1022void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
1023{
1024        if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
1025                kvm_vcpu_update_apicv(vcpu);
1026        avic_set_running(vcpu, true);
1027}
1028