linux/arch/x86/kvm/svm/avic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Kernel-based Virtual Machine driver for Linux
   4 *
   5 * AMD SVM support
   6 *
   7 * Copyright (C) 2006 Qumranet, Inc.
   8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
   9 *
  10 * Authors:
  11 *   Yaniv Kamay  <yaniv@qumranet.com>
  12 *   Avi Kivity   <avi@qumranet.com>
  13 */
  14
  15#define pr_fmt(fmt) "SVM: " fmt
  16
  17#include <linux/kvm_types.h>
  18#include <linux/hashtable.h>
  19#include <linux/amd-iommu.h>
  20#include <linux/kvm_host.h>
  21
  22#include <asm/irq_remapping.h>
  23
  24#include "trace.h"
  25#include "lapic.h"
  26#include "x86.h"
  27#include "irq.h"
  28#include "svm.h"
  29
  30#define SVM_AVIC_DOORBELL       0xc001011b
  31
  32#define AVIC_HPA_MASK   ~((0xFFFULL << 52) | 0xFFF)
  33
  34/*
  35 * 0xff is broadcast, so the max index allowed for physical APIC ID
  36 * table is 0xfe.  APIC IDs above 0xff are reserved.
  37 */
  38#define AVIC_MAX_PHYSICAL_ID_COUNT      255
  39
  40#define AVIC_UNACCEL_ACCESS_WRITE_MASK          1
  41#define AVIC_UNACCEL_ACCESS_OFFSET_MASK         0xFF0
  42#define AVIC_UNACCEL_ACCESS_VECTOR_MASK         0xFFFFFFFF
  43
  44/* AVIC GATAG is encoded using VM and VCPU IDs */
  45#define AVIC_VCPU_ID_BITS               8
  46#define AVIC_VCPU_ID_MASK               ((1 << AVIC_VCPU_ID_BITS) - 1)
  47
  48#define AVIC_VM_ID_BITS                 24
  49#define AVIC_VM_ID_NR                   (1 << AVIC_VM_ID_BITS)
  50#define AVIC_VM_ID_MASK                 ((1 << AVIC_VM_ID_BITS) - 1)
  51
  52#define AVIC_GATAG(x, y)                (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
  53                                                (y & AVIC_VCPU_ID_MASK))
  54#define AVIC_GATAG_TO_VMID(x)           ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
  55#define AVIC_GATAG_TO_VCPUID(x)         (x & AVIC_VCPU_ID_MASK)
  56
  57/* Note:
  58 * This hash table is used to map VM_ID to a struct kvm_svm,
  59 * when handling AMD IOMMU GALOG notification to schedule in
  60 * a particular vCPU.
  61 */
  62#define SVM_VM_DATA_HASH_BITS   8
  63static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
  64static u32 next_vm_id = 0;
  65static bool next_vm_id_wrapped = 0;
  66static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
  67
  68/*
  69 * This is a wrapper of struct amd_iommu_ir_data.
  70 */
  71struct amd_svm_iommu_ir {
  72        struct list_head node;  /* Used by SVM for per-vcpu ir_list */
  73        void *data;             /* Storing pointer to struct amd_ir_data */
  74};
  75
  76enum avic_ipi_failure_cause {
  77        AVIC_IPI_FAILURE_INVALID_INT_TYPE,
  78        AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
  79        AVIC_IPI_FAILURE_INVALID_TARGET,
  80        AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
  81};
  82
  83/* Note:
  84 * This function is called from IOMMU driver to notify
  85 * SVM to schedule in a particular vCPU of a particular VM.
  86 */
  87int avic_ga_log_notifier(u32 ga_tag)
  88{
  89        unsigned long flags;
  90        struct kvm_svm *kvm_svm;
  91        struct kvm_vcpu *vcpu = NULL;
  92        u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
  93        u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
  94
  95        pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
  96        trace_kvm_avic_ga_log(vm_id, vcpu_id);
  97
  98        spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
  99        hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
 100                if (kvm_svm->avic_vm_id != vm_id)
 101                        continue;
 102                vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
 103                break;
 104        }
 105        spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 106
 107        /* Note:
 108         * At this point, the IOMMU should have already set the pending
 109         * bit in the vAPIC backing page. So, we just need to schedule
 110         * in the vcpu.
 111         */
 112        if (vcpu)
 113                kvm_vcpu_wake_up(vcpu);
 114
 115        return 0;
 116}
 117
 118void avic_vm_destroy(struct kvm *kvm)
 119{
 120        unsigned long flags;
 121        struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
 122
 123        if (!enable_apicv)
 124                return;
 125
 126        if (kvm_svm->avic_logical_id_table_page)
 127                __free_page(kvm_svm->avic_logical_id_table_page);
 128        if (kvm_svm->avic_physical_id_table_page)
 129                __free_page(kvm_svm->avic_physical_id_table_page);
 130
 131        spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
 132        hash_del(&kvm_svm->hnode);
 133        spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 134}
 135
 136int avic_vm_init(struct kvm *kvm)
 137{
 138        unsigned long flags;
 139        int err = -ENOMEM;
 140        struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
 141        struct kvm_svm *k2;
 142        struct page *p_page;
 143        struct page *l_page;
 144        u32 vm_id;
 145
 146        if (!enable_apicv)
 147                return 0;
 148
 149        /* Allocating physical APIC ID table (4KB) */
 150        p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 151        if (!p_page)
 152                goto free_avic;
 153
 154        kvm_svm->avic_physical_id_table_page = p_page;
 155
 156        /* Allocating logical APIC ID table (4KB) */
 157        l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 158        if (!l_page)
 159                goto free_avic;
 160
 161        kvm_svm->avic_logical_id_table_page = l_page;
 162
 163        spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
 164 again:
 165        vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
 166        if (vm_id == 0) { /* id is 1-based, zero is not okay */
 167                next_vm_id_wrapped = 1;
 168                goto again;
 169        }
 170        /* Is it still in use? Only possible if wrapped at least once */
 171        if (next_vm_id_wrapped) {
 172                hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
 173                        if (k2->avic_vm_id == vm_id)
 174                                goto again;
 175                }
 176        }
 177        kvm_svm->avic_vm_id = vm_id;
 178        hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
 179        spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 180
 181        return 0;
 182
 183free_avic:
 184        avic_vm_destroy(kvm);
 185        return err;
 186}
 187
 188void avic_init_vmcb(struct vcpu_svm *svm)
 189{
 190        struct vmcb *vmcb = svm->vmcb;
 191        struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
 192        phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
 193        phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
 194        phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
 195
 196        vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
 197        vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
 198        vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
 199        vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
 200        vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE & VMCB_AVIC_APIC_BAR_MASK;
 201
 202        if (kvm_apicv_activated(svm->vcpu.kvm))
 203                vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
 204        else
 205                vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
 206}
 207
 208static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
 209                                       unsigned int index)
 210{
 211        u64 *avic_physical_id_table;
 212        struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 213
 214        if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
 215                return NULL;
 216
 217        avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
 218
 219        return &avic_physical_id_table[index];
 220}
 221
 222/*
 223 * Note:
 224 * AVIC hardware walks the nested page table to check permissions,
 225 * but does not use the SPA address specified in the leaf page
 226 * table entry since it uses  address in the AVIC_BACKING_PAGE pointer
 227 * field of the VMCB. Therefore, we set up the
 228 * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
 229 */
 230static int avic_alloc_access_page(struct kvm *kvm)
 231{
 232        void __user *ret;
 233        int r = 0;
 234
 235        mutex_lock(&kvm->slots_lock);
 236
 237        if (kvm->arch.apic_access_memslot_enabled)
 238                goto out;
 239
 240        ret = __x86_set_memory_region(kvm,
 241                                      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
 242                                      APIC_DEFAULT_PHYS_BASE,
 243                                      PAGE_SIZE);
 244        if (IS_ERR(ret)) {
 245                r = PTR_ERR(ret);
 246                goto out;
 247        }
 248
 249        kvm->arch.apic_access_memslot_enabled = true;
 250out:
 251        mutex_unlock(&kvm->slots_lock);
 252        return r;
 253}
 254
 255static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 256{
 257        u64 *entry, new_entry;
 258        int id = vcpu->vcpu_id;
 259        struct vcpu_svm *svm = to_svm(vcpu);
 260
 261        if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
 262                return -EINVAL;
 263
 264        if (!vcpu->arch.apic->regs)
 265                return -EINVAL;
 266
 267        if (kvm_apicv_activated(vcpu->kvm)) {
 268                int ret;
 269
 270                ret = avic_alloc_access_page(vcpu->kvm);
 271                if (ret)
 272                        return ret;
 273        }
 274
 275        svm->avic_backing_page = virt_to_page(vcpu->arch.apic->regs);
 276
 277        /* Setting AVIC backing page address in the phy APIC ID table */
 278        entry = avic_get_physical_id_entry(vcpu, id);
 279        if (!entry)
 280                return -EINVAL;
 281
 282        new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
 283                              AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
 284                              AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
 285        WRITE_ONCE(*entry, new_entry);
 286
 287        svm->avic_physical_id_cache = entry;
 288
 289        return 0;
 290}
 291
 292static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
 293                                   u32 icrl, u32 icrh)
 294{
 295        struct kvm_vcpu *vcpu;
 296        int i;
 297
 298        kvm_for_each_vcpu(i, vcpu, kvm) {
 299                bool m = kvm_apic_match_dest(vcpu, source,
 300                                             icrl & APIC_SHORT_MASK,
 301                                             GET_APIC_DEST_FIELD(icrh),
 302                                             icrl & APIC_DEST_MASK);
 303
 304                if (m && !avic_vcpu_is_running(vcpu))
 305                        kvm_vcpu_wake_up(vcpu);
 306        }
 307}
 308
 309int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
 310{
 311        struct vcpu_svm *svm = to_svm(vcpu);
 312        u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
 313        u32 icrl = svm->vmcb->control.exit_info_1;
 314        u32 id = svm->vmcb->control.exit_info_2 >> 32;
 315        u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
 316        struct kvm_lapic *apic = vcpu->arch.apic;
 317
 318        trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
 319
 320        switch (id) {
 321        case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
 322                /*
 323                 * AVIC hardware handles the generation of
 324                 * IPIs when the specified Message Type is Fixed
 325                 * (also known as fixed delivery mode) and
 326                 * the Trigger Mode is edge-triggered. The hardware
 327                 * also supports self and broadcast delivery modes
 328                 * specified via the Destination Shorthand(DSH)
 329                 * field of the ICRL. Logical and physical APIC ID
 330                 * formats are supported. All other IPI types cause
 331                 * a #VMEXIT, which needs to emulated.
 332                 */
 333                kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
 334                kvm_lapic_reg_write(apic, APIC_ICR, icrl);
 335                break;
 336        case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING:
 337                /*
 338                 * At this point, we expect that the AVIC HW has already
 339                 * set the appropriate IRR bits on the valid target
 340                 * vcpus. So, we just need to kick the appropriate vcpu.
 341                 */
 342                avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh);
 343                break;
 344        case AVIC_IPI_FAILURE_INVALID_TARGET:
 345                WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
 346                          index, vcpu->vcpu_id, icrh, icrl);
 347                break;
 348        case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
 349                WARN_ONCE(1, "Invalid backing page\n");
 350                break;
 351        default:
 352                pr_err("Unknown IPI interception\n");
 353        }
 354
 355        return 1;
 356}
 357
 358static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
 359{
 360        struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 361        int index;
 362        u32 *logical_apic_id_table;
 363        int dlid = GET_APIC_LOGICAL_ID(ldr);
 364
 365        if (!dlid)
 366                return NULL;
 367
 368        if (flat) { /* flat */
 369                index = ffs(dlid) - 1;
 370                if (index > 7)
 371                        return NULL;
 372        } else { /* cluster */
 373                int cluster = (dlid & 0xf0) >> 4;
 374                int apic = ffs(dlid & 0x0f) - 1;
 375
 376                if ((apic < 0) || (apic > 7) ||
 377                    (cluster >= 0xf))
 378                        return NULL;
 379                index = (cluster << 2) + apic;
 380        }
 381
 382        logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
 383
 384        return &logical_apic_id_table[index];
 385}
 386
 387static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
 388{
 389        bool flat;
 390        u32 *entry, new_entry;
 391
 392        flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
 393        entry = avic_get_logical_id_entry(vcpu, ldr, flat);
 394        if (!entry)
 395                return -EINVAL;
 396
 397        new_entry = READ_ONCE(*entry);
 398        new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
 399        new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
 400        new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
 401        WRITE_ONCE(*entry, new_entry);
 402
 403        return 0;
 404}
 405
 406static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
 407{
 408        struct vcpu_svm *svm = to_svm(vcpu);
 409        bool flat = svm->dfr_reg == APIC_DFR_FLAT;
 410        u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
 411
 412        if (entry)
 413                clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
 414}
 415
 416static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
 417{
 418        int ret = 0;
 419        struct vcpu_svm *svm = to_svm(vcpu);
 420        u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
 421        u32 id = kvm_xapic_id(vcpu->arch.apic);
 422
 423        if (ldr == svm->ldr_reg)
 424                return 0;
 425
 426        avic_invalidate_logical_id_entry(vcpu);
 427
 428        if (ldr)
 429                ret = avic_ldr_write(vcpu, id, ldr);
 430
 431        if (!ret)
 432                svm->ldr_reg = ldr;
 433
 434        return ret;
 435}
 436
 437static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
 438{
 439        u64 *old, *new;
 440        struct vcpu_svm *svm = to_svm(vcpu);
 441        u32 id = kvm_xapic_id(vcpu->arch.apic);
 442
 443        if (vcpu->vcpu_id == id)
 444                return 0;
 445
 446        old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
 447        new = avic_get_physical_id_entry(vcpu, id);
 448        if (!new || !old)
 449                return 1;
 450
 451        /* We need to move physical_id_entry to new offset */
 452        *new = *old;
 453        *old = 0ULL;
 454        to_svm(vcpu)->avic_physical_id_cache = new;
 455
 456        /*
 457         * Also update the guest physical APIC ID in the logical
 458         * APIC ID table entry if already setup the LDR.
 459         */
 460        if (svm->ldr_reg)
 461                avic_handle_ldr_update(vcpu);
 462
 463        return 0;
 464}
 465
 466static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
 467{
 468        struct vcpu_svm *svm = to_svm(vcpu);
 469        u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
 470
 471        if (svm->dfr_reg == dfr)
 472                return;
 473
 474        avic_invalidate_logical_id_entry(vcpu);
 475        svm->dfr_reg = dfr;
 476}
 477
 478static int avic_unaccel_trap_write(struct vcpu_svm *svm)
 479{
 480        struct kvm_lapic *apic = svm->vcpu.arch.apic;
 481        u32 offset = svm->vmcb->control.exit_info_1 &
 482                                AVIC_UNACCEL_ACCESS_OFFSET_MASK;
 483
 484        switch (offset) {
 485        case APIC_ID:
 486                if (avic_handle_apic_id_update(&svm->vcpu))
 487                        return 0;
 488                break;
 489        case APIC_LDR:
 490                if (avic_handle_ldr_update(&svm->vcpu))
 491                        return 0;
 492                break;
 493        case APIC_DFR:
 494                avic_handle_dfr_update(&svm->vcpu);
 495                break;
 496        default:
 497                break;
 498        }
 499
 500        kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
 501
 502        return 1;
 503}
 504
 505static bool is_avic_unaccelerated_access_trap(u32 offset)
 506{
 507        bool ret = false;
 508
 509        switch (offset) {
 510        case APIC_ID:
 511        case APIC_EOI:
 512        case APIC_RRR:
 513        case APIC_LDR:
 514        case APIC_DFR:
 515        case APIC_SPIV:
 516        case APIC_ESR:
 517        case APIC_ICR:
 518        case APIC_LVTT:
 519        case APIC_LVTTHMR:
 520        case APIC_LVTPC:
 521        case APIC_LVT0:
 522        case APIC_LVT1:
 523        case APIC_LVTERR:
 524        case APIC_TMICT:
 525        case APIC_TDCR:
 526                ret = true;
 527                break;
 528        default:
 529                break;
 530        }
 531        return ret;
 532}
 533
 534int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu)
 535{
 536        struct vcpu_svm *svm = to_svm(vcpu);
 537        int ret = 0;
 538        u32 offset = svm->vmcb->control.exit_info_1 &
 539                     AVIC_UNACCEL_ACCESS_OFFSET_MASK;
 540        u32 vector = svm->vmcb->control.exit_info_2 &
 541                     AVIC_UNACCEL_ACCESS_VECTOR_MASK;
 542        bool write = (svm->vmcb->control.exit_info_1 >> 32) &
 543                     AVIC_UNACCEL_ACCESS_WRITE_MASK;
 544        bool trap = is_avic_unaccelerated_access_trap(offset);
 545
 546        trace_kvm_avic_unaccelerated_access(vcpu->vcpu_id, offset,
 547                                            trap, write, vector);
 548        if (trap) {
 549                /* Handling Trap */
 550                WARN_ONCE(!write, "svm: Handling trap read.\n");
 551                ret = avic_unaccel_trap_write(svm);
 552        } else {
 553                /* Handling Fault */
 554                ret = kvm_emulate_instruction(vcpu, 0);
 555        }
 556
 557        return ret;
 558}
 559
 560int avic_init_vcpu(struct vcpu_svm *svm)
 561{
 562        int ret;
 563        struct kvm_vcpu *vcpu = &svm->vcpu;
 564
 565        if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm))
 566                return 0;
 567
 568        ret = avic_init_backing_page(vcpu);
 569        if (ret)
 570                return ret;
 571
 572        INIT_LIST_HEAD(&svm->ir_list);
 573        spin_lock_init(&svm->ir_list_lock);
 574        svm->dfr_reg = APIC_DFR_FLAT;
 575
 576        return ret;
 577}
 578
 579void avic_post_state_restore(struct kvm_vcpu *vcpu)
 580{
 581        if (avic_handle_apic_id_update(vcpu) != 0)
 582                return;
 583        avic_handle_dfr_update(vcpu);
 584        avic_handle_ldr_update(vcpu);
 585}
 586
 587void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 588{
 589        return;
 590}
 591
 592void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 593{
 594}
 595
 596void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
 597{
 598}
 599
 600static int svm_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
 601{
 602        int ret = 0;
 603        unsigned long flags;
 604        struct amd_svm_iommu_ir *ir;
 605        struct vcpu_svm *svm = to_svm(vcpu);
 606
 607        if (!kvm_arch_has_assigned_device(vcpu->kvm))
 608                return 0;
 609
 610        /*
 611         * Here, we go through the per-vcpu ir_list to update all existing
 612         * interrupt remapping table entry targeting this vcpu.
 613         */
 614        spin_lock_irqsave(&svm->ir_list_lock, flags);
 615
 616        if (list_empty(&svm->ir_list))
 617                goto out;
 618
 619        list_for_each_entry(ir, &svm->ir_list, node) {
 620                if (activate)
 621                        ret = amd_iommu_activate_guest_mode(ir->data);
 622                else
 623                        ret = amd_iommu_deactivate_guest_mode(ir->data);
 624                if (ret)
 625                        break;
 626        }
 627out:
 628        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 629        return ret;
 630}
 631
 632void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 633{
 634        struct vcpu_svm *svm = to_svm(vcpu);
 635        struct vmcb *vmcb = svm->vmcb01.ptr;
 636        bool activated = kvm_vcpu_apicv_active(vcpu);
 637
 638        if (!enable_apicv)
 639                return;
 640
 641        if (activated) {
 642                /**
 643                 * During AVIC temporary deactivation, guest could update
 644                 * APIC ID, DFR and LDR registers, which would not be trapped
 645                 * by avic_unaccelerated_access_interception(). In this case,
 646                 * we need to check and update the AVIC logical APIC ID table
 647                 * accordingly before re-activating.
 648                 */
 649                avic_post_state_restore(vcpu);
 650                vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
 651        } else {
 652                vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
 653        }
 654        vmcb_mark_dirty(vmcb, VMCB_AVIC);
 655
 656        if (activated)
 657                avic_vcpu_load(vcpu, vcpu->cpu);
 658        else
 659                avic_vcpu_put(vcpu);
 660
 661        svm_set_pi_irte_mode(vcpu, activated);
 662}
 663
 664void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 665{
 666        return;
 667}
 668
 669int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
 670{
 671        if (!vcpu->arch.apicv_active)
 672                return -1;
 673
 674        kvm_lapic_set_irr(vec, vcpu->arch.apic);
 675        smp_mb__after_atomic();
 676
 677        if (avic_vcpu_is_running(vcpu)) {
 678                int cpuid = vcpu->cpu;
 679
 680                if (cpuid != get_cpu())
 681                        wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
 682                put_cpu();
 683        } else
 684                kvm_vcpu_wake_up(vcpu);
 685
 686        return 0;
 687}
 688
 689bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
 690{
 691        return false;
 692}
 693
 694static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
 695{
 696        unsigned long flags;
 697        struct amd_svm_iommu_ir *cur;
 698
 699        spin_lock_irqsave(&svm->ir_list_lock, flags);
 700        list_for_each_entry(cur, &svm->ir_list, node) {
 701                if (cur->data != pi->ir_data)
 702                        continue;
 703                list_del(&cur->node);
 704                kfree(cur);
 705                break;
 706        }
 707        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 708}
 709
 710static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
 711{
 712        int ret = 0;
 713        unsigned long flags;
 714        struct amd_svm_iommu_ir *ir;
 715
 716        /**
 717         * In some cases, the existing irte is updaed and re-set,
 718         * so we need to check here if it's already been * added
 719         * to the ir_list.
 720         */
 721        if (pi->ir_data && (pi->prev_ga_tag != 0)) {
 722                struct kvm *kvm = svm->vcpu.kvm;
 723                u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
 724                struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
 725                struct vcpu_svm *prev_svm;
 726
 727                if (!prev_vcpu) {
 728                        ret = -EINVAL;
 729                        goto out;
 730                }
 731
 732                prev_svm = to_svm(prev_vcpu);
 733                svm_ir_list_del(prev_svm, pi);
 734        }
 735
 736        /**
 737         * Allocating new amd_iommu_pi_data, which will get
 738         * add to the per-vcpu ir_list.
 739         */
 740        ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
 741        if (!ir) {
 742                ret = -ENOMEM;
 743                goto out;
 744        }
 745        ir->data = pi->ir_data;
 746
 747        spin_lock_irqsave(&svm->ir_list_lock, flags);
 748        list_add(&ir->node, &svm->ir_list);
 749        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 750out:
 751        return ret;
 752}
 753
 754/*
 755 * Note:
 756 * The HW cannot support posting multicast/broadcast
 757 * interrupts to a vCPU. So, we still use legacy interrupt
 758 * remapping for these kind of interrupts.
 759 *
 760 * For lowest-priority interrupts, we only support
 761 * those with single CPU as the destination, e.g. user
 762 * configures the interrupts via /proc/irq or uses
 763 * irqbalance to make the interrupts single-CPU.
 764 */
 765static int
 766get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
 767                 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
 768{
 769        struct kvm_lapic_irq irq;
 770        struct kvm_vcpu *vcpu = NULL;
 771
 772        kvm_set_msi_irq(kvm, e, &irq);
 773
 774        if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
 775            !kvm_irq_is_postable(&irq)) {
 776                pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
 777                         __func__, irq.vector);
 778                return -1;
 779        }
 780
 781        pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
 782                 irq.vector);
 783        *svm = to_svm(vcpu);
 784        vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
 785        vcpu_info->vector = irq.vector;
 786
 787        return 0;
 788}
 789
 790/*
 791 * svm_update_pi_irte - set IRTE for Posted-Interrupts
 792 *
 793 * @kvm: kvm
 794 * @host_irq: host irq of the interrupt
 795 * @guest_irq: gsi of the interrupt
 796 * @set: set or unset PI
 797 * returns 0 on success, < 0 on failure
 798 */
 799int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 800                       uint32_t guest_irq, bool set)
 801{
 802        struct kvm_kernel_irq_routing_entry *e;
 803        struct kvm_irq_routing_table *irq_rt;
 804        int idx, ret = -EINVAL;
 805
 806        if (!kvm_arch_has_assigned_device(kvm) ||
 807            !irq_remapping_cap(IRQ_POSTING_CAP))
 808                return 0;
 809
 810        pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
 811                 __func__, host_irq, guest_irq, set);
 812
 813        idx = srcu_read_lock(&kvm->irq_srcu);
 814        irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 815        WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
 816
 817        hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
 818                struct vcpu_data vcpu_info;
 819                struct vcpu_svm *svm = NULL;
 820
 821                if (e->type != KVM_IRQ_ROUTING_MSI)
 822                        continue;
 823
 824                /**
 825                 * Here, we setup with legacy mode in the following cases:
 826                 * 1. When cannot target interrupt to a specific vcpu.
 827                 * 2. Unsetting posted interrupt.
 828                 * 3. APIC virtialization is disabled for the vcpu.
 829                 * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
 830                 */
 831                if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
 832                    kvm_vcpu_apicv_active(&svm->vcpu)) {
 833                        struct amd_iommu_pi_data pi;
 834
 835                        /* Try to enable guest_mode in IRTE */
 836                        pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
 837                                            AVIC_HPA_MASK);
 838                        pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
 839                                                     svm->vcpu.vcpu_id);
 840                        pi.is_guest_mode = true;
 841                        pi.vcpu_data = &vcpu_info;
 842                        ret = irq_set_vcpu_affinity(host_irq, &pi);
 843
 844                        /**
 845                         * Here, we successfully setting up vcpu affinity in
 846                         * IOMMU guest mode. Now, we need to store the posted
 847                         * interrupt information in a per-vcpu ir_list so that
 848                         * we can reference to them directly when we update vcpu
 849                         * scheduling information in IOMMU irte.
 850                         */
 851                        if (!ret && pi.is_guest_mode)
 852                                svm_ir_list_add(svm, &pi);
 853                } else {
 854                        /* Use legacy mode in IRTE */
 855                        struct amd_iommu_pi_data pi;
 856
 857                        /**
 858                         * Here, pi is used to:
 859                         * - Tell IOMMU to use legacy mode for this interrupt.
 860                         * - Retrieve ga_tag of prior interrupt remapping data.
 861                         */
 862                        pi.prev_ga_tag = 0;
 863                        pi.is_guest_mode = false;
 864                        ret = irq_set_vcpu_affinity(host_irq, &pi);
 865
 866                        /**
 867                         * Check if the posted interrupt was previously
 868                         * setup with the guest_mode by checking if the ga_tag
 869                         * was cached. If so, we need to clean up the per-vcpu
 870                         * ir_list.
 871                         */
 872                        if (!ret && pi.prev_ga_tag) {
 873                                int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
 874                                struct kvm_vcpu *vcpu;
 875
 876                                vcpu = kvm_get_vcpu_by_id(kvm, id);
 877                                if (vcpu)
 878                                        svm_ir_list_del(to_svm(vcpu), &pi);
 879                        }
 880                }
 881
 882                if (!ret && svm) {
 883                        trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
 884                                                 e->gsi, vcpu_info.vector,
 885                                                 vcpu_info.pi_desc_addr, set);
 886                }
 887
 888                if (ret < 0) {
 889                        pr_err("%s: failed to update PI IRTE\n", __func__);
 890                        goto out;
 891                }
 892        }
 893
 894        ret = 0;
 895out:
 896        srcu_read_unlock(&kvm->irq_srcu, idx);
 897        return ret;
 898}
 899
 900bool svm_check_apicv_inhibit_reasons(ulong bit)
 901{
 902        ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
 903                          BIT(APICV_INHIBIT_REASON_HYPERV) |
 904                          BIT(APICV_INHIBIT_REASON_NESTED) |
 905                          BIT(APICV_INHIBIT_REASON_IRQWIN) |
 906                          BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
 907                          BIT(APICV_INHIBIT_REASON_X2APIC) |
 908                          BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
 909
 910        return supported & BIT(bit);
 911}
 912
 913
 914static inline int
 915avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 916{
 917        int ret = 0;
 918        unsigned long flags;
 919        struct amd_svm_iommu_ir *ir;
 920        struct vcpu_svm *svm = to_svm(vcpu);
 921
 922        if (!kvm_arch_has_assigned_device(vcpu->kvm))
 923                return 0;
 924
 925        /*
 926         * Here, we go through the per-vcpu ir_list to update all existing
 927         * interrupt remapping table entry targeting this vcpu.
 928         */
 929        spin_lock_irqsave(&svm->ir_list_lock, flags);
 930
 931        if (list_empty(&svm->ir_list))
 932                goto out;
 933
 934        list_for_each_entry(ir, &svm->ir_list, node) {
 935                ret = amd_iommu_update_ga(cpu, r, ir->data);
 936                if (ret)
 937                        break;
 938        }
 939out:
 940        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 941        return ret;
 942}
 943
 944void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 945{
 946        u64 entry;
 947
 948        int h_physical_id = kvm_cpu_get_apicid(cpu);
 949        struct vcpu_svm *svm = to_svm(vcpu);
 950
 951        if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
 952                return;
 953
 954        entry = READ_ONCE(*(svm->avic_physical_id_cache));
 955        WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
 956
 957        entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
 958        entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
 959
 960        entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 961        if (svm->avic_is_running)
 962                entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 963
 964        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 965        avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
 966                                        svm->avic_is_running);
 967}
 968
 969void avic_vcpu_put(struct kvm_vcpu *vcpu)
 970{
 971        u64 entry;
 972        struct vcpu_svm *svm = to_svm(vcpu);
 973
 974        entry = READ_ONCE(*(svm->avic_physical_id_cache));
 975        if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
 976                avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
 977
 978        entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
 979        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 980}
 981
 982/*
 983 * This function is called during VCPU halt/unhalt.
 984 */
 985static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
 986{
 987        struct vcpu_svm *svm = to_svm(vcpu);
 988
 989        svm->avic_is_running = is_run;
 990
 991        if (!kvm_vcpu_apicv_active(vcpu))
 992                return;
 993
 994        if (is_run)
 995                avic_vcpu_load(vcpu, vcpu->cpu);
 996        else
 997                avic_vcpu_put(vcpu);
 998}
 999
1000void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
1001{
1002        avic_set_running(vcpu, false);
1003}
1004
1005void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
1006{
1007        if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
1008                kvm_vcpu_update_apicv(vcpu);
1009        avic_set_running(vcpu, true);
1010}
1011