linux/arch/x86/kvm/lapic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2
   3/*
   4 * Local APIC virtualization
   5 *
   6 * Copyright (C) 2006 Qumranet, Inc.
   7 * Copyright (C) 2007 Novell
   8 * Copyright (C) 2007 Intel
   9 * Copyright 2009 Red Hat, Inc. and/or its affiliates.
  10 *
  11 * Authors:
  12 *   Dor Laor <dor.laor@qumranet.com>
  13 *   Gregory Haskins <ghaskins@novell.com>
  14 *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
  15 *
  16 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
  17 */
  18
  19#include <linux/kvm_host.h>
  20#include <linux/kvm.h>
  21#include <linux/mm.h>
  22#include <linux/highmem.h>
  23#include <linux/smp.h>
  24#include <linux/hrtimer.h>
  25#include <linux/io.h>
  26#include <linux/export.h>
  27#include <linux/math64.h>
  28#include <linux/slab.h>
  29#include <asm/processor.h>
  30#include <asm/msr.h>
  31#include <asm/page.h>
  32#include <asm/current.h>
  33#include <asm/apicdef.h>
  34#include <asm/delay.h>
  35#include <linux/atomic.h>
  36#include <linux/jump_label.h>
  37#include "kvm_cache_regs.h"
  38#include "irq.h"
  39#include "ioapic.h"
  40#include "trace.h"
  41#include "x86.h"
  42#include "cpuid.h"
  43#include "hyperv.h"
  44
  45#ifndef CONFIG_X86_64
  46#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
  47#else
  48#define mod_64(x, y) ((x) % (y))
  49#endif
  50
  51#define PRId64 "d"
  52#define PRIx64 "llx"
  53#define PRIu64 "u"
  54#define PRIo64 "o"
  55
  56/* 14 is the version for Xeon and Pentium 8.4.8*/
  57#define APIC_VERSION                    (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
  58#define LAPIC_MMIO_LENGTH               (1 << 12)
  59/* followed define is not in apicdef.h */
  60#define MAX_APIC_VECTOR                 256
  61#define APIC_VECTORS_PER_REG            32
  62
  63static bool lapic_timer_advance_dynamic __read_mostly;
  64#define LAPIC_TIMER_ADVANCE_ADJUST_MIN  100     /* clock cycles */
  65#define LAPIC_TIMER_ADVANCE_ADJUST_MAX  10000   /* clock cycles */
  66#define LAPIC_TIMER_ADVANCE_NS_INIT     1000
  67#define LAPIC_TIMER_ADVANCE_NS_MAX     5000
  68/* step-by-step approximation to mitigate fluctuation */
  69#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
  70
  71static inline int apic_test_vector(int vec, void *bitmap)
  72{
  73        return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  74}
  75
  76bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
  77{
  78        struct kvm_lapic *apic = vcpu->arch.apic;
  79
  80        return apic_test_vector(vector, apic->regs + APIC_ISR) ||
  81                apic_test_vector(vector, apic->regs + APIC_IRR);
  82}
  83
  84static inline int __apic_test_and_set_vector(int vec, void *bitmap)
  85{
  86        return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  87}
  88
  89static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
  90{
  91        return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  92}
  93
  94struct static_key_deferred apic_hw_disabled __read_mostly;
  95struct static_key_deferred apic_sw_disabled __read_mostly;
  96
  97static inline int apic_enabled(struct kvm_lapic *apic)
  98{
  99        return kvm_apic_sw_enabled(apic) &&     kvm_apic_hw_enabled(apic);
 100}
 101
 102#define LVT_MASK        \
 103        (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
 104
 105#define LINT_MASK       \
 106        (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 107         APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 108
 109static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
 110{
 111        return apic->vcpu->vcpu_id;
 112}
 113
 114static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
 115{
 116        return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
 117}
 118
 119bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
 120{
 121        return kvm_x86_ops.set_hv_timer
 122               && !(kvm_mwait_in_guest(vcpu->kvm) ||
 123                    kvm_can_post_timer_interrupt(vcpu));
 124}
 125EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
 126
 127static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
 128{
 129        return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
 130}
 131
 132static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
 133                u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
 134        switch (map->mode) {
 135        case KVM_APIC_MODE_X2APIC: {
 136                u32 offset = (dest_id >> 16) * 16;
 137                u32 max_apic_id = map->max_apic_id;
 138
 139                if (offset <= max_apic_id) {
 140                        u8 cluster_size = min(max_apic_id - offset + 1, 16U);
 141
 142                        offset = array_index_nospec(offset, map->max_apic_id + 1);
 143                        *cluster = &map->phys_map[offset];
 144                        *mask = dest_id & (0xffff >> (16 - cluster_size));
 145                } else {
 146                        *mask = 0;
 147                }
 148
 149                return true;
 150                }
 151        case KVM_APIC_MODE_XAPIC_FLAT:
 152                *cluster = map->xapic_flat_map;
 153                *mask = dest_id & 0xff;
 154                return true;
 155        case KVM_APIC_MODE_XAPIC_CLUSTER:
 156                *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
 157                *mask = dest_id & 0xf;
 158                return true;
 159        default:
 160                /* Not optimized. */
 161                return false;
 162        }
 163}
 164
 165static void kvm_apic_map_free(struct rcu_head *rcu)
 166{
 167        struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
 168
 169        kvfree(map);
 170}
 171
 172/*
 173 * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
 174 *
 175 * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
 176 * apic_map_lock_held.
 177 */
 178enum {
 179        CLEAN,
 180        UPDATE_IN_PROGRESS,
 181        DIRTY
 182};
 183
 184void kvm_recalculate_apic_map(struct kvm *kvm)
 185{
 186        struct kvm_apic_map *new, *old = NULL;
 187        struct kvm_vcpu *vcpu;
 188        int i;
 189        u32 max_id = 255; /* enough space for any xAPIC ID */
 190
 191        /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
 192        if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
 193                return;
 194
 195        mutex_lock(&kvm->arch.apic_map_lock);
 196        /*
 197         * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
 198         * (if clean) or the APIC registers (if dirty).
 199         */
 200        if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
 201                                   DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
 202                /* Someone else has updated the map. */
 203                mutex_unlock(&kvm->arch.apic_map_lock);
 204                return;
 205        }
 206
 207        kvm_for_each_vcpu(i, vcpu, kvm)
 208                if (kvm_apic_present(vcpu))
 209                        max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
 210
 211        new = kvzalloc(sizeof(struct kvm_apic_map) +
 212                           sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
 213                           GFP_KERNEL_ACCOUNT);
 214
 215        if (!new)
 216                goto out;
 217
 218        new->max_apic_id = max_id;
 219
 220        kvm_for_each_vcpu(i, vcpu, kvm) {
 221                struct kvm_lapic *apic = vcpu->arch.apic;
 222                struct kvm_lapic **cluster;
 223                u16 mask;
 224                u32 ldr;
 225                u8 xapic_id;
 226                u32 x2apic_id;
 227
 228                if (!kvm_apic_present(vcpu))
 229                        continue;
 230
 231                xapic_id = kvm_xapic_id(apic);
 232                x2apic_id = kvm_x2apic_id(apic);
 233
 234                /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
 235                if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
 236                                x2apic_id <= new->max_apic_id)
 237                        new->phys_map[x2apic_id] = apic;
 238                /*
 239                 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
 240                 * prevent them from masking VCPUs with APIC ID <= 0xff.
 241                 */
 242                if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
 243                        new->phys_map[xapic_id] = apic;
 244
 245                if (!kvm_apic_sw_enabled(apic))
 246                        continue;
 247
 248                ldr = kvm_lapic_get_reg(apic, APIC_LDR);
 249
 250                if (apic_x2apic_mode(apic)) {
 251                        new->mode |= KVM_APIC_MODE_X2APIC;
 252                } else if (ldr) {
 253                        ldr = GET_APIC_LOGICAL_ID(ldr);
 254                        if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
 255                                new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
 256                        else
 257                                new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
 258                }
 259
 260                if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
 261                        continue;
 262
 263                if (mask)
 264                        cluster[ffs(mask) - 1] = apic;
 265        }
 266out:
 267        old = rcu_dereference_protected(kvm->arch.apic_map,
 268                        lockdep_is_held(&kvm->arch.apic_map_lock));
 269        rcu_assign_pointer(kvm->arch.apic_map, new);
 270        /*
 271         * Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
 272         * If another update has come in, leave it DIRTY.
 273         */
 274        atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
 275                               UPDATE_IN_PROGRESS, CLEAN);
 276        mutex_unlock(&kvm->arch.apic_map_lock);
 277
 278        if (old)
 279                call_rcu(&old->rcu, kvm_apic_map_free);
 280
 281        kvm_make_scan_ioapic_request(kvm);
 282}
 283
 284static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 285{
 286        bool enabled = val & APIC_SPIV_APIC_ENABLED;
 287
 288        kvm_lapic_set_reg(apic, APIC_SPIV, val);
 289
 290        if (enabled != apic->sw_enabled) {
 291                apic->sw_enabled = enabled;
 292                if (enabled)
 293                        static_key_slow_dec_deferred(&apic_sw_disabled);
 294                else
 295                        static_key_slow_inc(&apic_sw_disabled.key);
 296
 297                atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
 298        }
 299}
 300
 301static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
 302{
 303        kvm_lapic_set_reg(apic, APIC_ID, id << 24);
 304        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
 305}
 306
 307static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
 308{
 309        kvm_lapic_set_reg(apic, APIC_LDR, id);
 310        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
 311}
 312
 313static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
 314{
 315        kvm_lapic_set_reg(apic, APIC_DFR, val);
 316        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
 317}
 318
 319static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
 320{
 321        return ((id >> 4) << 16) | (1 << (id & 0xf));
 322}
 323
 324static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
 325{
 326        u32 ldr = kvm_apic_calc_x2apic_ldr(id);
 327
 328        WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
 329
 330        kvm_lapic_set_reg(apic, APIC_ID, id);
 331        kvm_lapic_set_reg(apic, APIC_LDR, ldr);
 332        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
 333}
 334
 335static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 336{
 337        return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
 338}
 339
 340static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 341{
 342        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
 343}
 344
 345static inline int apic_lvtt_period(struct kvm_lapic *apic)
 346{
 347        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
 348}
 349
 350static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 351{
 352        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 353}
 354
 355static inline int apic_lvt_nmi_mode(u32 lvt_val)
 356{
 357        return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
 358}
 359
 360void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 361{
 362        struct kvm_lapic *apic = vcpu->arch.apic;
 363        u32 v = APIC_VERSION;
 364
 365        if (!lapic_in_kernel(vcpu))
 366                return;
 367
 368        /*
 369         * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
 370         * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
 371         * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
 372         * version first and level-triggered interrupts never get EOIed in
 373         * IOAPIC.
 374         */
 375        if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
 376            !ioapic_in_kernel(vcpu->kvm))
 377                v |= APIC_LVR_DIRECTED_EOI;
 378        kvm_lapic_set_reg(apic, APIC_LVR, v);
 379}
 380
 381static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
 382        LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 383        LVT_MASK | APIC_MODE_MASK,      /* LVTTHMR */
 384        LVT_MASK | APIC_MODE_MASK,      /* LVTPC */
 385        LINT_MASK, LINT_MASK,   /* LVT0-1 */
 386        LVT_MASK                /* LVTERR */
 387};
 388
 389static int find_highest_vector(void *bitmap)
 390{
 391        int vec;
 392        u32 *reg;
 393
 394        for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
 395             vec >= 0; vec -= APIC_VECTORS_PER_REG) {
 396                reg = bitmap + REG_POS(vec);
 397                if (*reg)
 398                        return __fls(*reg) + vec;
 399        }
 400
 401        return -1;
 402}
 403
 404static u8 count_vectors(void *bitmap)
 405{
 406        int vec;
 407        u32 *reg;
 408        u8 count = 0;
 409
 410        for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
 411                reg = bitmap + REG_POS(vec);
 412                count += hweight32(*reg);
 413        }
 414
 415        return count;
 416}
 417
 418bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
 419{
 420        u32 i, vec;
 421        u32 pir_val, irr_val, prev_irr_val;
 422        int max_updated_irr;
 423
 424        max_updated_irr = -1;
 425        *max_irr = -1;
 426
 427        for (i = vec = 0; i <= 7; i++, vec += 32) {
 428                pir_val = READ_ONCE(pir[i]);
 429                irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
 430                if (pir_val) {
 431                        prev_irr_val = irr_val;
 432                        irr_val |= xchg(&pir[i], 0);
 433                        *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
 434                        if (prev_irr_val != irr_val) {
 435                                max_updated_irr =
 436                                        __fls(irr_val ^ prev_irr_val) + vec;
 437                        }
 438                }
 439                if (irr_val)
 440                        *max_irr = __fls(irr_val) + vec;
 441        }
 442
 443        return ((max_updated_irr != -1) &&
 444                (max_updated_irr == *max_irr));
 445}
 446EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
 447
 448bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
 449{
 450        struct kvm_lapic *apic = vcpu->arch.apic;
 451
 452        return __kvm_apic_update_irr(pir, apic->regs, max_irr);
 453}
 454EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 455
 456static inline int apic_search_irr(struct kvm_lapic *apic)
 457{
 458        return find_highest_vector(apic->regs + APIC_IRR);
 459}
 460
 461static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 462{
 463        int result;
 464
 465        /*
 466         * Note that irr_pending is just a hint. It will be always
 467         * true with virtual interrupt delivery enabled.
 468         */
 469        if (!apic->irr_pending)
 470                return -1;
 471
 472        result = apic_search_irr(apic);
 473        ASSERT(result == -1 || result >= 16);
 474
 475        return result;
 476}
 477
 478static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 479{
 480        struct kvm_vcpu *vcpu;
 481
 482        vcpu = apic->vcpu;
 483
 484        if (unlikely(vcpu->arch.apicv_active)) {
 485                /* need to update RVI */
 486                kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
 487                kvm_x86_ops.hwapic_irr_update(vcpu,
 488                                apic_find_highest_irr(apic));
 489        } else {
 490                apic->irr_pending = false;
 491                kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
 492                if (apic_search_irr(apic) != -1)
 493                        apic->irr_pending = true;
 494        }
 495}
 496
 497void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
 498{
 499        apic_clear_irr(vec, vcpu->arch.apic);
 500}
 501EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
 502
 503static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 504{
 505        struct kvm_vcpu *vcpu;
 506
 507        if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
 508                return;
 509
 510        vcpu = apic->vcpu;
 511
 512        /*
 513         * With APIC virtualization enabled, all caching is disabled
 514         * because the processor can modify ISR under the hood.  Instead
 515         * just set SVI.
 516         */
 517        if (unlikely(vcpu->arch.apicv_active))
 518                kvm_x86_ops.hwapic_isr_update(vcpu, vec);
 519        else {
 520                ++apic->isr_count;
 521                BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
 522                /*
 523                 * ISR (in service register) bit is set when injecting an interrupt.
 524                 * The highest vector is injected. Thus the latest bit set matches
 525                 * the highest bit in ISR.
 526                 */
 527                apic->highest_isr_cache = vec;
 528        }
 529}
 530
 531static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 532{
 533        int result;
 534
 535        /*
 536         * Note that isr_count is always 1, and highest_isr_cache
 537         * is always -1, with APIC virtualization enabled.
 538         */
 539        if (!apic->isr_count)
 540                return -1;
 541        if (likely(apic->highest_isr_cache != -1))
 542                return apic->highest_isr_cache;
 543
 544        result = find_highest_vector(apic->regs + APIC_ISR);
 545        ASSERT(result == -1 || result >= 16);
 546
 547        return result;
 548}
 549
 550static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 551{
 552        struct kvm_vcpu *vcpu;
 553        if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
 554                return;
 555
 556        vcpu = apic->vcpu;
 557
 558        /*
 559         * We do get here for APIC virtualization enabled if the guest
 560         * uses the Hyper-V APIC enlightenment.  In this case we may need
 561         * to trigger a new interrupt delivery by writing the SVI field;
 562         * on the other hand isr_count and highest_isr_cache are unused
 563         * and must be left alone.
 564         */
 565        if (unlikely(vcpu->arch.apicv_active))
 566                kvm_x86_ops.hwapic_isr_update(vcpu,
 567                                               apic_find_highest_isr(apic));
 568        else {
 569                --apic->isr_count;
 570                BUG_ON(apic->isr_count < 0);
 571                apic->highest_isr_cache = -1;
 572        }
 573}
 574
 575int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 576{
 577        /* This may race with setting of irr in __apic_accept_irq() and
 578         * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
 579         * will cause vmexit immediately and the value will be recalculated
 580         * on the next vmentry.
 581         */
 582        return apic_find_highest_irr(vcpu->arch.apic);
 583}
 584EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
 585
 586static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 587                             int vector, int level, int trig_mode,
 588                             struct dest_map *dest_map);
 589
 590int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 591                     struct dest_map *dest_map)
 592{
 593        struct kvm_lapic *apic = vcpu->arch.apic;
 594
 595        return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
 596                        irq->level, irq->trig_mode, dest_map);
 597}
 598
 599static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
 600                         struct kvm_lapic_irq *irq, u32 min)
 601{
 602        int i, count = 0;
 603        struct kvm_vcpu *vcpu;
 604
 605        if (min > map->max_apic_id)
 606                return 0;
 607
 608        for_each_set_bit(i, ipi_bitmap,
 609                min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
 610                if (map->phys_map[min + i]) {
 611                        vcpu = map->phys_map[min + i]->vcpu;
 612                        count += kvm_apic_set_irq(vcpu, irq, NULL);
 613                }
 614        }
 615
 616        return count;
 617}
 618
 619int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 620                    unsigned long ipi_bitmap_high, u32 min,
 621                    unsigned long icr, int op_64_bit)
 622{
 623        struct kvm_apic_map *map;
 624        struct kvm_lapic_irq irq = {0};
 625        int cluster_size = op_64_bit ? 64 : 32;
 626        int count;
 627
 628        if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
 629                return -KVM_EINVAL;
 630
 631        irq.vector = icr & APIC_VECTOR_MASK;
 632        irq.delivery_mode = icr & APIC_MODE_MASK;
 633        irq.level = (icr & APIC_INT_ASSERT) != 0;
 634        irq.trig_mode = icr & APIC_INT_LEVELTRIG;
 635
 636        rcu_read_lock();
 637        map = rcu_dereference(kvm->arch.apic_map);
 638
 639        count = -EOPNOTSUPP;
 640        if (likely(map)) {
 641                count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
 642                min += cluster_size;
 643                count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
 644        }
 645
 646        rcu_read_unlock();
 647        return count;
 648}
 649
 650static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
 651{
 652
 653        return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
 654                                      sizeof(val));
 655}
 656
 657static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
 658{
 659
 660        return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
 661                                      sizeof(*val));
 662}
 663
 664static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
 665{
 666        return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
 667}
 668
 669static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
 670{
 671        u8 val;
 672        if (pv_eoi_get_user(vcpu, &val) < 0) {
 673                printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
 674                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 675                return false;
 676        }
 677        return val & KVM_PV_EOI_ENABLED;
 678}
 679
 680static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
 681{
 682        if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
 683                printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
 684                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 685                return;
 686        }
 687        __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 688}
 689
 690static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 691{
 692        if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
 693                printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
 694                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 695                return;
 696        }
 697        __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 698}
 699
 700static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
 701{
 702        int highest_irr;
 703        if (apic->vcpu->arch.apicv_active)
 704                highest_irr = kvm_x86_ops.sync_pir_to_irr(apic->vcpu);
 705        else
 706                highest_irr = apic_find_highest_irr(apic);
 707        if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
 708                return -1;
 709        return highest_irr;
 710}
 711
 712static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
 713{
 714        u32 tpr, isrv, ppr, old_ppr;
 715        int isr;
 716
 717        old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
 718        tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
 719        isr = apic_find_highest_isr(apic);
 720        isrv = (isr != -1) ? isr : 0;
 721
 722        if ((tpr & 0xf0) >= (isrv & 0xf0))
 723                ppr = tpr & 0xff;
 724        else
 725                ppr = isrv & 0xf0;
 726
 727        *new_ppr = ppr;
 728        if (old_ppr != ppr)
 729                kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
 730
 731        return ppr < old_ppr;
 732}
 733
 734static void apic_update_ppr(struct kvm_lapic *apic)
 735{
 736        u32 ppr;
 737
 738        if (__apic_update_ppr(apic, &ppr) &&
 739            apic_has_interrupt_for_ppr(apic, ppr) != -1)
 740                kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 741}
 742
 743void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
 744{
 745        apic_update_ppr(vcpu->arch.apic);
 746}
 747EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
 748
 749static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 750{
 751        kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
 752        apic_update_ppr(apic);
 753}
 754
 755static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
 756{
 757        return mda == (apic_x2apic_mode(apic) ?
 758                        X2APIC_BROADCAST : APIC_BROADCAST);
 759}
 760
 761static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
 762{
 763        if (kvm_apic_broadcast(apic, mda))
 764                return true;
 765
 766        if (apic_x2apic_mode(apic))
 767                return mda == kvm_x2apic_id(apic);
 768
 769        /*
 770         * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
 771         * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
 772         * this allows unique addressing of VCPUs with APIC ID over 0xff.
 773         * The 0xff condition is needed because writeable xAPIC ID.
 774         */
 775        if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
 776                return true;
 777
 778        return mda == kvm_xapic_id(apic);
 779}
 780
 781static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 782{
 783        u32 logical_id;
 784
 785        if (kvm_apic_broadcast(apic, mda))
 786                return true;
 787
 788        logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
 789
 790        if (apic_x2apic_mode(apic))
 791                return ((logical_id >> 16) == (mda >> 16))
 792                       && (logical_id & mda & 0xffff) != 0;
 793
 794        logical_id = GET_APIC_LOGICAL_ID(logical_id);
 795
 796        switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
 797        case APIC_DFR_FLAT:
 798                return (logical_id & mda) != 0;
 799        case APIC_DFR_CLUSTER:
 800                return ((logical_id >> 4) == (mda >> 4))
 801                       && (logical_id & mda & 0xf) != 0;
 802        default:
 803                return false;
 804        }
 805}
 806
 807/* The KVM local APIC implementation has two quirks:
 808 *
 809 *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
 810 *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
 811 *    KVM doesn't do that aliasing.
 812 *
 813 *  - in-kernel IOAPIC messages have to be delivered directly to
 814 *    x2APIC, because the kernel does not support interrupt remapping.
 815 *    In order to support broadcast without interrupt remapping, x2APIC
 816 *    rewrites the destination of non-IPI messages from APIC_BROADCAST
 817 *    to X2APIC_BROADCAST.
 818 *
 819 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
 820 * important when userspace wants to use x2APIC-format MSIs, because
 821 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
 822 */
 823static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
 824                struct kvm_lapic *source, struct kvm_lapic *target)
 825{
 826        bool ipi = source != NULL;
 827
 828        if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
 829            !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
 830                return X2APIC_BROADCAST;
 831
 832        return dest_id;
 833}
 834
 835bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 836                           int shorthand, unsigned int dest, int dest_mode)
 837{
 838        struct kvm_lapic *target = vcpu->arch.apic;
 839        u32 mda = kvm_apic_mda(vcpu, dest, source, target);
 840
 841        ASSERT(target);
 842        switch (shorthand) {
 843        case APIC_DEST_NOSHORT:
 844                if (dest_mode == APIC_DEST_PHYSICAL)
 845                        return kvm_apic_match_physical_addr(target, mda);
 846                else
 847                        return kvm_apic_match_logical_addr(target, mda);
 848        case APIC_DEST_SELF:
 849                return target == source;
 850        case APIC_DEST_ALLINC:
 851                return true;
 852        case APIC_DEST_ALLBUT:
 853                return target != source;
 854        default:
 855                return false;
 856        }
 857}
 858EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
 859
 860int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
 861                       const unsigned long *bitmap, u32 bitmap_size)
 862{
 863        u32 mod;
 864        int i, idx = -1;
 865
 866        mod = vector % dest_vcpus;
 867
 868        for (i = 0; i <= mod; i++) {
 869                idx = find_next_bit(bitmap, bitmap_size, idx + 1);
 870                BUG_ON(idx == bitmap_size);
 871        }
 872
 873        return idx;
 874}
 875
 876static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
 877{
 878        if (!kvm->arch.disabled_lapic_found) {
 879                kvm->arch.disabled_lapic_found = true;
 880                printk(KERN_INFO
 881                       "Disabled LAPIC found during irq injection\n");
 882        }
 883}
 884
 885static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
 886                struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
 887{
 888        if (kvm->arch.x2apic_broadcast_quirk_disabled) {
 889                if ((irq->dest_id == APIC_BROADCAST &&
 890                                map->mode != KVM_APIC_MODE_X2APIC))
 891                        return true;
 892                if (irq->dest_id == X2APIC_BROADCAST)
 893                        return true;
 894        } else {
 895                bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
 896                if (irq->dest_id == (x2apic_ipi ?
 897                                     X2APIC_BROADCAST : APIC_BROADCAST))
 898                        return true;
 899        }
 900
 901        return false;
 902}
 903
 904/* Return true if the interrupt can be handled by using *bitmap as index mask
 905 * for valid destinations in *dst array.
 906 * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
 907 * Note: we may have zero kvm_lapic destinations when we return true, which
 908 * means that the interrupt should be dropped.  In this case, *bitmap would be
 909 * zero and *dst undefined.
 910 */
 911static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
 912                struct kvm_lapic **src, struct kvm_lapic_irq *irq,
 913                struct kvm_apic_map *map, struct kvm_lapic ***dst,
 914                unsigned long *bitmap)
 915{
 916        int i, lowest;
 917
 918        if (irq->shorthand == APIC_DEST_SELF && src) {
 919                *dst = src;
 920                *bitmap = 1;
 921                return true;
 922        } else if (irq->shorthand)
 923                return false;
 924
 925        if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
 926                return false;
 927
 928        if (irq->dest_mode == APIC_DEST_PHYSICAL) {
 929                if (irq->dest_id > map->max_apic_id) {
 930                        *bitmap = 0;
 931                } else {
 932                        u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
 933                        *dst = &map->phys_map[dest_id];
 934                        *bitmap = 1;
 935                }
 936                return true;
 937        }
 938
 939        *bitmap = 0;
 940        if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
 941                                (u16 *)bitmap))
 942                return false;
 943
 944        if (!kvm_lowest_prio_delivery(irq))
 945                return true;
 946
 947        if (!kvm_vector_hashing_enabled()) {
 948                lowest = -1;
 949                for_each_set_bit(i, bitmap, 16) {
 950                        if (!(*dst)[i])
 951                                continue;
 952                        if (lowest < 0)
 953                                lowest = i;
 954                        else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
 955                                                (*dst)[lowest]->vcpu) < 0)
 956                                lowest = i;
 957                }
 958        } else {
 959                if (!*bitmap)
 960                        return true;
 961
 962                lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
 963                                bitmap, 16);
 964
 965                if (!(*dst)[lowest]) {
 966                        kvm_apic_disabled_lapic_found(kvm);
 967                        *bitmap = 0;
 968                        return true;
 969                }
 970        }
 971
 972        *bitmap = (lowest >= 0) ? 1 << lowest : 0;
 973
 974        return true;
 975}
 976
 977bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 978                struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
 979{
 980        struct kvm_apic_map *map;
 981        unsigned long bitmap;
 982        struct kvm_lapic **dst = NULL;
 983        int i;
 984        bool ret;
 985
 986        *r = -1;
 987
 988        if (irq->shorthand == APIC_DEST_SELF) {
 989                *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
 990                return true;
 991        }
 992
 993        rcu_read_lock();
 994        map = rcu_dereference(kvm->arch.apic_map);
 995
 996        ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
 997        if (ret) {
 998                *r = 0;
 999                for_each_set_bit(i, &bitmap, 16) {
1000                        if (!dst[i])
1001                                continue;
1002                        *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
1003                }
1004        }
1005
1006        rcu_read_unlock();
1007        return ret;
1008}
1009
1010/*
1011 * This routine tries to handle interrupts in posted mode, here is how
1012 * it deals with different cases:
1013 * - For single-destination interrupts, handle it in posted mode
1014 * - Else if vector hashing is enabled and it is a lowest-priority
1015 *   interrupt, handle it in posted mode and use the following mechanism
1016 *   to find the destination vCPU.
1017 *      1. For lowest-priority interrupts, store all the possible
1018 *         destination vCPUs in an array.
1019 *      2. Use "guest vector % max number of destination vCPUs" to find
1020 *         the right destination vCPU in the array for the lowest-priority
1021 *         interrupt.
1022 * - Otherwise, use remapped mode to inject the interrupt.
1023 */
1024bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
1025                        struct kvm_vcpu **dest_vcpu)
1026{
1027        struct kvm_apic_map *map;
1028        unsigned long bitmap;
1029        struct kvm_lapic **dst = NULL;
1030        bool ret = false;
1031
1032        if (irq->shorthand)
1033                return false;
1034
1035        rcu_read_lock();
1036        map = rcu_dereference(kvm->arch.apic_map);
1037
1038        if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
1039                        hweight16(bitmap) == 1) {
1040                unsigned long i = find_first_bit(&bitmap, 16);
1041
1042                if (dst[i]) {
1043                        *dest_vcpu = dst[i]->vcpu;
1044                        ret = true;
1045                }
1046        }
1047
1048        rcu_read_unlock();
1049        return ret;
1050}
1051
1052/*
1053 * Add a pending IRQ into lapic.
1054 * Return 1 if successfully added and 0 if discarded.
1055 */
1056static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1057                             int vector, int level, int trig_mode,
1058                             struct dest_map *dest_map)
1059{
1060        int result = 0;
1061        struct kvm_vcpu *vcpu = apic->vcpu;
1062
1063        trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1064                                  trig_mode, vector);
1065        switch (delivery_mode) {
1066        case APIC_DM_LOWEST:
1067                vcpu->arch.apic_arb_prio++;
1068                fallthrough;
1069        case APIC_DM_FIXED:
1070                if (unlikely(trig_mode && !level))
1071                        break;
1072
1073                /* FIXME add logic for vcpu on reset */
1074                if (unlikely(!apic_enabled(apic)))
1075                        break;
1076
1077                result = 1;
1078
1079                if (dest_map) {
1080                        __set_bit(vcpu->vcpu_id, dest_map->map);
1081                        dest_map->vectors[vcpu->vcpu_id] = vector;
1082                }
1083
1084                if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1085                        if (trig_mode)
1086                                kvm_lapic_set_vector(vector,
1087                                                     apic->regs + APIC_TMR);
1088                        else
1089                                kvm_lapic_clear_vector(vector,
1090                                                       apic->regs + APIC_TMR);
1091                }
1092
1093                if (kvm_x86_ops.deliver_posted_interrupt(vcpu, vector)) {
1094                        kvm_lapic_set_irr(vector, apic);
1095                        kvm_make_request(KVM_REQ_EVENT, vcpu);
1096                        kvm_vcpu_kick(vcpu);
1097                }
1098                break;
1099
1100        case APIC_DM_REMRD:
1101                result = 1;
1102                vcpu->arch.pv.pv_unhalted = 1;
1103                kvm_make_request(KVM_REQ_EVENT, vcpu);
1104                kvm_vcpu_kick(vcpu);
1105                break;
1106
1107        case APIC_DM_SMI:
1108                result = 1;
1109                kvm_make_request(KVM_REQ_SMI, vcpu);
1110                kvm_vcpu_kick(vcpu);
1111                break;
1112
1113        case APIC_DM_NMI:
1114                result = 1;
1115                kvm_inject_nmi(vcpu);
1116                kvm_vcpu_kick(vcpu);
1117                break;
1118
1119        case APIC_DM_INIT:
1120                if (!trig_mode || level) {
1121                        result = 1;
1122                        /* assumes that there are only KVM_APIC_INIT/SIPI */
1123                        apic->pending_events = (1UL << KVM_APIC_INIT);
1124                        kvm_make_request(KVM_REQ_EVENT, vcpu);
1125                        kvm_vcpu_kick(vcpu);
1126                }
1127                break;
1128
1129        case APIC_DM_STARTUP:
1130                result = 1;
1131                apic->sipi_vector = vector;
1132                /* make sure sipi_vector is visible for the receiver */
1133                smp_wmb();
1134                set_bit(KVM_APIC_SIPI, &apic->pending_events);
1135                kvm_make_request(KVM_REQ_EVENT, vcpu);
1136                kvm_vcpu_kick(vcpu);
1137                break;
1138
1139        case APIC_DM_EXTINT:
1140                /*
1141                 * Should only be called by kvm_apic_local_deliver() with LVT0,
1142                 * before NMI watchdog was enabled. Already handled by
1143                 * kvm_apic_accept_pic_intr().
1144                 */
1145                break;
1146
1147        default:
1148                printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1149                       delivery_mode);
1150                break;
1151        }
1152        return result;
1153}
1154
1155/*
1156 * This routine identifies the destination vcpus mask meant to receive the
1157 * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1158 * out the destination vcpus array and set the bitmap or it traverses to
1159 * each available vcpu to identify the same.
1160 */
1161void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1162                              unsigned long *vcpu_bitmap)
1163{
1164        struct kvm_lapic **dest_vcpu = NULL;
1165        struct kvm_lapic *src = NULL;
1166        struct kvm_apic_map *map;
1167        struct kvm_vcpu *vcpu;
1168        unsigned long bitmap;
1169        int i, vcpu_idx;
1170        bool ret;
1171
1172        rcu_read_lock();
1173        map = rcu_dereference(kvm->arch.apic_map);
1174
1175        ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1176                                          &bitmap);
1177        if (ret) {
1178                for_each_set_bit(i, &bitmap, 16) {
1179                        if (!dest_vcpu[i])
1180                                continue;
1181                        vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1182                        __set_bit(vcpu_idx, vcpu_bitmap);
1183                }
1184        } else {
1185                kvm_for_each_vcpu(i, vcpu, kvm) {
1186                        if (!kvm_apic_present(vcpu))
1187                                continue;
1188                        if (!kvm_apic_match_dest(vcpu, NULL,
1189                                                 irq->shorthand,
1190                                                 irq->dest_id,
1191                                                 irq->dest_mode))
1192                                continue;
1193                        __set_bit(i, vcpu_bitmap);
1194                }
1195        }
1196        rcu_read_unlock();
1197}
1198
1199int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1200{
1201        return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1202}
1203
1204static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1205{
1206        return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1207}
1208
1209static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1210{
1211        int trigger_mode;
1212
1213        /* Eoi the ioapic only if the ioapic doesn't own the vector. */
1214        if (!kvm_ioapic_handles_vector(apic, vector))
1215                return;
1216
1217        /* Request a KVM exit to inform the userspace IOAPIC. */
1218        if (irqchip_split(apic->vcpu->kvm)) {
1219                apic->vcpu->arch.pending_ioapic_eoi = vector;
1220                kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1221                return;
1222        }
1223
1224        if (apic_test_vector(vector, apic->regs + APIC_TMR))
1225                trigger_mode = IOAPIC_LEVEL_TRIG;
1226        else
1227                trigger_mode = IOAPIC_EDGE_TRIG;
1228
1229        kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1230}
1231
1232static int apic_set_eoi(struct kvm_lapic *apic)
1233{
1234        int vector = apic_find_highest_isr(apic);
1235
1236        trace_kvm_eoi(apic, vector);
1237
1238        /*
1239         * Not every write EOI will has corresponding ISR,
1240         * one example is when Kernel check timer on setup_IO_APIC
1241         */
1242        if (vector == -1)
1243                return vector;
1244
1245        apic_clear_isr(vector, apic);
1246        apic_update_ppr(apic);
1247
1248        if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
1249                kvm_hv_synic_send_eoi(apic->vcpu, vector);
1250
1251        kvm_ioapic_send_eoi(apic, vector);
1252        kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1253        return vector;
1254}
1255
1256/*
1257 * this interface assumes a trap-like exit, which has already finished
1258 * desired side effect including vISR and vPPR update.
1259 */
1260void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1261{
1262        struct kvm_lapic *apic = vcpu->arch.apic;
1263
1264        trace_kvm_eoi(apic, vector);
1265
1266        kvm_ioapic_send_eoi(apic, vector);
1267        kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1268}
1269EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1270
1271void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
1272{
1273        struct kvm_lapic_irq irq;
1274
1275        irq.vector = icr_low & APIC_VECTOR_MASK;
1276        irq.delivery_mode = icr_low & APIC_MODE_MASK;
1277        irq.dest_mode = icr_low & APIC_DEST_MASK;
1278        irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1279        irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1280        irq.shorthand = icr_low & APIC_SHORT_MASK;
1281        irq.msi_redir_hint = false;
1282        if (apic_x2apic_mode(apic))
1283                irq.dest_id = icr_high;
1284        else
1285                irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
1286
1287        trace_kvm_apic_ipi(icr_low, irq.dest_id);
1288
1289        kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1290}
1291
1292static u32 apic_get_tmcct(struct kvm_lapic *apic)
1293{
1294        ktime_t remaining, now;
1295        s64 ns;
1296        u32 tmcct;
1297
1298        ASSERT(apic != NULL);
1299
1300        /* if initial count is 0, current count should also be 0 */
1301        if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1302                apic->lapic_timer.period == 0)
1303                return 0;
1304
1305        now = ktime_get();
1306        remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1307        if (ktime_to_ns(remaining) < 0)
1308                remaining = 0;
1309
1310        ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1311        tmcct = div64_u64(ns,
1312                         (APIC_BUS_CYCLE_NS * apic->divide_count));
1313
1314        return tmcct;
1315}
1316
1317static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1318{
1319        struct kvm_vcpu *vcpu = apic->vcpu;
1320        struct kvm_run *run = vcpu->run;
1321
1322        kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1323        run->tpr_access.rip = kvm_rip_read(vcpu);
1324        run->tpr_access.is_write = write;
1325}
1326
1327static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1328{
1329        if (apic->vcpu->arch.tpr_access_reporting)
1330                __report_tpr_access(apic, write);
1331}
1332
1333static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1334{
1335        u32 val = 0;
1336
1337        if (offset >= LAPIC_MMIO_LENGTH)
1338                return 0;
1339
1340        switch (offset) {
1341        case APIC_ARBPRI:
1342                break;
1343
1344        case APIC_TMCCT:        /* Timer CCR */
1345                if (apic_lvtt_tscdeadline(apic))
1346                        return 0;
1347
1348                val = apic_get_tmcct(apic);
1349                break;
1350        case APIC_PROCPRI:
1351                apic_update_ppr(apic);
1352                val = kvm_lapic_get_reg(apic, offset);
1353                break;
1354        case APIC_TASKPRI:
1355                report_tpr_access(apic, false);
1356                fallthrough;
1357        default:
1358                val = kvm_lapic_get_reg(apic, offset);
1359                break;
1360        }
1361
1362        return val;
1363}
1364
1365static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1366{
1367        return container_of(dev, struct kvm_lapic, dev);
1368}
1369
1370#define APIC_REG_MASK(reg)      (1ull << ((reg) >> 4))
1371#define APIC_REGS_MASK(first, count) \
1372        (APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1373
1374int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1375                void *data)
1376{
1377        unsigned char alignment = offset & 0xf;
1378        u32 result;
1379        /* this bitmask has a bit cleared for each reserved register */
1380        u64 valid_reg_mask =
1381                APIC_REG_MASK(APIC_ID) |
1382                APIC_REG_MASK(APIC_LVR) |
1383                APIC_REG_MASK(APIC_TASKPRI) |
1384                APIC_REG_MASK(APIC_PROCPRI) |
1385                APIC_REG_MASK(APIC_LDR) |
1386                APIC_REG_MASK(APIC_DFR) |
1387                APIC_REG_MASK(APIC_SPIV) |
1388                APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1389                APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1390                APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1391                APIC_REG_MASK(APIC_ESR) |
1392                APIC_REG_MASK(APIC_ICR) |
1393                APIC_REG_MASK(APIC_ICR2) |
1394                APIC_REG_MASK(APIC_LVTT) |
1395                APIC_REG_MASK(APIC_LVTTHMR) |
1396                APIC_REG_MASK(APIC_LVTPC) |
1397                APIC_REG_MASK(APIC_LVT0) |
1398                APIC_REG_MASK(APIC_LVT1) |
1399                APIC_REG_MASK(APIC_LVTERR) |
1400                APIC_REG_MASK(APIC_TMICT) |
1401                APIC_REG_MASK(APIC_TMCCT) |
1402                APIC_REG_MASK(APIC_TDCR);
1403
1404        /* ARBPRI is not valid on x2APIC */
1405        if (!apic_x2apic_mode(apic))
1406                valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
1407
1408        if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
1409                return 1;
1410
1411        result = __apic_read(apic, offset & ~0xf);
1412
1413        trace_kvm_apic_read(offset, result);
1414
1415        switch (len) {
1416        case 1:
1417        case 2:
1418        case 4:
1419                memcpy(data, (char *)&result + alignment, len);
1420                break;
1421        default:
1422                printk(KERN_ERR "Local APIC read with len = %x, "
1423                       "should be 1,2, or 4 instead\n", len);
1424                break;
1425        }
1426        return 0;
1427}
1428EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
1429
1430static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1431{
1432        return addr >= apic->base_address &&
1433                addr < apic->base_address + LAPIC_MMIO_LENGTH;
1434}
1435
1436static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1437                           gpa_t address, int len, void *data)
1438{
1439        struct kvm_lapic *apic = to_lapic(this);
1440        u32 offset = address - apic->base_address;
1441
1442        if (!apic_mmio_in_range(apic, address))
1443                return -EOPNOTSUPP;
1444
1445        if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1446                if (!kvm_check_has_quirk(vcpu->kvm,
1447                                         KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1448                        return -EOPNOTSUPP;
1449
1450                memset(data, 0xff, len);
1451                return 0;
1452        }
1453
1454        kvm_lapic_reg_read(apic, offset, len, data);
1455
1456        return 0;
1457}
1458
1459static void update_divide_count(struct kvm_lapic *apic)
1460{
1461        u32 tmp1, tmp2, tdcr;
1462
1463        tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1464        tmp1 = tdcr & 0xf;
1465        tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1466        apic->divide_count = 0x1 << (tmp2 & 0x7);
1467}
1468
1469static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1470{
1471        /*
1472         * Do not allow the guest to program periodic timers with small
1473         * interval, since the hrtimers are not throttled by the host
1474         * scheduler.
1475         */
1476        if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1477                s64 min_period = min_timer_period_us * 1000LL;
1478
1479                if (apic->lapic_timer.period < min_period) {
1480                        pr_info_ratelimited(
1481                            "kvm: vcpu %i: requested %lld ns "
1482                            "lapic timer period limited to %lld ns\n",
1483                            apic->vcpu->vcpu_id,
1484                            apic->lapic_timer.period, min_period);
1485                        apic->lapic_timer.period = min_period;
1486                }
1487        }
1488}
1489
1490static void cancel_hv_timer(struct kvm_lapic *apic);
1491
1492static void apic_update_lvtt(struct kvm_lapic *apic)
1493{
1494        u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1495                        apic->lapic_timer.timer_mode_mask;
1496
1497        if (apic->lapic_timer.timer_mode != timer_mode) {
1498                if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1499                                APIC_LVT_TIMER_TSCDEADLINE)) {
1500                        hrtimer_cancel(&apic->lapic_timer.timer);
1501                        preempt_disable();
1502                        if (apic->lapic_timer.hv_timer_in_use)
1503                                cancel_hv_timer(apic);
1504                        preempt_enable();
1505                        kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1506                        apic->lapic_timer.period = 0;
1507                        apic->lapic_timer.tscdeadline = 0;
1508                }
1509                apic->lapic_timer.timer_mode = timer_mode;
1510                limit_periodic_timer_frequency(apic);
1511        }
1512}
1513
1514/*
1515 * On APICv, this test will cause a busy wait
1516 * during a higher-priority task.
1517 */
1518
1519static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1520{
1521        struct kvm_lapic *apic = vcpu->arch.apic;
1522        u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1523
1524        if (kvm_apic_hw_enabled(apic)) {
1525                int vec = reg & APIC_VECTOR_MASK;
1526                void *bitmap = apic->regs + APIC_ISR;
1527
1528                if (vcpu->arch.apicv_active)
1529                        bitmap = apic->regs + APIC_IRR;
1530
1531                if (apic_test_vector(vec, bitmap))
1532                        return true;
1533        }
1534        return false;
1535}
1536
1537static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1538{
1539        u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1540
1541        /*
1542         * If the guest TSC is running at a different ratio than the host, then
1543         * convert the delay to nanoseconds to achieve an accurate delay.  Note
1544         * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1545         * always for VMX enabled hardware.
1546         */
1547        if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
1548                __delay(min(guest_cycles,
1549                        nsec_to_cycles(vcpu, timer_advance_ns)));
1550        } else {
1551                u64 delay_ns = guest_cycles * 1000000ULL;
1552                do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1553                ndelay(min_t(u32, delay_ns, timer_advance_ns));
1554        }
1555}
1556
1557static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1558                                              s64 advance_expire_delta)
1559{
1560        struct kvm_lapic *apic = vcpu->arch.apic;
1561        u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1562        u64 ns;
1563
1564        /* Do not adjust for tiny fluctuations or large random spikes. */
1565        if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1566            abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1567                return;
1568
1569        /* too early */
1570        if (advance_expire_delta < 0) {
1571                ns = -advance_expire_delta * 1000000ULL;
1572                do_div(ns, vcpu->arch.virtual_tsc_khz);
1573                timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1574        } else {
1575        /* too late */
1576                ns = advance_expire_delta * 1000000ULL;
1577                do_div(ns, vcpu->arch.virtual_tsc_khz);
1578                timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1579        }
1580
1581        if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1582                timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1583        apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1584}
1585
1586static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1587{
1588        struct kvm_lapic *apic = vcpu->arch.apic;
1589        u64 guest_tsc, tsc_deadline;
1590
1591        tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1592        apic->lapic_timer.expired_tscdeadline = 0;
1593        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1594        apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
1595
1596        if (guest_tsc < tsc_deadline)
1597                __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1598
1599        if (lapic_timer_advance_dynamic)
1600                adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
1601}
1602
1603void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1604{
1605        if (lapic_in_kernel(vcpu) &&
1606            vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1607            vcpu->arch.apic->lapic_timer.timer_advance_ns &&
1608            lapic_timer_int_injected(vcpu))
1609                __kvm_wait_lapic_expire(vcpu);
1610}
1611EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1612
1613static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1614{
1615        struct kvm_timer *ktimer = &apic->lapic_timer;
1616
1617        kvm_apic_local_deliver(apic, APIC_LVTT);
1618        if (apic_lvtt_tscdeadline(apic)) {
1619                ktimer->tscdeadline = 0;
1620        } else if (apic_lvtt_oneshot(apic)) {
1621                ktimer->tscdeadline = 0;
1622                ktimer->target_expiration = 0;
1623        }
1624}
1625
1626static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
1627{
1628        struct kvm_vcpu *vcpu = apic->vcpu;
1629        struct kvm_timer *ktimer = &apic->lapic_timer;
1630
1631        if (atomic_read(&apic->lapic_timer.pending))
1632                return;
1633
1634        if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1635                ktimer->expired_tscdeadline = ktimer->tscdeadline;
1636
1637        if (!from_timer_fn && vcpu->arch.apicv_active) {
1638                WARN_ON(kvm_get_running_vcpu() != vcpu);
1639                kvm_apic_inject_pending_timer_irqs(apic);
1640                return;
1641        }
1642
1643        if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1644                kvm_wait_lapic_expire(vcpu);
1645                kvm_apic_inject_pending_timer_irqs(apic);
1646                return;
1647        }
1648
1649        atomic_inc(&apic->lapic_timer.pending);
1650        kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1651        if (from_timer_fn)
1652                kvm_vcpu_kick(vcpu);
1653}
1654
1655static void start_sw_tscdeadline(struct kvm_lapic *apic)
1656{
1657        struct kvm_timer *ktimer = &apic->lapic_timer;
1658        u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1659        u64 ns = 0;
1660        ktime_t expire;
1661        struct kvm_vcpu *vcpu = apic->vcpu;
1662        unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1663        unsigned long flags;
1664        ktime_t now;
1665
1666        if (unlikely(!tscdeadline || !this_tsc_khz))
1667                return;
1668
1669        local_irq_save(flags);
1670
1671        now = ktime_get();
1672        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1673
1674        ns = (tscdeadline - guest_tsc) * 1000000ULL;
1675        do_div(ns, this_tsc_khz);
1676
1677        if (likely(tscdeadline > guest_tsc) &&
1678            likely(ns > apic->lapic_timer.timer_advance_ns)) {
1679                expire = ktime_add_ns(now, ns);
1680                expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1681                hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
1682        } else
1683                apic_timer_expired(apic, false);
1684
1685        local_irq_restore(flags);
1686}
1687
1688static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
1689{
1690        return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
1691}
1692
1693static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1694{
1695        ktime_t now, remaining;
1696        u64 ns_remaining_old, ns_remaining_new;
1697
1698        apic->lapic_timer.period =
1699                        tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1700        limit_periodic_timer_frequency(apic);
1701
1702        now = ktime_get();
1703        remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1704        if (ktime_to_ns(remaining) < 0)
1705                remaining = 0;
1706
1707        ns_remaining_old = ktime_to_ns(remaining);
1708        ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1709                                           apic->divide_count, old_divisor);
1710
1711        apic->lapic_timer.tscdeadline +=
1712                nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1713                nsec_to_cycles(apic->vcpu, ns_remaining_old);
1714        apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1715}
1716
1717static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
1718{
1719        ktime_t now;
1720        u64 tscl = rdtsc();
1721        s64 deadline;
1722
1723        now = ktime_get();
1724        apic->lapic_timer.period =
1725                        tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1726
1727        if (!apic->lapic_timer.period) {
1728                apic->lapic_timer.tscdeadline = 0;
1729                return false;
1730        }
1731
1732        limit_periodic_timer_frequency(apic);
1733        deadline = apic->lapic_timer.period;
1734
1735        if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
1736                if (unlikely(count_reg != APIC_TMICT)) {
1737                        deadline = tmict_to_ns(apic,
1738                                     kvm_lapic_get_reg(apic, count_reg));
1739                        if (unlikely(deadline <= 0))
1740                                deadline = apic->lapic_timer.period;
1741                        else if (unlikely(deadline > apic->lapic_timer.period)) {
1742                                pr_info_ratelimited(
1743                                    "kvm: vcpu %i: requested lapic timer restore with "
1744                                    "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
1745                                    "Using initial count to start timer.\n",
1746                                    apic->vcpu->vcpu_id,
1747                                    count_reg,
1748                                    kvm_lapic_get_reg(apic, count_reg),
1749                                    deadline, apic->lapic_timer.period);
1750                                kvm_lapic_set_reg(apic, count_reg, 0);
1751                                deadline = apic->lapic_timer.period;
1752                        }
1753                }
1754        }
1755
1756        apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1757                nsec_to_cycles(apic->vcpu, deadline);
1758        apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
1759
1760        return true;
1761}
1762
1763static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1764{
1765        ktime_t now = ktime_get();
1766        u64 tscl = rdtsc();
1767        ktime_t delta;
1768
1769        /*
1770         * Synchronize both deadlines to the same time source or
1771         * differences in the periods (caused by differences in the
1772         * underlying clocks or numerical approximation errors) will
1773         * cause the two to drift apart over time as the errors
1774         * accumulate.
1775         */
1776        apic->lapic_timer.target_expiration =
1777                ktime_add_ns(apic->lapic_timer.target_expiration,
1778                                apic->lapic_timer.period);
1779        delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1780        apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1781                nsec_to_cycles(apic->vcpu, delta);
1782}
1783
1784static void start_sw_period(struct kvm_lapic *apic)
1785{
1786        if (!apic->lapic_timer.period)
1787                return;
1788
1789        if (ktime_after(ktime_get(),
1790                        apic->lapic_timer.target_expiration)) {
1791                apic_timer_expired(apic, false);
1792
1793                if (apic_lvtt_oneshot(apic))
1794                        return;
1795
1796                advance_periodic_target_expiration(apic);
1797        }
1798
1799        hrtimer_start(&apic->lapic_timer.timer,
1800                apic->lapic_timer.target_expiration,
1801                HRTIMER_MODE_ABS_HARD);
1802}
1803
1804bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1805{
1806        if (!lapic_in_kernel(vcpu))
1807                return false;
1808
1809        return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1810}
1811EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1812
1813static void cancel_hv_timer(struct kvm_lapic *apic)
1814{
1815        WARN_ON(preemptible());
1816        WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1817        kvm_x86_ops.cancel_hv_timer(apic->vcpu);
1818        apic->lapic_timer.hv_timer_in_use = false;
1819}
1820
1821static bool start_hv_timer(struct kvm_lapic *apic)
1822{
1823        struct kvm_timer *ktimer = &apic->lapic_timer;
1824        struct kvm_vcpu *vcpu = apic->vcpu;
1825        bool expired;
1826
1827        WARN_ON(preemptible());
1828        if (!kvm_can_use_hv_timer(vcpu))
1829                return false;
1830
1831        if (!ktimer->tscdeadline)
1832                return false;
1833
1834        if (kvm_x86_ops.set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
1835                return false;
1836
1837        ktimer->hv_timer_in_use = true;
1838        hrtimer_cancel(&ktimer->timer);
1839
1840        /*
1841         * To simplify handling the periodic timer, leave the hv timer running
1842         * even if the deadline timer has expired, i.e. rely on the resulting
1843         * VM-Exit to recompute the periodic timer's target expiration.
1844         */
1845        if (!apic_lvtt_period(apic)) {
1846                /*
1847                 * Cancel the hv timer if the sw timer fired while the hv timer
1848                 * was being programmed, or if the hv timer itself expired.
1849                 */
1850                if (atomic_read(&ktimer->pending)) {
1851                        cancel_hv_timer(apic);
1852                } else if (expired) {
1853                        apic_timer_expired(apic, false);
1854                        cancel_hv_timer(apic);
1855                }
1856        }
1857
1858        trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
1859
1860        return true;
1861}
1862
1863static void start_sw_timer(struct kvm_lapic *apic)
1864{
1865        struct kvm_timer *ktimer = &apic->lapic_timer;
1866
1867        WARN_ON(preemptible());
1868        if (apic->lapic_timer.hv_timer_in_use)
1869                cancel_hv_timer(apic);
1870        if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1871                return;
1872
1873        if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1874                start_sw_period(apic);
1875        else if (apic_lvtt_tscdeadline(apic))
1876                start_sw_tscdeadline(apic);
1877        trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
1878}
1879
1880static void restart_apic_timer(struct kvm_lapic *apic)
1881{
1882        preempt_disable();
1883
1884        if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
1885                goto out;
1886
1887        if (!start_hv_timer(apic))
1888                start_sw_timer(apic);
1889out:
1890        preempt_enable();
1891}
1892
1893void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1894{
1895        struct kvm_lapic *apic = vcpu->arch.apic;
1896
1897        preempt_disable();
1898        /* If the preempt notifier has already run, it also called apic_timer_expired */
1899        if (!apic->lapic_timer.hv_timer_in_use)
1900                goto out;
1901        WARN_ON(rcuwait_active(&vcpu->wait));
1902        cancel_hv_timer(apic);
1903        apic_timer_expired(apic, false);
1904
1905        if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1906                advance_periodic_target_expiration(apic);
1907                restart_apic_timer(apic);
1908        }
1909out:
1910        preempt_enable();
1911}
1912EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
1913
1914void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
1915{
1916        restart_apic_timer(vcpu->arch.apic);
1917}
1918EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
1919
1920void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
1921{
1922        struct kvm_lapic *apic = vcpu->arch.apic;
1923
1924        preempt_disable();
1925        /* Possibly the TSC deadline timer is not enabled yet */
1926        if (apic->lapic_timer.hv_timer_in_use)
1927                start_sw_timer(apic);
1928        preempt_enable();
1929}
1930EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
1931
1932void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
1933{
1934        struct kvm_lapic *apic = vcpu->arch.apic;
1935
1936        WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1937        restart_apic_timer(apic);
1938}
1939
1940static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
1941{
1942        atomic_set(&apic->lapic_timer.pending, 0);
1943
1944        if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1945            && !set_target_expiration(apic, count_reg))
1946                return;
1947
1948        restart_apic_timer(apic);
1949}
1950
1951static void start_apic_timer(struct kvm_lapic *apic)
1952{
1953        __start_apic_timer(apic, APIC_TMICT);
1954}
1955
1956static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1957{
1958        bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
1959
1960        if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
1961                apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
1962                if (lvt0_in_nmi_mode) {
1963                        atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1964                } else
1965                        atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1966        }
1967}
1968
1969int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1970{
1971        int ret = 0;
1972
1973        trace_kvm_apic_write(reg, val);
1974
1975        switch (reg) {
1976        case APIC_ID:           /* Local APIC ID */
1977                if (!apic_x2apic_mode(apic))
1978                        kvm_apic_set_xapic_id(apic, val >> 24);
1979                else
1980                        ret = 1;
1981                break;
1982
1983        case APIC_TASKPRI:
1984                report_tpr_access(apic, true);
1985                apic_set_tpr(apic, val & 0xff);
1986                break;
1987
1988        case APIC_EOI:
1989                apic_set_eoi(apic);
1990                break;
1991
1992        case APIC_LDR:
1993                if (!apic_x2apic_mode(apic))
1994                        kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
1995                else
1996                        ret = 1;
1997                break;
1998
1999        case APIC_DFR:
2000                if (!apic_x2apic_mode(apic))
2001                        kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
2002                else
2003                        ret = 1;
2004                break;
2005
2006        case APIC_SPIV: {
2007                u32 mask = 0x3ff;
2008                if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
2009                        mask |= APIC_SPIV_DIRECTED_EOI;
2010                apic_set_spiv(apic, val & mask);
2011                if (!(val & APIC_SPIV_APIC_ENABLED)) {
2012                        int i;
2013                        u32 lvt_val;
2014
2015                        for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
2016                                lvt_val = kvm_lapic_get_reg(apic,
2017                                                       APIC_LVTT + 0x10 * i);
2018                                kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
2019                                             lvt_val | APIC_LVT_MASKED);
2020                        }
2021                        apic_update_lvtt(apic);
2022                        atomic_set(&apic->lapic_timer.pending, 0);
2023
2024                }
2025                break;
2026        }
2027        case APIC_ICR:
2028                /* No delay here, so we always clear the pending bit */
2029                val &= ~(1 << 12);
2030                kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
2031                kvm_lapic_set_reg(apic, APIC_ICR, val);
2032                break;
2033
2034        case APIC_ICR2:
2035                if (!apic_x2apic_mode(apic))
2036                        val &= 0xff000000;
2037                kvm_lapic_set_reg(apic, APIC_ICR2, val);
2038                break;
2039
2040        case APIC_LVT0:
2041                apic_manage_nmi_watchdog(apic, val);
2042                fallthrough;
2043        case APIC_LVTTHMR:
2044        case APIC_LVTPC:
2045        case APIC_LVT1:
2046        case APIC_LVTERR: {
2047                /* TODO: Check vector */
2048                size_t size;
2049                u32 index;
2050
2051                if (!kvm_apic_sw_enabled(apic))
2052                        val |= APIC_LVT_MASKED;
2053                size = ARRAY_SIZE(apic_lvt_mask);
2054                index = array_index_nospec(
2055                                (reg - APIC_LVTT) >> 4, size);
2056                val &= apic_lvt_mask[index];
2057                kvm_lapic_set_reg(apic, reg, val);
2058                break;
2059        }
2060
2061        case APIC_LVTT:
2062                if (!kvm_apic_sw_enabled(apic))
2063                        val |= APIC_LVT_MASKED;
2064                val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
2065                kvm_lapic_set_reg(apic, APIC_LVTT, val);
2066                apic_update_lvtt(apic);
2067                break;
2068
2069        case APIC_TMICT:
2070                if (apic_lvtt_tscdeadline(apic))
2071                        break;
2072
2073                hrtimer_cancel(&apic->lapic_timer.timer);
2074                kvm_lapic_set_reg(apic, APIC_TMICT, val);
2075                start_apic_timer(apic);
2076                break;
2077
2078        case APIC_TDCR: {
2079                uint32_t old_divisor = apic->divide_count;
2080
2081                kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
2082                update_divide_count(apic);
2083                if (apic->divide_count != old_divisor &&
2084                                apic->lapic_timer.period) {
2085                        hrtimer_cancel(&apic->lapic_timer.timer);
2086                        update_target_expiration(apic, old_divisor);
2087                        restart_apic_timer(apic);
2088                }
2089                break;
2090        }
2091        case APIC_ESR:
2092                if (apic_x2apic_mode(apic) && val != 0)
2093                        ret = 1;
2094                break;
2095
2096        case APIC_SELF_IPI:
2097                if (apic_x2apic_mode(apic)) {
2098                        kvm_lapic_reg_write(apic, APIC_ICR,
2099                                            APIC_DEST_SELF | (val & APIC_VECTOR_MASK));
2100                } else
2101                        ret = 1;
2102                break;
2103        default:
2104                ret = 1;
2105                break;
2106        }
2107
2108        kvm_recalculate_apic_map(apic->vcpu->kvm);
2109
2110        return ret;
2111}
2112EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
2113
2114static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
2115                            gpa_t address, int len, const void *data)
2116{
2117        struct kvm_lapic *apic = to_lapic(this);
2118        unsigned int offset = address - apic->base_address;
2119        u32 val;
2120
2121        if (!apic_mmio_in_range(apic, address))
2122                return -EOPNOTSUPP;
2123
2124        if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2125                if (!kvm_check_has_quirk(vcpu->kvm,
2126                                         KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2127                        return -EOPNOTSUPP;
2128
2129                return 0;
2130        }
2131
2132        /*
2133         * APIC register must be aligned on 128-bits boundary.
2134         * 32/64/128 bits registers must be accessed thru 32 bits.
2135         * Refer SDM 8.4.1
2136         */
2137        if (len != 4 || (offset & 0xf))
2138                return 0;
2139
2140        val = *(u32*)data;
2141
2142        kvm_lapic_reg_write(apic, offset & 0xff0, val);
2143
2144        return 0;
2145}
2146
2147void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2148{
2149        kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2150}
2151EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2152
2153/* emulate APIC access in a trap manner */
2154void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2155{
2156        u32 val = 0;
2157
2158        /* hw has done the conditional check and inst decode */
2159        offset &= 0xff0;
2160
2161        kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
2162
2163        /* TODO: optimize to just emulate side effect w/o one more write */
2164        kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
2165}
2166EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2167
2168void kvm_free_lapic(struct kvm_vcpu *vcpu)
2169{
2170        struct kvm_lapic *apic = vcpu->arch.apic;
2171
2172        if (!vcpu->arch.apic)
2173                return;
2174
2175        hrtimer_cancel(&apic->lapic_timer.timer);
2176
2177        if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2178                static_key_slow_dec_deferred(&apic_hw_disabled);
2179
2180        if (!apic->sw_enabled)
2181                static_key_slow_dec_deferred(&apic_sw_disabled);
2182
2183        if (apic->regs)
2184                free_page((unsigned long)apic->regs);
2185
2186        kfree(apic);
2187}
2188
2189/*
2190 *----------------------------------------------------------------------
2191 * LAPIC interface
2192 *----------------------------------------------------------------------
2193 */
2194u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2195{
2196        struct kvm_lapic *apic = vcpu->arch.apic;
2197
2198        if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2199                return 0;
2200
2201        return apic->lapic_timer.tscdeadline;
2202}
2203
2204void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2205{
2206        struct kvm_lapic *apic = vcpu->arch.apic;
2207
2208        if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2209                return;
2210
2211        hrtimer_cancel(&apic->lapic_timer.timer);
2212        apic->lapic_timer.tscdeadline = data;
2213        start_apic_timer(apic);
2214}
2215
2216void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2217{
2218        struct kvm_lapic *apic = vcpu->arch.apic;
2219
2220        apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
2221                     | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
2222}
2223
2224u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2225{
2226        u64 tpr;
2227
2228        tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2229
2230        return (tpr & 0xf0) >> 4;
2231}
2232
2233void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2234{
2235        u64 old_value = vcpu->arch.apic_base;
2236        struct kvm_lapic *apic = vcpu->arch.apic;
2237
2238        if (!apic)
2239                value |= MSR_IA32_APICBASE_BSP;
2240
2241        vcpu->arch.apic_base = value;
2242
2243        if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2244                kvm_update_cpuid_runtime(vcpu);
2245
2246        if (!apic)
2247                return;
2248
2249        /* update jump label if enable bit changes */
2250        if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2251                if (value & MSR_IA32_APICBASE_ENABLE) {
2252                        kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2253                        static_key_slow_dec_deferred(&apic_hw_disabled);
2254                } else {
2255                        static_key_slow_inc(&apic_hw_disabled.key);
2256                        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2257                }
2258        }
2259
2260        if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
2261                kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2262
2263        if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
2264                kvm_x86_ops.set_virtual_apic_mode(vcpu);
2265
2266        apic->base_address = apic->vcpu->arch.apic_base &
2267                             MSR_IA32_APICBASE_BASE;
2268
2269        if ((value & MSR_IA32_APICBASE_ENABLE) &&
2270             apic->base_address != APIC_DEFAULT_PHYS_BASE)
2271                pr_warn_once("APIC base relocation is unsupported by KVM");
2272}
2273
2274void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
2275{
2276        struct kvm_lapic *apic = vcpu->arch.apic;
2277
2278        if (vcpu->arch.apicv_active) {
2279                /* irr_pending is always true when apicv is activated. */
2280                apic->irr_pending = true;
2281                apic->isr_count = 1;
2282        } else {
2283                apic->irr_pending = (apic_search_irr(apic) != -1);
2284                apic->isr_count = count_vectors(apic->regs + APIC_ISR);
2285        }
2286}
2287EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
2288
2289void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2290{
2291        struct kvm_lapic *apic = vcpu->arch.apic;
2292        int i;
2293
2294        if (!apic)
2295                return;
2296
2297        /* Stop the timer in case it's a reset to an active apic */
2298        hrtimer_cancel(&apic->lapic_timer.timer);
2299
2300        if (!init_event) {
2301                kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
2302                                         MSR_IA32_APICBASE_ENABLE);
2303                kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2304        }
2305        kvm_apic_set_version(apic->vcpu);
2306
2307        for (i = 0; i < KVM_APIC_LVT_NUM; i++)
2308                kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
2309        apic_update_lvtt(apic);
2310        if (kvm_vcpu_is_reset_bsp(vcpu) &&
2311            kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2312                kvm_lapic_set_reg(apic, APIC_LVT0,
2313                             SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2314        apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2315
2316        kvm_apic_set_dfr(apic, 0xffffffffU);
2317        apic_set_spiv(apic, 0xff);
2318        kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2319        if (!apic_x2apic_mode(apic))
2320                kvm_apic_set_ldr(apic, 0);
2321        kvm_lapic_set_reg(apic, APIC_ESR, 0);
2322        kvm_lapic_set_reg(apic, APIC_ICR, 0);
2323        kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2324        kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2325        kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2326        for (i = 0; i < 8; i++) {
2327                kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2328                kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2329                kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2330        }
2331        kvm_apic_update_apicv(vcpu);
2332        apic->highest_isr_cache = -1;
2333        update_divide_count(apic);
2334        atomic_set(&apic->lapic_timer.pending, 0);
2335        if (kvm_vcpu_is_bsp(vcpu))
2336                kvm_lapic_set_base(vcpu,
2337                                vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
2338        vcpu->arch.pv_eoi.msr_val = 0;
2339        apic_update_ppr(apic);
2340        if (vcpu->arch.apicv_active) {
2341                kvm_x86_ops.apicv_post_state_restore(vcpu);
2342                kvm_x86_ops.hwapic_irr_update(vcpu, -1);
2343                kvm_x86_ops.hwapic_isr_update(vcpu, -1);
2344        }
2345
2346        vcpu->arch.apic_arb_prio = 0;
2347        vcpu->arch.apic_attention = 0;
2348
2349        kvm_recalculate_apic_map(vcpu->kvm);
2350}
2351
2352/*
2353 *----------------------------------------------------------------------
2354 * timer interface
2355 *----------------------------------------------------------------------
2356 */
2357
2358static bool lapic_is_periodic(struct kvm_lapic *apic)
2359{
2360        return apic_lvtt_period(apic);
2361}
2362
2363int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2364{
2365        struct kvm_lapic *apic = vcpu->arch.apic;
2366
2367        if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2368                return atomic_read(&apic->lapic_timer.pending);
2369
2370        return 0;
2371}
2372
2373int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2374{
2375        u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2376        int vector, mode, trig_mode;
2377
2378        if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2379                vector = reg & APIC_VECTOR_MASK;
2380                mode = reg & APIC_MODE_MASK;
2381                trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2382                return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
2383                                        NULL);
2384        }
2385        return 0;
2386}
2387
2388void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2389{
2390        struct kvm_lapic *apic = vcpu->arch.apic;
2391
2392        if (apic)
2393                kvm_apic_local_deliver(apic, APIC_LVT0);
2394}
2395
2396static const struct kvm_io_device_ops apic_mmio_ops = {
2397        .read     = apic_mmio_read,
2398        .write    = apic_mmio_write,
2399};
2400
2401static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2402{
2403        struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2404        struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2405
2406        apic_timer_expired(apic, true);
2407
2408        if (lapic_is_periodic(apic)) {
2409                advance_periodic_target_expiration(apic);
2410                hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2411                return HRTIMER_RESTART;
2412        } else
2413                return HRTIMER_NORESTART;
2414}
2415
2416int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2417{
2418        struct kvm_lapic *apic;
2419
2420        ASSERT(vcpu != NULL);
2421
2422        apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2423        if (!apic)
2424                goto nomem;
2425
2426        vcpu->arch.apic = apic;
2427
2428        apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2429        if (!apic->regs) {
2430                printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2431                       vcpu->vcpu_id);
2432                goto nomem_free_apic;
2433        }
2434        apic->vcpu = vcpu;
2435
2436        hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2437                     HRTIMER_MODE_ABS_HARD);
2438        apic->lapic_timer.timer.function = apic_timer_fn;
2439        if (timer_advance_ns == -1) {
2440                apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2441                lapic_timer_advance_dynamic = true;
2442        } else {
2443                apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2444                lapic_timer_advance_dynamic = false;
2445        }
2446
2447        /*
2448         * APIC is created enabled. This will prevent kvm_lapic_set_base from
2449         * thinking that APIC state has changed.
2450         */
2451        vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2452        static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2453        kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2454
2455        return 0;
2456nomem_free_apic:
2457        kfree(apic);
2458        vcpu->arch.apic = NULL;
2459nomem:
2460        return -ENOMEM;
2461}
2462
2463int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2464{
2465        struct kvm_lapic *apic = vcpu->arch.apic;
2466        u32 ppr;
2467
2468        if (!kvm_apic_present(vcpu))
2469                return -1;
2470
2471        __apic_update_ppr(apic, &ppr);
2472        return apic_has_interrupt_for_ppr(apic, ppr);
2473}
2474EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
2475
2476int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2477{
2478        u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2479
2480        if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2481                return 1;
2482        if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2483            GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2484                return 1;
2485        return 0;
2486}
2487
2488void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2489{
2490        struct kvm_lapic *apic = vcpu->arch.apic;
2491
2492        if (atomic_read(&apic->lapic_timer.pending) > 0) {
2493                kvm_apic_inject_pending_timer_irqs(apic);
2494                atomic_set(&apic->lapic_timer.pending, 0);
2495        }
2496}
2497
2498int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2499{
2500        int vector = kvm_apic_has_interrupt(vcpu);
2501        struct kvm_lapic *apic = vcpu->arch.apic;
2502        u32 ppr;
2503
2504        if (vector == -1)
2505                return -1;
2506
2507        /*
2508         * We get here even with APIC virtualization enabled, if doing
2509         * nested virtualization and L1 runs with the "acknowledge interrupt
2510         * on exit" mode.  Then we cannot inject the interrupt via RVI,
2511         * because the process would deliver it through the IDT.
2512         */
2513
2514        apic_clear_irr(vector, apic);
2515        if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
2516                /*
2517                 * For auto-EOI interrupts, there might be another pending
2518                 * interrupt above PPR, so check whether to raise another
2519                 * KVM_REQ_EVENT.
2520                 */
2521                apic_update_ppr(apic);
2522        } else {
2523                /*
2524                 * For normal interrupts, PPR has been raised and there cannot
2525                 * be a higher-priority pending interrupt---except if there was
2526                 * a concurrent interrupt injection, but that would have
2527                 * triggered KVM_REQ_EVENT already.
2528                 */
2529                apic_set_isr(vector, apic);
2530                __apic_update_ppr(apic, &ppr);
2531        }
2532
2533        return vector;
2534}
2535
2536static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2537                struct kvm_lapic_state *s, bool set)
2538{
2539        if (apic_x2apic_mode(vcpu->arch.apic)) {
2540                u32 *id = (u32 *)(s->regs + APIC_ID);
2541                u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2542
2543                if (vcpu->kvm->arch.x2apic_format) {
2544                        if (*id != vcpu->vcpu_id)
2545                                return -EINVAL;
2546                } else {
2547                        if (set)
2548                                *id >>= 24;
2549                        else
2550                                *id <<= 24;
2551                }
2552
2553                /* In x2APIC mode, the LDR is fixed and based on the id */
2554                if (set)
2555                        *ldr = kvm_apic_calc_x2apic_ldr(*id);
2556        }
2557
2558        return 0;
2559}
2560
2561int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2562{
2563        memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2564
2565        /*
2566         * Get calculated timer current count for remaining timer period (if
2567         * any) and store it in the returned register set.
2568         */
2569        __kvm_lapic_set_reg(s->regs, APIC_TMCCT,
2570                            __apic_read(vcpu->arch.apic, APIC_TMCCT));
2571
2572        return kvm_apic_state_fixup(vcpu, s, false);
2573}
2574
2575int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2576{
2577        struct kvm_lapic *apic = vcpu->arch.apic;
2578        int r;
2579
2580        kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2581        /* set SPIV separately to get count of SW disabled APICs right */
2582        apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2583
2584        r = kvm_apic_state_fixup(vcpu, s, true);
2585        if (r) {
2586                kvm_recalculate_apic_map(vcpu->kvm);
2587                return r;
2588        }
2589        memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
2590
2591        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2592        kvm_recalculate_apic_map(vcpu->kvm);
2593        kvm_apic_set_version(vcpu);
2594
2595        apic_update_ppr(apic);
2596        hrtimer_cancel(&apic->lapic_timer.timer);
2597        apic_update_lvtt(apic);
2598        apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2599        update_divide_count(apic);
2600        __start_apic_timer(apic, APIC_TMCCT);
2601        kvm_apic_update_apicv(vcpu);
2602        apic->highest_isr_cache = -1;
2603        if (vcpu->arch.apicv_active) {
2604                kvm_x86_ops.apicv_post_state_restore(vcpu);
2605                kvm_x86_ops.hwapic_irr_update(vcpu,
2606                                apic_find_highest_irr(apic));
2607                kvm_x86_ops.hwapic_isr_update(vcpu,
2608                                apic_find_highest_isr(apic));
2609        }
2610        kvm_make_request(KVM_REQ_EVENT, vcpu);
2611        if (ioapic_in_kernel(vcpu->kvm))
2612                kvm_rtc_eoi_tracking_restore_one(vcpu);
2613
2614        vcpu->arch.apic_arb_prio = 0;
2615
2616        return 0;
2617}
2618
2619void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2620{
2621        struct hrtimer *timer;
2622
2623        if (!lapic_in_kernel(vcpu) ||
2624                kvm_can_post_timer_interrupt(vcpu))
2625                return;
2626
2627        timer = &vcpu->arch.apic->lapic_timer.timer;
2628        if (hrtimer_cancel(timer))
2629                hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
2630}
2631
2632/*
2633 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2634 *
2635 * Detect whether guest triggered PV EOI since the
2636 * last entry. If yes, set EOI on guests's behalf.
2637 * Clear PV EOI in guest memory in any case.
2638 */
2639static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2640                                        struct kvm_lapic *apic)
2641{
2642        bool pending;
2643        int vector;
2644        /*
2645         * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2646         * and KVM_PV_EOI_ENABLED in guest memory as follows:
2647         *
2648         * KVM_APIC_PV_EOI_PENDING is unset:
2649         *      -> host disabled PV EOI.
2650         * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2651         *      -> host enabled PV EOI, guest did not execute EOI yet.
2652         * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2653         *      -> host enabled PV EOI, guest executed EOI.
2654         */
2655        BUG_ON(!pv_eoi_enabled(vcpu));
2656        pending = pv_eoi_get_pending(vcpu);
2657        /*
2658         * Clear pending bit in any case: it will be set again on vmentry.
2659         * While this might not be ideal from performance point of view,
2660         * this makes sure pv eoi is only enabled when we know it's safe.
2661         */
2662        pv_eoi_clr_pending(vcpu);
2663        if (pending)
2664                return;
2665        vector = apic_set_eoi(apic);
2666        trace_kvm_pv_eoi(apic, vector);
2667}
2668
2669void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2670{
2671        u32 data;
2672
2673        if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2674                apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2675
2676        if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2677                return;
2678
2679        if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2680                                  sizeof(u32)))
2681                return;
2682
2683        apic_set_tpr(vcpu->arch.apic, data & 0xff);
2684}
2685
2686/*
2687 * apic_sync_pv_eoi_to_guest - called before vmentry
2688 *
2689 * Detect whether it's safe to enable PV EOI and
2690 * if yes do so.
2691 */
2692static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2693                                        struct kvm_lapic *apic)
2694{
2695        if (!pv_eoi_enabled(vcpu) ||
2696            /* IRR set or many bits in ISR: could be nested. */
2697            apic->irr_pending ||
2698            /* Cache not set: could be safe but we don't bother. */
2699            apic->highest_isr_cache == -1 ||
2700            /* Need EOI to update ioapic. */
2701            kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2702                /*
2703                 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2704                 * so we need not do anything here.
2705                 */
2706                return;
2707        }
2708
2709        pv_eoi_set_pending(apic->vcpu);
2710}
2711
2712void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2713{
2714        u32 data, tpr;
2715        int max_irr, max_isr;
2716        struct kvm_lapic *apic = vcpu->arch.apic;
2717
2718        apic_sync_pv_eoi_to_guest(vcpu, apic);
2719
2720        if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2721                return;
2722
2723        tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2724        max_irr = apic_find_highest_irr(apic);
2725        if (max_irr < 0)
2726                max_irr = 0;
2727        max_isr = apic_find_highest_isr(apic);
2728        if (max_isr < 0)
2729                max_isr = 0;
2730        data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2731
2732        kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2733                                sizeof(u32));
2734}
2735
2736int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2737{
2738        if (vapic_addr) {
2739                if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2740                                        &vcpu->arch.apic->vapic_cache,
2741                                        vapic_addr, sizeof(u32)))
2742                        return -EINVAL;
2743                __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2744        } else {
2745                __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2746        }
2747
2748        vcpu->arch.apic->vapic_addr = vapic_addr;
2749        return 0;
2750}
2751
2752int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2753{
2754        struct kvm_lapic *apic = vcpu->arch.apic;
2755        u32 reg = (msr - APIC_BASE_MSR) << 4;
2756
2757        if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2758                return 1;
2759
2760        if (reg == APIC_ICR2)
2761                return 1;
2762
2763        /* if this is ICR write vector before command */
2764        if (reg == APIC_ICR)
2765                kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2766        return kvm_lapic_reg_write(apic, reg, (u32)data);
2767}
2768
2769int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2770{
2771        struct kvm_lapic *apic = vcpu->arch.apic;
2772        u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
2773
2774        if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2775                return 1;
2776
2777        if (reg == APIC_DFR || reg == APIC_ICR2)
2778                return 1;
2779
2780        if (kvm_lapic_reg_read(apic, reg, 4, &low))
2781                return 1;
2782        if (reg == APIC_ICR)
2783                kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2784
2785        *data = (((u64)high) << 32) | low;
2786
2787        return 0;
2788}
2789
2790int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2791{
2792        struct kvm_lapic *apic = vcpu->arch.apic;
2793
2794        if (!lapic_in_kernel(vcpu))
2795                return 1;
2796
2797        /* if this is ICR write vector before command */
2798        if (reg == APIC_ICR)
2799                kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2800        return kvm_lapic_reg_write(apic, reg, (u32)data);
2801}
2802
2803int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2804{
2805        struct kvm_lapic *apic = vcpu->arch.apic;
2806        u32 low, high = 0;
2807
2808        if (!lapic_in_kernel(vcpu))
2809                return 1;
2810
2811        if (kvm_lapic_reg_read(apic, reg, 4, &low))
2812                return 1;
2813        if (reg == APIC_ICR)
2814                kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2815
2816        *data = (((u64)high) << 32) | low;
2817
2818        return 0;
2819}
2820
2821int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
2822{
2823        u64 addr = data & ~KVM_MSR_ENABLED;
2824        struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
2825        unsigned long new_len;
2826
2827        if (!IS_ALIGNED(addr, 4))
2828                return 1;
2829
2830        vcpu->arch.pv_eoi.msr_val = data;
2831        if (!pv_eoi_enabled(vcpu))
2832                return 0;
2833
2834        if (addr == ghc->gpa && len <= ghc->len)
2835                new_len = ghc->len;
2836        else
2837                new_len = len;
2838
2839        return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
2840}
2841
2842void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2843{
2844        struct kvm_lapic *apic = vcpu->arch.apic;
2845        u8 sipi_vector;
2846        int r;
2847        unsigned long pe;
2848
2849        if (!lapic_in_kernel(vcpu))
2850                return;
2851
2852        /*
2853         * Read pending events before calling the check_events
2854         * callback.
2855         */
2856        pe = smp_load_acquire(&apic->pending_events);
2857        if (!pe)
2858                return;
2859
2860        if (is_guest_mode(vcpu)) {
2861                r = kvm_x86_ops.nested_ops->check_events(vcpu);
2862                if (r < 0)
2863                        return;
2864                /*
2865                 * If an event has happened and caused a vmexit,
2866                 * we know INITs are latched and therefore
2867                 * we will not incorrectly deliver an APIC
2868                 * event instead of a vmexit.
2869                 */
2870        }
2871
2872        /*
2873         * INITs are latched while CPU is in specific states
2874         * (SMM, VMX root mode, SVM with GIF=0).
2875         * Because a CPU cannot be in these states immediately
2876         * after it has processed an INIT signal (and thus in
2877         * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
2878         * and leave the INIT pending.
2879         */
2880        if (kvm_vcpu_latch_init(vcpu)) {
2881                WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
2882                if (test_bit(KVM_APIC_SIPI, &pe))
2883                        clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2884                return;
2885        }
2886
2887        if (test_bit(KVM_APIC_INIT, &pe)) {
2888                clear_bit(KVM_APIC_INIT, &apic->pending_events);
2889                kvm_vcpu_reset(vcpu, true);
2890                if (kvm_vcpu_is_bsp(apic->vcpu))
2891                        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2892                else
2893                        vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
2894        }
2895        if (test_bit(KVM_APIC_SIPI, &pe)) {
2896                clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2897                if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
2898                        /* evaluate pending_events before reading the vector */
2899                        smp_rmb();
2900                        sipi_vector = apic->sipi_vector;
2901                        kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, sipi_vector);
2902                        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2903                }
2904        }
2905}
2906
2907void kvm_lapic_init(void)
2908{
2909        /* do not patch jump label more than once per second */
2910        jump_label_rate_limit(&apic_hw_disabled, HZ);
2911        jump_label_rate_limit(&apic_sw_disabled, HZ);
2912}
2913
2914void kvm_lapic_exit(void)
2915{
2916        static_key_deferred_flush(&apic_hw_disabled);
2917        static_key_deferred_flush(&apic_sw_disabled);
2918}
2919