linux/arch/x86/kvm/lapic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2
   3/*
   4 * Local APIC virtualization
   5 *
   6 * Copyright (C) 2006 Qumranet, Inc.
   7 * Copyright (C) 2007 Novell
   8 * Copyright (C) 2007 Intel
   9 * Copyright 2009 Red Hat, Inc. and/or its affiliates.
  10 *
  11 * Authors:
  12 *   Dor Laor <dor.laor@qumranet.com>
  13 *   Gregory Haskins <ghaskins@novell.com>
  14 *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
  15 *
  16 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
  17 */
  18
  19#include <linux/kvm_host.h>
  20#include <linux/kvm.h>
  21#include <linux/mm.h>
  22#include <linux/highmem.h>
  23#include <linux/smp.h>
  24#include <linux/hrtimer.h>
  25#include <linux/io.h>
  26#include <linux/export.h>
  27#include <linux/math64.h>
  28#include <linux/slab.h>
  29#include <asm/processor.h>
  30#include <asm/msr.h>
  31#include <asm/page.h>
  32#include <asm/current.h>
  33#include <asm/apicdef.h>
  34#include <asm/delay.h>
  35#include <linux/atomic.h>
  36#include <linux/jump_label.h>
  37#include "kvm_cache_regs.h"
  38#include "irq.h"
  39#include "ioapic.h"
  40#include "trace.h"
  41#include "x86.h"
  42#include "cpuid.h"
  43#include "hyperv.h"
  44
  45#ifndef CONFIG_X86_64
  46#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
  47#else
  48#define mod_64(x, y) ((x) % (y))
  49#endif
  50
  51#define PRId64 "d"
  52#define PRIx64 "llx"
  53#define PRIu64 "u"
  54#define PRIo64 "o"
  55
  56/* 14 is the version for Xeon and Pentium 8.4.8*/
  57#define APIC_VERSION                    (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
  58#define LAPIC_MMIO_LENGTH               (1 << 12)
  59/* followed define is not in apicdef.h */
  60#define MAX_APIC_VECTOR                 256
  61#define APIC_VECTORS_PER_REG            32
  62
  63static bool lapic_timer_advance_dynamic __read_mostly;
  64#define LAPIC_TIMER_ADVANCE_ADJUST_MIN  100     /* clock cycles */
  65#define LAPIC_TIMER_ADVANCE_ADJUST_MAX  10000   /* clock cycles */
  66#define LAPIC_TIMER_ADVANCE_NS_INIT     1000
  67#define LAPIC_TIMER_ADVANCE_NS_MAX     5000
  68/* step-by-step approximation to mitigate fluctuation */
  69#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
  70
  71static inline int apic_test_vector(int vec, void *bitmap)
  72{
  73        return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  74}
  75
  76bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
  77{
  78        struct kvm_lapic *apic = vcpu->arch.apic;
  79
  80        return apic_test_vector(vector, apic->regs + APIC_ISR) ||
  81                apic_test_vector(vector, apic->regs + APIC_IRR);
  82}
  83
  84static inline int __apic_test_and_set_vector(int vec, void *bitmap)
  85{
  86        return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  87}
  88
  89static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
  90{
  91        return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  92}
  93
  94__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
  95__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
  96
  97static inline int apic_enabled(struct kvm_lapic *apic)
  98{
  99        return kvm_apic_sw_enabled(apic) &&     kvm_apic_hw_enabled(apic);
 100}
 101
 102#define LVT_MASK        \
 103        (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
 104
 105#define LINT_MASK       \
 106        (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 107         APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 108
 109static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
 110{
 111        return apic->vcpu->vcpu_id;
 112}
 113
 114static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
 115{
 116        return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
 117}
 118
 119bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
 120{
 121        return kvm_x86_ops.set_hv_timer
 122               && !(kvm_mwait_in_guest(vcpu->kvm) ||
 123                    kvm_can_post_timer_interrupt(vcpu));
 124}
 125EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
 126
 127static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
 128{
 129        return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
 130}
 131
 132static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
 133                u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
 134        switch (map->mode) {
 135        case KVM_APIC_MODE_X2APIC: {
 136                u32 offset = (dest_id >> 16) * 16;
 137                u32 max_apic_id = map->max_apic_id;
 138
 139                if (offset <= max_apic_id) {
 140                        u8 cluster_size = min(max_apic_id - offset + 1, 16U);
 141
 142                        offset = array_index_nospec(offset, map->max_apic_id + 1);
 143                        *cluster = &map->phys_map[offset];
 144                        *mask = dest_id & (0xffff >> (16 - cluster_size));
 145                } else {
 146                        *mask = 0;
 147                }
 148
 149                return true;
 150                }
 151        case KVM_APIC_MODE_XAPIC_FLAT:
 152                *cluster = map->xapic_flat_map;
 153                *mask = dest_id & 0xff;
 154                return true;
 155        case KVM_APIC_MODE_XAPIC_CLUSTER:
 156                *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
 157                *mask = dest_id & 0xf;
 158                return true;
 159        default:
 160                /* Not optimized. */
 161                return false;
 162        }
 163}
 164
 165static void kvm_apic_map_free(struct rcu_head *rcu)
 166{
 167        struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
 168
 169        kvfree(map);
 170}
 171
 172/*
 173 * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
 174 *
 175 * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
 176 * apic_map_lock_held.
 177 */
 178enum {
 179        CLEAN,
 180        UPDATE_IN_PROGRESS,
 181        DIRTY
 182};
 183
 184void kvm_recalculate_apic_map(struct kvm *kvm)
 185{
 186        struct kvm_apic_map *new, *old = NULL;
 187        struct kvm_vcpu *vcpu;
 188        int i;
 189        u32 max_id = 255; /* enough space for any xAPIC ID */
 190
 191        /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
 192        if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
 193                return;
 194
 195        WARN_ONCE(!irqchip_in_kernel(kvm),
 196                  "Dirty APIC map without an in-kernel local APIC");
 197
 198        mutex_lock(&kvm->arch.apic_map_lock);
 199        /*
 200         * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
 201         * (if clean) or the APIC registers (if dirty).
 202         */
 203        if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
 204                                   DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
 205                /* Someone else has updated the map. */
 206                mutex_unlock(&kvm->arch.apic_map_lock);
 207                return;
 208        }
 209
 210        kvm_for_each_vcpu(i, vcpu, kvm)
 211                if (kvm_apic_present(vcpu))
 212                        max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
 213
 214        new = kvzalloc(sizeof(struct kvm_apic_map) +
 215                           sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
 216                           GFP_KERNEL_ACCOUNT);
 217
 218        if (!new)
 219                goto out;
 220
 221        new->max_apic_id = max_id;
 222
 223        kvm_for_each_vcpu(i, vcpu, kvm) {
 224                struct kvm_lapic *apic = vcpu->arch.apic;
 225                struct kvm_lapic **cluster;
 226                u16 mask;
 227                u32 ldr;
 228                u8 xapic_id;
 229                u32 x2apic_id;
 230
 231                if (!kvm_apic_present(vcpu))
 232                        continue;
 233
 234                xapic_id = kvm_xapic_id(apic);
 235                x2apic_id = kvm_x2apic_id(apic);
 236
 237                /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
 238                if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
 239                                x2apic_id <= new->max_apic_id)
 240                        new->phys_map[x2apic_id] = apic;
 241                /*
 242                 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
 243                 * prevent them from masking VCPUs with APIC ID <= 0xff.
 244                 */
 245                if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
 246                        new->phys_map[xapic_id] = apic;
 247
 248                if (!kvm_apic_sw_enabled(apic))
 249                        continue;
 250
 251                ldr = kvm_lapic_get_reg(apic, APIC_LDR);
 252
 253                if (apic_x2apic_mode(apic)) {
 254                        new->mode |= KVM_APIC_MODE_X2APIC;
 255                } else if (ldr) {
 256                        ldr = GET_APIC_LOGICAL_ID(ldr);
 257                        if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
 258                                new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
 259                        else
 260                                new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
 261                }
 262
 263                if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
 264                        continue;
 265
 266                if (mask)
 267                        cluster[ffs(mask) - 1] = apic;
 268        }
 269out:
 270        old = rcu_dereference_protected(kvm->arch.apic_map,
 271                        lockdep_is_held(&kvm->arch.apic_map_lock));
 272        rcu_assign_pointer(kvm->arch.apic_map, new);
 273        /*
 274         * Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
 275         * If another update has come in, leave it DIRTY.
 276         */
 277        atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
 278                               UPDATE_IN_PROGRESS, CLEAN);
 279        mutex_unlock(&kvm->arch.apic_map_lock);
 280
 281        if (old)
 282                call_rcu(&old->rcu, kvm_apic_map_free);
 283
 284        kvm_make_scan_ioapic_request(kvm);
 285}
 286
 287static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 288{
 289        bool enabled = val & APIC_SPIV_APIC_ENABLED;
 290
 291        kvm_lapic_set_reg(apic, APIC_SPIV, val);
 292
 293        if (enabled != apic->sw_enabled) {
 294                apic->sw_enabled = enabled;
 295                if (enabled)
 296                        static_branch_slow_dec_deferred(&apic_sw_disabled);
 297                else
 298                        static_branch_inc(&apic_sw_disabled.key);
 299
 300                atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
 301        }
 302
 303        /* Check if there are APF page ready requests pending */
 304        if (enabled)
 305                kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
 306}
 307
 308static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
 309{
 310        kvm_lapic_set_reg(apic, APIC_ID, id << 24);
 311        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
 312}
 313
 314static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
 315{
 316        kvm_lapic_set_reg(apic, APIC_LDR, id);
 317        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
 318}
 319
 320static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
 321{
 322        kvm_lapic_set_reg(apic, APIC_DFR, val);
 323        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
 324}
 325
 326static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
 327{
 328        return ((id >> 4) << 16) | (1 << (id & 0xf));
 329}
 330
 331static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
 332{
 333        u32 ldr = kvm_apic_calc_x2apic_ldr(id);
 334
 335        WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
 336
 337        kvm_lapic_set_reg(apic, APIC_ID, id);
 338        kvm_lapic_set_reg(apic, APIC_LDR, ldr);
 339        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
 340}
 341
 342static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 343{
 344        return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
 345}
 346
 347static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 348{
 349        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
 350}
 351
 352static inline int apic_lvtt_period(struct kvm_lapic *apic)
 353{
 354        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
 355}
 356
 357static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 358{
 359        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 360}
 361
 362static inline int apic_lvt_nmi_mode(u32 lvt_val)
 363{
 364        return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
 365}
 366
 367void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 368{
 369        struct kvm_lapic *apic = vcpu->arch.apic;
 370        u32 v = APIC_VERSION;
 371
 372        if (!lapic_in_kernel(vcpu))
 373                return;
 374
 375        /*
 376         * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
 377         * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
 378         * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
 379         * version first and level-triggered interrupts never get EOIed in
 380         * IOAPIC.
 381         */
 382        if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
 383            !ioapic_in_kernel(vcpu->kvm))
 384                v |= APIC_LVR_DIRECTED_EOI;
 385        kvm_lapic_set_reg(apic, APIC_LVR, v);
 386}
 387
 388static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
 389        LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 390        LVT_MASK | APIC_MODE_MASK,      /* LVTTHMR */
 391        LVT_MASK | APIC_MODE_MASK,      /* LVTPC */
 392        LINT_MASK, LINT_MASK,   /* LVT0-1 */
 393        LVT_MASK                /* LVTERR */
 394};
 395
 396static int find_highest_vector(void *bitmap)
 397{
 398        int vec;
 399        u32 *reg;
 400
 401        for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
 402             vec >= 0; vec -= APIC_VECTORS_PER_REG) {
 403                reg = bitmap + REG_POS(vec);
 404                if (*reg)
 405                        return __fls(*reg) + vec;
 406        }
 407
 408        return -1;
 409}
 410
 411static u8 count_vectors(void *bitmap)
 412{
 413        int vec;
 414        u32 *reg;
 415        u8 count = 0;
 416
 417        for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
 418                reg = bitmap + REG_POS(vec);
 419                count += hweight32(*reg);
 420        }
 421
 422        return count;
 423}
 424
 425bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
 426{
 427        u32 i, vec;
 428        u32 pir_val, irr_val, prev_irr_val;
 429        int max_updated_irr;
 430
 431        max_updated_irr = -1;
 432        *max_irr = -1;
 433
 434        for (i = vec = 0; i <= 7; i++, vec += 32) {
 435                pir_val = READ_ONCE(pir[i]);
 436                irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
 437                if (pir_val) {
 438                        prev_irr_val = irr_val;
 439                        irr_val |= xchg(&pir[i], 0);
 440                        *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
 441                        if (prev_irr_val != irr_val) {
 442                                max_updated_irr =
 443                                        __fls(irr_val ^ prev_irr_val) + vec;
 444                        }
 445                }
 446                if (irr_val)
 447                        *max_irr = __fls(irr_val) + vec;
 448        }
 449
 450        return ((max_updated_irr != -1) &&
 451                (max_updated_irr == *max_irr));
 452}
 453EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
 454
 455bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
 456{
 457        struct kvm_lapic *apic = vcpu->arch.apic;
 458
 459        return __kvm_apic_update_irr(pir, apic->regs, max_irr);
 460}
 461EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 462
 463static inline int apic_search_irr(struct kvm_lapic *apic)
 464{
 465        return find_highest_vector(apic->regs + APIC_IRR);
 466}
 467
 468static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 469{
 470        int result;
 471
 472        /*
 473         * Note that irr_pending is just a hint. It will be always
 474         * true with virtual interrupt delivery enabled.
 475         */
 476        if (!apic->irr_pending)
 477                return -1;
 478
 479        result = apic_search_irr(apic);
 480        ASSERT(result == -1 || result >= 16);
 481
 482        return result;
 483}
 484
 485static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 486{
 487        struct kvm_vcpu *vcpu;
 488
 489        vcpu = apic->vcpu;
 490
 491        if (unlikely(vcpu->arch.apicv_active)) {
 492                /* need to update RVI */
 493                kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
 494                static_call(kvm_x86_hwapic_irr_update)(vcpu,
 495                                apic_find_highest_irr(apic));
 496        } else {
 497                apic->irr_pending = false;
 498                kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
 499                if (apic_search_irr(apic) != -1)
 500                        apic->irr_pending = true;
 501        }
 502}
 503
 504void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
 505{
 506        apic_clear_irr(vec, vcpu->arch.apic);
 507}
 508EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
 509
 510static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 511{
 512        struct kvm_vcpu *vcpu;
 513
 514        if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
 515                return;
 516
 517        vcpu = apic->vcpu;
 518
 519        /*
 520         * With APIC virtualization enabled, all caching is disabled
 521         * because the processor can modify ISR under the hood.  Instead
 522         * just set SVI.
 523         */
 524        if (unlikely(vcpu->arch.apicv_active))
 525                static_call(kvm_x86_hwapic_isr_update)(vcpu, vec);
 526        else {
 527                ++apic->isr_count;
 528                BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
 529                /*
 530                 * ISR (in service register) bit is set when injecting an interrupt.
 531                 * The highest vector is injected. Thus the latest bit set matches
 532                 * the highest bit in ISR.
 533                 */
 534                apic->highest_isr_cache = vec;
 535        }
 536}
 537
 538static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 539{
 540        int result;
 541
 542        /*
 543         * Note that isr_count is always 1, and highest_isr_cache
 544         * is always -1, with APIC virtualization enabled.
 545         */
 546        if (!apic->isr_count)
 547                return -1;
 548        if (likely(apic->highest_isr_cache != -1))
 549                return apic->highest_isr_cache;
 550
 551        result = find_highest_vector(apic->regs + APIC_ISR);
 552        ASSERT(result == -1 || result >= 16);
 553
 554        return result;
 555}
 556
 557static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 558{
 559        struct kvm_vcpu *vcpu;
 560        if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
 561                return;
 562
 563        vcpu = apic->vcpu;
 564
 565        /*
 566         * We do get here for APIC virtualization enabled if the guest
 567         * uses the Hyper-V APIC enlightenment.  In this case we may need
 568         * to trigger a new interrupt delivery by writing the SVI field;
 569         * on the other hand isr_count and highest_isr_cache are unused
 570         * and must be left alone.
 571         */
 572        if (unlikely(vcpu->arch.apicv_active))
 573                static_call(kvm_x86_hwapic_isr_update)(vcpu,
 574                                                apic_find_highest_isr(apic));
 575        else {
 576                --apic->isr_count;
 577                BUG_ON(apic->isr_count < 0);
 578                apic->highest_isr_cache = -1;
 579        }
 580}
 581
 582int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 583{
 584        /* This may race with setting of irr in __apic_accept_irq() and
 585         * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
 586         * will cause vmexit immediately and the value will be recalculated
 587         * on the next vmentry.
 588         */
 589        return apic_find_highest_irr(vcpu->arch.apic);
 590}
 591EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
 592
 593static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 594                             int vector, int level, int trig_mode,
 595                             struct dest_map *dest_map);
 596
 597int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 598                     struct dest_map *dest_map)
 599{
 600        struct kvm_lapic *apic = vcpu->arch.apic;
 601
 602        return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
 603                        irq->level, irq->trig_mode, dest_map);
 604}
 605
 606static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
 607                         struct kvm_lapic_irq *irq, u32 min)
 608{
 609        int i, count = 0;
 610        struct kvm_vcpu *vcpu;
 611
 612        if (min > map->max_apic_id)
 613                return 0;
 614
 615        for_each_set_bit(i, ipi_bitmap,
 616                min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
 617                if (map->phys_map[min + i]) {
 618                        vcpu = map->phys_map[min + i]->vcpu;
 619                        count += kvm_apic_set_irq(vcpu, irq, NULL);
 620                }
 621        }
 622
 623        return count;
 624}
 625
 626int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 627                    unsigned long ipi_bitmap_high, u32 min,
 628                    unsigned long icr, int op_64_bit)
 629{
 630        struct kvm_apic_map *map;
 631        struct kvm_lapic_irq irq = {0};
 632        int cluster_size = op_64_bit ? 64 : 32;
 633        int count;
 634
 635        if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
 636                return -KVM_EINVAL;
 637
 638        irq.vector = icr & APIC_VECTOR_MASK;
 639        irq.delivery_mode = icr & APIC_MODE_MASK;
 640        irq.level = (icr & APIC_INT_ASSERT) != 0;
 641        irq.trig_mode = icr & APIC_INT_LEVELTRIG;
 642
 643        rcu_read_lock();
 644        map = rcu_dereference(kvm->arch.apic_map);
 645
 646        count = -EOPNOTSUPP;
 647        if (likely(map)) {
 648                count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
 649                min += cluster_size;
 650                count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
 651        }
 652
 653        rcu_read_unlock();
 654        return count;
 655}
 656
 657static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
 658{
 659
 660        return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
 661                                      sizeof(val));
 662}
 663
 664static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
 665{
 666
 667        return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
 668                                      sizeof(*val));
 669}
 670
 671static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
 672{
 673        return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
 674}
 675
 676static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
 677{
 678        u8 val;
 679        if (pv_eoi_get_user(vcpu, &val) < 0) {
 680                printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
 681                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 682                return false;
 683        }
 684        return val & KVM_PV_EOI_ENABLED;
 685}
 686
 687static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
 688{
 689        if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
 690                printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
 691                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 692                return;
 693        }
 694        __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 695}
 696
 697static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 698{
 699        if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
 700                printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
 701                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 702                return;
 703        }
 704        __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 705}
 706
 707static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
 708{
 709        int highest_irr;
 710        if (apic->vcpu->arch.apicv_active)
 711                highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
 712        else
 713                highest_irr = apic_find_highest_irr(apic);
 714        if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
 715                return -1;
 716        return highest_irr;
 717}
 718
 719static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
 720{
 721        u32 tpr, isrv, ppr, old_ppr;
 722        int isr;
 723
 724        old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
 725        tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
 726        isr = apic_find_highest_isr(apic);
 727        isrv = (isr != -1) ? isr : 0;
 728
 729        if ((tpr & 0xf0) >= (isrv & 0xf0))
 730                ppr = tpr & 0xff;
 731        else
 732                ppr = isrv & 0xf0;
 733
 734        *new_ppr = ppr;
 735        if (old_ppr != ppr)
 736                kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
 737
 738        return ppr < old_ppr;
 739}
 740
 741static void apic_update_ppr(struct kvm_lapic *apic)
 742{
 743        u32 ppr;
 744
 745        if (__apic_update_ppr(apic, &ppr) &&
 746            apic_has_interrupt_for_ppr(apic, ppr) != -1)
 747                kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 748}
 749
 750void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
 751{
 752        apic_update_ppr(vcpu->arch.apic);
 753}
 754EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
 755
 756static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 757{
 758        kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
 759        apic_update_ppr(apic);
 760}
 761
 762static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
 763{
 764        return mda == (apic_x2apic_mode(apic) ?
 765                        X2APIC_BROADCAST : APIC_BROADCAST);
 766}
 767
 768static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
 769{
 770        if (kvm_apic_broadcast(apic, mda))
 771                return true;
 772
 773        if (apic_x2apic_mode(apic))
 774                return mda == kvm_x2apic_id(apic);
 775
 776        /*
 777         * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
 778         * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
 779         * this allows unique addressing of VCPUs with APIC ID over 0xff.
 780         * The 0xff condition is needed because writeable xAPIC ID.
 781         */
 782        if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
 783                return true;
 784
 785        return mda == kvm_xapic_id(apic);
 786}
 787
 788static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 789{
 790        u32 logical_id;
 791
 792        if (kvm_apic_broadcast(apic, mda))
 793                return true;
 794
 795        logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
 796
 797        if (apic_x2apic_mode(apic))
 798                return ((logical_id >> 16) == (mda >> 16))
 799                       && (logical_id & mda & 0xffff) != 0;
 800
 801        logical_id = GET_APIC_LOGICAL_ID(logical_id);
 802
 803        switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
 804        case APIC_DFR_FLAT:
 805                return (logical_id & mda) != 0;
 806        case APIC_DFR_CLUSTER:
 807                return ((logical_id >> 4) == (mda >> 4))
 808                       && (logical_id & mda & 0xf) != 0;
 809        default:
 810                return false;
 811        }
 812}
 813
 814/* The KVM local APIC implementation has two quirks:
 815 *
 816 *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
 817 *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
 818 *    KVM doesn't do that aliasing.
 819 *
 820 *  - in-kernel IOAPIC messages have to be delivered directly to
 821 *    x2APIC, because the kernel does not support interrupt remapping.
 822 *    In order to support broadcast without interrupt remapping, x2APIC
 823 *    rewrites the destination of non-IPI messages from APIC_BROADCAST
 824 *    to X2APIC_BROADCAST.
 825 *
 826 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
 827 * important when userspace wants to use x2APIC-format MSIs, because
 828 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
 829 */
 830static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
 831                struct kvm_lapic *source, struct kvm_lapic *target)
 832{
 833        bool ipi = source != NULL;
 834
 835        if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
 836            !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
 837                return X2APIC_BROADCAST;
 838
 839        return dest_id;
 840}
 841
 842bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 843                           int shorthand, unsigned int dest, int dest_mode)
 844{
 845        struct kvm_lapic *target = vcpu->arch.apic;
 846        u32 mda = kvm_apic_mda(vcpu, dest, source, target);
 847
 848        ASSERT(target);
 849        switch (shorthand) {
 850        case APIC_DEST_NOSHORT:
 851                if (dest_mode == APIC_DEST_PHYSICAL)
 852                        return kvm_apic_match_physical_addr(target, mda);
 853                else
 854                        return kvm_apic_match_logical_addr(target, mda);
 855        case APIC_DEST_SELF:
 856                return target == source;
 857        case APIC_DEST_ALLINC:
 858                return true;
 859        case APIC_DEST_ALLBUT:
 860                return target != source;
 861        default:
 862                return false;
 863        }
 864}
 865EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
 866
 867int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
 868                       const unsigned long *bitmap, u32 bitmap_size)
 869{
 870        u32 mod;
 871        int i, idx = -1;
 872
 873        mod = vector % dest_vcpus;
 874
 875        for (i = 0; i <= mod; i++) {
 876                idx = find_next_bit(bitmap, bitmap_size, idx + 1);
 877                BUG_ON(idx == bitmap_size);
 878        }
 879
 880        return idx;
 881}
 882
 883static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
 884{
 885        if (!kvm->arch.disabled_lapic_found) {
 886                kvm->arch.disabled_lapic_found = true;
 887                printk(KERN_INFO
 888                       "Disabled LAPIC found during irq injection\n");
 889        }
 890}
 891
 892static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
 893                struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
 894{
 895        if (kvm->arch.x2apic_broadcast_quirk_disabled) {
 896                if ((irq->dest_id == APIC_BROADCAST &&
 897                                map->mode != KVM_APIC_MODE_X2APIC))
 898                        return true;
 899                if (irq->dest_id == X2APIC_BROADCAST)
 900                        return true;
 901        } else {
 902                bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
 903                if (irq->dest_id == (x2apic_ipi ?
 904                                     X2APIC_BROADCAST : APIC_BROADCAST))
 905                        return true;
 906        }
 907
 908        return false;
 909}
 910
 911/* Return true if the interrupt can be handled by using *bitmap as index mask
 912 * for valid destinations in *dst array.
 913 * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
 914 * Note: we may have zero kvm_lapic destinations when we return true, which
 915 * means that the interrupt should be dropped.  In this case, *bitmap would be
 916 * zero and *dst undefined.
 917 */
 918static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
 919                struct kvm_lapic **src, struct kvm_lapic_irq *irq,
 920                struct kvm_apic_map *map, struct kvm_lapic ***dst,
 921                unsigned long *bitmap)
 922{
 923        int i, lowest;
 924
 925        if (irq->shorthand == APIC_DEST_SELF && src) {
 926                *dst = src;
 927                *bitmap = 1;
 928                return true;
 929        } else if (irq->shorthand)
 930                return false;
 931
 932        if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
 933                return false;
 934
 935        if (irq->dest_mode == APIC_DEST_PHYSICAL) {
 936                if (irq->dest_id > map->max_apic_id) {
 937                        *bitmap = 0;
 938                } else {
 939                        u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
 940                        *dst = &map->phys_map[dest_id];
 941                        *bitmap = 1;
 942                }
 943                return true;
 944        }
 945
 946        *bitmap = 0;
 947        if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
 948                                (u16 *)bitmap))
 949                return false;
 950
 951        if (!kvm_lowest_prio_delivery(irq))
 952                return true;
 953
 954        if (!kvm_vector_hashing_enabled()) {
 955                lowest = -1;
 956                for_each_set_bit(i, bitmap, 16) {
 957                        if (!(*dst)[i])
 958                                continue;
 959                        if (lowest < 0)
 960                                lowest = i;
 961                        else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
 962                                                (*dst)[lowest]->vcpu) < 0)
 963                                lowest = i;
 964                }
 965        } else {
 966                if (!*bitmap)
 967                        return true;
 968
 969                lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
 970                                bitmap, 16);
 971
 972                if (!(*dst)[lowest]) {
 973                        kvm_apic_disabled_lapic_found(kvm);
 974                        *bitmap = 0;
 975                        return true;
 976                }
 977        }
 978
 979        *bitmap = (lowest >= 0) ? 1 << lowest : 0;
 980
 981        return true;
 982}
 983
 984bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 985                struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
 986{
 987        struct kvm_apic_map *map;
 988        unsigned long bitmap;
 989        struct kvm_lapic **dst = NULL;
 990        int i;
 991        bool ret;
 992
 993        *r = -1;
 994
 995        if (irq->shorthand == APIC_DEST_SELF) {
 996                *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
 997                return true;
 998        }
 999
1000        rcu_read_lock();
1001        map = rcu_dereference(kvm->arch.apic_map);
1002
1003        ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
1004        if (ret) {
1005                *r = 0;
1006                for_each_set_bit(i, &bitmap, 16) {
1007                        if (!dst[i])
1008                                continue;
1009                        *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
1010                }
1011        }
1012
1013        rcu_read_unlock();
1014        return ret;
1015}
1016
1017/*
1018 * This routine tries to handle interrupts in posted mode, here is how
1019 * it deals with different cases:
1020 * - For single-destination interrupts, handle it in posted mode
1021 * - Else if vector hashing is enabled and it is a lowest-priority
1022 *   interrupt, handle it in posted mode and use the following mechanism
1023 *   to find the destination vCPU.
1024 *      1. For lowest-priority interrupts, store all the possible
1025 *         destination vCPUs in an array.
1026 *      2. Use "guest vector % max number of destination vCPUs" to find
1027 *         the right destination vCPU in the array for the lowest-priority
1028 *         interrupt.
1029 * - Otherwise, use remapped mode to inject the interrupt.
1030 */
1031bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
1032                        struct kvm_vcpu **dest_vcpu)
1033{
1034        struct kvm_apic_map *map;
1035        unsigned long bitmap;
1036        struct kvm_lapic **dst = NULL;
1037        bool ret = false;
1038
1039        if (irq->shorthand)
1040                return false;
1041
1042        rcu_read_lock();
1043        map = rcu_dereference(kvm->arch.apic_map);
1044
1045        if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
1046                        hweight16(bitmap) == 1) {
1047                unsigned long i = find_first_bit(&bitmap, 16);
1048
1049                if (dst[i]) {
1050                        *dest_vcpu = dst[i]->vcpu;
1051                        ret = true;
1052                }
1053        }
1054
1055        rcu_read_unlock();
1056        return ret;
1057}
1058
1059/*
1060 * Add a pending IRQ into lapic.
1061 * Return 1 if successfully added and 0 if discarded.
1062 */
1063static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1064                             int vector, int level, int trig_mode,
1065                             struct dest_map *dest_map)
1066{
1067        int result = 0;
1068        struct kvm_vcpu *vcpu = apic->vcpu;
1069
1070        trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1071                                  trig_mode, vector);
1072        switch (delivery_mode) {
1073        case APIC_DM_LOWEST:
1074                vcpu->arch.apic_arb_prio++;
1075                fallthrough;
1076        case APIC_DM_FIXED:
1077                if (unlikely(trig_mode && !level))
1078                        break;
1079
1080                /* FIXME add logic for vcpu on reset */
1081                if (unlikely(!apic_enabled(apic)))
1082                        break;
1083
1084                result = 1;
1085
1086                if (dest_map) {
1087                        __set_bit(vcpu->vcpu_id, dest_map->map);
1088                        dest_map->vectors[vcpu->vcpu_id] = vector;
1089                }
1090
1091                if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1092                        if (trig_mode)
1093                                kvm_lapic_set_vector(vector,
1094                                                     apic->regs + APIC_TMR);
1095                        else
1096                                kvm_lapic_clear_vector(vector,
1097                                                       apic->regs + APIC_TMR);
1098                }
1099
1100                if (static_call(kvm_x86_deliver_posted_interrupt)(vcpu, vector)) {
1101                        kvm_lapic_set_irr(vector, apic);
1102                        kvm_make_request(KVM_REQ_EVENT, vcpu);
1103                        kvm_vcpu_kick(vcpu);
1104                }
1105                break;
1106
1107        case APIC_DM_REMRD:
1108                result = 1;
1109                vcpu->arch.pv.pv_unhalted = 1;
1110                kvm_make_request(KVM_REQ_EVENT, vcpu);
1111                kvm_vcpu_kick(vcpu);
1112                break;
1113
1114        case APIC_DM_SMI:
1115                result = 1;
1116                kvm_make_request(KVM_REQ_SMI, vcpu);
1117                kvm_vcpu_kick(vcpu);
1118                break;
1119
1120        case APIC_DM_NMI:
1121                result = 1;
1122                kvm_inject_nmi(vcpu);
1123                kvm_vcpu_kick(vcpu);
1124                break;
1125
1126        case APIC_DM_INIT:
1127                if (!trig_mode || level) {
1128                        result = 1;
1129                        /* assumes that there are only KVM_APIC_INIT/SIPI */
1130                        apic->pending_events = (1UL << KVM_APIC_INIT);
1131                        kvm_make_request(KVM_REQ_EVENT, vcpu);
1132                        kvm_vcpu_kick(vcpu);
1133                }
1134                break;
1135
1136        case APIC_DM_STARTUP:
1137                result = 1;
1138                apic->sipi_vector = vector;
1139                /* make sure sipi_vector is visible for the receiver */
1140                smp_wmb();
1141                set_bit(KVM_APIC_SIPI, &apic->pending_events);
1142                kvm_make_request(KVM_REQ_EVENT, vcpu);
1143                kvm_vcpu_kick(vcpu);
1144                break;
1145
1146        case APIC_DM_EXTINT:
1147                /*
1148                 * Should only be called by kvm_apic_local_deliver() with LVT0,
1149                 * before NMI watchdog was enabled. Already handled by
1150                 * kvm_apic_accept_pic_intr().
1151                 */
1152                break;
1153
1154        default:
1155                printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1156                       delivery_mode);
1157                break;
1158        }
1159        return result;
1160}
1161
1162/*
1163 * This routine identifies the destination vcpus mask meant to receive the
1164 * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1165 * out the destination vcpus array and set the bitmap or it traverses to
1166 * each available vcpu to identify the same.
1167 */
1168void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1169                              unsigned long *vcpu_bitmap)
1170{
1171        struct kvm_lapic **dest_vcpu = NULL;
1172        struct kvm_lapic *src = NULL;
1173        struct kvm_apic_map *map;
1174        struct kvm_vcpu *vcpu;
1175        unsigned long bitmap;
1176        int i, vcpu_idx;
1177        bool ret;
1178
1179        rcu_read_lock();
1180        map = rcu_dereference(kvm->arch.apic_map);
1181
1182        ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1183                                          &bitmap);
1184        if (ret) {
1185                for_each_set_bit(i, &bitmap, 16) {
1186                        if (!dest_vcpu[i])
1187                                continue;
1188                        vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1189                        __set_bit(vcpu_idx, vcpu_bitmap);
1190                }
1191        } else {
1192                kvm_for_each_vcpu(i, vcpu, kvm) {
1193                        if (!kvm_apic_present(vcpu))
1194                                continue;
1195                        if (!kvm_apic_match_dest(vcpu, NULL,
1196                                                 irq->shorthand,
1197                                                 irq->dest_id,
1198                                                 irq->dest_mode))
1199                                continue;
1200                        __set_bit(i, vcpu_bitmap);
1201                }
1202        }
1203        rcu_read_unlock();
1204}
1205
1206int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1207{
1208        return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1209}
1210
1211static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1212{
1213        return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1214}
1215
1216static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1217{
1218        int trigger_mode;
1219
1220        /* Eoi the ioapic only if the ioapic doesn't own the vector. */
1221        if (!kvm_ioapic_handles_vector(apic, vector))
1222                return;
1223
1224        /* Request a KVM exit to inform the userspace IOAPIC. */
1225        if (irqchip_split(apic->vcpu->kvm)) {
1226                apic->vcpu->arch.pending_ioapic_eoi = vector;
1227                kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1228                return;
1229        }
1230
1231        if (apic_test_vector(vector, apic->regs + APIC_TMR))
1232                trigger_mode = IOAPIC_LEVEL_TRIG;
1233        else
1234                trigger_mode = IOAPIC_EDGE_TRIG;
1235
1236        kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1237}
1238
1239static int apic_set_eoi(struct kvm_lapic *apic)
1240{
1241        int vector = apic_find_highest_isr(apic);
1242
1243        trace_kvm_eoi(apic, vector);
1244
1245        /*
1246         * Not every write EOI will has corresponding ISR,
1247         * one example is when Kernel check timer on setup_IO_APIC
1248         */
1249        if (vector == -1)
1250                return vector;
1251
1252        apic_clear_isr(vector, apic);
1253        apic_update_ppr(apic);
1254
1255        if (to_hv_vcpu(apic->vcpu) &&
1256            test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap))
1257                kvm_hv_synic_send_eoi(apic->vcpu, vector);
1258
1259        kvm_ioapic_send_eoi(apic, vector);
1260        kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1261        return vector;
1262}
1263
1264/*
1265 * this interface assumes a trap-like exit, which has already finished
1266 * desired side effect including vISR and vPPR update.
1267 */
1268void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1269{
1270        struct kvm_lapic *apic = vcpu->arch.apic;
1271
1272        trace_kvm_eoi(apic, vector);
1273
1274        kvm_ioapic_send_eoi(apic, vector);
1275        kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1276}
1277EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1278
1279void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
1280{
1281        struct kvm_lapic_irq irq;
1282
1283        irq.vector = icr_low & APIC_VECTOR_MASK;
1284        irq.delivery_mode = icr_low & APIC_MODE_MASK;
1285        irq.dest_mode = icr_low & APIC_DEST_MASK;
1286        irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1287        irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1288        irq.shorthand = icr_low & APIC_SHORT_MASK;
1289        irq.msi_redir_hint = false;
1290        if (apic_x2apic_mode(apic))
1291                irq.dest_id = icr_high;
1292        else
1293                irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
1294
1295        trace_kvm_apic_ipi(icr_low, irq.dest_id);
1296
1297        kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1298}
1299
1300static u32 apic_get_tmcct(struct kvm_lapic *apic)
1301{
1302        ktime_t remaining, now;
1303        s64 ns;
1304        u32 tmcct;
1305
1306        ASSERT(apic != NULL);
1307
1308        /* if initial count is 0, current count should also be 0 */
1309        if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1310                apic->lapic_timer.period == 0)
1311                return 0;
1312
1313        now = ktime_get();
1314        remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1315        if (ktime_to_ns(remaining) < 0)
1316                remaining = 0;
1317
1318        ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1319        tmcct = div64_u64(ns,
1320                         (APIC_BUS_CYCLE_NS * apic->divide_count));
1321
1322        return tmcct;
1323}
1324
1325static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1326{
1327        struct kvm_vcpu *vcpu = apic->vcpu;
1328        struct kvm_run *run = vcpu->run;
1329
1330        kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1331        run->tpr_access.rip = kvm_rip_read(vcpu);
1332        run->tpr_access.is_write = write;
1333}
1334
1335static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1336{
1337        if (apic->vcpu->arch.tpr_access_reporting)
1338                __report_tpr_access(apic, write);
1339}
1340
1341static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1342{
1343        u32 val = 0;
1344
1345        if (offset >= LAPIC_MMIO_LENGTH)
1346                return 0;
1347
1348        switch (offset) {
1349        case APIC_ARBPRI:
1350                break;
1351
1352        case APIC_TMCCT:        /* Timer CCR */
1353                if (apic_lvtt_tscdeadline(apic))
1354                        return 0;
1355
1356                val = apic_get_tmcct(apic);
1357                break;
1358        case APIC_PROCPRI:
1359                apic_update_ppr(apic);
1360                val = kvm_lapic_get_reg(apic, offset);
1361                break;
1362        case APIC_TASKPRI:
1363                report_tpr_access(apic, false);
1364                fallthrough;
1365        default:
1366                val = kvm_lapic_get_reg(apic, offset);
1367                break;
1368        }
1369
1370        return val;
1371}
1372
1373static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1374{
1375        return container_of(dev, struct kvm_lapic, dev);
1376}
1377
1378#define APIC_REG_MASK(reg)      (1ull << ((reg) >> 4))
1379#define APIC_REGS_MASK(first, count) \
1380        (APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1381
1382int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1383                void *data)
1384{
1385        unsigned char alignment = offset & 0xf;
1386        u32 result;
1387        /* this bitmask has a bit cleared for each reserved register */
1388        u64 valid_reg_mask =
1389                APIC_REG_MASK(APIC_ID) |
1390                APIC_REG_MASK(APIC_LVR) |
1391                APIC_REG_MASK(APIC_TASKPRI) |
1392                APIC_REG_MASK(APIC_PROCPRI) |
1393                APIC_REG_MASK(APIC_LDR) |
1394                APIC_REG_MASK(APIC_DFR) |
1395                APIC_REG_MASK(APIC_SPIV) |
1396                APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1397                APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1398                APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1399                APIC_REG_MASK(APIC_ESR) |
1400                APIC_REG_MASK(APIC_ICR) |
1401                APIC_REG_MASK(APIC_ICR2) |
1402                APIC_REG_MASK(APIC_LVTT) |
1403                APIC_REG_MASK(APIC_LVTTHMR) |
1404                APIC_REG_MASK(APIC_LVTPC) |
1405                APIC_REG_MASK(APIC_LVT0) |
1406                APIC_REG_MASK(APIC_LVT1) |
1407                APIC_REG_MASK(APIC_LVTERR) |
1408                APIC_REG_MASK(APIC_TMICT) |
1409                APIC_REG_MASK(APIC_TMCCT) |
1410                APIC_REG_MASK(APIC_TDCR);
1411
1412        /* ARBPRI is not valid on x2APIC */
1413        if (!apic_x2apic_mode(apic))
1414                valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
1415
1416        if (alignment + len > 4)
1417                return 1;
1418
1419        if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
1420                return 1;
1421
1422        result = __apic_read(apic, offset & ~0xf);
1423
1424        trace_kvm_apic_read(offset, result);
1425
1426        switch (len) {
1427        case 1:
1428        case 2:
1429        case 4:
1430                memcpy(data, (char *)&result + alignment, len);
1431                break;
1432        default:
1433                printk(KERN_ERR "Local APIC read with len = %x, "
1434                       "should be 1,2, or 4 instead\n", len);
1435                break;
1436        }
1437        return 0;
1438}
1439EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
1440
1441static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1442{
1443        return addr >= apic->base_address &&
1444                addr < apic->base_address + LAPIC_MMIO_LENGTH;
1445}
1446
1447static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1448                           gpa_t address, int len, void *data)
1449{
1450        struct kvm_lapic *apic = to_lapic(this);
1451        u32 offset = address - apic->base_address;
1452
1453        if (!apic_mmio_in_range(apic, address))
1454                return -EOPNOTSUPP;
1455
1456        if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1457                if (!kvm_check_has_quirk(vcpu->kvm,
1458                                         KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1459                        return -EOPNOTSUPP;
1460
1461                memset(data, 0xff, len);
1462                return 0;
1463        }
1464
1465        kvm_lapic_reg_read(apic, offset, len, data);
1466
1467        return 0;
1468}
1469
1470static void update_divide_count(struct kvm_lapic *apic)
1471{
1472        u32 tmp1, tmp2, tdcr;
1473
1474        tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1475        tmp1 = tdcr & 0xf;
1476        tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1477        apic->divide_count = 0x1 << (tmp2 & 0x7);
1478}
1479
1480static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1481{
1482        /*
1483         * Do not allow the guest to program periodic timers with small
1484         * interval, since the hrtimers are not throttled by the host
1485         * scheduler.
1486         */
1487        if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1488                s64 min_period = min_timer_period_us * 1000LL;
1489
1490                if (apic->lapic_timer.period < min_period) {
1491                        pr_info_ratelimited(
1492                            "kvm: vcpu %i: requested %lld ns "
1493                            "lapic timer period limited to %lld ns\n",
1494                            apic->vcpu->vcpu_id,
1495                            apic->lapic_timer.period, min_period);
1496                        apic->lapic_timer.period = min_period;
1497                }
1498        }
1499}
1500
1501static void cancel_hv_timer(struct kvm_lapic *apic);
1502
1503static void cancel_apic_timer(struct kvm_lapic *apic)
1504{
1505        hrtimer_cancel(&apic->lapic_timer.timer);
1506        preempt_disable();
1507        if (apic->lapic_timer.hv_timer_in_use)
1508                cancel_hv_timer(apic);
1509        preempt_enable();
1510}
1511
1512static void apic_update_lvtt(struct kvm_lapic *apic)
1513{
1514        u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1515                        apic->lapic_timer.timer_mode_mask;
1516
1517        if (apic->lapic_timer.timer_mode != timer_mode) {
1518                if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1519                                APIC_LVT_TIMER_TSCDEADLINE)) {
1520                        cancel_apic_timer(apic);
1521                        kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1522                        apic->lapic_timer.period = 0;
1523                        apic->lapic_timer.tscdeadline = 0;
1524                }
1525                apic->lapic_timer.timer_mode = timer_mode;
1526                limit_periodic_timer_frequency(apic);
1527        }
1528}
1529
1530/*
1531 * On APICv, this test will cause a busy wait
1532 * during a higher-priority task.
1533 */
1534
1535static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1536{
1537        struct kvm_lapic *apic = vcpu->arch.apic;
1538        u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1539
1540        if (kvm_apic_hw_enabled(apic)) {
1541                int vec = reg & APIC_VECTOR_MASK;
1542                void *bitmap = apic->regs + APIC_ISR;
1543
1544                if (vcpu->arch.apicv_active)
1545                        bitmap = apic->regs + APIC_IRR;
1546
1547                if (apic_test_vector(vec, bitmap))
1548                        return true;
1549        }
1550        return false;
1551}
1552
1553static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1554{
1555        u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1556
1557        /*
1558         * If the guest TSC is running at a different ratio than the host, then
1559         * convert the delay to nanoseconds to achieve an accurate delay.  Note
1560         * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1561         * always for VMX enabled hardware.
1562         */
1563        if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
1564                __delay(min(guest_cycles,
1565                        nsec_to_cycles(vcpu, timer_advance_ns)));
1566        } else {
1567                u64 delay_ns = guest_cycles * 1000000ULL;
1568                do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1569                ndelay(min_t(u32, delay_ns, timer_advance_ns));
1570        }
1571}
1572
1573static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1574                                              s64 advance_expire_delta)
1575{
1576        struct kvm_lapic *apic = vcpu->arch.apic;
1577        u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1578        u64 ns;
1579
1580        /* Do not adjust for tiny fluctuations or large random spikes. */
1581        if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1582            abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1583                return;
1584
1585        /* too early */
1586        if (advance_expire_delta < 0) {
1587                ns = -advance_expire_delta * 1000000ULL;
1588                do_div(ns, vcpu->arch.virtual_tsc_khz);
1589                timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1590        } else {
1591        /* too late */
1592                ns = advance_expire_delta * 1000000ULL;
1593                do_div(ns, vcpu->arch.virtual_tsc_khz);
1594                timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1595        }
1596
1597        if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1598                timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1599        apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1600}
1601
1602static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1603{
1604        struct kvm_lapic *apic = vcpu->arch.apic;
1605        u64 guest_tsc, tsc_deadline;
1606
1607        tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1608        apic->lapic_timer.expired_tscdeadline = 0;
1609        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1610        apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
1611
1612        if (lapic_timer_advance_dynamic) {
1613                adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
1614                /*
1615                 * If the timer fired early, reread the TSC to account for the
1616                 * overhead of the above adjustment to avoid waiting longer
1617                 * than is necessary.
1618                 */
1619                if (guest_tsc < tsc_deadline)
1620                        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1621        }
1622
1623        if (guest_tsc < tsc_deadline)
1624                __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1625}
1626
1627void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1628{
1629        if (lapic_in_kernel(vcpu) &&
1630            vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1631            vcpu->arch.apic->lapic_timer.timer_advance_ns &&
1632            lapic_timer_int_injected(vcpu))
1633                __kvm_wait_lapic_expire(vcpu);
1634}
1635EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1636
1637static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1638{
1639        struct kvm_timer *ktimer = &apic->lapic_timer;
1640
1641        kvm_apic_local_deliver(apic, APIC_LVTT);
1642        if (apic_lvtt_tscdeadline(apic)) {
1643                ktimer->tscdeadline = 0;
1644        } else if (apic_lvtt_oneshot(apic)) {
1645                ktimer->tscdeadline = 0;
1646                ktimer->target_expiration = 0;
1647        }
1648}
1649
1650static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
1651{
1652        struct kvm_vcpu *vcpu = apic->vcpu;
1653        struct kvm_timer *ktimer = &apic->lapic_timer;
1654
1655        if (atomic_read(&apic->lapic_timer.pending))
1656                return;
1657
1658        if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1659                ktimer->expired_tscdeadline = ktimer->tscdeadline;
1660
1661        if (!from_timer_fn && vcpu->arch.apicv_active) {
1662                WARN_ON(kvm_get_running_vcpu() != vcpu);
1663                kvm_apic_inject_pending_timer_irqs(apic);
1664                return;
1665        }
1666
1667        if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1668                /*
1669                 * Ensure the guest's timer has truly expired before posting an
1670                 * interrupt.  Open code the relevant checks to avoid querying
1671                 * lapic_timer_int_injected(), which will be false since the
1672                 * interrupt isn't yet injected.  Waiting until after injecting
1673                 * is not an option since that won't help a posted interrupt.
1674                 */
1675                if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1676                    vcpu->arch.apic->lapic_timer.timer_advance_ns)
1677                        __kvm_wait_lapic_expire(vcpu);
1678                kvm_apic_inject_pending_timer_irqs(apic);
1679                return;
1680        }
1681
1682        atomic_inc(&apic->lapic_timer.pending);
1683        kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
1684        if (from_timer_fn)
1685                kvm_vcpu_kick(vcpu);
1686}
1687
1688static void start_sw_tscdeadline(struct kvm_lapic *apic)
1689{
1690        struct kvm_timer *ktimer = &apic->lapic_timer;
1691        u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1692        u64 ns = 0;
1693        ktime_t expire;
1694        struct kvm_vcpu *vcpu = apic->vcpu;
1695        unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1696        unsigned long flags;
1697        ktime_t now;
1698
1699        if (unlikely(!tscdeadline || !this_tsc_khz))
1700                return;
1701
1702        local_irq_save(flags);
1703
1704        now = ktime_get();
1705        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1706
1707        ns = (tscdeadline - guest_tsc) * 1000000ULL;
1708        do_div(ns, this_tsc_khz);
1709
1710        if (likely(tscdeadline > guest_tsc) &&
1711            likely(ns > apic->lapic_timer.timer_advance_ns)) {
1712                expire = ktime_add_ns(now, ns);
1713                expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1714                hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
1715        } else
1716                apic_timer_expired(apic, false);
1717
1718        local_irq_restore(flags);
1719}
1720
1721static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
1722{
1723        return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
1724}
1725
1726static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1727{
1728        ktime_t now, remaining;
1729        u64 ns_remaining_old, ns_remaining_new;
1730
1731        apic->lapic_timer.period =
1732                        tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1733        limit_periodic_timer_frequency(apic);
1734
1735        now = ktime_get();
1736        remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1737        if (ktime_to_ns(remaining) < 0)
1738                remaining = 0;
1739
1740        ns_remaining_old = ktime_to_ns(remaining);
1741        ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1742                                           apic->divide_count, old_divisor);
1743
1744        apic->lapic_timer.tscdeadline +=
1745                nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1746                nsec_to_cycles(apic->vcpu, ns_remaining_old);
1747        apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1748}
1749
1750static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
1751{
1752        ktime_t now;
1753        u64 tscl = rdtsc();
1754        s64 deadline;
1755
1756        now = ktime_get();
1757        apic->lapic_timer.period =
1758                        tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1759
1760        if (!apic->lapic_timer.period) {
1761                apic->lapic_timer.tscdeadline = 0;
1762                return false;
1763        }
1764
1765        limit_periodic_timer_frequency(apic);
1766        deadline = apic->lapic_timer.period;
1767
1768        if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
1769                if (unlikely(count_reg != APIC_TMICT)) {
1770                        deadline = tmict_to_ns(apic,
1771                                     kvm_lapic_get_reg(apic, count_reg));
1772                        if (unlikely(deadline <= 0))
1773                                deadline = apic->lapic_timer.period;
1774                        else if (unlikely(deadline > apic->lapic_timer.period)) {
1775                                pr_info_ratelimited(
1776                                    "kvm: vcpu %i: requested lapic timer restore with "
1777                                    "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
1778                                    "Using initial count to start timer.\n",
1779                                    apic->vcpu->vcpu_id,
1780                                    count_reg,
1781                                    kvm_lapic_get_reg(apic, count_reg),
1782                                    deadline, apic->lapic_timer.period);
1783                                kvm_lapic_set_reg(apic, count_reg, 0);
1784                                deadline = apic->lapic_timer.period;
1785                        }
1786                }
1787        }
1788
1789        apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1790                nsec_to_cycles(apic->vcpu, deadline);
1791        apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
1792
1793        return true;
1794}
1795
1796static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1797{
1798        ktime_t now = ktime_get();
1799        u64 tscl = rdtsc();
1800        ktime_t delta;
1801
1802        /*
1803         * Synchronize both deadlines to the same time source or
1804         * differences in the periods (caused by differences in the
1805         * underlying clocks or numerical approximation errors) will
1806         * cause the two to drift apart over time as the errors
1807         * accumulate.
1808         */
1809        apic->lapic_timer.target_expiration =
1810                ktime_add_ns(apic->lapic_timer.target_expiration,
1811                                apic->lapic_timer.period);
1812        delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1813        apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1814                nsec_to_cycles(apic->vcpu, delta);
1815}
1816
1817static void start_sw_period(struct kvm_lapic *apic)
1818{
1819        if (!apic->lapic_timer.period)
1820                return;
1821
1822        if (ktime_after(ktime_get(),
1823                        apic->lapic_timer.target_expiration)) {
1824                apic_timer_expired(apic, false);
1825
1826                if (apic_lvtt_oneshot(apic))
1827                        return;
1828
1829                advance_periodic_target_expiration(apic);
1830        }
1831
1832        hrtimer_start(&apic->lapic_timer.timer,
1833                apic->lapic_timer.target_expiration,
1834                HRTIMER_MODE_ABS_HARD);
1835}
1836
1837bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1838{
1839        if (!lapic_in_kernel(vcpu))
1840                return false;
1841
1842        return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1843}
1844EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1845
1846static void cancel_hv_timer(struct kvm_lapic *apic)
1847{
1848        WARN_ON(preemptible());
1849        WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1850        static_call(kvm_x86_cancel_hv_timer)(apic->vcpu);
1851        apic->lapic_timer.hv_timer_in_use = false;
1852}
1853
1854static bool start_hv_timer(struct kvm_lapic *apic)
1855{
1856        struct kvm_timer *ktimer = &apic->lapic_timer;
1857        struct kvm_vcpu *vcpu = apic->vcpu;
1858        bool expired;
1859
1860        WARN_ON(preemptible());
1861        if (!kvm_can_use_hv_timer(vcpu))
1862                return false;
1863
1864        if (!ktimer->tscdeadline)
1865                return false;
1866
1867        if (static_call(kvm_x86_set_hv_timer)(vcpu, ktimer->tscdeadline, &expired))
1868                return false;
1869
1870        ktimer->hv_timer_in_use = true;
1871        hrtimer_cancel(&ktimer->timer);
1872
1873        /*
1874         * To simplify handling the periodic timer, leave the hv timer running
1875         * even if the deadline timer has expired, i.e. rely on the resulting
1876         * VM-Exit to recompute the periodic timer's target expiration.
1877         */
1878        if (!apic_lvtt_period(apic)) {
1879                /*
1880                 * Cancel the hv timer if the sw timer fired while the hv timer
1881                 * was being programmed, or if the hv timer itself expired.
1882                 */
1883                if (atomic_read(&ktimer->pending)) {
1884                        cancel_hv_timer(apic);
1885                } else if (expired) {
1886                        apic_timer_expired(apic, false);
1887                        cancel_hv_timer(apic);
1888                }
1889        }
1890
1891        trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
1892
1893        return true;
1894}
1895
1896static void start_sw_timer(struct kvm_lapic *apic)
1897{
1898        struct kvm_timer *ktimer = &apic->lapic_timer;
1899
1900        WARN_ON(preemptible());
1901        if (apic->lapic_timer.hv_timer_in_use)
1902                cancel_hv_timer(apic);
1903        if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1904                return;
1905
1906        if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1907                start_sw_period(apic);
1908        else if (apic_lvtt_tscdeadline(apic))
1909                start_sw_tscdeadline(apic);
1910        trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
1911}
1912
1913static void restart_apic_timer(struct kvm_lapic *apic)
1914{
1915        preempt_disable();
1916
1917        if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
1918                goto out;
1919
1920        if (!start_hv_timer(apic))
1921                start_sw_timer(apic);
1922out:
1923        preempt_enable();
1924}
1925
1926void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1927{
1928        struct kvm_lapic *apic = vcpu->arch.apic;
1929
1930        preempt_disable();
1931        /* If the preempt notifier has already run, it also called apic_timer_expired */
1932        if (!apic->lapic_timer.hv_timer_in_use)
1933                goto out;
1934        WARN_ON(rcuwait_active(&vcpu->wait));
1935        apic_timer_expired(apic, false);
1936        cancel_hv_timer(apic);
1937
1938        if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1939                advance_periodic_target_expiration(apic);
1940                restart_apic_timer(apic);
1941        }
1942out:
1943        preempt_enable();
1944}
1945EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
1946
1947void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
1948{
1949        restart_apic_timer(vcpu->arch.apic);
1950}
1951EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
1952
1953void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
1954{
1955        struct kvm_lapic *apic = vcpu->arch.apic;
1956
1957        preempt_disable();
1958        /* Possibly the TSC deadline timer is not enabled yet */
1959        if (apic->lapic_timer.hv_timer_in_use)
1960                start_sw_timer(apic);
1961        preempt_enable();
1962}
1963EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
1964
1965void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
1966{
1967        struct kvm_lapic *apic = vcpu->arch.apic;
1968
1969        WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1970        restart_apic_timer(apic);
1971}
1972
1973static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
1974{
1975        atomic_set(&apic->lapic_timer.pending, 0);
1976
1977        if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1978            && !set_target_expiration(apic, count_reg))
1979                return;
1980
1981        restart_apic_timer(apic);
1982}
1983
1984static void start_apic_timer(struct kvm_lapic *apic)
1985{
1986        __start_apic_timer(apic, APIC_TMICT);
1987}
1988
1989static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1990{
1991        bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
1992
1993        if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
1994                apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
1995                if (lvt0_in_nmi_mode) {
1996                        atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1997                } else
1998                        atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1999        }
2000}
2001
2002int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
2003{
2004        int ret = 0;
2005
2006        trace_kvm_apic_write(reg, val);
2007
2008        switch (reg) {
2009        case APIC_ID:           /* Local APIC ID */
2010                if (!apic_x2apic_mode(apic))
2011                        kvm_apic_set_xapic_id(apic, val >> 24);
2012                else
2013                        ret = 1;
2014                break;
2015
2016        case APIC_TASKPRI:
2017                report_tpr_access(apic, true);
2018                apic_set_tpr(apic, val & 0xff);
2019                break;
2020
2021        case APIC_EOI:
2022                apic_set_eoi(apic);
2023                break;
2024
2025        case APIC_LDR:
2026                if (!apic_x2apic_mode(apic))
2027                        kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
2028                else
2029                        ret = 1;
2030                break;
2031
2032        case APIC_DFR:
2033                if (!apic_x2apic_mode(apic))
2034                        kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
2035                else
2036                        ret = 1;
2037                break;
2038
2039        case APIC_SPIV: {
2040                u32 mask = 0x3ff;
2041                if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
2042                        mask |= APIC_SPIV_DIRECTED_EOI;
2043                apic_set_spiv(apic, val & mask);
2044                if (!(val & APIC_SPIV_APIC_ENABLED)) {
2045                        int i;
2046                        u32 lvt_val;
2047
2048                        for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
2049                                lvt_val = kvm_lapic_get_reg(apic,
2050                                                       APIC_LVTT + 0x10 * i);
2051                                kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
2052                                             lvt_val | APIC_LVT_MASKED);
2053                        }
2054                        apic_update_lvtt(apic);
2055                        atomic_set(&apic->lapic_timer.pending, 0);
2056
2057                }
2058                break;
2059        }
2060        case APIC_ICR:
2061                /* No delay here, so we always clear the pending bit */
2062                val &= ~(1 << 12);
2063                kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
2064                kvm_lapic_set_reg(apic, APIC_ICR, val);
2065                break;
2066
2067        case APIC_ICR2:
2068                if (!apic_x2apic_mode(apic))
2069                        val &= 0xff000000;
2070                kvm_lapic_set_reg(apic, APIC_ICR2, val);
2071                break;
2072
2073        case APIC_LVT0:
2074                apic_manage_nmi_watchdog(apic, val);
2075                fallthrough;
2076        case APIC_LVTTHMR:
2077        case APIC_LVTPC:
2078        case APIC_LVT1:
2079        case APIC_LVTERR: {
2080                /* TODO: Check vector */
2081                size_t size;
2082                u32 index;
2083
2084                if (!kvm_apic_sw_enabled(apic))
2085                        val |= APIC_LVT_MASKED;
2086                size = ARRAY_SIZE(apic_lvt_mask);
2087                index = array_index_nospec(
2088                                (reg - APIC_LVTT) >> 4, size);
2089                val &= apic_lvt_mask[index];
2090                kvm_lapic_set_reg(apic, reg, val);
2091                break;
2092        }
2093
2094        case APIC_LVTT:
2095                if (!kvm_apic_sw_enabled(apic))
2096                        val |= APIC_LVT_MASKED;
2097                val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
2098                kvm_lapic_set_reg(apic, APIC_LVTT, val);
2099                apic_update_lvtt(apic);
2100                break;
2101
2102        case APIC_TMICT:
2103                if (apic_lvtt_tscdeadline(apic))
2104                        break;
2105
2106                cancel_apic_timer(apic);
2107                kvm_lapic_set_reg(apic, APIC_TMICT, val);
2108                start_apic_timer(apic);
2109                break;
2110
2111        case APIC_TDCR: {
2112                uint32_t old_divisor = apic->divide_count;
2113
2114                kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
2115                update_divide_count(apic);
2116                if (apic->divide_count != old_divisor &&
2117                                apic->lapic_timer.period) {
2118                        hrtimer_cancel(&apic->lapic_timer.timer);
2119                        update_target_expiration(apic, old_divisor);
2120                        restart_apic_timer(apic);
2121                }
2122                break;
2123        }
2124        case APIC_ESR:
2125                if (apic_x2apic_mode(apic) && val != 0)
2126                        ret = 1;
2127                break;
2128
2129        case APIC_SELF_IPI:
2130                if (apic_x2apic_mode(apic)) {
2131                        kvm_lapic_reg_write(apic, APIC_ICR,
2132                                            APIC_DEST_SELF | (val & APIC_VECTOR_MASK));
2133                } else
2134                        ret = 1;
2135                break;
2136        default:
2137                ret = 1;
2138                break;
2139        }
2140
2141        kvm_recalculate_apic_map(apic->vcpu->kvm);
2142
2143        return ret;
2144}
2145EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
2146
2147static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
2148                            gpa_t address, int len, const void *data)
2149{
2150        struct kvm_lapic *apic = to_lapic(this);
2151        unsigned int offset = address - apic->base_address;
2152        u32 val;
2153
2154        if (!apic_mmio_in_range(apic, address))
2155                return -EOPNOTSUPP;
2156
2157        if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2158                if (!kvm_check_has_quirk(vcpu->kvm,
2159                                         KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2160                        return -EOPNOTSUPP;
2161
2162                return 0;
2163        }
2164
2165        /*
2166         * APIC register must be aligned on 128-bits boundary.
2167         * 32/64/128 bits registers must be accessed thru 32 bits.
2168         * Refer SDM 8.4.1
2169         */
2170        if (len != 4 || (offset & 0xf))
2171                return 0;
2172
2173        val = *(u32*)data;
2174
2175        kvm_lapic_reg_write(apic, offset & 0xff0, val);
2176
2177        return 0;
2178}
2179
2180void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2181{
2182        kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2183}
2184EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2185
2186/* emulate APIC access in a trap manner */
2187void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2188{
2189        u32 val = 0;
2190
2191        /* hw has done the conditional check and inst decode */
2192        offset &= 0xff0;
2193
2194        kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
2195
2196        /* TODO: optimize to just emulate side effect w/o one more write */
2197        kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
2198}
2199EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2200
2201void kvm_free_lapic(struct kvm_vcpu *vcpu)
2202{
2203        struct kvm_lapic *apic = vcpu->arch.apic;
2204
2205        if (!vcpu->arch.apic)
2206                return;
2207
2208        hrtimer_cancel(&apic->lapic_timer.timer);
2209
2210        if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2211                static_branch_slow_dec_deferred(&apic_hw_disabled);
2212
2213        if (!apic->sw_enabled)
2214                static_branch_slow_dec_deferred(&apic_sw_disabled);
2215
2216        if (apic->regs)
2217                free_page((unsigned long)apic->regs);
2218
2219        kfree(apic);
2220}
2221
2222/*
2223 *----------------------------------------------------------------------
2224 * LAPIC interface
2225 *----------------------------------------------------------------------
2226 */
2227u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2228{
2229        struct kvm_lapic *apic = vcpu->arch.apic;
2230
2231        if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2232                return 0;
2233
2234        return apic->lapic_timer.tscdeadline;
2235}
2236
2237void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2238{
2239        struct kvm_lapic *apic = vcpu->arch.apic;
2240
2241        if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2242                return;
2243
2244        hrtimer_cancel(&apic->lapic_timer.timer);
2245        apic->lapic_timer.tscdeadline = data;
2246        start_apic_timer(apic);
2247}
2248
2249void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2250{
2251        struct kvm_lapic *apic = vcpu->arch.apic;
2252
2253        apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
2254                     | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
2255}
2256
2257u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2258{
2259        u64 tpr;
2260
2261        tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2262
2263        return (tpr & 0xf0) >> 4;
2264}
2265
2266void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2267{
2268        u64 old_value = vcpu->arch.apic_base;
2269        struct kvm_lapic *apic = vcpu->arch.apic;
2270
2271        vcpu->arch.apic_base = value;
2272
2273        if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2274                kvm_update_cpuid_runtime(vcpu);
2275
2276        if (!apic)
2277                return;
2278
2279        /* update jump label if enable bit changes */
2280        if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2281                if (value & MSR_IA32_APICBASE_ENABLE) {
2282                        kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2283                        static_branch_slow_dec_deferred(&apic_hw_disabled);
2284                        /* Check if there are APF page ready requests pending */
2285                        kvm_make_request(KVM_REQ_APF_READY, vcpu);
2286                } else {
2287                        static_branch_inc(&apic_hw_disabled.key);
2288                        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2289                }
2290        }
2291
2292        if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
2293                kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2294
2295        if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
2296                static_call(kvm_x86_set_virtual_apic_mode)(vcpu);
2297
2298        apic->base_address = apic->vcpu->arch.apic_base &
2299                             MSR_IA32_APICBASE_BASE;
2300
2301        if ((value & MSR_IA32_APICBASE_ENABLE) &&
2302             apic->base_address != APIC_DEFAULT_PHYS_BASE)
2303                pr_warn_once("APIC base relocation is unsupported by KVM");
2304}
2305
2306void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
2307{
2308        struct kvm_lapic *apic = vcpu->arch.apic;
2309
2310        if (vcpu->arch.apicv_active) {
2311                /* irr_pending is always true when apicv is activated. */
2312                apic->irr_pending = true;
2313                apic->isr_count = 1;
2314        } else {
2315                apic->irr_pending = (apic_search_irr(apic) != -1);
2316                apic->isr_count = count_vectors(apic->regs + APIC_ISR);
2317        }
2318}
2319EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
2320
2321void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2322{
2323        struct kvm_lapic *apic = vcpu->arch.apic;
2324        u64 msr_val;
2325        int i;
2326
2327        if (!init_event) {
2328                msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
2329                if (kvm_vcpu_is_reset_bsp(vcpu))
2330                        msr_val |= MSR_IA32_APICBASE_BSP;
2331                kvm_lapic_set_base(vcpu, msr_val);
2332        }
2333
2334        if (!apic)
2335                return;
2336
2337        /* Stop the timer in case it's a reset to an active apic */
2338        hrtimer_cancel(&apic->lapic_timer.timer);
2339
2340        /* The xAPIC ID is set at RESET even if the APIC was already enabled. */
2341        if (!init_event)
2342                kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2343        kvm_apic_set_version(apic->vcpu);
2344
2345        for (i = 0; i < KVM_APIC_LVT_NUM; i++)
2346                kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
2347        apic_update_lvtt(apic);
2348        if (kvm_vcpu_is_reset_bsp(vcpu) &&
2349            kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2350                kvm_lapic_set_reg(apic, APIC_LVT0,
2351                             SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2352        apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2353
2354        kvm_apic_set_dfr(apic, 0xffffffffU);
2355        apic_set_spiv(apic, 0xff);
2356        kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2357        if (!apic_x2apic_mode(apic))
2358                kvm_apic_set_ldr(apic, 0);
2359        kvm_lapic_set_reg(apic, APIC_ESR, 0);
2360        kvm_lapic_set_reg(apic, APIC_ICR, 0);
2361        kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2362        kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2363        kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2364        for (i = 0; i < 8; i++) {
2365                kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2366                kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2367                kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2368        }
2369        kvm_apic_update_apicv(vcpu);
2370        apic->highest_isr_cache = -1;
2371        update_divide_count(apic);
2372        atomic_set(&apic->lapic_timer.pending, 0);
2373
2374        vcpu->arch.pv_eoi.msr_val = 0;
2375        apic_update_ppr(apic);
2376        if (vcpu->arch.apicv_active) {
2377                static_call(kvm_x86_apicv_post_state_restore)(vcpu);
2378                static_call(kvm_x86_hwapic_irr_update)(vcpu, -1);
2379                static_call(kvm_x86_hwapic_isr_update)(vcpu, -1);
2380        }
2381
2382        vcpu->arch.apic_arb_prio = 0;
2383        vcpu->arch.apic_attention = 0;
2384
2385        kvm_recalculate_apic_map(vcpu->kvm);
2386}
2387
2388/*
2389 *----------------------------------------------------------------------
2390 * timer interface
2391 *----------------------------------------------------------------------
2392 */
2393
2394static bool lapic_is_periodic(struct kvm_lapic *apic)
2395{
2396        return apic_lvtt_period(apic);
2397}
2398
2399int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2400{
2401        struct kvm_lapic *apic = vcpu->arch.apic;
2402
2403        if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2404                return atomic_read(&apic->lapic_timer.pending);
2405
2406        return 0;
2407}
2408
2409int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2410{
2411        u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2412        int vector, mode, trig_mode;
2413
2414        if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2415                vector = reg & APIC_VECTOR_MASK;
2416                mode = reg & APIC_MODE_MASK;
2417                trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2418                return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
2419                                        NULL);
2420        }
2421        return 0;
2422}
2423
2424void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2425{
2426        struct kvm_lapic *apic = vcpu->arch.apic;
2427
2428        if (apic)
2429                kvm_apic_local_deliver(apic, APIC_LVT0);
2430}
2431
2432static const struct kvm_io_device_ops apic_mmio_ops = {
2433        .read     = apic_mmio_read,
2434        .write    = apic_mmio_write,
2435};
2436
2437static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2438{
2439        struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2440        struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2441
2442        apic_timer_expired(apic, true);
2443
2444        if (lapic_is_periodic(apic)) {
2445                advance_periodic_target_expiration(apic);
2446                hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2447                return HRTIMER_RESTART;
2448        } else
2449                return HRTIMER_NORESTART;
2450}
2451
2452int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2453{
2454        struct kvm_lapic *apic;
2455
2456        ASSERT(vcpu != NULL);
2457
2458        apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2459        if (!apic)
2460                goto nomem;
2461
2462        vcpu->arch.apic = apic;
2463
2464        apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2465        if (!apic->regs) {
2466                printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2467                       vcpu->vcpu_id);
2468                goto nomem_free_apic;
2469        }
2470        apic->vcpu = vcpu;
2471
2472        hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2473                     HRTIMER_MODE_ABS_HARD);
2474        apic->lapic_timer.timer.function = apic_timer_fn;
2475        if (timer_advance_ns == -1) {
2476                apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2477                lapic_timer_advance_dynamic = true;
2478        } else {
2479                apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2480                lapic_timer_advance_dynamic = false;
2481        }
2482
2483        /*
2484         * Stuff the APIC ENABLE bit in lieu of temporarily incrementing
2485         * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset().
2486         */
2487        vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2488        static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2489        kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2490
2491        return 0;
2492nomem_free_apic:
2493        kfree(apic);
2494        vcpu->arch.apic = NULL;
2495nomem:
2496        return -ENOMEM;
2497}
2498
2499int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2500{
2501        struct kvm_lapic *apic = vcpu->arch.apic;
2502        u32 ppr;
2503
2504        if (!kvm_apic_present(vcpu))
2505                return -1;
2506
2507        __apic_update_ppr(apic, &ppr);
2508        return apic_has_interrupt_for_ppr(apic, ppr);
2509}
2510EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
2511
2512int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2513{
2514        u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2515
2516        if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2517                return 1;
2518        if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2519            GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2520                return 1;
2521        return 0;
2522}
2523
2524void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2525{
2526        struct kvm_lapic *apic = vcpu->arch.apic;
2527
2528        if (atomic_read(&apic->lapic_timer.pending) > 0) {
2529                kvm_apic_inject_pending_timer_irqs(apic);
2530                atomic_set(&apic->lapic_timer.pending, 0);
2531        }
2532}
2533
2534int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2535{
2536        int vector = kvm_apic_has_interrupt(vcpu);
2537        struct kvm_lapic *apic = vcpu->arch.apic;
2538        u32 ppr;
2539
2540        if (vector == -1)
2541                return -1;
2542
2543        /*
2544         * We get here even with APIC virtualization enabled, if doing
2545         * nested virtualization and L1 runs with the "acknowledge interrupt
2546         * on exit" mode.  Then we cannot inject the interrupt via RVI,
2547         * because the process would deliver it through the IDT.
2548         */
2549
2550        apic_clear_irr(vector, apic);
2551        if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) {
2552                /*
2553                 * For auto-EOI interrupts, there might be another pending
2554                 * interrupt above PPR, so check whether to raise another
2555                 * KVM_REQ_EVENT.
2556                 */
2557                apic_update_ppr(apic);
2558        } else {
2559                /*
2560                 * For normal interrupts, PPR has been raised and there cannot
2561                 * be a higher-priority pending interrupt---except if there was
2562                 * a concurrent interrupt injection, but that would have
2563                 * triggered KVM_REQ_EVENT already.
2564                 */
2565                apic_set_isr(vector, apic);
2566                __apic_update_ppr(apic, &ppr);
2567        }
2568
2569        return vector;
2570}
2571
2572static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2573                struct kvm_lapic_state *s, bool set)
2574{
2575        if (apic_x2apic_mode(vcpu->arch.apic)) {
2576                u32 *id = (u32 *)(s->regs + APIC_ID);
2577                u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2578
2579                if (vcpu->kvm->arch.x2apic_format) {
2580                        if (*id != vcpu->vcpu_id)
2581                                return -EINVAL;
2582                } else {
2583                        if (set)
2584                                *id >>= 24;
2585                        else
2586                                *id <<= 24;
2587                }
2588
2589                /* In x2APIC mode, the LDR is fixed and based on the id */
2590                if (set)
2591                        *ldr = kvm_apic_calc_x2apic_ldr(*id);
2592        }
2593
2594        return 0;
2595}
2596
2597int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2598{
2599        memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2600
2601        /*
2602         * Get calculated timer current count for remaining timer period (if
2603         * any) and store it in the returned register set.
2604         */
2605        __kvm_lapic_set_reg(s->regs, APIC_TMCCT,
2606                            __apic_read(vcpu->arch.apic, APIC_TMCCT));
2607
2608        return kvm_apic_state_fixup(vcpu, s, false);
2609}
2610
2611int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2612{
2613        struct kvm_lapic *apic = vcpu->arch.apic;
2614        int r;
2615
2616        kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2617        /* set SPIV separately to get count of SW disabled APICs right */
2618        apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2619
2620        r = kvm_apic_state_fixup(vcpu, s, true);
2621        if (r) {
2622                kvm_recalculate_apic_map(vcpu->kvm);
2623                return r;
2624        }
2625        memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
2626
2627        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2628        kvm_recalculate_apic_map(vcpu->kvm);
2629        kvm_apic_set_version(vcpu);
2630
2631        apic_update_ppr(apic);
2632        hrtimer_cancel(&apic->lapic_timer.timer);
2633        apic->lapic_timer.expired_tscdeadline = 0;
2634        apic_update_lvtt(apic);
2635        apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2636        update_divide_count(apic);
2637        __start_apic_timer(apic, APIC_TMCCT);
2638        kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
2639        kvm_apic_update_apicv(vcpu);
2640        apic->highest_isr_cache = -1;
2641        if (vcpu->arch.apicv_active) {
2642                static_call(kvm_x86_apicv_post_state_restore)(vcpu);
2643                static_call(kvm_x86_hwapic_irr_update)(vcpu,
2644                                apic_find_highest_irr(apic));
2645                static_call(kvm_x86_hwapic_isr_update)(vcpu,
2646                                apic_find_highest_isr(apic));
2647        }
2648        kvm_make_request(KVM_REQ_EVENT, vcpu);
2649        if (ioapic_in_kernel(vcpu->kvm))
2650                kvm_rtc_eoi_tracking_restore_one(vcpu);
2651
2652        vcpu->arch.apic_arb_prio = 0;
2653
2654        return 0;
2655}
2656
2657void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2658{
2659        struct hrtimer *timer;
2660
2661        if (!lapic_in_kernel(vcpu) ||
2662                kvm_can_post_timer_interrupt(vcpu))
2663                return;
2664
2665        timer = &vcpu->arch.apic->lapic_timer.timer;
2666        if (hrtimer_cancel(timer))
2667                hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
2668}
2669
2670/*
2671 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2672 *
2673 * Detect whether guest triggered PV EOI since the
2674 * last entry. If yes, set EOI on guests's behalf.
2675 * Clear PV EOI in guest memory in any case.
2676 */
2677static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2678                                        struct kvm_lapic *apic)
2679{
2680        bool pending;
2681        int vector;
2682        /*
2683         * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2684         * and KVM_PV_EOI_ENABLED in guest memory as follows:
2685         *
2686         * KVM_APIC_PV_EOI_PENDING is unset:
2687         *      -> host disabled PV EOI.
2688         * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2689         *      -> host enabled PV EOI, guest did not execute EOI yet.
2690         * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2691         *      -> host enabled PV EOI, guest executed EOI.
2692         */
2693        BUG_ON(!pv_eoi_enabled(vcpu));
2694        pending = pv_eoi_get_pending(vcpu);
2695        /*
2696         * Clear pending bit in any case: it will be set again on vmentry.
2697         * While this might not be ideal from performance point of view,
2698         * this makes sure pv eoi is only enabled when we know it's safe.
2699         */
2700        pv_eoi_clr_pending(vcpu);
2701        if (pending)
2702                return;
2703        vector = apic_set_eoi(apic);
2704        trace_kvm_pv_eoi(apic, vector);
2705}
2706
2707void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2708{
2709        u32 data;
2710
2711        if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2712                apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2713
2714        if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2715                return;
2716
2717        if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2718                                  sizeof(u32)))
2719                return;
2720
2721        apic_set_tpr(vcpu->arch.apic, data & 0xff);
2722}
2723
2724/*
2725 * apic_sync_pv_eoi_to_guest - called before vmentry
2726 *
2727 * Detect whether it's safe to enable PV EOI and
2728 * if yes do so.
2729 */
2730static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2731                                        struct kvm_lapic *apic)
2732{
2733        if (!pv_eoi_enabled(vcpu) ||
2734            /* IRR set or many bits in ISR: could be nested. */
2735            apic->irr_pending ||
2736            /* Cache not set: could be safe but we don't bother. */
2737            apic->highest_isr_cache == -1 ||
2738            /* Need EOI to update ioapic. */
2739            kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2740                /*
2741                 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2742                 * so we need not do anything here.
2743                 */
2744                return;
2745        }
2746
2747        pv_eoi_set_pending(apic->vcpu);
2748}
2749
2750void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2751{
2752        u32 data, tpr;
2753        int max_irr, max_isr;
2754        struct kvm_lapic *apic = vcpu->arch.apic;
2755
2756        apic_sync_pv_eoi_to_guest(vcpu, apic);
2757
2758        if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2759                return;
2760
2761        tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2762        max_irr = apic_find_highest_irr(apic);
2763        if (max_irr < 0)
2764                max_irr = 0;
2765        max_isr = apic_find_highest_isr(apic);
2766        if (max_isr < 0)
2767                max_isr = 0;
2768        data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2769
2770        kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2771                                sizeof(u32));
2772}
2773
2774int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2775{
2776        if (vapic_addr) {
2777                if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2778                                        &vcpu->arch.apic->vapic_cache,
2779                                        vapic_addr, sizeof(u32)))
2780                        return -EINVAL;
2781                __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2782        } else {
2783                __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2784        }
2785
2786        vcpu->arch.apic->vapic_addr = vapic_addr;
2787        return 0;
2788}
2789
2790int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2791{
2792        struct kvm_lapic *apic = vcpu->arch.apic;
2793        u32 reg = (msr - APIC_BASE_MSR) << 4;
2794
2795        if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2796                return 1;
2797
2798        if (reg == APIC_ICR2)
2799                return 1;
2800
2801        /* if this is ICR write vector before command */
2802        if (reg == APIC_ICR)
2803                kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2804        return kvm_lapic_reg_write(apic, reg, (u32)data);
2805}
2806
2807int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2808{
2809        struct kvm_lapic *apic = vcpu->arch.apic;
2810        u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
2811
2812        if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2813                return 1;
2814
2815        if (reg == APIC_DFR || reg == APIC_ICR2)
2816                return 1;
2817
2818        if (kvm_lapic_reg_read(apic, reg, 4, &low))
2819                return 1;
2820        if (reg == APIC_ICR)
2821                kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2822
2823        *data = (((u64)high) << 32) | low;
2824
2825        return 0;
2826}
2827
2828int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2829{
2830        struct kvm_lapic *apic = vcpu->arch.apic;
2831
2832        if (!lapic_in_kernel(vcpu))
2833                return 1;
2834
2835        /* if this is ICR write vector before command */
2836        if (reg == APIC_ICR)
2837                kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2838        return kvm_lapic_reg_write(apic, reg, (u32)data);
2839}
2840
2841int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2842{
2843        struct kvm_lapic *apic = vcpu->arch.apic;
2844        u32 low, high = 0;
2845
2846        if (!lapic_in_kernel(vcpu))
2847                return 1;
2848
2849        if (kvm_lapic_reg_read(apic, reg, 4, &low))
2850                return 1;
2851        if (reg == APIC_ICR)
2852                kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2853
2854        *data = (((u64)high) << 32) | low;
2855
2856        return 0;
2857}
2858
2859int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
2860{
2861        u64 addr = data & ~KVM_MSR_ENABLED;
2862        struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
2863        unsigned long new_len;
2864
2865        if (!IS_ALIGNED(addr, 4))
2866                return 1;
2867
2868        vcpu->arch.pv_eoi.msr_val = data;
2869        if (!pv_eoi_enabled(vcpu))
2870                return 0;
2871
2872        if (addr == ghc->gpa && len <= ghc->len)
2873                new_len = ghc->len;
2874        else
2875                new_len = len;
2876
2877        return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
2878}
2879
2880int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2881{
2882        struct kvm_lapic *apic = vcpu->arch.apic;
2883        u8 sipi_vector;
2884        int r;
2885        unsigned long pe;
2886
2887        if (!lapic_in_kernel(vcpu))
2888                return 0;
2889
2890        /*
2891         * Read pending events before calling the check_events
2892         * callback.
2893         */
2894        pe = smp_load_acquire(&apic->pending_events);
2895        if (!pe)
2896                return 0;
2897
2898        if (is_guest_mode(vcpu)) {
2899                r = kvm_check_nested_events(vcpu);
2900                if (r < 0)
2901                        return r == -EBUSY ? 0 : r;
2902                /*
2903                 * If an event has happened and caused a vmexit,
2904                 * we know INITs are latched and therefore
2905                 * we will not incorrectly deliver an APIC
2906                 * event instead of a vmexit.
2907                 */
2908        }
2909
2910        /*
2911         * INITs are latched while CPU is in specific states
2912         * (SMM, VMX root mode, SVM with GIF=0).
2913         * Because a CPU cannot be in these states immediately
2914         * after it has processed an INIT signal (and thus in
2915         * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
2916         * and leave the INIT pending.
2917         */
2918        if (kvm_vcpu_latch_init(vcpu)) {
2919                WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
2920                if (test_bit(KVM_APIC_SIPI, &pe))
2921                        clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2922                return 0;
2923        }
2924
2925        if (test_bit(KVM_APIC_INIT, &pe)) {
2926                clear_bit(KVM_APIC_INIT, &apic->pending_events);
2927                kvm_vcpu_reset(vcpu, true);
2928                if (kvm_vcpu_is_bsp(apic->vcpu))
2929                        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2930                else
2931                        vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
2932        }
2933        if (test_bit(KVM_APIC_SIPI, &pe)) {
2934                clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2935                if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
2936                        /* evaluate pending_events before reading the vector */
2937                        smp_rmb();
2938                        sipi_vector = apic->sipi_vector;
2939                        kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, sipi_vector);
2940                        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2941                }
2942        }
2943        return 0;
2944}
2945
2946void kvm_lapic_exit(void)
2947{
2948        static_key_deferred_flush(&apic_hw_disabled);
2949        WARN_ON(static_branch_unlikely(&apic_hw_disabled.key));
2950        static_key_deferred_flush(&apic_sw_disabled);
2951        WARN_ON(static_branch_unlikely(&apic_sw_disabled.key));
2952}
2953