linux/arch/x86/kvm/lapic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2
   3/*
   4 * Local APIC virtualization
   5 *
   6 * Copyright (C) 2006 Qumranet, Inc.
   7 * Copyright (C) 2007 Novell
   8 * Copyright (C) 2007 Intel
   9 * Copyright 2009 Red Hat, Inc. and/or its affiliates.
  10 *
  11 * Authors:
  12 *   Dor Laor <dor.laor@qumranet.com>
  13 *   Gregory Haskins <ghaskins@novell.com>
  14 *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
  15 *
  16 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
  17 */
  18
  19#include <linux/kvm_host.h>
  20#include <linux/kvm.h>
  21#include <linux/mm.h>
  22#include <linux/highmem.h>
  23#include <linux/smp.h>
  24#include <linux/hrtimer.h>
  25#include <linux/io.h>
  26#include <linux/export.h>
  27#include <linux/math64.h>
  28#include <linux/slab.h>
  29#include <asm/processor.h>
  30#include <asm/msr.h>
  31#include <asm/page.h>
  32#include <asm/current.h>
  33#include <asm/apicdef.h>
  34#include <asm/delay.h>
  35#include <linux/atomic.h>
  36#include <linux/jump_label.h>
  37#include "kvm_cache_regs.h"
  38#include "irq.h"
  39#include "trace.h"
  40#include "x86.h"
  41#include "cpuid.h"
  42#include "hyperv.h"
  43
  44#ifndef CONFIG_X86_64
  45#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
  46#else
  47#define mod_64(x, y) ((x) % (y))
  48#endif
  49
  50#define PRId64 "d"
  51#define PRIx64 "llx"
  52#define PRIu64 "u"
  53#define PRIo64 "o"
  54
  55/* 14 is the version for Xeon and Pentium 8.4.8*/
  56#define APIC_VERSION                    (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
  57#define LAPIC_MMIO_LENGTH               (1 << 12)
  58/* followed define is not in apicdef.h */
  59#define APIC_SHORT_MASK                 0xc0000
  60#define APIC_DEST_NOSHORT               0x0
  61#define APIC_DEST_MASK                  0x800
  62#define MAX_APIC_VECTOR                 256
  63#define APIC_VECTORS_PER_REG            32
  64
  65#define APIC_BROADCAST                  0xFF
  66#define X2APIC_BROADCAST                0xFFFFFFFFul
  67
  68#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
  69#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
  70/* step-by-step approximation to mitigate fluctuation */
  71#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
  72
  73static inline int apic_test_vector(int vec, void *bitmap)
  74{
  75        return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  76}
  77
  78bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
  79{
  80        struct kvm_lapic *apic = vcpu->arch.apic;
  81
  82        return apic_test_vector(vector, apic->regs + APIC_ISR) ||
  83                apic_test_vector(vector, apic->regs + APIC_IRR);
  84}
  85
  86static inline int __apic_test_and_set_vector(int vec, void *bitmap)
  87{
  88        return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  89}
  90
  91static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
  92{
  93        return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  94}
  95
  96struct static_key_deferred apic_hw_disabled __read_mostly;
  97struct static_key_deferred apic_sw_disabled __read_mostly;
  98
  99static inline int apic_enabled(struct kvm_lapic *apic)
 100{
 101        return kvm_apic_sw_enabled(apic) &&     kvm_apic_hw_enabled(apic);
 102}
 103
 104#define LVT_MASK        \
 105        (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
 106
 107#define LINT_MASK       \
 108        (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 109         APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 110
 111static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
 112{
 113        return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
 114}
 115
 116static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
 117{
 118        return apic->vcpu->vcpu_id;
 119}
 120
 121bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
 122{
 123        return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
 124}
 125EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);
 126
 127static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
 128{
 129        return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
 130}
 131
 132static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
 133                u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
 134        switch (map->mode) {
 135        case KVM_APIC_MODE_X2APIC: {
 136                u32 offset = (dest_id >> 16) * 16;
 137                u32 max_apic_id = map->max_apic_id;
 138
 139                if (offset <= max_apic_id) {
 140                        u8 cluster_size = min(max_apic_id - offset + 1, 16U);
 141
 142                        offset = array_index_nospec(offset, map->max_apic_id + 1);
 143                        *cluster = &map->phys_map[offset];
 144                        *mask = dest_id & (0xffff >> (16 - cluster_size));
 145                } else {
 146                        *mask = 0;
 147                }
 148
 149                return true;
 150                }
 151        case KVM_APIC_MODE_XAPIC_FLAT:
 152                *cluster = map->xapic_flat_map;
 153                *mask = dest_id & 0xff;
 154                return true;
 155        case KVM_APIC_MODE_XAPIC_CLUSTER:
 156                *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
 157                *mask = dest_id & 0xf;
 158                return true;
 159        default:
 160                /* Not optimized. */
 161                return false;
 162        }
 163}
 164
 165static void kvm_apic_map_free(struct rcu_head *rcu)
 166{
 167        struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
 168
 169        kvfree(map);
 170}
 171
 172static void recalculate_apic_map(struct kvm *kvm)
 173{
 174        struct kvm_apic_map *new, *old = NULL;
 175        struct kvm_vcpu *vcpu;
 176        int i;
 177        u32 max_id = 255; /* enough space for any xAPIC ID */
 178
 179        mutex_lock(&kvm->arch.apic_map_lock);
 180
 181        kvm_for_each_vcpu(i, vcpu, kvm)
 182                if (kvm_apic_present(vcpu))
 183                        max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
 184
 185        new = kvzalloc(sizeof(struct kvm_apic_map) +
 186                           sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
 187                           GFP_KERNEL_ACCOUNT);
 188
 189        if (!new)
 190                goto out;
 191
 192        new->max_apic_id = max_id;
 193
 194        kvm_for_each_vcpu(i, vcpu, kvm) {
 195                struct kvm_lapic *apic = vcpu->arch.apic;
 196                struct kvm_lapic **cluster;
 197                u16 mask;
 198                u32 ldr;
 199                u8 xapic_id;
 200                u32 x2apic_id;
 201
 202                if (!kvm_apic_present(vcpu))
 203                        continue;
 204
 205                xapic_id = kvm_xapic_id(apic);
 206                x2apic_id = kvm_x2apic_id(apic);
 207
 208                /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
 209                if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
 210                                x2apic_id <= new->max_apic_id)
 211                        new->phys_map[x2apic_id] = apic;
 212                /*
 213                 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
 214                 * prevent them from masking VCPUs with APIC ID <= 0xff.
 215                 */
 216                if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
 217                        new->phys_map[xapic_id] = apic;
 218
 219                if (!kvm_apic_sw_enabled(apic))
 220                        continue;
 221
 222                ldr = kvm_lapic_get_reg(apic, APIC_LDR);
 223
 224                if (apic_x2apic_mode(apic)) {
 225                        new->mode |= KVM_APIC_MODE_X2APIC;
 226                } else if (ldr) {
 227                        ldr = GET_APIC_LOGICAL_ID(ldr);
 228                        if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
 229                                new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
 230                        else
 231                                new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
 232                }
 233
 234                if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
 235                        continue;
 236
 237                if (mask)
 238                        cluster[ffs(mask) - 1] = apic;
 239        }
 240out:
 241        old = rcu_dereference_protected(kvm->arch.apic_map,
 242                        lockdep_is_held(&kvm->arch.apic_map_lock));
 243        rcu_assign_pointer(kvm->arch.apic_map, new);
 244        mutex_unlock(&kvm->arch.apic_map_lock);
 245
 246        if (old)
 247                call_rcu(&old->rcu, kvm_apic_map_free);
 248
 249        kvm_make_scan_ioapic_request(kvm);
 250}
 251
 252static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 253{
 254        bool enabled = val & APIC_SPIV_APIC_ENABLED;
 255
 256        kvm_lapic_set_reg(apic, APIC_SPIV, val);
 257
 258        if (enabled != apic->sw_enabled) {
 259                apic->sw_enabled = enabled;
 260                if (enabled)
 261                        static_key_slow_dec_deferred(&apic_sw_disabled);
 262                else
 263                        static_key_slow_inc(&apic_sw_disabled.key);
 264
 265                recalculate_apic_map(apic->vcpu->kvm);
 266        }
 267}
 268
 269static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
 270{
 271        kvm_lapic_set_reg(apic, APIC_ID, id << 24);
 272        recalculate_apic_map(apic->vcpu->kvm);
 273}
 274
 275static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
 276{
 277        kvm_lapic_set_reg(apic, APIC_LDR, id);
 278        recalculate_apic_map(apic->vcpu->kvm);
 279}
 280
 281static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
 282{
 283        return ((id >> 4) << 16) | (1 << (id & 0xf));
 284}
 285
 286static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
 287{
 288        u32 ldr = kvm_apic_calc_x2apic_ldr(id);
 289
 290        WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
 291
 292        kvm_lapic_set_reg(apic, APIC_ID, id);
 293        kvm_lapic_set_reg(apic, APIC_LDR, ldr);
 294        recalculate_apic_map(apic->vcpu->kvm);
 295}
 296
 297static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 298{
 299        return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
 300}
 301
 302static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 303{
 304        return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
 305}
 306
 307static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 308{
 309        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
 310}
 311
 312static inline int apic_lvtt_period(struct kvm_lapic *apic)
 313{
 314        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
 315}
 316
 317static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 318{
 319        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 320}
 321
 322static inline int apic_lvt_nmi_mode(u32 lvt_val)
 323{
 324        return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
 325}
 326
 327void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 328{
 329        struct kvm_lapic *apic = vcpu->arch.apic;
 330        struct kvm_cpuid_entry2 *feat;
 331        u32 v = APIC_VERSION;
 332
 333        if (!lapic_in_kernel(vcpu))
 334                return;
 335
 336        /*
 337         * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
 338         * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
 339         * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
 340         * version first and level-triggered interrupts never get EOIed in
 341         * IOAPIC.
 342         */
 343        feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
 344        if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
 345            !ioapic_in_kernel(vcpu->kvm))
 346                v |= APIC_LVR_DIRECTED_EOI;
 347        kvm_lapic_set_reg(apic, APIC_LVR, v);
 348}
 349
 350static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
 351        LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 352        LVT_MASK | APIC_MODE_MASK,      /* LVTTHMR */
 353        LVT_MASK | APIC_MODE_MASK,      /* LVTPC */
 354        LINT_MASK, LINT_MASK,   /* LVT0-1 */
 355        LVT_MASK                /* LVTERR */
 356};
 357
 358static int find_highest_vector(void *bitmap)
 359{
 360        int vec;
 361        u32 *reg;
 362
 363        for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
 364             vec >= 0; vec -= APIC_VECTORS_PER_REG) {
 365                reg = bitmap + REG_POS(vec);
 366                if (*reg)
 367                        return __fls(*reg) + vec;
 368        }
 369
 370        return -1;
 371}
 372
 373static u8 count_vectors(void *bitmap)
 374{
 375        int vec;
 376        u32 *reg;
 377        u8 count = 0;
 378
 379        for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
 380                reg = bitmap + REG_POS(vec);
 381                count += hweight32(*reg);
 382        }
 383
 384        return count;
 385}
 386
 387bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
 388{
 389        u32 i, vec;
 390        u32 pir_val, irr_val, prev_irr_val;
 391        int max_updated_irr;
 392
 393        max_updated_irr = -1;
 394        *max_irr = -1;
 395
 396        for (i = vec = 0; i <= 7; i++, vec += 32) {
 397                pir_val = READ_ONCE(pir[i]);
 398                irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
 399                if (pir_val) {
 400                        prev_irr_val = irr_val;
 401                        irr_val |= xchg(&pir[i], 0);
 402                        *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
 403                        if (prev_irr_val != irr_val) {
 404                                max_updated_irr =
 405                                        __fls(irr_val ^ prev_irr_val) + vec;
 406                        }
 407                }
 408                if (irr_val)
 409                        *max_irr = __fls(irr_val) + vec;
 410        }
 411
 412        return ((max_updated_irr != -1) &&
 413                (max_updated_irr == *max_irr));
 414}
 415EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
 416
 417bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
 418{
 419        struct kvm_lapic *apic = vcpu->arch.apic;
 420
 421        return __kvm_apic_update_irr(pir, apic->regs, max_irr);
 422}
 423EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 424
 425static inline int apic_search_irr(struct kvm_lapic *apic)
 426{
 427        return find_highest_vector(apic->regs + APIC_IRR);
 428}
 429
 430static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 431{
 432        int result;
 433
 434        /*
 435         * Note that irr_pending is just a hint. It will be always
 436         * true with virtual interrupt delivery enabled.
 437         */
 438        if (!apic->irr_pending)
 439                return -1;
 440
 441        result = apic_search_irr(apic);
 442        ASSERT(result == -1 || result >= 16);
 443
 444        return result;
 445}
 446
 447static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 448{
 449        struct kvm_vcpu *vcpu;
 450
 451        vcpu = apic->vcpu;
 452
 453        if (unlikely(vcpu->arch.apicv_active)) {
 454                /* need to update RVI */
 455                kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
 456                kvm_x86_ops->hwapic_irr_update(vcpu,
 457                                apic_find_highest_irr(apic));
 458        } else {
 459                apic->irr_pending = false;
 460                kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
 461                if (apic_search_irr(apic) != -1)
 462                        apic->irr_pending = true;
 463        }
 464}
 465
 466static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 467{
 468        struct kvm_vcpu *vcpu;
 469
 470        if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
 471                return;
 472
 473        vcpu = apic->vcpu;
 474
 475        /*
 476         * With APIC virtualization enabled, all caching is disabled
 477         * because the processor can modify ISR under the hood.  Instead
 478         * just set SVI.
 479         */
 480        if (unlikely(vcpu->arch.apicv_active))
 481                kvm_x86_ops->hwapic_isr_update(vcpu, vec);
 482        else {
 483                ++apic->isr_count;
 484                BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
 485                /*
 486                 * ISR (in service register) bit is set when injecting an interrupt.
 487                 * The highest vector is injected. Thus the latest bit set matches
 488                 * the highest bit in ISR.
 489                 */
 490                apic->highest_isr_cache = vec;
 491        }
 492}
 493
 494static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 495{
 496        int result;
 497
 498        /*
 499         * Note that isr_count is always 1, and highest_isr_cache
 500         * is always -1, with APIC virtualization enabled.
 501         */
 502        if (!apic->isr_count)
 503                return -1;
 504        if (likely(apic->highest_isr_cache != -1))
 505                return apic->highest_isr_cache;
 506
 507        result = find_highest_vector(apic->regs + APIC_ISR);
 508        ASSERT(result == -1 || result >= 16);
 509
 510        return result;
 511}
 512
 513static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 514{
 515        struct kvm_vcpu *vcpu;
 516        if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
 517                return;
 518
 519        vcpu = apic->vcpu;
 520
 521        /*
 522         * We do get here for APIC virtualization enabled if the guest
 523         * uses the Hyper-V APIC enlightenment.  In this case we may need
 524         * to trigger a new interrupt delivery by writing the SVI field;
 525         * on the other hand isr_count and highest_isr_cache are unused
 526         * and must be left alone.
 527         */
 528        if (unlikely(vcpu->arch.apicv_active))
 529                kvm_x86_ops->hwapic_isr_update(vcpu,
 530                                               apic_find_highest_isr(apic));
 531        else {
 532                --apic->isr_count;
 533                BUG_ON(apic->isr_count < 0);
 534                apic->highest_isr_cache = -1;
 535        }
 536}
 537
 538int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 539{
 540        /* This may race with setting of irr in __apic_accept_irq() and
 541         * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
 542         * will cause vmexit immediately and the value will be recalculated
 543         * on the next vmentry.
 544         */
 545        return apic_find_highest_irr(vcpu->arch.apic);
 546}
 547EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
 548
 549static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 550                             int vector, int level, int trig_mode,
 551                             struct dest_map *dest_map);
 552
 553int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 554                     struct dest_map *dest_map)
 555{
 556        struct kvm_lapic *apic = vcpu->arch.apic;
 557
 558        return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
 559                        irq->level, irq->trig_mode, dest_map);
 560}
 561
 562int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 563                    unsigned long ipi_bitmap_high, u32 min,
 564                    unsigned long icr, int op_64_bit)
 565{
 566        int i;
 567        struct kvm_apic_map *map;
 568        struct kvm_vcpu *vcpu;
 569        struct kvm_lapic_irq irq = {0};
 570        int cluster_size = op_64_bit ? 64 : 32;
 571        int count = 0;
 572
 573        irq.vector = icr & APIC_VECTOR_MASK;
 574        irq.delivery_mode = icr & APIC_MODE_MASK;
 575        irq.level = (icr & APIC_INT_ASSERT) != 0;
 576        irq.trig_mode = icr & APIC_INT_LEVELTRIG;
 577
 578        if (icr & APIC_DEST_MASK)
 579                return -KVM_EINVAL;
 580        if (icr & APIC_SHORT_MASK)
 581                return -KVM_EINVAL;
 582
 583        rcu_read_lock();
 584        map = rcu_dereference(kvm->arch.apic_map);
 585
 586        if (unlikely(!map)) {
 587                count = -EOPNOTSUPP;
 588                goto out;
 589        }
 590
 591        if (min > map->max_apic_id)
 592                goto out;
 593        /* Bits above cluster_size are masked in the caller.  */
 594        for_each_set_bit(i, &ipi_bitmap_low,
 595                min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
 596                if (map->phys_map[min + i]) {
 597                        vcpu = map->phys_map[min + i]->vcpu;
 598                        count += kvm_apic_set_irq(vcpu, &irq, NULL);
 599                }
 600        }
 601
 602        min += cluster_size;
 603
 604        if (min > map->max_apic_id)
 605                goto out;
 606
 607        for_each_set_bit(i, &ipi_bitmap_high,
 608                min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
 609                if (map->phys_map[min + i]) {
 610                        vcpu = map->phys_map[min + i]->vcpu;
 611                        count += kvm_apic_set_irq(vcpu, &irq, NULL);
 612                }
 613        }
 614
 615out:
 616        rcu_read_unlock();
 617        return count;
 618}
 619
 620static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
 621{
 622
 623        return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
 624                                      sizeof(val));
 625}
 626
 627static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
 628{
 629
 630        return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
 631                                      sizeof(*val));
 632}
 633
 634static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
 635{
 636        return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
 637}
 638
 639static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
 640{
 641        u8 val;
 642        if (pv_eoi_get_user(vcpu, &val) < 0)
 643                printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
 644                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 645        return val & 0x1;
 646}
 647
 648static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
 649{
 650        if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
 651                printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
 652                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 653                return;
 654        }
 655        __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 656}
 657
 658static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 659{
 660        if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
 661                printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
 662                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 663                return;
 664        }
 665        __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 666}
 667
 668static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
 669{
 670        int highest_irr;
 671        if (apic->vcpu->arch.apicv_active)
 672                highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
 673        else
 674                highest_irr = apic_find_highest_irr(apic);
 675        if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
 676                return -1;
 677        return highest_irr;
 678}
 679
 680static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
 681{
 682        u32 tpr, isrv, ppr, old_ppr;
 683        int isr;
 684
 685        old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
 686        tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
 687        isr = apic_find_highest_isr(apic);
 688        isrv = (isr != -1) ? isr : 0;
 689
 690        if ((tpr & 0xf0) >= (isrv & 0xf0))
 691                ppr = tpr & 0xff;
 692        else
 693                ppr = isrv & 0xf0;
 694
 695        *new_ppr = ppr;
 696        if (old_ppr != ppr)
 697                kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
 698
 699        return ppr < old_ppr;
 700}
 701
 702static void apic_update_ppr(struct kvm_lapic *apic)
 703{
 704        u32 ppr;
 705
 706        if (__apic_update_ppr(apic, &ppr) &&
 707            apic_has_interrupt_for_ppr(apic, ppr) != -1)
 708                kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 709}
 710
 711void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
 712{
 713        apic_update_ppr(vcpu->arch.apic);
 714}
 715EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
 716
 717static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 718{
 719        kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
 720        apic_update_ppr(apic);
 721}
 722
 723static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
 724{
 725        return mda == (apic_x2apic_mode(apic) ?
 726                        X2APIC_BROADCAST : APIC_BROADCAST);
 727}
 728
 729static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
 730{
 731        if (kvm_apic_broadcast(apic, mda))
 732                return true;
 733
 734        if (apic_x2apic_mode(apic))
 735                return mda == kvm_x2apic_id(apic);
 736
 737        /*
 738         * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
 739         * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
 740         * this allows unique addressing of VCPUs with APIC ID over 0xff.
 741         * The 0xff condition is needed because writeable xAPIC ID.
 742         */
 743        if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
 744                return true;
 745
 746        return mda == kvm_xapic_id(apic);
 747}
 748
 749static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 750{
 751        u32 logical_id;
 752
 753        if (kvm_apic_broadcast(apic, mda))
 754                return true;
 755
 756        logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
 757
 758        if (apic_x2apic_mode(apic))
 759                return ((logical_id >> 16) == (mda >> 16))
 760                       && (logical_id & mda & 0xffff) != 0;
 761
 762        logical_id = GET_APIC_LOGICAL_ID(logical_id);
 763
 764        switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
 765        case APIC_DFR_FLAT:
 766                return (logical_id & mda) != 0;
 767        case APIC_DFR_CLUSTER:
 768                return ((logical_id >> 4) == (mda >> 4))
 769                       && (logical_id & mda & 0xf) != 0;
 770        default:
 771                return false;
 772        }
 773}
 774
 775/* The KVM local APIC implementation has two quirks:
 776 *
 777 *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
 778 *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
 779 *    KVM doesn't do that aliasing.
 780 *
 781 *  - in-kernel IOAPIC messages have to be delivered directly to
 782 *    x2APIC, because the kernel does not support interrupt remapping.
 783 *    In order to support broadcast without interrupt remapping, x2APIC
 784 *    rewrites the destination of non-IPI messages from APIC_BROADCAST
 785 *    to X2APIC_BROADCAST.
 786 *
 787 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
 788 * important when userspace wants to use x2APIC-format MSIs, because
 789 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
 790 */
 791static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
 792                struct kvm_lapic *source, struct kvm_lapic *target)
 793{
 794        bool ipi = source != NULL;
 795
 796        if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
 797            !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
 798                return X2APIC_BROADCAST;
 799
 800        return dest_id;
 801}
 802
 803bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 804                           int short_hand, unsigned int dest, int dest_mode)
 805{
 806        struct kvm_lapic *target = vcpu->arch.apic;
 807        u32 mda = kvm_apic_mda(vcpu, dest, source, target);
 808
 809        ASSERT(target);
 810        switch (short_hand) {
 811        case APIC_DEST_NOSHORT:
 812                if (dest_mode == APIC_DEST_PHYSICAL)
 813                        return kvm_apic_match_physical_addr(target, mda);
 814                else
 815                        return kvm_apic_match_logical_addr(target, mda);
 816        case APIC_DEST_SELF:
 817                return target == source;
 818        case APIC_DEST_ALLINC:
 819                return true;
 820        case APIC_DEST_ALLBUT:
 821                return target != source;
 822        default:
 823                return false;
 824        }
 825}
 826EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
 827
 828int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
 829                       const unsigned long *bitmap, u32 bitmap_size)
 830{
 831        u32 mod;
 832        int i, idx = -1;
 833
 834        mod = vector % dest_vcpus;
 835
 836        for (i = 0; i <= mod; i++) {
 837                idx = find_next_bit(bitmap, bitmap_size, idx + 1);
 838                BUG_ON(idx == bitmap_size);
 839        }
 840
 841        return idx;
 842}
 843
 844static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
 845{
 846        if (!kvm->arch.disabled_lapic_found) {
 847                kvm->arch.disabled_lapic_found = true;
 848                printk(KERN_INFO
 849                       "Disabled LAPIC found during irq injection\n");
 850        }
 851}
 852
 853static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
 854                struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
 855{
 856        if (kvm->arch.x2apic_broadcast_quirk_disabled) {
 857                if ((irq->dest_id == APIC_BROADCAST &&
 858                                map->mode != KVM_APIC_MODE_X2APIC))
 859                        return true;
 860                if (irq->dest_id == X2APIC_BROADCAST)
 861                        return true;
 862        } else {
 863                bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
 864                if (irq->dest_id == (x2apic_ipi ?
 865                                     X2APIC_BROADCAST : APIC_BROADCAST))
 866                        return true;
 867        }
 868
 869        return false;
 870}
 871
 872/* Return true if the interrupt can be handled by using *bitmap as index mask
 873 * for valid destinations in *dst array.
 874 * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
 875 * Note: we may have zero kvm_lapic destinations when we return true, which
 876 * means that the interrupt should be dropped.  In this case, *bitmap would be
 877 * zero and *dst undefined.
 878 */
 879static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
 880                struct kvm_lapic **src, struct kvm_lapic_irq *irq,
 881                struct kvm_apic_map *map, struct kvm_lapic ***dst,
 882                unsigned long *bitmap)
 883{
 884        int i, lowest;
 885
 886        if (irq->shorthand == APIC_DEST_SELF && src) {
 887                *dst = src;
 888                *bitmap = 1;
 889                return true;
 890        } else if (irq->shorthand)
 891                return false;
 892
 893        if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
 894                return false;
 895
 896        if (irq->dest_mode == APIC_DEST_PHYSICAL) {
 897                if (irq->dest_id > map->max_apic_id) {
 898                        *bitmap = 0;
 899                } else {
 900                        u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
 901                        *dst = &map->phys_map[dest_id];
 902                        *bitmap = 1;
 903                }
 904                return true;
 905        }
 906
 907        *bitmap = 0;
 908        if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
 909                                (u16 *)bitmap))
 910                return false;
 911
 912        if (!kvm_lowest_prio_delivery(irq))
 913                return true;
 914
 915        if (!kvm_vector_hashing_enabled()) {
 916                lowest = -1;
 917                for_each_set_bit(i, bitmap, 16) {
 918                        if (!(*dst)[i])
 919                                continue;
 920                        if (lowest < 0)
 921                                lowest = i;
 922                        else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
 923                                                (*dst)[lowest]->vcpu) < 0)
 924                                lowest = i;
 925                }
 926        } else {
 927                if (!*bitmap)
 928                        return true;
 929
 930                lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
 931                                bitmap, 16);
 932
 933                if (!(*dst)[lowest]) {
 934                        kvm_apic_disabled_lapic_found(kvm);
 935                        *bitmap = 0;
 936                        return true;
 937                }
 938        }
 939
 940        *bitmap = (lowest >= 0) ? 1 << lowest : 0;
 941
 942        return true;
 943}
 944
 945bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 946                struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
 947{
 948        struct kvm_apic_map *map;
 949        unsigned long bitmap;
 950        struct kvm_lapic **dst = NULL;
 951        int i;
 952        bool ret;
 953
 954        *r = -1;
 955
 956        if (irq->shorthand == APIC_DEST_SELF) {
 957                *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
 958                return true;
 959        }
 960
 961        rcu_read_lock();
 962        map = rcu_dereference(kvm->arch.apic_map);
 963
 964        ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
 965        if (ret) {
 966                *r = 0;
 967                for_each_set_bit(i, &bitmap, 16) {
 968                        if (!dst[i])
 969                                continue;
 970                        *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
 971                }
 972        }
 973
 974        rcu_read_unlock();
 975        return ret;
 976}
 977
 978/*
 979 * This routine tries to handler interrupts in posted mode, here is how
 980 * it deals with different cases:
 981 * - For single-destination interrupts, handle it in posted mode
 982 * - Else if vector hashing is enabled and it is a lowest-priority
 983 *   interrupt, handle it in posted mode and use the following mechanism
 984 *   to find the destinaiton vCPU.
 985 *      1. For lowest-priority interrupts, store all the possible
 986 *         destination vCPUs in an array.
 987 *      2. Use "guest vector % max number of destination vCPUs" to find
 988 *         the right destination vCPU in the array for the lowest-priority
 989 *         interrupt.
 990 * - Otherwise, use remapped mode to inject the interrupt.
 991 */
 992bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 993                        struct kvm_vcpu **dest_vcpu)
 994{
 995        struct kvm_apic_map *map;
 996        unsigned long bitmap;
 997        struct kvm_lapic **dst = NULL;
 998        bool ret = false;
 999
1000        if (irq->shorthand)
1001                return false;
1002
1003        rcu_read_lock();
1004        map = rcu_dereference(kvm->arch.apic_map);
1005
1006        if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
1007                        hweight16(bitmap) == 1) {
1008                unsigned long i = find_first_bit(&bitmap, 16);
1009
1010                if (dst[i]) {
1011                        *dest_vcpu = dst[i]->vcpu;
1012                        ret = true;
1013                }
1014        }
1015
1016        rcu_read_unlock();
1017        return ret;
1018}
1019
1020/*
1021 * Add a pending IRQ into lapic.
1022 * Return 1 if successfully added and 0 if discarded.
1023 */
1024static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1025                             int vector, int level, int trig_mode,
1026                             struct dest_map *dest_map)
1027{
1028        int result = 0;
1029        struct kvm_vcpu *vcpu = apic->vcpu;
1030
1031        trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1032                                  trig_mode, vector);
1033        switch (delivery_mode) {
1034        case APIC_DM_LOWEST:
1035                vcpu->arch.apic_arb_prio++;
1036                /* fall through */
1037        case APIC_DM_FIXED:
1038                if (unlikely(trig_mode && !level))
1039                        break;
1040
1041                /* FIXME add logic for vcpu on reset */
1042                if (unlikely(!apic_enabled(apic)))
1043                        break;
1044
1045                result = 1;
1046
1047                if (dest_map) {
1048                        __set_bit(vcpu->vcpu_id, dest_map->map);
1049                        dest_map->vectors[vcpu->vcpu_id] = vector;
1050                }
1051
1052                if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1053                        if (trig_mode)
1054                                kvm_lapic_set_vector(vector,
1055                                                     apic->regs + APIC_TMR);
1056                        else
1057                                kvm_lapic_clear_vector(vector,
1058                                                       apic->regs + APIC_TMR);
1059                }
1060
1061                if (vcpu->arch.apicv_active)
1062                        kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
1063                else {
1064                        kvm_lapic_set_irr(vector, apic);
1065
1066                        kvm_make_request(KVM_REQ_EVENT, vcpu);
1067                        kvm_vcpu_kick(vcpu);
1068                }
1069                break;
1070
1071        case APIC_DM_REMRD:
1072                result = 1;
1073                vcpu->arch.pv.pv_unhalted = 1;
1074                kvm_make_request(KVM_REQ_EVENT, vcpu);
1075                kvm_vcpu_kick(vcpu);
1076                break;
1077
1078        case APIC_DM_SMI:
1079                result = 1;
1080                kvm_make_request(KVM_REQ_SMI, vcpu);
1081                kvm_vcpu_kick(vcpu);
1082                break;
1083
1084        case APIC_DM_NMI:
1085                result = 1;
1086                kvm_inject_nmi(vcpu);
1087                kvm_vcpu_kick(vcpu);
1088                break;
1089
1090        case APIC_DM_INIT:
1091                if (!trig_mode || level) {
1092                        result = 1;
1093                        /* assumes that there are only KVM_APIC_INIT/SIPI */
1094                        apic->pending_events = (1UL << KVM_APIC_INIT);
1095                        /* make sure pending_events is visible before sending
1096                         * the request */
1097                        smp_wmb();
1098                        kvm_make_request(KVM_REQ_EVENT, vcpu);
1099                        kvm_vcpu_kick(vcpu);
1100                }
1101                break;
1102
1103        case APIC_DM_STARTUP:
1104                result = 1;
1105                apic->sipi_vector = vector;
1106                /* make sure sipi_vector is visible for the receiver */
1107                smp_wmb();
1108                set_bit(KVM_APIC_SIPI, &apic->pending_events);
1109                kvm_make_request(KVM_REQ_EVENT, vcpu);
1110                kvm_vcpu_kick(vcpu);
1111                break;
1112
1113        case APIC_DM_EXTINT:
1114                /*
1115                 * Should only be called by kvm_apic_local_deliver() with LVT0,
1116                 * before NMI watchdog was enabled. Already handled by
1117                 * kvm_apic_accept_pic_intr().
1118                 */
1119                break;
1120
1121        default:
1122                printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1123                       delivery_mode);
1124                break;
1125        }
1126        return result;
1127}
1128
1129int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1130{
1131        return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1132}
1133
1134static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1135{
1136        return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1137}
1138
1139static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1140{
1141        int trigger_mode;
1142
1143        /* Eoi the ioapic only if the ioapic doesn't own the vector. */
1144        if (!kvm_ioapic_handles_vector(apic, vector))
1145                return;
1146
1147        /* Request a KVM exit to inform the userspace IOAPIC. */
1148        if (irqchip_split(apic->vcpu->kvm)) {
1149                apic->vcpu->arch.pending_ioapic_eoi = vector;
1150                kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1151                return;
1152        }
1153
1154        if (apic_test_vector(vector, apic->regs + APIC_TMR))
1155                trigger_mode = IOAPIC_LEVEL_TRIG;
1156        else
1157                trigger_mode = IOAPIC_EDGE_TRIG;
1158
1159        kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1160}
1161
1162static int apic_set_eoi(struct kvm_lapic *apic)
1163{
1164        int vector = apic_find_highest_isr(apic);
1165
1166        trace_kvm_eoi(apic, vector);
1167
1168        /*
1169         * Not every write EOI will has corresponding ISR,
1170         * one example is when Kernel check timer on setup_IO_APIC
1171         */
1172        if (vector == -1)
1173                return vector;
1174
1175        apic_clear_isr(vector, apic);
1176        apic_update_ppr(apic);
1177
1178        if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
1179                kvm_hv_synic_send_eoi(apic->vcpu, vector);
1180
1181        kvm_ioapic_send_eoi(apic, vector);
1182        kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1183        return vector;
1184}
1185
1186/*
1187 * this interface assumes a trap-like exit, which has already finished
1188 * desired side effect including vISR and vPPR update.
1189 */
1190void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1191{
1192        struct kvm_lapic *apic = vcpu->arch.apic;
1193
1194        trace_kvm_eoi(apic, vector);
1195
1196        kvm_ioapic_send_eoi(apic, vector);
1197        kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1198}
1199EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1200
1201static void apic_send_ipi(struct kvm_lapic *apic)
1202{
1203        u32 icr_low = kvm_lapic_get_reg(apic, APIC_ICR);
1204        u32 icr_high = kvm_lapic_get_reg(apic, APIC_ICR2);
1205        struct kvm_lapic_irq irq;
1206
1207        irq.vector = icr_low & APIC_VECTOR_MASK;
1208        irq.delivery_mode = icr_low & APIC_MODE_MASK;
1209        irq.dest_mode = icr_low & APIC_DEST_MASK;
1210        irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1211        irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1212        irq.shorthand = icr_low & APIC_SHORT_MASK;
1213        irq.msi_redir_hint = false;
1214        if (apic_x2apic_mode(apic))
1215                irq.dest_id = icr_high;
1216        else
1217                irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
1218
1219        trace_kvm_apic_ipi(icr_low, irq.dest_id);
1220
1221        kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1222}
1223
1224static u32 apic_get_tmcct(struct kvm_lapic *apic)
1225{
1226        ktime_t remaining, now;
1227        s64 ns;
1228        u32 tmcct;
1229
1230        ASSERT(apic != NULL);
1231
1232        /* if initial count is 0, current count should also be 0 */
1233        if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1234                apic->lapic_timer.period == 0)
1235                return 0;
1236
1237        now = ktime_get();
1238        remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1239        if (ktime_to_ns(remaining) < 0)
1240                remaining = 0;
1241
1242        ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1243        tmcct = div64_u64(ns,
1244                         (APIC_BUS_CYCLE_NS * apic->divide_count));
1245
1246        return tmcct;
1247}
1248
1249static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1250{
1251        struct kvm_vcpu *vcpu = apic->vcpu;
1252        struct kvm_run *run = vcpu->run;
1253
1254        kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1255        run->tpr_access.rip = kvm_rip_read(vcpu);
1256        run->tpr_access.is_write = write;
1257}
1258
1259static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1260{
1261        if (apic->vcpu->arch.tpr_access_reporting)
1262                __report_tpr_access(apic, write);
1263}
1264
1265static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1266{
1267        u32 val = 0;
1268
1269        if (offset >= LAPIC_MMIO_LENGTH)
1270                return 0;
1271
1272        switch (offset) {
1273        case APIC_ARBPRI:
1274                break;
1275
1276        case APIC_TMCCT:        /* Timer CCR */
1277                if (apic_lvtt_tscdeadline(apic))
1278                        return 0;
1279
1280                val = apic_get_tmcct(apic);
1281                break;
1282        case APIC_PROCPRI:
1283                apic_update_ppr(apic);
1284                val = kvm_lapic_get_reg(apic, offset);
1285                break;
1286        case APIC_TASKPRI:
1287                report_tpr_access(apic, false);
1288                /* fall thru */
1289        default:
1290                val = kvm_lapic_get_reg(apic, offset);
1291                break;
1292        }
1293
1294        return val;
1295}
1296
1297static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1298{
1299        return container_of(dev, struct kvm_lapic, dev);
1300}
1301
1302#define APIC_REG_MASK(reg)      (1ull << ((reg) >> 4))
1303#define APIC_REGS_MASK(first, count) \
1304        (APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1305
1306int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1307                void *data)
1308{
1309        unsigned char alignment = offset & 0xf;
1310        u32 result;
1311        /* this bitmask has a bit cleared for each reserved register */
1312        u64 valid_reg_mask =
1313                APIC_REG_MASK(APIC_ID) |
1314                APIC_REG_MASK(APIC_LVR) |
1315                APIC_REG_MASK(APIC_TASKPRI) |
1316                APIC_REG_MASK(APIC_PROCPRI) |
1317                APIC_REG_MASK(APIC_LDR) |
1318                APIC_REG_MASK(APIC_DFR) |
1319                APIC_REG_MASK(APIC_SPIV) |
1320                APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1321                APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1322                APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1323                APIC_REG_MASK(APIC_ESR) |
1324                APIC_REG_MASK(APIC_ICR) |
1325                APIC_REG_MASK(APIC_ICR2) |
1326                APIC_REG_MASK(APIC_LVTT) |
1327                APIC_REG_MASK(APIC_LVTTHMR) |
1328                APIC_REG_MASK(APIC_LVTPC) |
1329                APIC_REG_MASK(APIC_LVT0) |
1330                APIC_REG_MASK(APIC_LVT1) |
1331                APIC_REG_MASK(APIC_LVTERR) |
1332                APIC_REG_MASK(APIC_TMICT) |
1333                APIC_REG_MASK(APIC_TMCCT) |
1334                APIC_REG_MASK(APIC_TDCR);
1335
1336        /* ARBPRI is not valid on x2APIC */
1337        if (!apic_x2apic_mode(apic))
1338                valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
1339
1340        if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
1341                return 1;
1342
1343        result = __apic_read(apic, offset & ~0xf);
1344
1345        trace_kvm_apic_read(offset, result);
1346
1347        switch (len) {
1348        case 1:
1349        case 2:
1350        case 4:
1351                memcpy(data, (char *)&result + alignment, len);
1352                break;
1353        default:
1354                printk(KERN_ERR "Local APIC read with len = %x, "
1355                       "should be 1,2, or 4 instead\n", len);
1356                break;
1357        }
1358        return 0;
1359}
1360EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
1361
1362static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1363{
1364        return addr >= apic->base_address &&
1365                addr < apic->base_address + LAPIC_MMIO_LENGTH;
1366}
1367
1368static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1369                           gpa_t address, int len, void *data)
1370{
1371        struct kvm_lapic *apic = to_lapic(this);
1372        u32 offset = address - apic->base_address;
1373
1374        if (!apic_mmio_in_range(apic, address))
1375                return -EOPNOTSUPP;
1376
1377        if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1378                if (!kvm_check_has_quirk(vcpu->kvm,
1379                                         KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1380                        return -EOPNOTSUPP;
1381
1382                memset(data, 0xff, len);
1383                return 0;
1384        }
1385
1386        kvm_lapic_reg_read(apic, offset, len, data);
1387
1388        return 0;
1389}
1390
1391static void update_divide_count(struct kvm_lapic *apic)
1392{
1393        u32 tmp1, tmp2, tdcr;
1394
1395        tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1396        tmp1 = tdcr & 0xf;
1397        tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1398        apic->divide_count = 0x1 << (tmp2 & 0x7);
1399}
1400
1401static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1402{
1403        /*
1404         * Do not allow the guest to program periodic timers with small
1405         * interval, since the hrtimers are not throttled by the host
1406         * scheduler.
1407         */
1408        if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1409                s64 min_period = min_timer_period_us * 1000LL;
1410
1411                if (apic->lapic_timer.period < min_period) {
1412                        pr_info_ratelimited(
1413                            "kvm: vcpu %i: requested %lld ns "
1414                            "lapic timer period limited to %lld ns\n",
1415                            apic->vcpu->vcpu_id,
1416                            apic->lapic_timer.period, min_period);
1417                        apic->lapic_timer.period = min_period;
1418                }
1419        }
1420}
1421
1422static void apic_update_lvtt(struct kvm_lapic *apic)
1423{
1424        u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1425                        apic->lapic_timer.timer_mode_mask;
1426
1427        if (apic->lapic_timer.timer_mode != timer_mode) {
1428                if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1429                                APIC_LVT_TIMER_TSCDEADLINE)) {
1430                        hrtimer_cancel(&apic->lapic_timer.timer);
1431                        kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1432                        apic->lapic_timer.period = 0;
1433                        apic->lapic_timer.tscdeadline = 0;
1434                }
1435                apic->lapic_timer.timer_mode = timer_mode;
1436                limit_periodic_timer_frequency(apic);
1437        }
1438}
1439
1440/*
1441 * On APICv, this test will cause a busy wait
1442 * during a higher-priority task.
1443 */
1444
1445static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1446{
1447        struct kvm_lapic *apic = vcpu->arch.apic;
1448        u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1449
1450        if (kvm_apic_hw_enabled(apic)) {
1451                int vec = reg & APIC_VECTOR_MASK;
1452                void *bitmap = apic->regs + APIC_ISR;
1453
1454                if (vcpu->arch.apicv_active)
1455                        bitmap = apic->regs + APIC_IRR;
1456
1457                if (apic_test_vector(vec, bitmap))
1458                        return true;
1459        }
1460        return false;
1461}
1462
1463static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1464{
1465        u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1466
1467        /*
1468         * If the guest TSC is running at a different ratio than the host, then
1469         * convert the delay to nanoseconds to achieve an accurate delay.  Note
1470         * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1471         * always for VMX enabled hardware.
1472         */
1473        if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
1474                __delay(min(guest_cycles,
1475                        nsec_to_cycles(vcpu, timer_advance_ns)));
1476        } else {
1477                u64 delay_ns = guest_cycles * 1000000ULL;
1478                do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1479                ndelay(min_t(u32, delay_ns, timer_advance_ns));
1480        }
1481}
1482
1483static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1484                                              s64 advance_expire_delta)
1485{
1486        struct kvm_lapic *apic = vcpu->arch.apic;
1487        u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1488        u64 ns;
1489
1490        /* too early */
1491        if (advance_expire_delta < 0) {
1492                ns = -advance_expire_delta * 1000000ULL;
1493                do_div(ns, vcpu->arch.virtual_tsc_khz);
1494                timer_advance_ns -= min((u32)ns,
1495                        timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
1496        } else {
1497        /* too late */
1498                ns = advance_expire_delta * 1000000ULL;
1499                do_div(ns, vcpu->arch.virtual_tsc_khz);
1500                timer_advance_ns += min((u32)ns,
1501                        timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
1502        }
1503
1504        if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
1505                apic->lapic_timer.timer_advance_adjust_done = true;
1506        if (unlikely(timer_advance_ns > 5000)) {
1507                timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
1508                apic->lapic_timer.timer_advance_adjust_done = false;
1509        }
1510        apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1511}
1512
1513static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1514{
1515        struct kvm_lapic *apic = vcpu->arch.apic;
1516        u64 guest_tsc, tsc_deadline;
1517
1518        if (apic->lapic_timer.expired_tscdeadline == 0)
1519                return;
1520
1521        tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1522        apic->lapic_timer.expired_tscdeadline = 0;
1523        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1524        apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
1525
1526        if (guest_tsc < tsc_deadline)
1527                __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1528
1529        if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
1530                adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
1531}
1532
1533void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1534{
1535        if (lapic_timer_int_injected(vcpu))
1536                __kvm_wait_lapic_expire(vcpu);
1537}
1538EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1539
1540static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1541{
1542        struct kvm_timer *ktimer = &apic->lapic_timer;
1543
1544        kvm_apic_local_deliver(apic, APIC_LVTT);
1545        if (apic_lvtt_tscdeadline(apic))
1546                ktimer->tscdeadline = 0;
1547        if (apic_lvtt_oneshot(apic)) {
1548                ktimer->tscdeadline = 0;
1549                ktimer->target_expiration = 0;
1550        }
1551}
1552
1553static void apic_timer_expired(struct kvm_lapic *apic)
1554{
1555        struct kvm_vcpu *vcpu = apic->vcpu;
1556        struct kvm_timer *ktimer = &apic->lapic_timer;
1557
1558        if (atomic_read(&apic->lapic_timer.pending))
1559                return;
1560
1561        if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1562                ktimer->expired_tscdeadline = ktimer->tscdeadline;
1563
1564        if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1565                if (apic->lapic_timer.timer_advance_ns)
1566                        __kvm_wait_lapic_expire(vcpu);
1567                kvm_apic_inject_pending_timer_irqs(apic);
1568                return;
1569        }
1570
1571        atomic_inc(&apic->lapic_timer.pending);
1572        kvm_set_pending_timer(vcpu);
1573}
1574
1575static void start_sw_tscdeadline(struct kvm_lapic *apic)
1576{
1577        struct kvm_timer *ktimer = &apic->lapic_timer;
1578        u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1579        u64 ns = 0;
1580        ktime_t expire;
1581        struct kvm_vcpu *vcpu = apic->vcpu;
1582        unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1583        unsigned long flags;
1584        ktime_t now;
1585
1586        if (unlikely(!tscdeadline || !this_tsc_khz))
1587                return;
1588
1589        local_irq_save(flags);
1590
1591        now = ktime_get();
1592        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1593
1594        ns = (tscdeadline - guest_tsc) * 1000000ULL;
1595        do_div(ns, this_tsc_khz);
1596
1597        if (likely(tscdeadline > guest_tsc) &&
1598            likely(ns > apic->lapic_timer.timer_advance_ns)) {
1599                expire = ktime_add_ns(now, ns);
1600                expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1601                hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS);
1602        } else
1603                apic_timer_expired(apic);
1604
1605        local_irq_restore(flags);
1606}
1607
1608static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1609{
1610        ktime_t now, remaining;
1611        u64 ns_remaining_old, ns_remaining_new;
1612
1613        apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1614                * APIC_BUS_CYCLE_NS * apic->divide_count;
1615        limit_periodic_timer_frequency(apic);
1616
1617        now = ktime_get();
1618        remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1619        if (ktime_to_ns(remaining) < 0)
1620                remaining = 0;
1621
1622        ns_remaining_old = ktime_to_ns(remaining);
1623        ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1624                                           apic->divide_count, old_divisor);
1625
1626        apic->lapic_timer.tscdeadline +=
1627                nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1628                nsec_to_cycles(apic->vcpu, ns_remaining_old);
1629        apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1630}
1631
1632static bool set_target_expiration(struct kvm_lapic *apic)
1633{
1634        ktime_t now;
1635        u64 tscl = rdtsc();
1636
1637        now = ktime_get();
1638        apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1639                * APIC_BUS_CYCLE_NS * apic->divide_count;
1640
1641        if (!apic->lapic_timer.period) {
1642                apic->lapic_timer.tscdeadline = 0;
1643                return false;
1644        }
1645
1646        limit_periodic_timer_frequency(apic);
1647
1648        apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1649                nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
1650        apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
1651
1652        return true;
1653}
1654
1655static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1656{
1657        ktime_t now = ktime_get();
1658        u64 tscl = rdtsc();
1659        ktime_t delta;
1660
1661        /*
1662         * Synchronize both deadlines to the same time source or
1663         * differences in the periods (caused by differences in the
1664         * underlying clocks or numerical approximation errors) will
1665         * cause the two to drift apart over time as the errors
1666         * accumulate.
1667         */
1668        apic->lapic_timer.target_expiration =
1669                ktime_add_ns(apic->lapic_timer.target_expiration,
1670                                apic->lapic_timer.period);
1671        delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1672        apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1673                nsec_to_cycles(apic->vcpu, delta);
1674}
1675
1676static void start_sw_period(struct kvm_lapic *apic)
1677{
1678        if (!apic->lapic_timer.period)
1679                return;
1680
1681        if (ktime_after(ktime_get(),
1682                        apic->lapic_timer.target_expiration)) {
1683                apic_timer_expired(apic);
1684
1685                if (apic_lvtt_oneshot(apic))
1686                        return;
1687
1688                advance_periodic_target_expiration(apic);
1689        }
1690
1691        hrtimer_start(&apic->lapic_timer.timer,
1692                apic->lapic_timer.target_expiration,
1693                HRTIMER_MODE_ABS);
1694}
1695
1696bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1697{
1698        if (!lapic_in_kernel(vcpu))
1699                return false;
1700
1701        return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1702}
1703EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1704
1705static void cancel_hv_timer(struct kvm_lapic *apic)
1706{
1707        WARN_ON(preemptible());
1708        WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1709        kvm_x86_ops->cancel_hv_timer(apic->vcpu);
1710        apic->lapic_timer.hv_timer_in_use = false;
1711}
1712
1713static bool start_hv_timer(struct kvm_lapic *apic)
1714{
1715        struct kvm_timer *ktimer = &apic->lapic_timer;
1716        struct kvm_vcpu *vcpu = apic->vcpu;
1717        bool expired;
1718
1719        WARN_ON(preemptible());
1720        if (!kvm_x86_ops->set_hv_timer)
1721                return false;
1722
1723        if (!ktimer->tscdeadline)
1724                return false;
1725
1726        if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
1727                return false;
1728
1729        ktimer->hv_timer_in_use = true;
1730        hrtimer_cancel(&ktimer->timer);
1731
1732        /*
1733         * To simplify handling the periodic timer, leave the hv timer running
1734         * even if the deadline timer has expired, i.e. rely on the resulting
1735         * VM-Exit to recompute the periodic timer's target expiration.
1736         */
1737        if (!apic_lvtt_period(apic)) {
1738                /*
1739                 * Cancel the hv timer if the sw timer fired while the hv timer
1740                 * was being programmed, or if the hv timer itself expired.
1741                 */
1742                if (atomic_read(&ktimer->pending)) {
1743                        cancel_hv_timer(apic);
1744                } else if (expired) {
1745                        apic_timer_expired(apic);
1746                        cancel_hv_timer(apic);
1747                }
1748        }
1749
1750        trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
1751
1752        return true;
1753}
1754
1755static void start_sw_timer(struct kvm_lapic *apic)
1756{
1757        struct kvm_timer *ktimer = &apic->lapic_timer;
1758
1759        WARN_ON(preemptible());
1760        if (apic->lapic_timer.hv_timer_in_use)
1761                cancel_hv_timer(apic);
1762        if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1763                return;
1764
1765        if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1766                start_sw_period(apic);
1767        else if (apic_lvtt_tscdeadline(apic))
1768                start_sw_tscdeadline(apic);
1769        trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
1770}
1771
1772static void restart_apic_timer(struct kvm_lapic *apic)
1773{
1774        preempt_disable();
1775
1776        if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
1777                goto out;
1778
1779        if (!start_hv_timer(apic))
1780                start_sw_timer(apic);
1781out:
1782        preempt_enable();
1783}
1784
1785void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1786{
1787        struct kvm_lapic *apic = vcpu->arch.apic;
1788
1789        preempt_disable();
1790        /* If the preempt notifier has already run, it also called apic_timer_expired */
1791        if (!apic->lapic_timer.hv_timer_in_use)
1792                goto out;
1793        WARN_ON(swait_active(&vcpu->wq));
1794        cancel_hv_timer(apic);
1795        apic_timer_expired(apic);
1796
1797        if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1798                advance_periodic_target_expiration(apic);
1799                restart_apic_timer(apic);
1800        }
1801out:
1802        preempt_enable();
1803}
1804EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
1805
1806void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
1807{
1808        restart_apic_timer(vcpu->arch.apic);
1809}
1810EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
1811
1812void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
1813{
1814        struct kvm_lapic *apic = vcpu->arch.apic;
1815
1816        preempt_disable();
1817        /* Possibly the TSC deadline timer is not enabled yet */
1818        if (apic->lapic_timer.hv_timer_in_use)
1819                start_sw_timer(apic);
1820        preempt_enable();
1821}
1822EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
1823
1824void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
1825{
1826        struct kvm_lapic *apic = vcpu->arch.apic;
1827
1828        WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1829        restart_apic_timer(apic);
1830}
1831
1832static void start_apic_timer(struct kvm_lapic *apic)
1833{
1834        atomic_set(&apic->lapic_timer.pending, 0);
1835
1836        if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1837            && !set_target_expiration(apic))
1838                return;
1839
1840        restart_apic_timer(apic);
1841}
1842
1843static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1844{
1845        bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
1846
1847        if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
1848                apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
1849                if (lvt0_in_nmi_mode) {
1850                        atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1851                } else
1852                        atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1853        }
1854}
1855
1856int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1857{
1858        int ret = 0;
1859
1860        trace_kvm_apic_write(reg, val);
1861
1862        switch (reg) {
1863        case APIC_ID:           /* Local APIC ID */
1864                if (!apic_x2apic_mode(apic))
1865                        kvm_apic_set_xapic_id(apic, val >> 24);
1866                else
1867                        ret = 1;
1868                break;
1869
1870        case APIC_TASKPRI:
1871                report_tpr_access(apic, true);
1872                apic_set_tpr(apic, val & 0xff);
1873                break;
1874
1875        case APIC_EOI:
1876                apic_set_eoi(apic);
1877                break;
1878
1879        case APIC_LDR:
1880                if (!apic_x2apic_mode(apic))
1881                        kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
1882                else
1883                        ret = 1;
1884                break;
1885
1886        case APIC_DFR:
1887                if (!apic_x2apic_mode(apic)) {
1888                        kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
1889                        recalculate_apic_map(apic->vcpu->kvm);
1890                } else
1891                        ret = 1;
1892                break;
1893
1894        case APIC_SPIV: {
1895                u32 mask = 0x3ff;
1896                if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
1897                        mask |= APIC_SPIV_DIRECTED_EOI;
1898                apic_set_spiv(apic, val & mask);
1899                if (!(val & APIC_SPIV_APIC_ENABLED)) {
1900                        int i;
1901                        u32 lvt_val;
1902
1903                        for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
1904                                lvt_val = kvm_lapic_get_reg(apic,
1905                                                       APIC_LVTT + 0x10 * i);
1906                                kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
1907                                             lvt_val | APIC_LVT_MASKED);
1908                        }
1909                        apic_update_lvtt(apic);
1910                        atomic_set(&apic->lapic_timer.pending, 0);
1911
1912                }
1913                break;
1914        }
1915        case APIC_ICR:
1916                /* No delay here, so we always clear the pending bit */
1917                kvm_lapic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
1918                apic_send_ipi(apic);
1919                break;
1920
1921        case APIC_ICR2:
1922                if (!apic_x2apic_mode(apic))
1923                        val &= 0xff000000;
1924                kvm_lapic_set_reg(apic, APIC_ICR2, val);
1925                break;
1926
1927        case APIC_LVT0:
1928                apic_manage_nmi_watchdog(apic, val);
1929                /* fall through */
1930        case APIC_LVTTHMR:
1931        case APIC_LVTPC:
1932        case APIC_LVT1:
1933        case APIC_LVTERR:
1934                /* TODO: Check vector */
1935                if (!kvm_apic_sw_enabled(apic))
1936                        val |= APIC_LVT_MASKED;
1937
1938                val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
1939                kvm_lapic_set_reg(apic, reg, val);
1940
1941                break;
1942
1943        case APIC_LVTT:
1944                if (!kvm_apic_sw_enabled(apic))
1945                        val |= APIC_LVT_MASKED;
1946                val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
1947                kvm_lapic_set_reg(apic, APIC_LVTT, val);
1948                apic_update_lvtt(apic);
1949                break;
1950
1951        case APIC_TMICT:
1952                if (apic_lvtt_tscdeadline(apic))
1953                        break;
1954
1955                hrtimer_cancel(&apic->lapic_timer.timer);
1956                kvm_lapic_set_reg(apic, APIC_TMICT, val);
1957                start_apic_timer(apic);
1958                break;
1959
1960        case APIC_TDCR: {
1961                uint32_t old_divisor = apic->divide_count;
1962
1963                kvm_lapic_set_reg(apic, APIC_TDCR, val);
1964                update_divide_count(apic);
1965                if (apic->divide_count != old_divisor &&
1966                                apic->lapic_timer.period) {
1967                        hrtimer_cancel(&apic->lapic_timer.timer);
1968                        update_target_expiration(apic, old_divisor);
1969                        restart_apic_timer(apic);
1970                }
1971                break;
1972        }
1973        case APIC_ESR:
1974                if (apic_x2apic_mode(apic) && val != 0)
1975                        ret = 1;
1976                break;
1977
1978        case APIC_SELF_IPI:
1979                if (apic_x2apic_mode(apic)) {
1980                        kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
1981                } else
1982                        ret = 1;
1983                break;
1984        default:
1985                ret = 1;
1986                break;
1987        }
1988
1989        return ret;
1990}
1991EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
1992
1993static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1994                            gpa_t address, int len, const void *data)
1995{
1996        struct kvm_lapic *apic = to_lapic(this);
1997        unsigned int offset = address - apic->base_address;
1998        u32 val;
1999
2000        if (!apic_mmio_in_range(apic, address))
2001                return -EOPNOTSUPP;
2002
2003        if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2004                if (!kvm_check_has_quirk(vcpu->kvm,
2005                                         KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2006                        return -EOPNOTSUPP;
2007
2008                return 0;
2009        }
2010
2011        /*
2012         * APIC register must be aligned on 128-bits boundary.
2013         * 32/64/128 bits registers must be accessed thru 32 bits.
2014         * Refer SDM 8.4.1
2015         */
2016        if (len != 4 || (offset & 0xf))
2017                return 0;
2018
2019        val = *(u32*)data;
2020
2021        kvm_lapic_reg_write(apic, offset & 0xff0, val);
2022
2023        return 0;
2024}
2025
2026void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2027{
2028        kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2029}
2030EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2031
2032/* emulate APIC access in a trap manner */
2033void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2034{
2035        u32 val = 0;
2036
2037        /* hw has done the conditional check and inst decode */
2038        offset &= 0xff0;
2039
2040        kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
2041
2042        /* TODO: optimize to just emulate side effect w/o one more write */
2043        kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
2044}
2045EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2046
2047void kvm_free_lapic(struct kvm_vcpu *vcpu)
2048{
2049        struct kvm_lapic *apic = vcpu->arch.apic;
2050
2051        if (!vcpu->arch.apic)
2052                return;
2053
2054        hrtimer_cancel(&apic->lapic_timer.timer);
2055
2056        if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2057                static_key_slow_dec_deferred(&apic_hw_disabled);
2058
2059        if (!apic->sw_enabled)
2060                static_key_slow_dec_deferred(&apic_sw_disabled);
2061
2062        if (apic->regs)
2063                free_page((unsigned long)apic->regs);
2064
2065        kfree(apic);
2066}
2067
2068/*
2069 *----------------------------------------------------------------------
2070 * LAPIC interface
2071 *----------------------------------------------------------------------
2072 */
2073u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2074{
2075        struct kvm_lapic *apic = vcpu->arch.apic;
2076
2077        if (!lapic_in_kernel(vcpu) ||
2078                !apic_lvtt_tscdeadline(apic))
2079                return 0;
2080
2081        return apic->lapic_timer.tscdeadline;
2082}
2083
2084void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2085{
2086        struct kvm_lapic *apic = vcpu->arch.apic;
2087
2088        if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
2089                        apic_lvtt_period(apic))
2090                return;
2091
2092        hrtimer_cancel(&apic->lapic_timer.timer);
2093        apic->lapic_timer.tscdeadline = data;
2094        start_apic_timer(apic);
2095}
2096
2097void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2098{
2099        struct kvm_lapic *apic = vcpu->arch.apic;
2100
2101        apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
2102                     | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
2103}
2104
2105u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2106{
2107        u64 tpr;
2108
2109        tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2110
2111        return (tpr & 0xf0) >> 4;
2112}
2113
2114void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2115{
2116        u64 old_value = vcpu->arch.apic_base;
2117        struct kvm_lapic *apic = vcpu->arch.apic;
2118
2119        if (!apic)
2120                value |= MSR_IA32_APICBASE_BSP;
2121
2122        vcpu->arch.apic_base = value;
2123
2124        if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2125                kvm_update_cpuid(vcpu);
2126
2127        if (!apic)
2128                return;
2129
2130        /* update jump label if enable bit changes */
2131        if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2132                if (value & MSR_IA32_APICBASE_ENABLE) {
2133                        kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2134                        static_key_slow_dec_deferred(&apic_hw_disabled);
2135                } else {
2136                        static_key_slow_inc(&apic_hw_disabled.key);
2137                        recalculate_apic_map(vcpu->kvm);
2138                }
2139        }
2140
2141        if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
2142                kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2143
2144        if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
2145                kvm_x86_ops->set_virtual_apic_mode(vcpu);
2146
2147        apic->base_address = apic->vcpu->arch.apic_base &
2148                             MSR_IA32_APICBASE_BASE;
2149
2150        if ((value & MSR_IA32_APICBASE_ENABLE) &&
2151             apic->base_address != APIC_DEFAULT_PHYS_BASE)
2152                pr_warn_once("APIC base relocation is unsupported by KVM");
2153}
2154
2155void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2156{
2157        struct kvm_lapic *apic = vcpu->arch.apic;
2158        int i;
2159
2160        if (!apic)
2161                return;
2162
2163        /* Stop the timer in case it's a reset to an active apic */
2164        hrtimer_cancel(&apic->lapic_timer.timer);
2165
2166        if (!init_event) {
2167                kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
2168                                         MSR_IA32_APICBASE_ENABLE);
2169                kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2170        }
2171        kvm_apic_set_version(apic->vcpu);
2172
2173        for (i = 0; i < KVM_APIC_LVT_NUM; i++)
2174                kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
2175        apic_update_lvtt(apic);
2176        if (kvm_vcpu_is_reset_bsp(vcpu) &&
2177            kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2178                kvm_lapic_set_reg(apic, APIC_LVT0,
2179                             SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2180        apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2181
2182        kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
2183        apic_set_spiv(apic, 0xff);
2184        kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2185        if (!apic_x2apic_mode(apic))
2186                kvm_apic_set_ldr(apic, 0);
2187        kvm_lapic_set_reg(apic, APIC_ESR, 0);
2188        kvm_lapic_set_reg(apic, APIC_ICR, 0);
2189        kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2190        kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2191        kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2192        for (i = 0; i < 8; i++) {
2193                kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2194                kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2195                kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2196        }
2197        apic->irr_pending = vcpu->arch.apicv_active;
2198        apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
2199        apic->highest_isr_cache = -1;
2200        update_divide_count(apic);
2201        atomic_set(&apic->lapic_timer.pending, 0);
2202        if (kvm_vcpu_is_bsp(vcpu))
2203                kvm_lapic_set_base(vcpu,
2204                                vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
2205        vcpu->arch.pv_eoi.msr_val = 0;
2206        apic_update_ppr(apic);
2207        if (vcpu->arch.apicv_active) {
2208                kvm_x86_ops->apicv_post_state_restore(vcpu);
2209                kvm_x86_ops->hwapic_irr_update(vcpu, -1);
2210                kvm_x86_ops->hwapic_isr_update(vcpu, -1);
2211        }
2212
2213        vcpu->arch.apic_arb_prio = 0;
2214        vcpu->arch.apic_attention = 0;
2215}
2216
2217/*
2218 *----------------------------------------------------------------------
2219 * timer interface
2220 *----------------------------------------------------------------------
2221 */
2222
2223static bool lapic_is_periodic(struct kvm_lapic *apic)
2224{
2225        return apic_lvtt_period(apic);
2226}
2227
2228int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2229{
2230        struct kvm_lapic *apic = vcpu->arch.apic;
2231
2232        if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2233                return atomic_read(&apic->lapic_timer.pending);
2234
2235        return 0;
2236}
2237
2238int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2239{
2240        u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2241        int vector, mode, trig_mode;
2242
2243        if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2244                vector = reg & APIC_VECTOR_MASK;
2245                mode = reg & APIC_MODE_MASK;
2246                trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2247                return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
2248                                        NULL);
2249        }
2250        return 0;
2251}
2252
2253void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2254{
2255        struct kvm_lapic *apic = vcpu->arch.apic;
2256
2257        if (apic)
2258                kvm_apic_local_deliver(apic, APIC_LVT0);
2259}
2260
2261static const struct kvm_io_device_ops apic_mmio_ops = {
2262        .read     = apic_mmio_read,
2263        .write    = apic_mmio_write,
2264};
2265
2266static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2267{
2268        struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2269        struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2270
2271        apic_timer_expired(apic);
2272
2273        if (lapic_is_periodic(apic)) {
2274                advance_periodic_target_expiration(apic);
2275                hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2276                return HRTIMER_RESTART;
2277        } else
2278                return HRTIMER_NORESTART;
2279}
2280
2281int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2282{
2283        struct kvm_lapic *apic;
2284
2285        ASSERT(vcpu != NULL);
2286
2287        apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2288        if (!apic)
2289                goto nomem;
2290
2291        vcpu->arch.apic = apic;
2292
2293        apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2294        if (!apic->regs) {
2295                printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2296                       vcpu->vcpu_id);
2297                goto nomem_free_apic;
2298        }
2299        apic->vcpu = vcpu;
2300
2301        hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2302                     HRTIMER_MODE_ABS);
2303        apic->lapic_timer.timer.function = apic_timer_fn;
2304        if (timer_advance_ns == -1) {
2305                apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
2306                apic->lapic_timer.timer_advance_adjust_done = false;
2307        } else {
2308                apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2309                apic->lapic_timer.timer_advance_adjust_done = true;
2310        }
2311
2312
2313        /*
2314         * APIC is created enabled. This will prevent kvm_lapic_set_base from
2315         * thinking that APIC state has changed.
2316         */
2317        vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2318        static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2319        kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2320
2321        return 0;
2322nomem_free_apic:
2323        kfree(apic);
2324        vcpu->arch.apic = NULL;
2325nomem:
2326        return -ENOMEM;
2327}
2328
2329int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2330{
2331        struct kvm_lapic *apic = vcpu->arch.apic;
2332        u32 ppr;
2333
2334        if (!kvm_apic_hw_enabled(apic))
2335                return -1;
2336
2337        __apic_update_ppr(apic, &ppr);
2338        return apic_has_interrupt_for_ppr(apic, ppr);
2339}
2340
2341int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2342{
2343        u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2344        int r = 0;
2345
2346        if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2347                r = 1;
2348        if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2349            GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2350                r = 1;
2351        return r;
2352}
2353
2354void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2355{
2356        struct kvm_lapic *apic = vcpu->arch.apic;
2357
2358        if (atomic_read(&apic->lapic_timer.pending) > 0) {
2359                kvm_apic_inject_pending_timer_irqs(apic);
2360                atomic_set(&apic->lapic_timer.pending, 0);
2361        }
2362}
2363
2364int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2365{
2366        int vector = kvm_apic_has_interrupt(vcpu);
2367        struct kvm_lapic *apic = vcpu->arch.apic;
2368        u32 ppr;
2369
2370        if (vector == -1)
2371                return -1;
2372
2373        /*
2374         * We get here even with APIC virtualization enabled, if doing
2375         * nested virtualization and L1 runs with the "acknowledge interrupt
2376         * on exit" mode.  Then we cannot inject the interrupt via RVI,
2377         * because the process would deliver it through the IDT.
2378         */
2379
2380        apic_clear_irr(vector, apic);
2381        if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
2382                /*
2383                 * For auto-EOI interrupts, there might be another pending
2384                 * interrupt above PPR, so check whether to raise another
2385                 * KVM_REQ_EVENT.
2386                 */
2387                apic_update_ppr(apic);
2388        } else {
2389                /*
2390                 * For normal interrupts, PPR has been raised and there cannot
2391                 * be a higher-priority pending interrupt---except if there was
2392                 * a concurrent interrupt injection, but that would have
2393                 * triggered KVM_REQ_EVENT already.
2394                 */
2395                apic_set_isr(vector, apic);
2396                __apic_update_ppr(apic, &ppr);
2397        }
2398
2399        return vector;
2400}
2401
2402static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2403                struct kvm_lapic_state *s, bool set)
2404{
2405        if (apic_x2apic_mode(vcpu->arch.apic)) {
2406                u32 *id = (u32 *)(s->regs + APIC_ID);
2407                u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2408
2409                if (vcpu->kvm->arch.x2apic_format) {
2410                        if (*id != vcpu->vcpu_id)
2411                                return -EINVAL;
2412                } else {
2413                        if (set)
2414                                *id >>= 24;
2415                        else
2416                                *id <<= 24;
2417                }
2418
2419                /* In x2APIC mode, the LDR is fixed and based on the id */
2420                if (set)
2421                        *ldr = kvm_apic_calc_x2apic_ldr(*id);
2422        }
2423
2424        return 0;
2425}
2426
2427int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2428{
2429        memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2430        return kvm_apic_state_fixup(vcpu, s, false);
2431}
2432
2433int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2434{
2435        struct kvm_lapic *apic = vcpu->arch.apic;
2436        int r;
2437
2438
2439        kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2440        /* set SPIV separately to get count of SW disabled APICs right */
2441        apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2442
2443        r = kvm_apic_state_fixup(vcpu, s, true);
2444        if (r)
2445                return r;
2446        memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
2447
2448        recalculate_apic_map(vcpu->kvm);
2449        kvm_apic_set_version(vcpu);
2450
2451        apic_update_ppr(apic);
2452        hrtimer_cancel(&apic->lapic_timer.timer);
2453        apic_update_lvtt(apic);
2454        apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2455        update_divide_count(apic);
2456        start_apic_timer(apic);
2457        apic->irr_pending = true;
2458        apic->isr_count = vcpu->arch.apicv_active ?
2459                                1 : count_vectors(apic->regs + APIC_ISR);
2460        apic->highest_isr_cache = -1;
2461        if (vcpu->arch.apicv_active) {
2462                kvm_x86_ops->apicv_post_state_restore(vcpu);
2463                kvm_x86_ops->hwapic_irr_update(vcpu,
2464                                apic_find_highest_irr(apic));
2465                kvm_x86_ops->hwapic_isr_update(vcpu,
2466                                apic_find_highest_isr(apic));
2467        }
2468        kvm_make_request(KVM_REQ_EVENT, vcpu);
2469        if (ioapic_in_kernel(vcpu->kvm))
2470                kvm_rtc_eoi_tracking_restore_one(vcpu);
2471
2472        vcpu->arch.apic_arb_prio = 0;
2473
2474        return 0;
2475}
2476
2477void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2478{
2479        struct hrtimer *timer;
2480
2481        if (!lapic_in_kernel(vcpu) ||
2482                kvm_can_post_timer_interrupt(vcpu))
2483                return;
2484
2485        timer = &vcpu->arch.apic->lapic_timer.timer;
2486        if (hrtimer_cancel(timer))
2487                hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
2488}
2489
2490/*
2491 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2492 *
2493 * Detect whether guest triggered PV EOI since the
2494 * last entry. If yes, set EOI on guests's behalf.
2495 * Clear PV EOI in guest memory in any case.
2496 */
2497static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2498                                        struct kvm_lapic *apic)
2499{
2500        bool pending;
2501        int vector;
2502        /*
2503         * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2504         * and KVM_PV_EOI_ENABLED in guest memory as follows:
2505         *
2506         * KVM_APIC_PV_EOI_PENDING is unset:
2507         *      -> host disabled PV EOI.
2508         * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2509         *      -> host enabled PV EOI, guest did not execute EOI yet.
2510         * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2511         *      -> host enabled PV EOI, guest executed EOI.
2512         */
2513        BUG_ON(!pv_eoi_enabled(vcpu));
2514        pending = pv_eoi_get_pending(vcpu);
2515        /*
2516         * Clear pending bit in any case: it will be set again on vmentry.
2517         * While this might not be ideal from performance point of view,
2518         * this makes sure pv eoi is only enabled when we know it's safe.
2519         */
2520        pv_eoi_clr_pending(vcpu);
2521        if (pending)
2522                return;
2523        vector = apic_set_eoi(apic);
2524        trace_kvm_pv_eoi(apic, vector);
2525}
2526
2527void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2528{
2529        u32 data;
2530
2531        if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2532                apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2533
2534        if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2535                return;
2536
2537        if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2538                                  sizeof(u32)))
2539                return;
2540
2541        apic_set_tpr(vcpu->arch.apic, data & 0xff);
2542}
2543
2544/*
2545 * apic_sync_pv_eoi_to_guest - called before vmentry
2546 *
2547 * Detect whether it's safe to enable PV EOI and
2548 * if yes do so.
2549 */
2550static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2551                                        struct kvm_lapic *apic)
2552{
2553        if (!pv_eoi_enabled(vcpu) ||
2554            /* IRR set or many bits in ISR: could be nested. */
2555            apic->irr_pending ||
2556            /* Cache not set: could be safe but we don't bother. */
2557            apic->highest_isr_cache == -1 ||
2558            /* Need EOI to update ioapic. */
2559            kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2560                /*
2561                 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2562                 * so we need not do anything here.
2563                 */
2564                return;
2565        }
2566
2567        pv_eoi_set_pending(apic->vcpu);
2568}
2569
2570void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2571{
2572        u32 data, tpr;
2573        int max_irr, max_isr;
2574        struct kvm_lapic *apic = vcpu->arch.apic;
2575
2576        apic_sync_pv_eoi_to_guest(vcpu, apic);
2577
2578        if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2579                return;
2580
2581        tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2582        max_irr = apic_find_highest_irr(apic);
2583        if (max_irr < 0)
2584                max_irr = 0;
2585        max_isr = apic_find_highest_isr(apic);
2586        if (max_isr < 0)
2587                max_isr = 0;
2588        data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2589
2590        kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2591                                sizeof(u32));
2592}
2593
2594int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2595{
2596        if (vapic_addr) {
2597                if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2598                                        &vcpu->arch.apic->vapic_cache,
2599                                        vapic_addr, sizeof(u32)))
2600                        return -EINVAL;
2601                __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2602        } else {
2603                __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2604        }
2605
2606        vcpu->arch.apic->vapic_addr = vapic_addr;
2607        return 0;
2608}
2609
2610int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2611{
2612        struct kvm_lapic *apic = vcpu->arch.apic;
2613        u32 reg = (msr - APIC_BASE_MSR) << 4;
2614
2615        if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2616                return 1;
2617
2618        if (reg == APIC_ICR2)
2619                return 1;
2620
2621        /* if this is ICR write vector before command */
2622        if (reg == APIC_ICR)
2623                kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2624        return kvm_lapic_reg_write(apic, reg, (u32)data);
2625}
2626
2627int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2628{
2629        struct kvm_lapic *apic = vcpu->arch.apic;
2630        u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
2631
2632        if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2633                return 1;
2634
2635        if (reg == APIC_DFR || reg == APIC_ICR2)
2636                return 1;
2637
2638        if (kvm_lapic_reg_read(apic, reg, 4, &low))
2639                return 1;
2640        if (reg == APIC_ICR)
2641                kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2642
2643        *data = (((u64)high) << 32) | low;
2644
2645        return 0;
2646}
2647
2648int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2649{
2650        struct kvm_lapic *apic = vcpu->arch.apic;
2651
2652        if (!lapic_in_kernel(vcpu))
2653                return 1;
2654
2655        /* if this is ICR write vector before command */
2656        if (reg == APIC_ICR)
2657                kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2658        return kvm_lapic_reg_write(apic, reg, (u32)data);
2659}
2660
2661int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2662{
2663        struct kvm_lapic *apic = vcpu->arch.apic;
2664        u32 low, high = 0;
2665
2666        if (!lapic_in_kernel(vcpu))
2667                return 1;
2668
2669        if (kvm_lapic_reg_read(apic, reg, 4, &low))
2670                return 1;
2671        if (reg == APIC_ICR)
2672                kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2673
2674        *data = (((u64)high) << 32) | low;
2675
2676        return 0;
2677}
2678
2679int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
2680{
2681        u64 addr = data & ~KVM_MSR_ENABLED;
2682        struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
2683        unsigned long new_len;
2684
2685        if (!IS_ALIGNED(addr, 4))
2686                return 1;
2687
2688        vcpu->arch.pv_eoi.msr_val = data;
2689        if (!pv_eoi_enabled(vcpu))
2690                return 0;
2691
2692        if (addr == ghc->gpa && len <= ghc->len)
2693                new_len = ghc->len;
2694        else
2695                new_len = len;
2696
2697        return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
2698}
2699
2700void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2701{
2702        struct kvm_lapic *apic = vcpu->arch.apic;
2703        u8 sipi_vector;
2704        unsigned long pe;
2705
2706        if (!lapic_in_kernel(vcpu) || !apic->pending_events)
2707                return;
2708
2709        /*
2710         * INITs are latched while in SMM.  Because an SMM CPU cannot
2711         * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
2712         * and delay processing of INIT until the next RSM.
2713         */
2714        if (is_smm(vcpu)) {
2715                WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
2716                if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
2717                        clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2718                return;
2719        }
2720
2721        pe = xchg(&apic->pending_events, 0);
2722        if (test_bit(KVM_APIC_INIT, &pe)) {
2723                kvm_vcpu_reset(vcpu, true);
2724                if (kvm_vcpu_is_bsp(apic->vcpu))
2725                        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2726                else
2727                        vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
2728        }
2729        if (test_bit(KVM_APIC_SIPI, &pe) &&
2730            vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
2731                /* evaluate pending_events before reading the vector */
2732                smp_rmb();
2733                sipi_vector = apic->sipi_vector;
2734                kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
2735                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2736        }
2737}
2738
2739void kvm_lapic_init(void)
2740{
2741        /* do not patch jump label more than once per second */
2742        jump_label_rate_limit(&apic_hw_disabled, HZ);
2743        jump_label_rate_limit(&apic_sw_disabled, HZ);
2744}
2745
2746void kvm_lapic_exit(void)
2747{
2748        static_key_deferred_flush(&apic_hw_disabled);
2749        static_key_deferred_flush(&apic_sw_disabled);
2750}
2751