linux/arch/x86/kvm/lapic.c
<<
>>
Prefs
   1
   2/*
   3 * Local APIC virtualization
   4 *
   5 * Copyright (C) 2006 Qumranet, Inc.
   6 * Copyright (C) 2007 Novell
   7 * Copyright (C) 2007 Intel
   8 * Copyright 2009 Red Hat, Inc. and/or its affiliates.
   9 *
  10 * Authors:
  11 *   Dor Laor <dor.laor@qumranet.com>
  12 *   Gregory Haskins <ghaskins@novell.com>
  13 *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
  14 *
  15 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
  16 *
  17 * This work is licensed under the terms of the GNU GPL, version 2.  See
  18 * the COPYING file in the top-level directory.
  19 */
  20
  21#include <linux/kvm_host.h>
  22#include <linux/kvm.h>
  23#include <linux/mm.h>
  24#include <linux/highmem.h>
  25#include <linux/smp.h>
  26#include <linux/hrtimer.h>
  27#include <linux/io.h>
  28#include <linux/export.h>
  29#include <linux/math64.h>
  30#include <linux/slab.h>
  31#include <asm/processor.h>
  32#include <asm/msr.h>
  33#include <asm/page.h>
  34#include <asm/current.h>
  35#include <asm/apicdef.h>
  36#include <asm/delay.h>
  37#include <linux/atomic.h>
  38#include <linux/jump_label.h>
  39#include "kvm_cache_regs.h"
  40#include "irq.h"
  41#include "trace.h"
  42#include "x86.h"
  43#include "cpuid.h"
  44#include "hyperv.h"
  45
  46#ifndef CONFIG_X86_64
  47#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
  48#else
  49#define mod_64(x, y) ((x) % (y))
  50#endif
  51
  52#define PRId64 "d"
  53#define PRIx64 "llx"
  54#define PRIu64 "u"
  55#define PRIo64 "o"
  56
  57/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
  58#define apic_debug(fmt, arg...)
  59
  60/* 14 is the version for Xeon and Pentium 8.4.8*/
  61#define APIC_VERSION                    (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
  62#define LAPIC_MMIO_LENGTH               (1 << 12)
  63/* followed define is not in apicdef.h */
  64#define APIC_SHORT_MASK                 0xc0000
  65#define APIC_DEST_NOSHORT               0x0
  66#define APIC_DEST_MASK                  0x800
  67#define MAX_APIC_VECTOR                 256
  68#define APIC_VECTORS_PER_REG            32
  69
  70#define APIC_BROADCAST                  0xFF
  71#define X2APIC_BROADCAST                0xFFFFFFFFul
  72
  73static inline int apic_test_vector(int vec, void *bitmap)
  74{
  75        return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  76}
  77
  78bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
  79{
  80        struct kvm_lapic *apic = vcpu->arch.apic;
  81
  82        return apic_test_vector(vector, apic->regs + APIC_ISR) ||
  83                apic_test_vector(vector, apic->regs + APIC_IRR);
  84}
  85
  86static inline void apic_clear_vector(int vec, void *bitmap)
  87{
  88        clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  89}
  90
  91static inline int __apic_test_and_set_vector(int vec, void *bitmap)
  92{
  93        return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  94}
  95
  96static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
  97{
  98        return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  99}
 100
 101struct static_key_deferred apic_hw_disabled __read_mostly;
 102struct static_key_deferred apic_sw_disabled __read_mostly;
 103
 104static inline int apic_enabled(struct kvm_lapic *apic)
 105{
 106        return kvm_apic_sw_enabled(apic) &&     kvm_apic_hw_enabled(apic);
 107}
 108
 109#define LVT_MASK        \
 110        (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
 111
 112#define LINT_MASK       \
 113        (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 114         APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 115
 116static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
 117{
 118        return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
 119}
 120
 121static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
 122{
 123        return apic->vcpu->vcpu_id;
 124}
 125
 126static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
 127                u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
 128        switch (map->mode) {
 129        case KVM_APIC_MODE_X2APIC: {
 130                u32 offset = (dest_id >> 16) * 16;
 131                u32 max_apic_id = map->max_apic_id;
 132
 133                if (offset <= max_apic_id) {
 134                        u8 cluster_size = min(max_apic_id - offset + 1, 16U);
 135
 136                        *cluster = &map->phys_map[offset];
 137                        *mask = dest_id & (0xffff >> (16 - cluster_size));
 138                } else {
 139                        *mask = 0;
 140                }
 141
 142                return true;
 143                }
 144        case KVM_APIC_MODE_XAPIC_FLAT:
 145                *cluster = map->xapic_flat_map;
 146                *mask = dest_id & 0xff;
 147                return true;
 148        case KVM_APIC_MODE_XAPIC_CLUSTER:
 149                *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
 150                *mask = dest_id & 0xf;
 151                return true;
 152        default:
 153                /* Not optimized. */
 154                return false;
 155        }
 156}
 157
 158static void kvm_apic_map_free(struct rcu_head *rcu)
 159{
 160        struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
 161
 162        kvfree(map);
 163}
 164
 165static void recalculate_apic_map(struct kvm *kvm)
 166{
 167        struct kvm_apic_map *new, *old = NULL;
 168        struct kvm_vcpu *vcpu;
 169        int i;
 170        u32 max_id = 255; /* enough space for any xAPIC ID */
 171
 172        mutex_lock(&kvm->arch.apic_map_lock);
 173
 174        kvm_for_each_vcpu(i, vcpu, kvm)
 175                if (kvm_apic_present(vcpu))
 176                        max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
 177
 178        new = kvzalloc(sizeof(struct kvm_apic_map) +
 179                           sizeof(struct kvm_lapic *) * ((u64)max_id + 1), GFP_KERNEL);
 180
 181        if (!new)
 182                goto out;
 183
 184        new->max_apic_id = max_id;
 185
 186        kvm_for_each_vcpu(i, vcpu, kvm) {
 187                struct kvm_lapic *apic = vcpu->arch.apic;
 188                struct kvm_lapic **cluster;
 189                u16 mask;
 190                u32 ldr;
 191                u8 xapic_id;
 192                u32 x2apic_id;
 193
 194                if (!kvm_apic_present(vcpu))
 195                        continue;
 196
 197                xapic_id = kvm_xapic_id(apic);
 198                x2apic_id = kvm_x2apic_id(apic);
 199
 200                /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
 201                if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
 202                                x2apic_id <= new->max_apic_id)
 203                        new->phys_map[x2apic_id] = apic;
 204                /*
 205                 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
 206                 * prevent them from masking VCPUs with APIC ID <= 0xff.
 207                 */
 208                if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
 209                        new->phys_map[xapic_id] = apic;
 210
 211                ldr = kvm_lapic_get_reg(apic, APIC_LDR);
 212
 213                if (apic_x2apic_mode(apic)) {
 214                        new->mode |= KVM_APIC_MODE_X2APIC;
 215                } else if (ldr) {
 216                        ldr = GET_APIC_LOGICAL_ID(ldr);
 217                        if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
 218                                new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
 219                        else
 220                                new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
 221                }
 222
 223                if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
 224                        continue;
 225
 226                if (mask)
 227                        cluster[ffs(mask) - 1] = apic;
 228        }
 229out:
 230        old = rcu_dereference_protected(kvm->arch.apic_map,
 231                        lockdep_is_held(&kvm->arch.apic_map_lock));
 232        rcu_assign_pointer(kvm->arch.apic_map, new);
 233        mutex_unlock(&kvm->arch.apic_map_lock);
 234
 235        if (old)
 236                call_rcu(&old->rcu, kvm_apic_map_free);
 237
 238        kvm_make_scan_ioapic_request(kvm);
 239}
 240
 241static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 242{
 243        bool enabled = val & APIC_SPIV_APIC_ENABLED;
 244
 245        kvm_lapic_set_reg(apic, APIC_SPIV, val);
 246
 247        if (enabled != apic->sw_enabled) {
 248                apic->sw_enabled = enabled;
 249                if (enabled) {
 250                        static_key_slow_dec_deferred(&apic_sw_disabled);
 251                        recalculate_apic_map(apic->vcpu->kvm);
 252                } else
 253                        static_key_slow_inc(&apic_sw_disabled.key);
 254        }
 255}
 256
 257static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
 258{
 259        kvm_lapic_set_reg(apic, APIC_ID, id << 24);
 260        recalculate_apic_map(apic->vcpu->kvm);
 261}
 262
 263static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
 264{
 265        kvm_lapic_set_reg(apic, APIC_LDR, id);
 266        recalculate_apic_map(apic->vcpu->kvm);
 267}
 268
 269static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
 270{
 271        return ((id >> 4) << 16) | (1 << (id & 0xf));
 272}
 273
 274static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
 275{
 276        u32 ldr = kvm_apic_calc_x2apic_ldr(id);
 277
 278        WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
 279
 280        kvm_lapic_set_reg(apic, APIC_ID, id);
 281        kvm_lapic_set_reg(apic, APIC_LDR, ldr);
 282        recalculate_apic_map(apic->vcpu->kvm);
 283}
 284
 285static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 286{
 287        return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
 288}
 289
 290static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 291{
 292        return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
 293}
 294
 295static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 296{
 297        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
 298}
 299
 300static inline int apic_lvtt_period(struct kvm_lapic *apic)
 301{
 302        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
 303}
 304
 305static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 306{
 307        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 308}
 309
 310static inline int apic_lvt_nmi_mode(u32 lvt_val)
 311{
 312        return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
 313}
 314
 315void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 316{
 317        struct kvm_lapic *apic = vcpu->arch.apic;
 318        struct kvm_cpuid_entry2 *feat;
 319        u32 v = APIC_VERSION;
 320
 321        if (!lapic_in_kernel(vcpu))
 322                return;
 323
 324        /*
 325         * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
 326         * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
 327         * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
 328         * version first and level-triggered interrupts never get EOIed in
 329         * IOAPIC.
 330         */
 331        feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
 332        if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
 333            !ioapic_in_kernel(vcpu->kvm))
 334                v |= APIC_LVR_DIRECTED_EOI;
 335        kvm_lapic_set_reg(apic, APIC_LVR, v);
 336}
 337
 338static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
 339        LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 340        LVT_MASK | APIC_MODE_MASK,      /* LVTTHMR */
 341        LVT_MASK | APIC_MODE_MASK,      /* LVTPC */
 342        LINT_MASK, LINT_MASK,   /* LVT0-1 */
 343        LVT_MASK                /* LVTERR */
 344};
 345
 346static int find_highest_vector(void *bitmap)
 347{
 348        int vec;
 349        u32 *reg;
 350
 351        for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
 352             vec >= 0; vec -= APIC_VECTORS_PER_REG) {
 353                reg = bitmap + REG_POS(vec);
 354                if (*reg)
 355                        return __fls(*reg) + vec;
 356        }
 357
 358        return -1;
 359}
 360
 361static u8 count_vectors(void *bitmap)
 362{
 363        int vec;
 364        u32 *reg;
 365        u8 count = 0;
 366
 367        for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
 368                reg = bitmap + REG_POS(vec);
 369                count += hweight32(*reg);
 370        }
 371
 372        return count;
 373}
 374
 375bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
 376{
 377        u32 i, vec;
 378        u32 pir_val, irr_val, prev_irr_val;
 379        int max_updated_irr;
 380
 381        max_updated_irr = -1;
 382        *max_irr = -1;
 383
 384        for (i = vec = 0; i <= 7; i++, vec += 32) {
 385                pir_val = READ_ONCE(pir[i]);
 386                irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
 387                if (pir_val) {
 388                        prev_irr_val = irr_val;
 389                        irr_val |= xchg(&pir[i], 0);
 390                        *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
 391                        if (prev_irr_val != irr_val) {
 392                                max_updated_irr =
 393                                        __fls(irr_val ^ prev_irr_val) + vec;
 394                        }
 395                }
 396                if (irr_val)
 397                        *max_irr = __fls(irr_val) + vec;
 398        }
 399
 400        return ((max_updated_irr != -1) &&
 401                (max_updated_irr == *max_irr));
 402}
 403EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
 404
 405bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
 406{
 407        struct kvm_lapic *apic = vcpu->arch.apic;
 408
 409        return __kvm_apic_update_irr(pir, apic->regs, max_irr);
 410}
 411EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 412
 413static inline int apic_search_irr(struct kvm_lapic *apic)
 414{
 415        return find_highest_vector(apic->regs + APIC_IRR);
 416}
 417
 418static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 419{
 420        int result;
 421
 422        /*
 423         * Note that irr_pending is just a hint. It will be always
 424         * true with virtual interrupt delivery enabled.
 425         */
 426        if (!apic->irr_pending)
 427                return -1;
 428
 429        result = apic_search_irr(apic);
 430        ASSERT(result == -1 || result >= 16);
 431
 432        return result;
 433}
 434
 435static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 436{
 437        struct kvm_vcpu *vcpu;
 438
 439        vcpu = apic->vcpu;
 440
 441        if (unlikely(vcpu->arch.apicv_active)) {
 442                /* need to update RVI */
 443                apic_clear_vector(vec, apic->regs + APIC_IRR);
 444                kvm_x86_ops->hwapic_irr_update(vcpu,
 445                                apic_find_highest_irr(apic));
 446        } else {
 447                apic->irr_pending = false;
 448                apic_clear_vector(vec, apic->regs + APIC_IRR);
 449                if (apic_search_irr(apic) != -1)
 450                        apic->irr_pending = true;
 451        }
 452}
 453
 454static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 455{
 456        struct kvm_vcpu *vcpu;
 457
 458        if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
 459                return;
 460
 461        vcpu = apic->vcpu;
 462
 463        /*
 464         * With APIC virtualization enabled, all caching is disabled
 465         * because the processor can modify ISR under the hood.  Instead
 466         * just set SVI.
 467         */
 468        if (unlikely(vcpu->arch.apicv_active))
 469                kvm_x86_ops->hwapic_isr_update(vcpu, vec);
 470        else {
 471                ++apic->isr_count;
 472                BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
 473                /*
 474                 * ISR (in service register) bit is set when injecting an interrupt.
 475                 * The highest vector is injected. Thus the latest bit set matches
 476                 * the highest bit in ISR.
 477                 */
 478                apic->highest_isr_cache = vec;
 479        }
 480}
 481
 482static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 483{
 484        int result;
 485
 486        /*
 487         * Note that isr_count is always 1, and highest_isr_cache
 488         * is always -1, with APIC virtualization enabled.
 489         */
 490        if (!apic->isr_count)
 491                return -1;
 492        if (likely(apic->highest_isr_cache != -1))
 493                return apic->highest_isr_cache;
 494
 495        result = find_highest_vector(apic->regs + APIC_ISR);
 496        ASSERT(result == -1 || result >= 16);
 497
 498        return result;
 499}
 500
 501static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 502{
 503        struct kvm_vcpu *vcpu;
 504        if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
 505                return;
 506
 507        vcpu = apic->vcpu;
 508
 509        /*
 510         * We do get here for APIC virtualization enabled if the guest
 511         * uses the Hyper-V APIC enlightenment.  In this case we may need
 512         * to trigger a new interrupt delivery by writing the SVI field;
 513         * on the other hand isr_count and highest_isr_cache are unused
 514         * and must be left alone.
 515         */
 516        if (unlikely(vcpu->arch.apicv_active))
 517                kvm_x86_ops->hwapic_isr_update(vcpu,
 518                                               apic_find_highest_isr(apic));
 519        else {
 520                --apic->isr_count;
 521                BUG_ON(apic->isr_count < 0);
 522                apic->highest_isr_cache = -1;
 523        }
 524}
 525
 526int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 527{
 528        /* This may race with setting of irr in __apic_accept_irq() and
 529         * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
 530         * will cause vmexit immediately and the value will be recalculated
 531         * on the next vmentry.
 532         */
 533        return apic_find_highest_irr(vcpu->arch.apic);
 534}
 535EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
 536
 537static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 538                             int vector, int level, int trig_mode,
 539                             struct dest_map *dest_map);
 540
 541int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 542                     struct dest_map *dest_map)
 543{
 544        struct kvm_lapic *apic = vcpu->arch.apic;
 545
 546        return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
 547                        irq->level, irq->trig_mode, dest_map);
 548}
 549
 550static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
 551{
 552
 553        return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
 554                                      sizeof(val));
 555}
 556
 557static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
 558{
 559
 560        return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
 561                                      sizeof(*val));
 562}
 563
 564static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
 565{
 566        return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
 567}
 568
 569static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
 570{
 571        u8 val;
 572        if (pv_eoi_get_user(vcpu, &val) < 0)
 573                apic_debug("Can't read EOI MSR value: 0x%llx\n",
 574                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 575        return val & 0x1;
 576}
 577
 578static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
 579{
 580        if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
 581                apic_debug("Can't set EOI MSR value: 0x%llx\n",
 582                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 583                return;
 584        }
 585        __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 586}
 587
 588static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 589{
 590        if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
 591                apic_debug("Can't clear EOI MSR value: 0x%llx\n",
 592                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 593                return;
 594        }
 595        __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 596}
 597
 598static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
 599{
 600        int highest_irr;
 601        if (apic->vcpu->arch.apicv_active)
 602                highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
 603        else
 604                highest_irr = apic_find_highest_irr(apic);
 605        if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
 606                return -1;
 607        return highest_irr;
 608}
 609
 610static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
 611{
 612        u32 tpr, isrv, ppr, old_ppr;
 613        int isr;
 614
 615        old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
 616        tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
 617        isr = apic_find_highest_isr(apic);
 618        isrv = (isr != -1) ? isr : 0;
 619
 620        if ((tpr & 0xf0) >= (isrv & 0xf0))
 621                ppr = tpr & 0xff;
 622        else
 623                ppr = isrv & 0xf0;
 624
 625        apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
 626                   apic, ppr, isr, isrv);
 627
 628        *new_ppr = ppr;
 629        if (old_ppr != ppr)
 630                kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
 631
 632        return ppr < old_ppr;
 633}
 634
 635static void apic_update_ppr(struct kvm_lapic *apic)
 636{
 637        u32 ppr;
 638
 639        if (__apic_update_ppr(apic, &ppr) &&
 640            apic_has_interrupt_for_ppr(apic, ppr) != -1)
 641                kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 642}
 643
 644void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
 645{
 646        apic_update_ppr(vcpu->arch.apic);
 647}
 648EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
 649
 650static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 651{
 652        kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
 653        apic_update_ppr(apic);
 654}
 655
 656static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
 657{
 658        return mda == (apic_x2apic_mode(apic) ?
 659                        X2APIC_BROADCAST : APIC_BROADCAST);
 660}
 661
 662static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
 663{
 664        if (kvm_apic_broadcast(apic, mda))
 665                return true;
 666
 667        if (apic_x2apic_mode(apic))
 668                return mda == kvm_x2apic_id(apic);
 669
 670        /*
 671         * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
 672         * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
 673         * this allows unique addressing of VCPUs with APIC ID over 0xff.
 674         * The 0xff condition is needed because writeable xAPIC ID.
 675         */
 676        if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
 677                return true;
 678
 679        return mda == kvm_xapic_id(apic);
 680}
 681
 682static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 683{
 684        u32 logical_id;
 685
 686        if (kvm_apic_broadcast(apic, mda))
 687                return true;
 688
 689        logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
 690
 691        if (apic_x2apic_mode(apic))
 692                return ((logical_id >> 16) == (mda >> 16))
 693                       && (logical_id & mda & 0xffff) != 0;
 694
 695        logical_id = GET_APIC_LOGICAL_ID(logical_id);
 696
 697        switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
 698        case APIC_DFR_FLAT:
 699                return (logical_id & mda) != 0;
 700        case APIC_DFR_CLUSTER:
 701                return ((logical_id >> 4) == (mda >> 4))
 702                       && (logical_id & mda & 0xf) != 0;
 703        default:
 704                apic_debug("Bad DFR vcpu %d: %08x\n",
 705                           apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR));
 706                return false;
 707        }
 708}
 709
 710/* The KVM local APIC implementation has two quirks:
 711 *
 712 *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
 713 *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
 714 *    KVM doesn't do that aliasing.
 715 *
 716 *  - in-kernel IOAPIC messages have to be delivered directly to
 717 *    x2APIC, because the kernel does not support interrupt remapping.
 718 *    In order to support broadcast without interrupt remapping, x2APIC
 719 *    rewrites the destination of non-IPI messages from APIC_BROADCAST
 720 *    to X2APIC_BROADCAST.
 721 *
 722 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
 723 * important when userspace wants to use x2APIC-format MSIs, because
 724 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
 725 */
 726static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
 727                struct kvm_lapic *source, struct kvm_lapic *target)
 728{
 729        bool ipi = source != NULL;
 730
 731        if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
 732            !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
 733                return X2APIC_BROADCAST;
 734
 735        return dest_id;
 736}
 737
 738bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 739                           int short_hand, unsigned int dest, int dest_mode)
 740{
 741        struct kvm_lapic *target = vcpu->arch.apic;
 742        u32 mda = kvm_apic_mda(vcpu, dest, source, target);
 743
 744        apic_debug("target %p, source %p, dest 0x%x, "
 745                   "dest_mode 0x%x, short_hand 0x%x\n",
 746                   target, source, dest, dest_mode, short_hand);
 747
 748        ASSERT(target);
 749        switch (short_hand) {
 750        case APIC_DEST_NOSHORT:
 751                if (dest_mode == APIC_DEST_PHYSICAL)
 752                        return kvm_apic_match_physical_addr(target, mda);
 753                else
 754                        return kvm_apic_match_logical_addr(target, mda);
 755        case APIC_DEST_SELF:
 756                return target == source;
 757        case APIC_DEST_ALLINC:
 758                return true;
 759        case APIC_DEST_ALLBUT:
 760                return target != source;
 761        default:
 762                apic_debug("kvm: apic: Bad dest shorthand value %x\n",
 763                           short_hand);
 764                return false;
 765        }
 766}
 767EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
 768
 769int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
 770                       const unsigned long *bitmap, u32 bitmap_size)
 771{
 772        u32 mod;
 773        int i, idx = -1;
 774
 775        mod = vector % dest_vcpus;
 776
 777        for (i = 0; i <= mod; i++) {
 778                idx = find_next_bit(bitmap, bitmap_size, idx + 1);
 779                BUG_ON(idx == bitmap_size);
 780        }
 781
 782        return idx;
 783}
 784
 785static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
 786{
 787        if (!kvm->arch.disabled_lapic_found) {
 788                kvm->arch.disabled_lapic_found = true;
 789                printk(KERN_INFO
 790                       "Disabled LAPIC found during irq injection\n");
 791        }
 792}
 793
 794static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
 795                struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
 796{
 797        if (kvm->arch.x2apic_broadcast_quirk_disabled) {
 798                if ((irq->dest_id == APIC_BROADCAST &&
 799                                map->mode != KVM_APIC_MODE_X2APIC))
 800                        return true;
 801                if (irq->dest_id == X2APIC_BROADCAST)
 802                        return true;
 803        } else {
 804                bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
 805                if (irq->dest_id == (x2apic_ipi ?
 806                                     X2APIC_BROADCAST : APIC_BROADCAST))
 807                        return true;
 808        }
 809
 810        return false;
 811}
 812
 813/* Return true if the interrupt can be handled by using *bitmap as index mask
 814 * for valid destinations in *dst array.
 815 * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
 816 * Note: we may have zero kvm_lapic destinations when we return true, which
 817 * means that the interrupt should be dropped.  In this case, *bitmap would be
 818 * zero and *dst undefined.
 819 */
 820static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
 821                struct kvm_lapic **src, struct kvm_lapic_irq *irq,
 822                struct kvm_apic_map *map, struct kvm_lapic ***dst,
 823                unsigned long *bitmap)
 824{
 825        int i, lowest;
 826
 827        if (irq->shorthand == APIC_DEST_SELF && src) {
 828                *dst = src;
 829                *bitmap = 1;
 830                return true;
 831        } else if (irq->shorthand)
 832                return false;
 833
 834        if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
 835                return false;
 836
 837        if (irq->dest_mode == APIC_DEST_PHYSICAL) {
 838                if (irq->dest_id > map->max_apic_id) {
 839                        *bitmap = 0;
 840                } else {
 841                        *dst = &map->phys_map[irq->dest_id];
 842                        *bitmap = 1;
 843                }
 844                return true;
 845        }
 846
 847        *bitmap = 0;
 848        if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
 849                                (u16 *)bitmap))
 850                return false;
 851
 852        if (!kvm_lowest_prio_delivery(irq))
 853                return true;
 854
 855        if (!kvm_vector_hashing_enabled()) {
 856                lowest = -1;
 857                for_each_set_bit(i, bitmap, 16) {
 858                        if (!(*dst)[i])
 859                                continue;
 860                        if (lowest < 0)
 861                                lowest = i;
 862                        else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
 863                                                (*dst)[lowest]->vcpu) < 0)
 864                                lowest = i;
 865                }
 866        } else {
 867                if (!*bitmap)
 868                        return true;
 869
 870                lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
 871                                bitmap, 16);
 872
 873                if (!(*dst)[lowest]) {
 874                        kvm_apic_disabled_lapic_found(kvm);
 875                        *bitmap = 0;
 876                        return true;
 877                }
 878        }
 879
 880        *bitmap = (lowest >= 0) ? 1 << lowest : 0;
 881
 882        return true;
 883}
 884
 885bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 886                struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
 887{
 888        struct kvm_apic_map *map;
 889        unsigned long bitmap;
 890        struct kvm_lapic **dst = NULL;
 891        int i;
 892        bool ret;
 893
 894        *r = -1;
 895
 896        if (irq->shorthand == APIC_DEST_SELF) {
 897                *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
 898                return true;
 899        }
 900
 901        rcu_read_lock();
 902        map = rcu_dereference(kvm->arch.apic_map);
 903
 904        ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
 905        if (ret)
 906                for_each_set_bit(i, &bitmap, 16) {
 907                        if (!dst[i])
 908                                continue;
 909                        if (*r < 0)
 910                                *r = 0;
 911                        *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
 912                }
 913
 914        rcu_read_unlock();
 915        return ret;
 916}
 917
 918/*
 919 * This routine tries to handler interrupts in posted mode, here is how
 920 * it deals with different cases:
 921 * - For single-destination interrupts, handle it in posted mode
 922 * - Else if vector hashing is enabled and it is a lowest-priority
 923 *   interrupt, handle it in posted mode and use the following mechanism
 924 *   to find the destinaiton vCPU.
 925 *      1. For lowest-priority interrupts, store all the possible
 926 *         destination vCPUs in an array.
 927 *      2. Use "guest vector % max number of destination vCPUs" to find
 928 *         the right destination vCPU in the array for the lowest-priority
 929 *         interrupt.
 930 * - Otherwise, use remapped mode to inject the interrupt.
 931 */
 932bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 933                        struct kvm_vcpu **dest_vcpu)
 934{
 935        struct kvm_apic_map *map;
 936        unsigned long bitmap;
 937        struct kvm_lapic **dst = NULL;
 938        bool ret = false;
 939
 940        if (irq->shorthand)
 941                return false;
 942
 943        rcu_read_lock();
 944        map = rcu_dereference(kvm->arch.apic_map);
 945
 946        if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
 947                        hweight16(bitmap) == 1) {
 948                unsigned long i = find_first_bit(&bitmap, 16);
 949
 950                if (dst[i]) {
 951                        *dest_vcpu = dst[i]->vcpu;
 952                        ret = true;
 953                }
 954        }
 955
 956        rcu_read_unlock();
 957        return ret;
 958}
 959
 960/*
 961 * Add a pending IRQ into lapic.
 962 * Return 1 if successfully added and 0 if discarded.
 963 */
 964static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 965                             int vector, int level, int trig_mode,
 966                             struct dest_map *dest_map)
 967{
 968        int result = 0;
 969        struct kvm_vcpu *vcpu = apic->vcpu;
 970
 971        trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
 972                                  trig_mode, vector);
 973        switch (delivery_mode) {
 974        case APIC_DM_LOWEST:
 975                vcpu->arch.apic_arb_prio++;
 976        case APIC_DM_FIXED:
 977                if (unlikely(trig_mode && !level))
 978                        break;
 979
 980                /* FIXME add logic for vcpu on reset */
 981                if (unlikely(!apic_enabled(apic)))
 982                        break;
 983
 984                result = 1;
 985
 986                if (dest_map) {
 987                        __set_bit(vcpu->vcpu_id, dest_map->map);
 988                        dest_map->vectors[vcpu->vcpu_id] = vector;
 989                }
 990
 991                if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
 992                        if (trig_mode)
 993                                kvm_lapic_set_vector(vector, apic->regs + APIC_TMR);
 994                        else
 995                                apic_clear_vector(vector, apic->regs + APIC_TMR);
 996                }
 997
 998                if (vcpu->arch.apicv_active)
 999                        kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
1000                else {
1001                        kvm_lapic_set_irr(vector, apic);
1002
1003                        kvm_make_request(KVM_REQ_EVENT, vcpu);
1004                        kvm_vcpu_kick(vcpu);
1005                }
1006                break;
1007
1008        case APIC_DM_REMRD:
1009                result = 1;
1010                vcpu->arch.pv.pv_unhalted = 1;
1011                kvm_make_request(KVM_REQ_EVENT, vcpu);
1012                kvm_vcpu_kick(vcpu);
1013                break;
1014
1015        case APIC_DM_SMI:
1016                result = 1;
1017                kvm_make_request(KVM_REQ_SMI, vcpu);
1018                kvm_vcpu_kick(vcpu);
1019                break;
1020
1021        case APIC_DM_NMI:
1022                result = 1;
1023                kvm_inject_nmi(vcpu);
1024                kvm_vcpu_kick(vcpu);
1025                break;
1026
1027        case APIC_DM_INIT:
1028                if (!trig_mode || level) {
1029                        result = 1;
1030                        /* assumes that there are only KVM_APIC_INIT/SIPI */
1031                        apic->pending_events = (1UL << KVM_APIC_INIT);
1032                        /* make sure pending_events is visible before sending
1033                         * the request */
1034                        smp_wmb();
1035                        kvm_make_request(KVM_REQ_EVENT, vcpu);
1036                        kvm_vcpu_kick(vcpu);
1037                } else {
1038                        apic_debug("Ignoring de-assert INIT to vcpu %d\n",
1039                                   vcpu->vcpu_id);
1040                }
1041                break;
1042
1043        case APIC_DM_STARTUP:
1044                apic_debug("SIPI to vcpu %d vector 0x%02x\n",
1045                           vcpu->vcpu_id, vector);
1046                result = 1;
1047                apic->sipi_vector = vector;
1048                /* make sure sipi_vector is visible for the receiver */
1049                smp_wmb();
1050                set_bit(KVM_APIC_SIPI, &apic->pending_events);
1051                kvm_make_request(KVM_REQ_EVENT, vcpu);
1052                kvm_vcpu_kick(vcpu);
1053                break;
1054
1055        case APIC_DM_EXTINT:
1056                /*
1057                 * Should only be called by kvm_apic_local_deliver() with LVT0,
1058                 * before NMI watchdog was enabled. Already handled by
1059                 * kvm_apic_accept_pic_intr().
1060                 */
1061                break;
1062
1063        default:
1064                printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1065                       delivery_mode);
1066                break;
1067        }
1068        return result;
1069}
1070
1071int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1072{
1073        return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1074}
1075
1076static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1077{
1078        return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1079}
1080
1081static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1082{
1083        int trigger_mode;
1084
1085        /* Eoi the ioapic only if the ioapic doesn't own the vector. */
1086        if (!kvm_ioapic_handles_vector(apic, vector))
1087                return;
1088
1089        /* Request a KVM exit to inform the userspace IOAPIC. */
1090        if (irqchip_split(apic->vcpu->kvm)) {
1091                apic->vcpu->arch.pending_ioapic_eoi = vector;
1092                kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1093                return;
1094        }
1095
1096        if (apic_test_vector(vector, apic->regs + APIC_TMR))
1097                trigger_mode = IOAPIC_LEVEL_TRIG;
1098        else
1099                trigger_mode = IOAPIC_EDGE_TRIG;
1100
1101        kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1102}
1103
1104static int apic_set_eoi(struct kvm_lapic *apic)
1105{
1106        int vector = apic_find_highest_isr(apic);
1107
1108        trace_kvm_eoi(apic, vector);
1109
1110        /*
1111         * Not every write EOI will has corresponding ISR,
1112         * one example is when Kernel check timer on setup_IO_APIC
1113         */
1114        if (vector == -1)
1115                return vector;
1116
1117        apic_clear_isr(vector, apic);
1118        apic_update_ppr(apic);
1119
1120        if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
1121                kvm_hv_synic_send_eoi(apic->vcpu, vector);
1122
1123        kvm_ioapic_send_eoi(apic, vector);
1124        kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1125        return vector;
1126}
1127
1128/*
1129 * this interface assumes a trap-like exit, which has already finished
1130 * desired side effect including vISR and vPPR update.
1131 */
1132void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1133{
1134        struct kvm_lapic *apic = vcpu->arch.apic;
1135
1136        trace_kvm_eoi(apic, vector);
1137
1138        kvm_ioapic_send_eoi(apic, vector);
1139        kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1140}
1141EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1142
1143static void apic_send_ipi(struct kvm_lapic *apic)
1144{
1145        u32 icr_low = kvm_lapic_get_reg(apic, APIC_ICR);
1146        u32 icr_high = kvm_lapic_get_reg(apic, APIC_ICR2);
1147        struct kvm_lapic_irq irq;
1148
1149        irq.vector = icr_low & APIC_VECTOR_MASK;
1150        irq.delivery_mode = icr_low & APIC_MODE_MASK;
1151        irq.dest_mode = icr_low & APIC_DEST_MASK;
1152        irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1153        irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1154        irq.shorthand = icr_low & APIC_SHORT_MASK;
1155        irq.msi_redir_hint = false;
1156        if (apic_x2apic_mode(apic))
1157                irq.dest_id = icr_high;
1158        else
1159                irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
1160
1161        trace_kvm_apic_ipi(icr_low, irq.dest_id);
1162
1163        apic_debug("icr_high 0x%x, icr_low 0x%x, "
1164                   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
1165                   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
1166                   "msi_redir_hint 0x%x\n",
1167                   icr_high, icr_low, irq.shorthand, irq.dest_id,
1168                   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
1169                   irq.vector, irq.msi_redir_hint);
1170
1171        kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1172}
1173
1174static u32 apic_get_tmcct(struct kvm_lapic *apic)
1175{
1176        ktime_t remaining, now;
1177        s64 ns;
1178        u32 tmcct;
1179
1180        ASSERT(apic != NULL);
1181
1182        /* if initial count is 0, current count should also be 0 */
1183        if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1184                apic->lapic_timer.period == 0)
1185                return 0;
1186
1187        now = ktime_get();
1188        remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1189        if (ktime_to_ns(remaining) < 0)
1190                remaining = 0;
1191
1192        ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1193        tmcct = div64_u64(ns,
1194                         (APIC_BUS_CYCLE_NS * apic->divide_count));
1195
1196        return tmcct;
1197}
1198
1199static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1200{
1201        struct kvm_vcpu *vcpu = apic->vcpu;
1202        struct kvm_run *run = vcpu->run;
1203
1204        kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1205        run->tpr_access.rip = kvm_rip_read(vcpu);
1206        run->tpr_access.is_write = write;
1207}
1208
1209static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1210{
1211        if (apic->vcpu->arch.tpr_access_reporting)
1212                __report_tpr_access(apic, write);
1213}
1214
1215static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1216{
1217        u32 val = 0;
1218
1219        if (offset >= LAPIC_MMIO_LENGTH)
1220                return 0;
1221
1222        switch (offset) {
1223        case APIC_ARBPRI:
1224                apic_debug("Access APIC ARBPRI register which is for P6\n");
1225                break;
1226
1227        case APIC_TMCCT:        /* Timer CCR */
1228                if (apic_lvtt_tscdeadline(apic))
1229                        return 0;
1230
1231                val = apic_get_tmcct(apic);
1232                break;
1233        case APIC_PROCPRI:
1234                apic_update_ppr(apic);
1235                val = kvm_lapic_get_reg(apic, offset);
1236                break;
1237        case APIC_TASKPRI:
1238                report_tpr_access(apic, false);
1239                /* fall thru */
1240        default:
1241                val = kvm_lapic_get_reg(apic, offset);
1242                break;
1243        }
1244
1245        return val;
1246}
1247
1248static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1249{
1250        return container_of(dev, struct kvm_lapic, dev);
1251}
1252
1253int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1254                void *data)
1255{
1256        unsigned char alignment = offset & 0xf;
1257        u32 result;
1258        /* this bitmask has a bit cleared for each reserved register */
1259        static const u64 rmask = 0x43ff01ffffffe70cULL;
1260
1261        if ((alignment + len) > 4) {
1262                apic_debug("KVM_APIC_READ: alignment error %x %d\n",
1263                           offset, len);
1264                return 1;
1265        }
1266
1267        if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
1268                apic_debug("KVM_APIC_READ: read reserved register %x\n",
1269                           offset);
1270                return 1;
1271        }
1272
1273        result = __apic_read(apic, offset & ~0xf);
1274
1275        trace_kvm_apic_read(offset, result);
1276
1277        switch (len) {
1278        case 1:
1279        case 2:
1280        case 4:
1281                memcpy(data, (char *)&result + alignment, len);
1282                break;
1283        default:
1284                printk(KERN_ERR "Local APIC read with len = %x, "
1285                       "should be 1,2, or 4 instead\n", len);
1286                break;
1287        }
1288        return 0;
1289}
1290EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
1291
1292static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1293{
1294        return kvm_apic_hw_enabled(apic) &&
1295            addr >= apic->base_address &&
1296            addr < apic->base_address + LAPIC_MMIO_LENGTH;
1297}
1298
1299static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1300                           gpa_t address, int len, void *data)
1301{
1302        struct kvm_lapic *apic = to_lapic(this);
1303        u32 offset = address - apic->base_address;
1304
1305        if (!apic_mmio_in_range(apic, address))
1306                return -EOPNOTSUPP;
1307
1308        kvm_lapic_reg_read(apic, offset, len, data);
1309
1310        return 0;
1311}
1312
1313static void update_divide_count(struct kvm_lapic *apic)
1314{
1315        u32 tmp1, tmp2, tdcr;
1316
1317        tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1318        tmp1 = tdcr & 0xf;
1319        tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1320        apic->divide_count = 0x1 << (tmp2 & 0x7);
1321
1322        apic_debug("timer divide count is 0x%x\n",
1323                                   apic->divide_count);
1324}
1325
1326static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1327{
1328        /*
1329         * Do not allow the guest to program periodic timers with small
1330         * interval, since the hrtimers are not throttled by the host
1331         * scheduler.
1332         */
1333        if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1334                s64 min_period = min_timer_period_us * 1000LL;
1335
1336                if (apic->lapic_timer.period < min_period) {
1337                        pr_info_ratelimited(
1338                            "kvm: vcpu %i: requested %lld ns "
1339                            "lapic timer period limited to %lld ns\n",
1340                            apic->vcpu->vcpu_id,
1341                            apic->lapic_timer.period, min_period);
1342                        apic->lapic_timer.period = min_period;
1343                }
1344        }
1345}
1346
1347static void apic_update_lvtt(struct kvm_lapic *apic)
1348{
1349        u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1350                        apic->lapic_timer.timer_mode_mask;
1351
1352        if (apic->lapic_timer.timer_mode != timer_mode) {
1353                if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1354                                APIC_LVT_TIMER_TSCDEADLINE)) {
1355                        hrtimer_cancel(&apic->lapic_timer.timer);
1356                        kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1357                        apic->lapic_timer.period = 0;
1358                        apic->lapic_timer.tscdeadline = 0;
1359                }
1360                apic->lapic_timer.timer_mode = timer_mode;
1361                limit_periodic_timer_frequency(apic);
1362        }
1363}
1364
1365static void apic_timer_expired(struct kvm_lapic *apic)
1366{
1367        struct kvm_vcpu *vcpu = apic->vcpu;
1368        struct swait_queue_head *q = &vcpu->wq;
1369        struct kvm_timer *ktimer = &apic->lapic_timer;
1370
1371        if (atomic_read(&apic->lapic_timer.pending))
1372                return;
1373
1374        atomic_inc(&apic->lapic_timer.pending);
1375        kvm_set_pending_timer(vcpu);
1376
1377        /*
1378         * For x86, the atomic_inc() is serialized, thus
1379         * using swait_active() is safe.
1380         */
1381        if (swait_active(q))
1382                swake_up(q);
1383
1384        if (apic_lvtt_tscdeadline(apic))
1385                ktimer->expired_tscdeadline = ktimer->tscdeadline;
1386}
1387
1388/*
1389 * On APICv, this test will cause a busy wait
1390 * during a higher-priority task.
1391 */
1392
1393static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1394{
1395        struct kvm_lapic *apic = vcpu->arch.apic;
1396        u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1397
1398        if (kvm_apic_hw_enabled(apic)) {
1399                int vec = reg & APIC_VECTOR_MASK;
1400                void *bitmap = apic->regs + APIC_ISR;
1401
1402                if (vcpu->arch.apicv_active)
1403                        bitmap = apic->regs + APIC_IRR;
1404
1405                if (apic_test_vector(vec, bitmap))
1406                        return true;
1407        }
1408        return false;
1409}
1410
1411void wait_lapic_expire(struct kvm_vcpu *vcpu)
1412{
1413        struct kvm_lapic *apic = vcpu->arch.apic;
1414        u64 guest_tsc, tsc_deadline;
1415
1416        if (!lapic_in_kernel(vcpu))
1417                return;
1418
1419        if (apic->lapic_timer.expired_tscdeadline == 0)
1420                return;
1421
1422        if (!lapic_timer_int_injected(vcpu))
1423                return;
1424
1425        tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1426        apic->lapic_timer.expired_tscdeadline = 0;
1427        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1428        trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
1429
1430        /* __delay is delay_tsc whenever the hardware has TSC, thus always.  */
1431        if (guest_tsc < tsc_deadline)
1432                __delay(min(tsc_deadline - guest_tsc,
1433                        nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
1434}
1435
1436static void start_sw_tscdeadline(struct kvm_lapic *apic)
1437{
1438        u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
1439        u64 ns = 0;
1440        ktime_t expire;
1441        struct kvm_vcpu *vcpu = apic->vcpu;
1442        unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1443        unsigned long flags;
1444        ktime_t now;
1445
1446        if (unlikely(!tscdeadline || !this_tsc_khz))
1447                return;
1448
1449        local_irq_save(flags);
1450
1451        now = ktime_get();
1452        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1453        if (likely(tscdeadline > guest_tsc)) {
1454                ns = (tscdeadline - guest_tsc) * 1000000ULL;
1455                do_div(ns, this_tsc_khz);
1456                expire = ktime_add_ns(now, ns);
1457                expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
1458                hrtimer_start(&apic->lapic_timer.timer,
1459                                expire, HRTIMER_MODE_ABS_PINNED);
1460        } else
1461                apic_timer_expired(apic);
1462
1463        local_irq_restore(flags);
1464}
1465
1466static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1467{
1468        ktime_t now, remaining;
1469        u64 ns_remaining_old, ns_remaining_new;
1470
1471        apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1472                * APIC_BUS_CYCLE_NS * apic->divide_count;
1473        limit_periodic_timer_frequency(apic);
1474
1475        now = ktime_get();
1476        remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1477        if (ktime_to_ns(remaining) < 0)
1478                remaining = 0;
1479
1480        ns_remaining_old = ktime_to_ns(remaining);
1481        ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1482                                           apic->divide_count, old_divisor);
1483
1484        apic->lapic_timer.tscdeadline +=
1485                nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1486                nsec_to_cycles(apic->vcpu, ns_remaining_old);
1487        apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1488}
1489
1490static bool set_target_expiration(struct kvm_lapic *apic)
1491{
1492        ktime_t now;
1493        u64 tscl = rdtsc();
1494
1495        now = ktime_get();
1496        apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1497                * APIC_BUS_CYCLE_NS * apic->divide_count;
1498
1499        if (!apic->lapic_timer.period) {
1500                apic->lapic_timer.tscdeadline = 0;
1501                return false;
1502        }
1503
1504        limit_periodic_timer_frequency(apic);
1505
1506        apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
1507                   PRIx64 ", "
1508                   "timer initial count 0x%x, period %lldns, "
1509                   "expire @ 0x%016" PRIx64 ".\n", __func__,
1510                   APIC_BUS_CYCLE_NS, ktime_to_ns(now),
1511                   kvm_lapic_get_reg(apic, APIC_TMICT),
1512                   apic->lapic_timer.period,
1513                   ktime_to_ns(ktime_add_ns(now,
1514                                apic->lapic_timer.period)));
1515
1516        apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1517                nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
1518        apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
1519
1520        return true;
1521}
1522
1523static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1524{
1525        ktime_t now = ktime_get();
1526        u64 tscl = rdtsc();
1527        ktime_t delta;
1528
1529        /*
1530         * Synchronize both deadlines to the same time source or
1531         * differences in the periods (caused by differences in the
1532         * underlying clocks or numerical approximation errors) will
1533         * cause the two to drift apart over time as the errors
1534         * accumulate.
1535         */
1536        apic->lapic_timer.target_expiration =
1537                ktime_add_ns(apic->lapic_timer.target_expiration,
1538                                apic->lapic_timer.period);
1539        delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1540        apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1541                nsec_to_cycles(apic->vcpu, delta);
1542}
1543
1544static void start_sw_period(struct kvm_lapic *apic)
1545{
1546        if (!apic->lapic_timer.period)
1547                return;
1548
1549        if (ktime_after(ktime_get(),
1550                        apic->lapic_timer.target_expiration)) {
1551                apic_timer_expired(apic);
1552
1553                if (apic_lvtt_oneshot(apic))
1554                        return;
1555
1556                advance_periodic_target_expiration(apic);
1557        }
1558
1559        hrtimer_start(&apic->lapic_timer.timer,
1560                apic->lapic_timer.target_expiration,
1561                HRTIMER_MODE_ABS_PINNED);
1562}
1563
1564bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1565{
1566        if (!lapic_in_kernel(vcpu))
1567                return false;
1568
1569        return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1570}
1571EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1572
1573static void cancel_hv_timer(struct kvm_lapic *apic)
1574{
1575        WARN_ON(preemptible());
1576        WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1577        kvm_x86_ops->cancel_hv_timer(apic->vcpu);
1578        apic->lapic_timer.hv_timer_in_use = false;
1579}
1580
1581static bool start_hv_timer(struct kvm_lapic *apic)
1582{
1583        struct kvm_timer *ktimer = &apic->lapic_timer;
1584        int r;
1585
1586        WARN_ON(preemptible());
1587        if (!kvm_x86_ops->set_hv_timer)
1588                return false;
1589
1590        if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1591                return false;
1592
1593        if (!ktimer->tscdeadline)
1594                return false;
1595
1596        r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline);
1597        if (r < 0)
1598                return false;
1599
1600        ktimer->hv_timer_in_use = true;
1601        hrtimer_cancel(&ktimer->timer);
1602
1603        /*
1604         * Also recheck ktimer->pending, in case the sw timer triggered in
1605         * the window.  For periodic timer, leave the hv timer running for
1606         * simplicity, and the deadline will be recomputed on the next vmexit.
1607         */
1608        if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) {
1609                if (r)
1610                        apic_timer_expired(apic);
1611                return false;
1612        }
1613
1614        trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true);
1615        return true;
1616}
1617
1618static void start_sw_timer(struct kvm_lapic *apic)
1619{
1620        struct kvm_timer *ktimer = &apic->lapic_timer;
1621
1622        WARN_ON(preemptible());
1623        if (apic->lapic_timer.hv_timer_in_use)
1624                cancel_hv_timer(apic);
1625        if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1626                return;
1627
1628        if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1629                start_sw_period(apic);
1630        else if (apic_lvtt_tscdeadline(apic))
1631                start_sw_tscdeadline(apic);
1632        trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
1633}
1634
1635static void restart_apic_timer(struct kvm_lapic *apic)
1636{
1637        preempt_disable();
1638        if (!start_hv_timer(apic))
1639                start_sw_timer(apic);
1640        preempt_enable();
1641}
1642
1643void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1644{
1645        struct kvm_lapic *apic = vcpu->arch.apic;
1646
1647        preempt_disable();
1648        /* If the preempt notifier has already run, it also called apic_timer_expired */
1649        if (!apic->lapic_timer.hv_timer_in_use)
1650                goto out;
1651        WARN_ON(swait_active(&vcpu->wq));
1652        cancel_hv_timer(apic);
1653        apic_timer_expired(apic);
1654
1655        if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1656                advance_periodic_target_expiration(apic);
1657                restart_apic_timer(apic);
1658        }
1659out:
1660        preempt_enable();
1661}
1662EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
1663
1664void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
1665{
1666        restart_apic_timer(vcpu->arch.apic);
1667}
1668EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
1669
1670void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
1671{
1672        struct kvm_lapic *apic = vcpu->arch.apic;
1673
1674        preempt_disable();
1675        /* Possibly the TSC deadline timer is not enabled yet */
1676        if (apic->lapic_timer.hv_timer_in_use)
1677                start_sw_timer(apic);
1678        preempt_enable();
1679}
1680EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
1681
1682void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
1683{
1684        struct kvm_lapic *apic = vcpu->arch.apic;
1685
1686        WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1687        restart_apic_timer(apic);
1688}
1689
1690static void start_apic_timer(struct kvm_lapic *apic)
1691{
1692        atomic_set(&apic->lapic_timer.pending, 0);
1693
1694        if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1695            && !set_target_expiration(apic))
1696                return;
1697
1698        restart_apic_timer(apic);
1699}
1700
1701static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1702{
1703        bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
1704
1705        if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
1706                apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
1707                if (lvt0_in_nmi_mode) {
1708                        apic_debug("Receive NMI setting on APIC_LVT0 "
1709                                   "for cpu %d\n", apic->vcpu->vcpu_id);
1710                        atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1711                } else
1712                        atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1713        }
1714}
1715
1716int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1717{
1718        int ret = 0;
1719
1720        trace_kvm_apic_write(reg, val);
1721
1722        switch (reg) {
1723        case APIC_ID:           /* Local APIC ID */
1724                if (!apic_x2apic_mode(apic))
1725                        kvm_apic_set_xapic_id(apic, val >> 24);
1726                else
1727                        ret = 1;
1728                break;
1729
1730        case APIC_TASKPRI:
1731                report_tpr_access(apic, true);
1732                apic_set_tpr(apic, val & 0xff);
1733                break;
1734
1735        case APIC_EOI:
1736                apic_set_eoi(apic);
1737                break;
1738
1739        case APIC_LDR:
1740                if (!apic_x2apic_mode(apic))
1741                        kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
1742                else
1743                        ret = 1;
1744                break;
1745
1746        case APIC_DFR:
1747                if (!apic_x2apic_mode(apic)) {
1748                        kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
1749                        recalculate_apic_map(apic->vcpu->kvm);
1750                } else
1751                        ret = 1;
1752                break;
1753
1754        case APIC_SPIV: {
1755                u32 mask = 0x3ff;
1756                if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
1757                        mask |= APIC_SPIV_DIRECTED_EOI;
1758                apic_set_spiv(apic, val & mask);
1759                if (!(val & APIC_SPIV_APIC_ENABLED)) {
1760                        int i;
1761                        u32 lvt_val;
1762
1763                        for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
1764                                lvt_val = kvm_lapic_get_reg(apic,
1765                                                       APIC_LVTT + 0x10 * i);
1766                                kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
1767                                             lvt_val | APIC_LVT_MASKED);
1768                        }
1769                        apic_update_lvtt(apic);
1770                        atomic_set(&apic->lapic_timer.pending, 0);
1771
1772                }
1773                break;
1774        }
1775        case APIC_ICR:
1776                /* No delay here, so we always clear the pending bit */
1777                kvm_lapic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
1778                apic_send_ipi(apic);
1779                break;
1780
1781        case APIC_ICR2:
1782                if (!apic_x2apic_mode(apic))
1783                        val &= 0xff000000;
1784                kvm_lapic_set_reg(apic, APIC_ICR2, val);
1785                break;
1786
1787        case APIC_LVT0:
1788                apic_manage_nmi_watchdog(apic, val);
1789        case APIC_LVTTHMR:
1790        case APIC_LVTPC:
1791        case APIC_LVT1:
1792        case APIC_LVTERR:
1793                /* TODO: Check vector */
1794                if (!kvm_apic_sw_enabled(apic))
1795                        val |= APIC_LVT_MASKED;
1796
1797                val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
1798                kvm_lapic_set_reg(apic, reg, val);
1799
1800                break;
1801
1802        case APIC_LVTT:
1803                if (!kvm_apic_sw_enabled(apic))
1804                        val |= APIC_LVT_MASKED;
1805                val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
1806                kvm_lapic_set_reg(apic, APIC_LVTT, val);
1807                apic_update_lvtt(apic);
1808                break;
1809
1810        case APIC_TMICT:
1811                if (apic_lvtt_tscdeadline(apic))
1812                        break;
1813
1814                hrtimer_cancel(&apic->lapic_timer.timer);
1815                kvm_lapic_set_reg(apic, APIC_TMICT, val);
1816                start_apic_timer(apic);
1817                break;
1818
1819        case APIC_TDCR: {
1820                uint32_t old_divisor = apic->divide_count;
1821
1822                if (val & 4)
1823                        apic_debug("KVM_WRITE:TDCR %x\n", val);
1824                kvm_lapic_set_reg(apic, APIC_TDCR, val);
1825                update_divide_count(apic);
1826                if (apic->divide_count != old_divisor &&
1827                                apic->lapic_timer.period) {
1828                        hrtimer_cancel(&apic->lapic_timer.timer);
1829                        update_target_expiration(apic, old_divisor);
1830                        restart_apic_timer(apic);
1831                }
1832                break;
1833        }
1834        case APIC_ESR:
1835                if (apic_x2apic_mode(apic) && val != 0) {
1836                        apic_debug("KVM_WRITE:ESR not zero %x\n", val);
1837                        ret = 1;
1838                }
1839                break;
1840
1841        case APIC_SELF_IPI:
1842                if (apic_x2apic_mode(apic)) {
1843                        kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
1844                } else
1845                        ret = 1;
1846                break;
1847        default:
1848                ret = 1;
1849                break;
1850        }
1851        if (ret)
1852                apic_debug("Local APIC Write to read-only register %x\n", reg);
1853        return ret;
1854}
1855EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
1856
1857static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1858                            gpa_t address, int len, const void *data)
1859{
1860        struct kvm_lapic *apic = to_lapic(this);
1861        unsigned int offset = address - apic->base_address;
1862        u32 val;
1863
1864        if (!apic_mmio_in_range(apic, address))
1865                return -EOPNOTSUPP;
1866
1867        /*
1868         * APIC register must be aligned on 128-bits boundary.
1869         * 32/64/128 bits registers must be accessed thru 32 bits.
1870         * Refer SDM 8.4.1
1871         */
1872        if (len != 4 || (offset & 0xf)) {
1873                /* Don't shout loud, $infamous_os would cause only noise. */
1874                apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
1875                return 0;
1876        }
1877
1878        val = *(u32*)data;
1879
1880        /* too common printing */
1881        if (offset != APIC_EOI)
1882                apic_debug("%s: offset 0x%x with length 0x%x, and value is "
1883                           "0x%x\n", __func__, offset, len, val);
1884
1885        kvm_lapic_reg_write(apic, offset & 0xff0, val);
1886
1887        return 0;
1888}
1889
1890void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
1891{
1892        kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
1893}
1894EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
1895
1896/* emulate APIC access in a trap manner */
1897void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
1898{
1899        u32 val = 0;
1900
1901        /* hw has done the conditional check and inst decode */
1902        offset &= 0xff0;
1903
1904        kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
1905
1906        /* TODO: optimize to just emulate side effect w/o one more write */
1907        kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
1908}
1909EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
1910
1911void kvm_free_lapic(struct kvm_vcpu *vcpu)
1912{
1913        struct kvm_lapic *apic = vcpu->arch.apic;
1914
1915        if (!vcpu->arch.apic)
1916                return;
1917
1918        hrtimer_cancel(&apic->lapic_timer.timer);
1919
1920        if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
1921                static_key_slow_dec_deferred(&apic_hw_disabled);
1922
1923        if (!apic->sw_enabled)
1924                static_key_slow_dec_deferred(&apic_sw_disabled);
1925
1926        if (apic->regs)
1927                free_page((unsigned long)apic->regs);
1928
1929        kfree(apic);
1930}
1931
1932/*
1933 *----------------------------------------------------------------------
1934 * LAPIC interface
1935 *----------------------------------------------------------------------
1936 */
1937u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
1938{
1939        struct kvm_lapic *apic = vcpu->arch.apic;
1940
1941        if (!lapic_in_kernel(vcpu) ||
1942                !apic_lvtt_tscdeadline(apic))
1943                return 0;
1944
1945        return apic->lapic_timer.tscdeadline;
1946}
1947
1948void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
1949{
1950        struct kvm_lapic *apic = vcpu->arch.apic;
1951
1952        if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
1953                        apic_lvtt_period(apic))
1954                return;
1955
1956        hrtimer_cancel(&apic->lapic_timer.timer);
1957        apic->lapic_timer.tscdeadline = data;
1958        start_apic_timer(apic);
1959}
1960
1961void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
1962{
1963        struct kvm_lapic *apic = vcpu->arch.apic;
1964
1965        apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
1966                     | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
1967}
1968
1969u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
1970{
1971        u64 tpr;
1972
1973        tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
1974
1975        return (tpr & 0xf0) >> 4;
1976}
1977
1978void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
1979{
1980        u64 old_value = vcpu->arch.apic_base;
1981        struct kvm_lapic *apic = vcpu->arch.apic;
1982
1983        if (!apic)
1984                value |= MSR_IA32_APICBASE_BSP;
1985
1986        vcpu->arch.apic_base = value;
1987
1988        if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
1989                kvm_update_cpuid(vcpu);
1990
1991        if (!apic)
1992                return;
1993
1994        /* update jump label if enable bit changes */
1995        if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
1996                if (value & MSR_IA32_APICBASE_ENABLE) {
1997                        kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
1998                        static_key_slow_dec_deferred(&apic_hw_disabled);
1999                } else {
2000                        static_key_slow_inc(&apic_hw_disabled.key);
2001                        recalculate_apic_map(vcpu->kvm);
2002                }
2003        }
2004
2005        if ((old_value ^ value) & X2APIC_ENABLE) {
2006                if (value & X2APIC_ENABLE) {
2007                        kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2008                        kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
2009                } else
2010                        kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
2011        }
2012
2013        apic->base_address = apic->vcpu->arch.apic_base &
2014                             MSR_IA32_APICBASE_BASE;
2015
2016        if ((value & MSR_IA32_APICBASE_ENABLE) &&
2017             apic->base_address != APIC_DEFAULT_PHYS_BASE)
2018                pr_warn_once("APIC base relocation is unsupported by KVM");
2019
2020        /* with FSB delivery interrupt, we can restart APIC functionality */
2021        apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
2022                   "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
2023
2024}
2025
2026void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2027{
2028        struct kvm_lapic *apic = vcpu->arch.apic;
2029        int i;
2030
2031        if (!apic)
2032                return;
2033
2034        apic_debug("%s\n", __func__);
2035
2036        /* Stop the timer in case it's a reset to an active apic */
2037        hrtimer_cancel(&apic->lapic_timer.timer);
2038
2039        if (!init_event) {
2040                kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
2041                                         MSR_IA32_APICBASE_ENABLE);
2042                kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2043        }
2044        kvm_apic_set_version(apic->vcpu);
2045
2046        for (i = 0; i < KVM_APIC_LVT_NUM; i++)
2047                kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
2048        apic_update_lvtt(apic);
2049        if (kvm_vcpu_is_reset_bsp(vcpu) &&
2050            kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2051                kvm_lapic_set_reg(apic, APIC_LVT0,
2052                             SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2053        apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2054
2055        kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
2056        apic_set_spiv(apic, 0xff);
2057        kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2058        if (!apic_x2apic_mode(apic))
2059                kvm_apic_set_ldr(apic, 0);
2060        kvm_lapic_set_reg(apic, APIC_ESR, 0);
2061        kvm_lapic_set_reg(apic, APIC_ICR, 0);
2062        kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2063        kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2064        kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2065        for (i = 0; i < 8; i++) {
2066                kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2067                kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2068                kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2069        }
2070        apic->irr_pending = vcpu->arch.apicv_active;
2071        apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
2072        apic->highest_isr_cache = -1;
2073        update_divide_count(apic);
2074        atomic_set(&apic->lapic_timer.pending, 0);
2075        if (kvm_vcpu_is_bsp(vcpu))
2076                kvm_lapic_set_base(vcpu,
2077                                vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
2078        vcpu->arch.pv_eoi.msr_val = 0;
2079        apic_update_ppr(apic);
2080        if (vcpu->arch.apicv_active) {
2081                kvm_x86_ops->apicv_post_state_restore(vcpu);
2082                kvm_x86_ops->hwapic_irr_update(vcpu, -1);
2083                kvm_x86_ops->hwapic_isr_update(vcpu, -1);
2084        }
2085
2086        vcpu->arch.apic_arb_prio = 0;
2087        vcpu->arch.apic_attention = 0;
2088
2089        apic_debug("%s: vcpu=%p, id=0x%x, base_msr="
2090                   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
2091                   vcpu, kvm_lapic_get_reg(apic, APIC_ID),
2092                   vcpu->arch.apic_base, apic->base_address);
2093}
2094
2095/*
2096 *----------------------------------------------------------------------
2097 * timer interface
2098 *----------------------------------------------------------------------
2099 */
2100
2101static bool lapic_is_periodic(struct kvm_lapic *apic)
2102{
2103        return apic_lvtt_period(apic);
2104}
2105
2106int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2107{
2108        struct kvm_lapic *apic = vcpu->arch.apic;
2109
2110        if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2111                return atomic_read(&apic->lapic_timer.pending);
2112
2113        return 0;
2114}
2115
2116int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2117{
2118        u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2119        int vector, mode, trig_mode;
2120
2121        if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2122                vector = reg & APIC_VECTOR_MASK;
2123                mode = reg & APIC_MODE_MASK;
2124                trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2125                return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
2126                                        NULL);
2127        }
2128        return 0;
2129}
2130
2131void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2132{
2133        struct kvm_lapic *apic = vcpu->arch.apic;
2134
2135        if (apic)
2136                kvm_apic_local_deliver(apic, APIC_LVT0);
2137}
2138
2139static const struct kvm_io_device_ops apic_mmio_ops = {
2140        .read     = apic_mmio_read,
2141        .write    = apic_mmio_write,
2142};
2143
2144static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2145{
2146        struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2147        struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2148
2149        apic_timer_expired(apic);
2150
2151        if (lapic_is_periodic(apic)) {
2152                advance_periodic_target_expiration(apic);
2153                hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2154                return HRTIMER_RESTART;
2155        } else
2156                return HRTIMER_NORESTART;
2157}
2158
2159int kvm_create_lapic(struct kvm_vcpu *vcpu)
2160{
2161        struct kvm_lapic *apic;
2162
2163        ASSERT(vcpu != NULL);
2164        apic_debug("apic_init %d\n", vcpu->vcpu_id);
2165
2166        apic = kzalloc(sizeof(*apic), GFP_KERNEL);
2167        if (!apic)
2168                goto nomem;
2169
2170        vcpu->arch.apic = apic;
2171
2172        apic->regs = (void *)get_zeroed_page(GFP_KERNEL);
2173        if (!apic->regs) {
2174                printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2175                       vcpu->vcpu_id);
2176                goto nomem_free_apic;
2177        }
2178        apic->vcpu = vcpu;
2179
2180        hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2181                     HRTIMER_MODE_ABS_PINNED);
2182        apic->lapic_timer.timer.function = apic_timer_fn;
2183
2184        /*
2185         * APIC is created enabled. This will prevent kvm_lapic_set_base from
2186         * thinking that APIC satet has changed.
2187         */
2188        vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2189        static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2190        kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2191
2192        return 0;
2193nomem_free_apic:
2194        kfree(apic);
2195nomem:
2196        return -ENOMEM;
2197}
2198
2199int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2200{
2201        struct kvm_lapic *apic = vcpu->arch.apic;
2202        u32 ppr;
2203
2204        if (!apic_enabled(apic))
2205                return -1;
2206
2207        __apic_update_ppr(apic, &ppr);
2208        return apic_has_interrupt_for_ppr(apic, ppr);
2209}
2210
2211int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2212{
2213        u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2214        int r = 0;
2215
2216        if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2217                r = 1;
2218        if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2219            GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2220                r = 1;
2221        return r;
2222}
2223
2224void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2225{
2226        struct kvm_lapic *apic = vcpu->arch.apic;
2227
2228        if (atomic_read(&apic->lapic_timer.pending) > 0) {
2229                kvm_apic_local_deliver(apic, APIC_LVTT);
2230                if (apic_lvtt_tscdeadline(apic))
2231                        apic->lapic_timer.tscdeadline = 0;
2232                if (apic_lvtt_oneshot(apic)) {
2233                        apic->lapic_timer.tscdeadline = 0;
2234                        apic->lapic_timer.target_expiration = 0;
2235                }
2236                atomic_set(&apic->lapic_timer.pending, 0);
2237        }
2238}
2239
2240int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2241{
2242        int vector = kvm_apic_has_interrupt(vcpu);
2243        struct kvm_lapic *apic = vcpu->arch.apic;
2244        u32 ppr;
2245
2246        if (vector == -1)
2247                return -1;
2248
2249        /*
2250         * We get here even with APIC virtualization enabled, if doing
2251         * nested virtualization and L1 runs with the "acknowledge interrupt
2252         * on exit" mode.  Then we cannot inject the interrupt via RVI,
2253         * because the process would deliver it through the IDT.
2254         */
2255
2256        apic_clear_irr(vector, apic);
2257        if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
2258                /*
2259                 * For auto-EOI interrupts, there might be another pending
2260                 * interrupt above PPR, so check whether to raise another
2261                 * KVM_REQ_EVENT.
2262                 */
2263                apic_update_ppr(apic);
2264        } else {
2265                /*
2266                 * For normal interrupts, PPR has been raised and there cannot
2267                 * be a higher-priority pending interrupt---except if there was
2268                 * a concurrent interrupt injection, but that would have
2269                 * triggered KVM_REQ_EVENT already.
2270                 */
2271                apic_set_isr(vector, apic);
2272                __apic_update_ppr(apic, &ppr);
2273        }
2274
2275        return vector;
2276}
2277
2278static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2279                struct kvm_lapic_state *s, bool set)
2280{
2281        if (apic_x2apic_mode(vcpu->arch.apic)) {
2282                u32 *id = (u32 *)(s->regs + APIC_ID);
2283                u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2284
2285                if (vcpu->kvm->arch.x2apic_format) {
2286                        if (*id != vcpu->vcpu_id)
2287                                return -EINVAL;
2288                } else {
2289                        if (set)
2290                                *id >>= 24;
2291                        else
2292                                *id <<= 24;
2293                }
2294
2295                /* In x2APIC mode, the LDR is fixed and based on the id */
2296                if (set)
2297                        *ldr = kvm_apic_calc_x2apic_ldr(*id);
2298        }
2299
2300        return 0;
2301}
2302
2303int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2304{
2305        memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2306        return kvm_apic_state_fixup(vcpu, s, false);
2307}
2308
2309int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2310{
2311        struct kvm_lapic *apic = vcpu->arch.apic;
2312        int r;
2313
2314
2315        kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2316        /* set SPIV separately to get count of SW disabled APICs right */
2317        apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2318
2319        r = kvm_apic_state_fixup(vcpu, s, true);
2320        if (r)
2321                return r;
2322        memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
2323
2324        recalculate_apic_map(vcpu->kvm);
2325        kvm_apic_set_version(vcpu);
2326
2327        apic_update_ppr(apic);
2328        hrtimer_cancel(&apic->lapic_timer.timer);
2329        apic_update_lvtt(apic);
2330        apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2331        update_divide_count(apic);
2332        start_apic_timer(apic);
2333        apic->irr_pending = true;
2334        apic->isr_count = vcpu->arch.apicv_active ?
2335                                1 : count_vectors(apic->regs + APIC_ISR);
2336        apic->highest_isr_cache = -1;
2337        if (vcpu->arch.apicv_active) {
2338                kvm_x86_ops->apicv_post_state_restore(vcpu);
2339                kvm_x86_ops->hwapic_irr_update(vcpu,
2340                                apic_find_highest_irr(apic));
2341                kvm_x86_ops->hwapic_isr_update(vcpu,
2342                                apic_find_highest_isr(apic));
2343        }
2344        kvm_make_request(KVM_REQ_EVENT, vcpu);
2345        if (ioapic_in_kernel(vcpu->kvm))
2346                kvm_rtc_eoi_tracking_restore_one(vcpu);
2347
2348        vcpu->arch.apic_arb_prio = 0;
2349
2350        return 0;
2351}
2352
2353void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2354{
2355        struct hrtimer *timer;
2356
2357        if (!lapic_in_kernel(vcpu))
2358                return;
2359
2360        timer = &vcpu->arch.apic->lapic_timer.timer;
2361        if (hrtimer_cancel(timer))
2362                hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
2363}
2364
2365/*
2366 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2367 *
2368 * Detect whether guest triggered PV EOI since the
2369 * last entry. If yes, set EOI on guests's behalf.
2370 * Clear PV EOI in guest memory in any case.
2371 */
2372static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2373                                        struct kvm_lapic *apic)
2374{
2375        bool pending;
2376        int vector;
2377        /*
2378         * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2379         * and KVM_PV_EOI_ENABLED in guest memory as follows:
2380         *
2381         * KVM_APIC_PV_EOI_PENDING is unset:
2382         *      -> host disabled PV EOI.
2383         * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2384         *      -> host enabled PV EOI, guest did not execute EOI yet.
2385         * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2386         *      -> host enabled PV EOI, guest executed EOI.
2387         */
2388        BUG_ON(!pv_eoi_enabled(vcpu));
2389        pending = pv_eoi_get_pending(vcpu);
2390        /*
2391         * Clear pending bit in any case: it will be set again on vmentry.
2392         * While this might not be ideal from performance point of view,
2393         * this makes sure pv eoi is only enabled when we know it's safe.
2394         */
2395        pv_eoi_clr_pending(vcpu);
2396        if (pending)
2397                return;
2398        vector = apic_set_eoi(apic);
2399        trace_kvm_pv_eoi(apic, vector);
2400}
2401
2402void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2403{
2404        u32 data;
2405
2406        if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2407                apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2408
2409        if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2410                return;
2411
2412        if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2413                                  sizeof(u32)))
2414                return;
2415
2416        apic_set_tpr(vcpu->arch.apic, data & 0xff);
2417}
2418
2419/*
2420 * apic_sync_pv_eoi_to_guest - called before vmentry
2421 *
2422 * Detect whether it's safe to enable PV EOI and
2423 * if yes do so.
2424 */
2425static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2426                                        struct kvm_lapic *apic)
2427{
2428        if (!pv_eoi_enabled(vcpu) ||
2429            /* IRR set or many bits in ISR: could be nested. */
2430            apic->irr_pending ||
2431            /* Cache not set: could be safe but we don't bother. */
2432            apic->highest_isr_cache == -1 ||
2433            /* Need EOI to update ioapic. */
2434            kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2435                /*
2436                 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2437                 * so we need not do anything here.
2438                 */
2439                return;
2440        }
2441
2442        pv_eoi_set_pending(apic->vcpu);
2443}
2444
2445void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2446{
2447        u32 data, tpr;
2448        int max_irr, max_isr;
2449        struct kvm_lapic *apic = vcpu->arch.apic;
2450
2451        apic_sync_pv_eoi_to_guest(vcpu, apic);
2452
2453        if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2454                return;
2455
2456        tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2457        max_irr = apic_find_highest_irr(apic);
2458        if (max_irr < 0)
2459                max_irr = 0;
2460        max_isr = apic_find_highest_isr(apic);
2461        if (max_isr < 0)
2462                max_isr = 0;
2463        data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2464
2465        kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2466                                sizeof(u32));
2467}
2468
2469int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2470{
2471        if (vapic_addr) {
2472                if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2473                                        &vcpu->arch.apic->vapic_cache,
2474                                        vapic_addr, sizeof(u32)))
2475                        return -EINVAL;
2476                __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2477        } else {
2478                __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2479        }
2480
2481        vcpu->arch.apic->vapic_addr = vapic_addr;
2482        return 0;
2483}
2484
2485int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2486{
2487        struct kvm_lapic *apic = vcpu->arch.apic;
2488        u32 reg = (msr - APIC_BASE_MSR) << 4;
2489
2490        if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2491                return 1;
2492
2493        if (reg == APIC_ICR2)
2494                return 1;
2495
2496        /* if this is ICR write vector before command */
2497        if (reg == APIC_ICR)
2498                kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2499        return kvm_lapic_reg_write(apic, reg, (u32)data);
2500}
2501
2502int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2503{
2504        struct kvm_lapic *apic = vcpu->arch.apic;
2505        u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
2506
2507        if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2508                return 1;
2509
2510        if (reg == APIC_DFR || reg == APIC_ICR2) {
2511                apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
2512                           reg);
2513                return 1;
2514        }
2515
2516        if (kvm_lapic_reg_read(apic, reg, 4, &low))
2517                return 1;
2518        if (reg == APIC_ICR)
2519                kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2520
2521        *data = (((u64)high) << 32) | low;
2522
2523        return 0;
2524}
2525
2526int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2527{
2528        struct kvm_lapic *apic = vcpu->arch.apic;
2529
2530        if (!lapic_in_kernel(vcpu))
2531                return 1;
2532
2533        /* if this is ICR write vector before command */
2534        if (reg == APIC_ICR)
2535                kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2536        return kvm_lapic_reg_write(apic, reg, (u32)data);
2537}
2538
2539int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2540{
2541        struct kvm_lapic *apic = vcpu->arch.apic;
2542        u32 low, high = 0;
2543
2544        if (!lapic_in_kernel(vcpu))
2545                return 1;
2546
2547        if (kvm_lapic_reg_read(apic, reg, 4, &low))
2548                return 1;
2549        if (reg == APIC_ICR)
2550                kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2551
2552        *data = (((u64)high) << 32) | low;
2553
2554        return 0;
2555}
2556
2557int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
2558{
2559        u64 addr = data & ~KVM_MSR_ENABLED;
2560        if (!IS_ALIGNED(addr, 4))
2561                return 1;
2562
2563        vcpu->arch.pv_eoi.msr_val = data;
2564        if (!pv_eoi_enabled(vcpu))
2565                return 0;
2566        return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
2567                                         addr, sizeof(u8));
2568}
2569
2570void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2571{
2572        struct kvm_lapic *apic = vcpu->arch.apic;
2573        u8 sipi_vector;
2574        unsigned long pe;
2575
2576        if (!lapic_in_kernel(vcpu) || !apic->pending_events)
2577                return;
2578
2579        /*
2580         * INITs are latched while in SMM.  Because an SMM CPU cannot
2581         * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
2582         * and delay processing of INIT until the next RSM.
2583         */
2584        if (is_smm(vcpu)) {
2585                WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
2586                if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
2587                        clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2588                return;
2589        }
2590
2591        pe = xchg(&apic->pending_events, 0);
2592        if (test_bit(KVM_APIC_INIT, &pe)) {
2593                kvm_vcpu_reset(vcpu, true);
2594                if (kvm_vcpu_is_bsp(apic->vcpu))
2595                        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2596                else
2597                        vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
2598        }
2599        if (test_bit(KVM_APIC_SIPI, &pe) &&
2600            vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
2601                /* evaluate pending_events before reading the vector */
2602                smp_rmb();
2603                sipi_vector = apic->sipi_vector;
2604                apic_debug("vcpu %d received sipi with vector # %x\n",
2605                         vcpu->vcpu_id, sipi_vector);
2606                kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
2607                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2608        }
2609}
2610
2611void kvm_lapic_init(void)
2612{
2613        /* do not patch jump label more than once per second */
2614        jump_label_rate_limit(&apic_hw_disabled, HZ);
2615        jump_label_rate_limit(&apic_sw_disabled, HZ);
2616}
2617
2618void kvm_lapic_exit(void)
2619{
2620        static_key_deferred_flush(&apic_hw_disabled);
2621        static_key_deferred_flush(&apic_sw_disabled);
2622}
2623