linux/arch/x86/kvm/lapic.c
<<
>>
Prefs
   1
   2/*
   3 * Local APIC virtualization
   4 *
   5 * Copyright (C) 2006 Qumranet, Inc.
   6 * Copyright (C) 2007 Novell
   7 * Copyright (C) 2007 Intel
   8 * Copyright 2009 Red Hat, Inc. and/or its affiliates.
   9 *
  10 * Authors:
  11 *   Dor Laor <dor.laor@qumranet.com>
  12 *   Gregory Haskins <ghaskins@novell.com>
  13 *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
  14 *
  15 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
  16 *
  17 * This work is licensed under the terms of the GNU GPL, version 2.  See
  18 * the COPYING file in the top-level directory.
  19 */
  20
  21#include <linux/kvm_host.h>
  22#include <linux/kvm.h>
  23#include <linux/mm.h>
  24#include <linux/highmem.h>
  25#include <linux/smp.h>
  26#include <linux/hrtimer.h>
  27#include <linux/io.h>
  28#include <linux/export.h>
  29#include <linux/math64.h>
  30#include <linux/slab.h>
  31#include <asm/processor.h>
  32#include <asm/msr.h>
  33#include <asm/page.h>
  34#include <asm/current.h>
  35#include <asm/apicdef.h>
  36#include <asm/delay.h>
  37#include <linux/atomic.h>
  38#include <linux/jump_label.h>
  39#include "kvm_cache_regs.h"
  40#include "irq.h"
  41#include "trace.h"
  42#include "x86.h"
  43#include "cpuid.h"
  44#include "hyperv.h"
  45
  46#ifndef CONFIG_X86_64
  47#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
  48#else
  49#define mod_64(x, y) ((x) % (y))
  50#endif
  51
  52#define PRId64 "d"
  53#define PRIx64 "llx"
  54#define PRIu64 "u"
  55#define PRIo64 "o"
  56
  57#define APIC_BUS_CYCLE_NS 1
  58
  59/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
  60#define apic_debug(fmt, arg...)
  61
  62/* 14 is the version for Xeon and Pentium 8.4.8*/
  63#define APIC_VERSION                    (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
  64#define LAPIC_MMIO_LENGTH               (1 << 12)
  65/* followed define is not in apicdef.h */
  66#define APIC_SHORT_MASK                 0xc0000
  67#define APIC_DEST_NOSHORT               0x0
  68#define APIC_DEST_MASK                  0x800
  69#define MAX_APIC_VECTOR                 256
  70#define APIC_VECTORS_PER_REG            32
  71
  72#define APIC_BROADCAST                  0xFF
  73#define X2APIC_BROADCAST                0xFFFFFFFFul
  74
  75static inline int apic_test_vector(int vec, void *bitmap)
  76{
  77        return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  78}
  79
  80bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
  81{
  82        struct kvm_lapic *apic = vcpu->arch.apic;
  83
  84        return apic_test_vector(vector, apic->regs + APIC_ISR) ||
  85                apic_test_vector(vector, apic->regs + APIC_IRR);
  86}
  87
  88static inline void apic_clear_vector(int vec, void *bitmap)
  89{
  90        clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  91}
  92
  93static inline int __apic_test_and_set_vector(int vec, void *bitmap)
  94{
  95        return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  96}
  97
  98static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
  99{
 100        return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
 101}
 102
 103struct static_key_deferred apic_hw_disabled __read_mostly;
 104struct static_key_deferred apic_sw_disabled __read_mostly;
 105
 106static inline int apic_enabled(struct kvm_lapic *apic)
 107{
 108        return kvm_apic_sw_enabled(apic) &&     kvm_apic_hw_enabled(apic);
 109}
 110
 111#define LVT_MASK        \
 112        (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
 113
 114#define LINT_MASK       \
 115        (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 116         APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 117
 118static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
 119                u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
 120        switch (map->mode) {
 121        case KVM_APIC_MODE_X2APIC: {
 122                u32 offset = (dest_id >> 16) * 16;
 123                u32 max_apic_id = map->max_apic_id;
 124
 125                if (offset <= max_apic_id) {
 126                        u8 cluster_size = min(max_apic_id - offset + 1, 16U);
 127
 128                        *cluster = &map->phys_map[offset];
 129                        *mask = dest_id & (0xffff >> (16 - cluster_size));
 130                } else {
 131                        *mask = 0;
 132                }
 133
 134                return true;
 135                }
 136        case KVM_APIC_MODE_XAPIC_FLAT:
 137                *cluster = map->xapic_flat_map;
 138                *mask = dest_id & 0xff;
 139                return true;
 140        case KVM_APIC_MODE_XAPIC_CLUSTER:
 141                *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
 142                *mask = dest_id & 0xf;
 143                return true;
 144        default:
 145                /* Not optimized. */
 146                return false;
 147        }
 148}
 149
 150static void kvm_apic_map_free(struct rcu_head *rcu)
 151{
 152        struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
 153
 154        kvfree(map);
 155}
 156
 157static void recalculate_apic_map(struct kvm *kvm)
 158{
 159        struct kvm_apic_map *new, *old = NULL;
 160        struct kvm_vcpu *vcpu;
 161        int i;
 162        u32 max_id = 255;
 163
 164        mutex_lock(&kvm->arch.apic_map_lock);
 165
 166        kvm_for_each_vcpu(i, vcpu, kvm)
 167                if (kvm_apic_present(vcpu))
 168                        max_id = max(max_id, kvm_apic_id(vcpu->arch.apic));
 169
 170        new = kvm_kvzalloc(sizeof(struct kvm_apic_map) +
 171                           sizeof(struct kvm_lapic *) * ((u64)max_id + 1));
 172
 173        if (!new)
 174                goto out;
 175
 176        new->max_apic_id = max_id;
 177
 178        kvm_for_each_vcpu(i, vcpu, kvm) {
 179                struct kvm_lapic *apic = vcpu->arch.apic;
 180                struct kvm_lapic **cluster;
 181                u16 mask;
 182                u32 ldr, aid;
 183
 184                if (!kvm_apic_present(vcpu))
 185                        continue;
 186
 187                aid = kvm_apic_id(apic);
 188                ldr = kvm_lapic_get_reg(apic, APIC_LDR);
 189
 190                if (aid <= new->max_apic_id)
 191                        new->phys_map[aid] = apic;
 192
 193                if (apic_x2apic_mode(apic)) {
 194                        new->mode |= KVM_APIC_MODE_X2APIC;
 195                } else if (ldr) {
 196                        ldr = GET_APIC_LOGICAL_ID(ldr);
 197                        if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
 198                                new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
 199                        else
 200                                new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
 201                }
 202
 203                if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
 204                        continue;
 205
 206                if (mask)
 207                        cluster[ffs(mask) - 1] = apic;
 208        }
 209out:
 210        old = rcu_dereference_protected(kvm->arch.apic_map,
 211                        lockdep_is_held(&kvm->arch.apic_map_lock));
 212        rcu_assign_pointer(kvm->arch.apic_map, new);
 213        mutex_unlock(&kvm->arch.apic_map_lock);
 214
 215        if (old)
 216                call_rcu(&old->rcu, kvm_apic_map_free);
 217
 218        kvm_make_scan_ioapic_request(kvm);
 219}
 220
 221static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 222{
 223        bool enabled = val & APIC_SPIV_APIC_ENABLED;
 224
 225        kvm_lapic_set_reg(apic, APIC_SPIV, val);
 226
 227        if (enabled != apic->sw_enabled) {
 228                apic->sw_enabled = enabled;
 229                if (enabled) {
 230                        static_key_slow_dec_deferred(&apic_sw_disabled);
 231                        recalculate_apic_map(apic->vcpu->kvm);
 232                } else
 233                        static_key_slow_inc(&apic_sw_disabled.key);
 234        }
 235}
 236
 237static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
 238{
 239        kvm_lapic_set_reg(apic, APIC_ID, id << 24);
 240        recalculate_apic_map(apic->vcpu->kvm);
 241}
 242
 243static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
 244{
 245        kvm_lapic_set_reg(apic, APIC_LDR, id);
 246        recalculate_apic_map(apic->vcpu->kvm);
 247}
 248
 249static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
 250{
 251        u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
 252
 253        kvm_lapic_set_reg(apic, APIC_ID, id);
 254        kvm_lapic_set_reg(apic, APIC_LDR, ldr);
 255        recalculate_apic_map(apic->vcpu->kvm);
 256}
 257
 258static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 259{
 260        return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
 261}
 262
 263static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 264{
 265        return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
 266}
 267
 268static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 269{
 270        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
 271}
 272
 273static inline int apic_lvtt_period(struct kvm_lapic *apic)
 274{
 275        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
 276}
 277
 278static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 279{
 280        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 281}
 282
 283static inline int apic_lvt_nmi_mode(u32 lvt_val)
 284{
 285        return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
 286}
 287
 288void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 289{
 290        struct kvm_lapic *apic = vcpu->arch.apic;
 291        struct kvm_cpuid_entry2 *feat;
 292        u32 v = APIC_VERSION;
 293
 294        if (!lapic_in_kernel(vcpu))
 295                return;
 296
 297        feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
 298        if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
 299                v |= APIC_LVR_DIRECTED_EOI;
 300        kvm_lapic_set_reg(apic, APIC_LVR, v);
 301}
 302
 303static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
 304        LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 305        LVT_MASK | APIC_MODE_MASK,      /* LVTTHMR */
 306        LVT_MASK | APIC_MODE_MASK,      /* LVTPC */
 307        LINT_MASK, LINT_MASK,   /* LVT0-1 */
 308        LVT_MASK                /* LVTERR */
 309};
 310
 311static int find_highest_vector(void *bitmap)
 312{
 313        int vec;
 314        u32 *reg;
 315
 316        for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
 317             vec >= 0; vec -= APIC_VECTORS_PER_REG) {
 318                reg = bitmap + REG_POS(vec);
 319                if (*reg)
 320                        return fls(*reg) - 1 + vec;
 321        }
 322
 323        return -1;
 324}
 325
 326static u8 count_vectors(void *bitmap)
 327{
 328        int vec;
 329        u32 *reg;
 330        u8 count = 0;
 331
 332        for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
 333                reg = bitmap + REG_POS(vec);
 334                count += hweight32(*reg);
 335        }
 336
 337        return count;
 338}
 339
 340void __kvm_apic_update_irr(u32 *pir, void *regs)
 341{
 342        u32 i, pir_val;
 343
 344        for (i = 0; i <= 7; i++) {
 345                pir_val = xchg(&pir[i], 0);
 346                if (pir_val)
 347                        *((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val;
 348        }
 349}
 350EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
 351
 352void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
 353{
 354        struct kvm_lapic *apic = vcpu->arch.apic;
 355
 356        __kvm_apic_update_irr(pir, apic->regs);
 357
 358        kvm_make_request(KVM_REQ_EVENT, vcpu);
 359}
 360EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 361
 362static inline int apic_search_irr(struct kvm_lapic *apic)
 363{
 364        return find_highest_vector(apic->regs + APIC_IRR);
 365}
 366
 367static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 368{
 369        int result;
 370
 371        /*
 372         * Note that irr_pending is just a hint. It will be always
 373         * true with virtual interrupt delivery enabled.
 374         */
 375        if (!apic->irr_pending)
 376                return -1;
 377
 378        if (apic->vcpu->arch.apicv_active)
 379                kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
 380        result = apic_search_irr(apic);
 381        ASSERT(result == -1 || result >= 16);
 382
 383        return result;
 384}
 385
 386static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 387{
 388        struct kvm_vcpu *vcpu;
 389
 390        vcpu = apic->vcpu;
 391
 392        if (unlikely(vcpu->arch.apicv_active)) {
 393                /* try to update RVI */
 394                apic_clear_vector(vec, apic->regs + APIC_IRR);
 395                kvm_make_request(KVM_REQ_EVENT, vcpu);
 396        } else {
 397                apic->irr_pending = false;
 398                apic_clear_vector(vec, apic->regs + APIC_IRR);
 399                if (apic_search_irr(apic) != -1)
 400                        apic->irr_pending = true;
 401        }
 402}
 403
 404static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 405{
 406        struct kvm_vcpu *vcpu;
 407
 408        if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
 409                return;
 410
 411        vcpu = apic->vcpu;
 412
 413        /*
 414         * With APIC virtualization enabled, all caching is disabled
 415         * because the processor can modify ISR under the hood.  Instead
 416         * just set SVI.
 417         */
 418        if (unlikely(vcpu->arch.apicv_active))
 419                kvm_x86_ops->hwapic_isr_update(vcpu, vec);
 420        else {
 421                ++apic->isr_count;
 422                BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
 423                /*
 424                 * ISR (in service register) bit is set when injecting an interrupt.
 425                 * The highest vector is injected. Thus the latest bit set matches
 426                 * the highest bit in ISR.
 427                 */
 428                apic->highest_isr_cache = vec;
 429        }
 430}
 431
 432static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 433{
 434        int result;
 435
 436        /*
 437         * Note that isr_count is always 1, and highest_isr_cache
 438         * is always -1, with APIC virtualization enabled.
 439         */
 440        if (!apic->isr_count)
 441                return -1;
 442        if (likely(apic->highest_isr_cache != -1))
 443                return apic->highest_isr_cache;
 444
 445        result = find_highest_vector(apic->regs + APIC_ISR);
 446        ASSERT(result == -1 || result >= 16);
 447
 448        return result;
 449}
 450
 451static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 452{
 453        struct kvm_vcpu *vcpu;
 454        if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
 455                return;
 456
 457        vcpu = apic->vcpu;
 458
 459        /*
 460         * We do get here for APIC virtualization enabled if the guest
 461         * uses the Hyper-V APIC enlightenment.  In this case we may need
 462         * to trigger a new interrupt delivery by writing the SVI field;
 463         * on the other hand isr_count and highest_isr_cache are unused
 464         * and must be left alone.
 465         */
 466        if (unlikely(vcpu->arch.apicv_active))
 467                kvm_x86_ops->hwapic_isr_update(vcpu,
 468                                               apic_find_highest_isr(apic));
 469        else {
 470                --apic->isr_count;
 471                BUG_ON(apic->isr_count < 0);
 472                apic->highest_isr_cache = -1;
 473        }
 474}
 475
 476int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 477{
 478        /* This may race with setting of irr in __apic_accept_irq() and
 479         * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
 480         * will cause vmexit immediately and the value will be recalculated
 481         * on the next vmentry.
 482         */
 483        return apic_find_highest_irr(vcpu->arch.apic);
 484}
 485
 486static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 487                             int vector, int level, int trig_mode,
 488                             struct dest_map *dest_map);
 489
 490int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 491                     struct dest_map *dest_map)
 492{
 493        struct kvm_lapic *apic = vcpu->arch.apic;
 494
 495        return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
 496                        irq->level, irq->trig_mode, dest_map);
 497}
 498
 499static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
 500{
 501
 502        return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
 503                                      sizeof(val));
 504}
 505
 506static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
 507{
 508
 509        return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
 510                                      sizeof(*val));
 511}
 512
 513static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
 514{
 515        return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
 516}
 517
 518static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
 519{
 520        u8 val;
 521        if (pv_eoi_get_user(vcpu, &val) < 0)
 522                apic_debug("Can't read EOI MSR value: 0x%llx\n",
 523                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 524        return val & 0x1;
 525}
 526
 527static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
 528{
 529        if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
 530                apic_debug("Can't set EOI MSR value: 0x%llx\n",
 531                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 532                return;
 533        }
 534        __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 535}
 536
 537static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 538{
 539        if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
 540                apic_debug("Can't clear EOI MSR value: 0x%llx\n",
 541                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 542                return;
 543        }
 544        __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 545}
 546
 547static void apic_update_ppr(struct kvm_lapic *apic)
 548{
 549        u32 tpr, isrv, ppr, old_ppr;
 550        int isr;
 551
 552        old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
 553        tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
 554        isr = apic_find_highest_isr(apic);
 555        isrv = (isr != -1) ? isr : 0;
 556
 557        if ((tpr & 0xf0) >= (isrv & 0xf0))
 558                ppr = tpr & 0xff;
 559        else
 560                ppr = isrv & 0xf0;
 561
 562        apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
 563                   apic, ppr, isr, isrv);
 564
 565        if (old_ppr != ppr) {
 566                kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
 567                if (ppr < old_ppr)
 568                        kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 569        }
 570}
 571
 572static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 573{
 574        kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
 575        apic_update_ppr(apic);
 576}
 577
 578static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
 579{
 580        if (apic_x2apic_mode(apic))
 581                return mda == X2APIC_BROADCAST;
 582
 583        return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST;
 584}
 585
 586static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
 587{
 588        if (kvm_apic_broadcast(apic, mda))
 589                return true;
 590
 591        if (apic_x2apic_mode(apic))
 592                return mda == kvm_apic_id(apic);
 593
 594        return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic));
 595}
 596
 597static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 598{
 599        u32 logical_id;
 600
 601        if (kvm_apic_broadcast(apic, mda))
 602                return true;
 603
 604        logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
 605
 606        if (apic_x2apic_mode(apic))
 607                return ((logical_id >> 16) == (mda >> 16))
 608                       && (logical_id & mda & 0xffff) != 0;
 609
 610        logical_id = GET_APIC_LOGICAL_ID(logical_id);
 611        mda = GET_APIC_DEST_FIELD(mda);
 612
 613        switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
 614        case APIC_DFR_FLAT:
 615                return (logical_id & mda) != 0;
 616        case APIC_DFR_CLUSTER:
 617                return ((logical_id >> 4) == (mda >> 4))
 618                       && (logical_id & mda & 0xf) != 0;
 619        default:
 620                apic_debug("Bad DFR vcpu %d: %08x\n",
 621                           apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR));
 622                return false;
 623        }
 624}
 625
 626/* The KVM local APIC implementation has two quirks:
 627 *
 628 *  - the xAPIC MDA stores the destination at bits 24-31, while this
 629 *    is not true of struct kvm_lapic_irq's dest_id field.  This is
 630 *    just a quirk in the API and is not problematic.
 631 *
 632 *  - in-kernel IOAPIC messages have to be delivered directly to
 633 *    x2APIC, because the kernel does not support interrupt remapping.
 634 *    In order to support broadcast without interrupt remapping, x2APIC
 635 *    rewrites the destination of non-IPI messages from APIC_BROADCAST
 636 *    to X2APIC_BROADCAST.
 637 *
 638 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
 639 * important when userspace wants to use x2APIC-format MSIs, because
 640 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
 641 */
 642static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
 643                struct kvm_lapic *source, struct kvm_lapic *target)
 644{
 645        bool ipi = source != NULL;
 646        bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
 647
 648        if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
 649            !ipi && dest_id == APIC_BROADCAST && x2apic_mda)
 650                return X2APIC_BROADCAST;
 651
 652        return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
 653}
 654
 655bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 656                           int short_hand, unsigned int dest, int dest_mode)
 657{
 658        struct kvm_lapic *target = vcpu->arch.apic;
 659        u32 mda = kvm_apic_mda(vcpu, dest, source, target);
 660
 661        apic_debug("target %p, source %p, dest 0x%x, "
 662                   "dest_mode 0x%x, short_hand 0x%x\n",
 663                   target, source, dest, dest_mode, short_hand);
 664
 665        ASSERT(target);
 666        switch (short_hand) {
 667        case APIC_DEST_NOSHORT:
 668                if (dest_mode == APIC_DEST_PHYSICAL)
 669                        return kvm_apic_match_physical_addr(target, mda);
 670                else
 671                        return kvm_apic_match_logical_addr(target, mda);
 672        case APIC_DEST_SELF:
 673                return target == source;
 674        case APIC_DEST_ALLINC:
 675                return true;
 676        case APIC_DEST_ALLBUT:
 677                return target != source;
 678        default:
 679                apic_debug("kvm: apic: Bad dest shorthand value %x\n",
 680                           short_hand);
 681                return false;
 682        }
 683}
 684EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
 685
 686int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
 687                       const unsigned long *bitmap, u32 bitmap_size)
 688{
 689        u32 mod;
 690        int i, idx = -1;
 691
 692        mod = vector % dest_vcpus;
 693
 694        for (i = 0; i <= mod; i++) {
 695                idx = find_next_bit(bitmap, bitmap_size, idx + 1);
 696                BUG_ON(idx == bitmap_size);
 697        }
 698
 699        return idx;
 700}
 701
 702static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
 703{
 704        if (!kvm->arch.disabled_lapic_found) {
 705                kvm->arch.disabled_lapic_found = true;
 706                printk(KERN_INFO
 707                       "Disabled LAPIC found during irq injection\n");
 708        }
 709}
 710
 711static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
 712                struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
 713{
 714        if (kvm->arch.x2apic_broadcast_quirk_disabled) {
 715                if ((irq->dest_id == APIC_BROADCAST &&
 716                                map->mode != KVM_APIC_MODE_X2APIC))
 717                        return true;
 718                if (irq->dest_id == X2APIC_BROADCAST)
 719                        return true;
 720        } else {
 721                bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
 722                if (irq->dest_id == (x2apic_ipi ?
 723                                     X2APIC_BROADCAST : APIC_BROADCAST))
 724                        return true;
 725        }
 726
 727        return false;
 728}
 729
 730/* Return true if the interrupt can be handled by using *bitmap as index mask
 731 * for valid destinations in *dst array.
 732 * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
 733 * Note: we may have zero kvm_lapic destinations when we return true, which
 734 * means that the interrupt should be dropped.  In this case, *bitmap would be
 735 * zero and *dst undefined.
 736 */
 737static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
 738                struct kvm_lapic **src, struct kvm_lapic_irq *irq,
 739                struct kvm_apic_map *map, struct kvm_lapic ***dst,
 740                unsigned long *bitmap)
 741{
 742        int i, lowest;
 743
 744        if (irq->shorthand == APIC_DEST_SELF && src) {
 745                *dst = src;
 746                *bitmap = 1;
 747                return true;
 748        } else if (irq->shorthand)
 749                return false;
 750
 751        if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
 752                return false;
 753
 754        if (irq->dest_mode == APIC_DEST_PHYSICAL) {
 755                if (irq->dest_id > map->max_apic_id) {
 756                        *bitmap = 0;
 757                } else {
 758                        *dst = &map->phys_map[irq->dest_id];
 759                        *bitmap = 1;
 760                }
 761                return true;
 762        }
 763
 764        *bitmap = 0;
 765        if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
 766                                (u16 *)bitmap))
 767                return false;
 768
 769        if (!kvm_lowest_prio_delivery(irq))
 770                return true;
 771
 772        if (!kvm_vector_hashing_enabled()) {
 773                lowest = -1;
 774                for_each_set_bit(i, bitmap, 16) {
 775                        if (!(*dst)[i])
 776                                continue;
 777                        if (lowest < 0)
 778                                lowest = i;
 779                        else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
 780                                                (*dst)[lowest]->vcpu) < 0)
 781                                lowest = i;
 782                }
 783        } else {
 784                if (!*bitmap)
 785                        return true;
 786
 787                lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
 788                                bitmap, 16);
 789
 790                if (!(*dst)[lowest]) {
 791                        kvm_apic_disabled_lapic_found(kvm);
 792                        *bitmap = 0;
 793                        return true;
 794                }
 795        }
 796
 797        *bitmap = (lowest >= 0) ? 1 << lowest : 0;
 798
 799        return true;
 800}
 801
 802bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 803                struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
 804{
 805        struct kvm_apic_map *map;
 806        unsigned long bitmap;
 807        struct kvm_lapic **dst = NULL;
 808        int i;
 809        bool ret;
 810
 811        *r = -1;
 812
 813        if (irq->shorthand == APIC_DEST_SELF) {
 814                *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
 815                return true;
 816        }
 817
 818        rcu_read_lock();
 819        map = rcu_dereference(kvm->arch.apic_map);
 820
 821        ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
 822        if (ret)
 823                for_each_set_bit(i, &bitmap, 16) {
 824                        if (!dst[i])
 825                                continue;
 826                        if (*r < 0)
 827                                *r = 0;
 828                        *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
 829                }
 830
 831        rcu_read_unlock();
 832        return ret;
 833}
 834
 835/*
 836 * This routine tries to handler interrupts in posted mode, here is how
 837 * it deals with different cases:
 838 * - For single-destination interrupts, handle it in posted mode
 839 * - Else if vector hashing is enabled and it is a lowest-priority
 840 *   interrupt, handle it in posted mode and use the following mechanism
 841 *   to find the destinaiton vCPU.
 842 *      1. For lowest-priority interrupts, store all the possible
 843 *         destination vCPUs in an array.
 844 *      2. Use "guest vector % max number of destination vCPUs" to find
 845 *         the right destination vCPU in the array for the lowest-priority
 846 *         interrupt.
 847 * - Otherwise, use remapped mode to inject the interrupt.
 848 */
 849bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 850                        struct kvm_vcpu **dest_vcpu)
 851{
 852        struct kvm_apic_map *map;
 853        unsigned long bitmap;
 854        struct kvm_lapic **dst = NULL;
 855        bool ret = false;
 856
 857        if (irq->shorthand)
 858                return false;
 859
 860        rcu_read_lock();
 861        map = rcu_dereference(kvm->arch.apic_map);
 862
 863        if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
 864                        hweight16(bitmap) == 1) {
 865                unsigned long i = find_first_bit(&bitmap, 16);
 866
 867                if (dst[i]) {
 868                        *dest_vcpu = dst[i]->vcpu;
 869                        ret = true;
 870                }
 871        }
 872
 873        rcu_read_unlock();
 874        return ret;
 875}
 876
 877/*
 878 * Add a pending IRQ into lapic.
 879 * Return 1 if successfully added and 0 if discarded.
 880 */
 881static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 882                             int vector, int level, int trig_mode,
 883                             struct dest_map *dest_map)
 884{
 885        int result = 0;
 886        struct kvm_vcpu *vcpu = apic->vcpu;
 887
 888        trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
 889                                  trig_mode, vector);
 890        switch (delivery_mode) {
 891        case APIC_DM_LOWEST:
 892                vcpu->arch.apic_arb_prio++;
 893        case APIC_DM_FIXED:
 894                if (unlikely(trig_mode && !level))
 895                        break;
 896
 897                /* FIXME add logic for vcpu on reset */
 898                if (unlikely(!apic_enabled(apic)))
 899                        break;
 900
 901                result = 1;
 902
 903                if (dest_map) {
 904                        __set_bit(vcpu->vcpu_id, dest_map->map);
 905                        dest_map->vectors[vcpu->vcpu_id] = vector;
 906                }
 907
 908                if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
 909                        if (trig_mode)
 910                                kvm_lapic_set_vector(vector, apic->regs + APIC_TMR);
 911                        else
 912                                apic_clear_vector(vector, apic->regs + APIC_TMR);
 913                }
 914
 915                if (vcpu->arch.apicv_active)
 916                        kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
 917                else {
 918                        kvm_lapic_set_irr(vector, apic);
 919
 920                        kvm_make_request(KVM_REQ_EVENT, vcpu);
 921                        kvm_vcpu_kick(vcpu);
 922                }
 923                break;
 924
 925        case APIC_DM_REMRD:
 926                result = 1;
 927                vcpu->arch.pv.pv_unhalted = 1;
 928                kvm_make_request(KVM_REQ_EVENT, vcpu);
 929                kvm_vcpu_kick(vcpu);
 930                break;
 931
 932        case APIC_DM_SMI:
 933                result = 1;
 934                kvm_make_request(KVM_REQ_SMI, vcpu);
 935                kvm_vcpu_kick(vcpu);
 936                break;
 937
 938        case APIC_DM_NMI:
 939                result = 1;
 940                kvm_inject_nmi(vcpu);
 941                kvm_vcpu_kick(vcpu);
 942                break;
 943
 944        case APIC_DM_INIT:
 945                if (!trig_mode || level) {
 946                        result = 1;
 947                        /* assumes that there are only KVM_APIC_INIT/SIPI */
 948                        apic->pending_events = (1UL << KVM_APIC_INIT);
 949                        /* make sure pending_events is visible before sending
 950                         * the request */
 951                        smp_wmb();
 952                        kvm_make_request(KVM_REQ_EVENT, vcpu);
 953                        kvm_vcpu_kick(vcpu);
 954                } else {
 955                        apic_debug("Ignoring de-assert INIT to vcpu %d\n",
 956                                   vcpu->vcpu_id);
 957                }
 958                break;
 959
 960        case APIC_DM_STARTUP:
 961                apic_debug("SIPI to vcpu %d vector 0x%02x\n",
 962                           vcpu->vcpu_id, vector);
 963                result = 1;
 964                apic->sipi_vector = vector;
 965                /* make sure sipi_vector is visible for the receiver */
 966                smp_wmb();
 967                set_bit(KVM_APIC_SIPI, &apic->pending_events);
 968                kvm_make_request(KVM_REQ_EVENT, vcpu);
 969                kvm_vcpu_kick(vcpu);
 970                break;
 971
 972        case APIC_DM_EXTINT:
 973                /*
 974                 * Should only be called by kvm_apic_local_deliver() with LVT0,
 975                 * before NMI watchdog was enabled. Already handled by
 976                 * kvm_apic_accept_pic_intr().
 977                 */
 978                break;
 979
 980        default:
 981                printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
 982                       delivery_mode);
 983                break;
 984        }
 985        return result;
 986}
 987
 988int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 989{
 990        return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
 991}
 992
 993static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
 994{
 995        return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
 996}
 997
 998static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 999{
1000        int trigger_mode;
1001
1002        /* Eoi the ioapic only if the ioapic doesn't own the vector. */
1003        if (!kvm_ioapic_handles_vector(apic, vector))
1004                return;
1005
1006        /* Request a KVM exit to inform the userspace IOAPIC. */
1007        if (irqchip_split(apic->vcpu->kvm)) {
1008                apic->vcpu->arch.pending_ioapic_eoi = vector;
1009                kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1010                return;
1011        }
1012
1013        if (apic_test_vector(vector, apic->regs + APIC_TMR))
1014                trigger_mode = IOAPIC_LEVEL_TRIG;
1015        else
1016                trigger_mode = IOAPIC_EDGE_TRIG;
1017
1018        kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1019}
1020
1021static int apic_set_eoi(struct kvm_lapic *apic)
1022{
1023        int vector = apic_find_highest_isr(apic);
1024
1025        trace_kvm_eoi(apic, vector);
1026
1027        /*
1028         * Not every write EOI will has corresponding ISR,
1029         * one example is when Kernel check timer on setup_IO_APIC
1030         */
1031        if (vector == -1)
1032                return vector;
1033
1034        apic_clear_isr(vector, apic);
1035        apic_update_ppr(apic);
1036
1037        if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
1038                kvm_hv_synic_send_eoi(apic->vcpu, vector);
1039
1040        kvm_ioapic_send_eoi(apic, vector);
1041        kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1042        return vector;
1043}
1044
1045/*
1046 * this interface assumes a trap-like exit, which has already finished
1047 * desired side effect including vISR and vPPR update.
1048 */
1049void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1050{
1051        struct kvm_lapic *apic = vcpu->arch.apic;
1052
1053        trace_kvm_eoi(apic, vector);
1054
1055        kvm_ioapic_send_eoi(apic, vector);
1056        kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1057}
1058EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1059
1060static void apic_send_ipi(struct kvm_lapic *apic)
1061{
1062        u32 icr_low = kvm_lapic_get_reg(apic, APIC_ICR);
1063        u32 icr_high = kvm_lapic_get_reg(apic, APIC_ICR2);
1064        struct kvm_lapic_irq irq;
1065
1066        irq.vector = icr_low & APIC_VECTOR_MASK;
1067        irq.delivery_mode = icr_low & APIC_MODE_MASK;
1068        irq.dest_mode = icr_low & APIC_DEST_MASK;
1069        irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1070        irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1071        irq.shorthand = icr_low & APIC_SHORT_MASK;
1072        irq.msi_redir_hint = false;
1073        if (apic_x2apic_mode(apic))
1074                irq.dest_id = icr_high;
1075        else
1076                irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
1077
1078        trace_kvm_apic_ipi(icr_low, irq.dest_id);
1079
1080        apic_debug("icr_high 0x%x, icr_low 0x%x, "
1081                   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
1082                   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
1083                   "msi_redir_hint 0x%x\n",
1084                   icr_high, icr_low, irq.shorthand, irq.dest_id,
1085                   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
1086                   irq.vector, irq.msi_redir_hint);
1087
1088        kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1089}
1090
1091static u32 apic_get_tmcct(struct kvm_lapic *apic)
1092{
1093        ktime_t remaining;
1094        s64 ns;
1095        u32 tmcct;
1096
1097        ASSERT(apic != NULL);
1098
1099        /* if initial count is 0, current count should also be 0 */
1100        if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1101                apic->lapic_timer.period == 0)
1102                return 0;
1103
1104        remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
1105        if (ktime_to_ns(remaining) < 0)
1106                remaining = ktime_set(0, 0);
1107
1108        ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1109        tmcct = div64_u64(ns,
1110                         (APIC_BUS_CYCLE_NS * apic->divide_count));
1111
1112        return tmcct;
1113}
1114
1115static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1116{
1117        struct kvm_vcpu *vcpu = apic->vcpu;
1118        struct kvm_run *run = vcpu->run;
1119
1120        kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1121        run->tpr_access.rip = kvm_rip_read(vcpu);
1122        run->tpr_access.is_write = write;
1123}
1124
1125static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1126{
1127        if (apic->vcpu->arch.tpr_access_reporting)
1128                __report_tpr_access(apic, write);
1129}
1130
1131static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1132{
1133        u32 val = 0;
1134
1135        if (offset >= LAPIC_MMIO_LENGTH)
1136                return 0;
1137
1138        switch (offset) {
1139        case APIC_ARBPRI:
1140                apic_debug("Access APIC ARBPRI register which is for P6\n");
1141                break;
1142
1143        case APIC_TMCCT:        /* Timer CCR */
1144                if (apic_lvtt_tscdeadline(apic))
1145                        return 0;
1146
1147                val = apic_get_tmcct(apic);
1148                break;
1149        case APIC_PROCPRI:
1150                apic_update_ppr(apic);
1151                val = kvm_lapic_get_reg(apic, offset);
1152                break;
1153        case APIC_TASKPRI:
1154                report_tpr_access(apic, false);
1155                /* fall thru */
1156        default:
1157                val = kvm_lapic_get_reg(apic, offset);
1158                break;
1159        }
1160
1161        return val;
1162}
1163
1164static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1165{
1166        return container_of(dev, struct kvm_lapic, dev);
1167}
1168
1169int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1170                void *data)
1171{
1172        unsigned char alignment = offset & 0xf;
1173        u32 result;
1174        /* this bitmask has a bit cleared for each reserved register */
1175        static const u64 rmask = 0x43ff01ffffffe70cULL;
1176
1177        if ((alignment + len) > 4) {
1178                apic_debug("KVM_APIC_READ: alignment error %x %d\n",
1179                           offset, len);
1180                return 1;
1181        }
1182
1183        if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
1184                apic_debug("KVM_APIC_READ: read reserved register %x\n",
1185                           offset);
1186                return 1;
1187        }
1188
1189        result = __apic_read(apic, offset & ~0xf);
1190
1191        trace_kvm_apic_read(offset, result);
1192
1193        switch (len) {
1194        case 1:
1195        case 2:
1196        case 4:
1197                memcpy(data, (char *)&result + alignment, len);
1198                break;
1199        default:
1200                printk(KERN_ERR "Local APIC read with len = %x, "
1201                       "should be 1,2, or 4 instead\n", len);
1202                break;
1203        }
1204        return 0;
1205}
1206EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
1207
1208static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1209{
1210        return kvm_apic_hw_enabled(apic) &&
1211            addr >= apic->base_address &&
1212            addr < apic->base_address + LAPIC_MMIO_LENGTH;
1213}
1214
1215static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1216                           gpa_t address, int len, void *data)
1217{
1218        struct kvm_lapic *apic = to_lapic(this);
1219        u32 offset = address - apic->base_address;
1220
1221        if (!apic_mmio_in_range(apic, address))
1222                return -EOPNOTSUPP;
1223
1224        kvm_lapic_reg_read(apic, offset, len, data);
1225
1226        return 0;
1227}
1228
1229static void update_divide_count(struct kvm_lapic *apic)
1230{
1231        u32 tmp1, tmp2, tdcr;
1232
1233        tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1234        tmp1 = tdcr & 0xf;
1235        tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1236        apic->divide_count = 0x1 << (tmp2 & 0x7);
1237
1238        apic_debug("timer divide count is 0x%x\n",
1239                                   apic->divide_count);
1240}
1241
1242static void apic_update_lvtt(struct kvm_lapic *apic)
1243{
1244        u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1245                        apic->lapic_timer.timer_mode_mask;
1246
1247        if (apic->lapic_timer.timer_mode != timer_mode) {
1248                apic->lapic_timer.timer_mode = timer_mode;
1249                hrtimer_cancel(&apic->lapic_timer.timer);
1250        }
1251}
1252
1253static void apic_timer_expired(struct kvm_lapic *apic)
1254{
1255        struct kvm_vcpu *vcpu = apic->vcpu;
1256        struct swait_queue_head *q = &vcpu->wq;
1257        struct kvm_timer *ktimer = &apic->lapic_timer;
1258
1259        if (atomic_read(&apic->lapic_timer.pending))
1260                return;
1261
1262        atomic_inc(&apic->lapic_timer.pending);
1263        kvm_set_pending_timer(vcpu);
1264
1265        if (swait_active(q))
1266                swake_up(q);
1267
1268        if (apic_lvtt_tscdeadline(apic))
1269                ktimer->expired_tscdeadline = ktimer->tscdeadline;
1270}
1271
1272/*
1273 * On APICv, this test will cause a busy wait
1274 * during a higher-priority task.
1275 */
1276
1277static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1278{
1279        struct kvm_lapic *apic = vcpu->arch.apic;
1280        u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1281
1282        if (kvm_apic_hw_enabled(apic)) {
1283                int vec = reg & APIC_VECTOR_MASK;
1284                void *bitmap = apic->regs + APIC_ISR;
1285
1286                if (vcpu->arch.apicv_active)
1287                        bitmap = apic->regs + APIC_IRR;
1288
1289                if (apic_test_vector(vec, bitmap))
1290                        return true;
1291        }
1292        return false;
1293}
1294
1295void wait_lapic_expire(struct kvm_vcpu *vcpu)
1296{
1297        struct kvm_lapic *apic = vcpu->arch.apic;
1298        u64 guest_tsc, tsc_deadline;
1299
1300        if (!lapic_in_kernel(vcpu))
1301                return;
1302
1303        if (apic->lapic_timer.expired_tscdeadline == 0)
1304                return;
1305
1306        if (!lapic_timer_int_injected(vcpu))
1307                return;
1308
1309        tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1310        apic->lapic_timer.expired_tscdeadline = 0;
1311        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1312        trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
1313
1314        /* __delay is delay_tsc whenever the hardware has TSC, thus always.  */
1315        if (guest_tsc < tsc_deadline)
1316                __delay(min(tsc_deadline - guest_tsc,
1317                        nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
1318}
1319
1320static void start_sw_tscdeadline(struct kvm_lapic *apic)
1321{
1322        u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
1323        u64 ns = 0;
1324        ktime_t expire;
1325        struct kvm_vcpu *vcpu = apic->vcpu;
1326        unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1327        unsigned long flags;
1328        ktime_t now;
1329
1330        if (unlikely(!tscdeadline || !this_tsc_khz))
1331                return;
1332
1333        local_irq_save(flags);
1334
1335        now = apic->lapic_timer.timer.base->get_time();
1336        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1337        if (likely(tscdeadline > guest_tsc)) {
1338                ns = (tscdeadline - guest_tsc) * 1000000ULL;
1339                do_div(ns, this_tsc_khz);
1340                expire = ktime_add_ns(now, ns);
1341                expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
1342                hrtimer_start(&apic->lapic_timer.timer,
1343                                expire, HRTIMER_MODE_ABS_PINNED);
1344        } else
1345                apic_timer_expired(apic);
1346
1347        local_irq_restore(flags);
1348}
1349
1350bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1351{
1352        if (!lapic_in_kernel(vcpu))
1353                return false;
1354
1355        return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1356}
1357EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1358
1359static void cancel_hv_tscdeadline(struct kvm_lapic *apic)
1360{
1361        kvm_x86_ops->cancel_hv_timer(apic->vcpu);
1362        apic->lapic_timer.hv_timer_in_use = false;
1363}
1364
1365void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1366{
1367        struct kvm_lapic *apic = vcpu->arch.apic;
1368
1369        WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1370        WARN_ON(swait_active(&vcpu->wq));
1371        cancel_hv_tscdeadline(apic);
1372        apic_timer_expired(apic);
1373}
1374EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
1375
1376static bool start_hv_tscdeadline(struct kvm_lapic *apic)
1377{
1378        u64 tscdeadline = apic->lapic_timer.tscdeadline;
1379
1380        if (atomic_read(&apic->lapic_timer.pending) ||
1381                kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
1382                if (apic->lapic_timer.hv_timer_in_use)
1383                        cancel_hv_tscdeadline(apic);
1384        } else {
1385                apic->lapic_timer.hv_timer_in_use = true;
1386                hrtimer_cancel(&apic->lapic_timer.timer);
1387
1388                /* In case the sw timer triggered in the window */
1389                if (atomic_read(&apic->lapic_timer.pending))
1390                        cancel_hv_tscdeadline(apic);
1391        }
1392        trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
1393                        apic->lapic_timer.hv_timer_in_use);
1394        return apic->lapic_timer.hv_timer_in_use;
1395}
1396
1397void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
1398{
1399        struct kvm_lapic *apic = vcpu->arch.apic;
1400
1401        WARN_ON(apic->lapic_timer.hv_timer_in_use);
1402
1403        if (apic_lvtt_tscdeadline(apic))
1404                start_hv_tscdeadline(apic);
1405}
1406EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
1407
1408void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
1409{
1410        struct kvm_lapic *apic = vcpu->arch.apic;
1411
1412        /* Possibly the TSC deadline timer is not enabled yet */
1413        if (!apic->lapic_timer.hv_timer_in_use)
1414                return;
1415
1416        cancel_hv_tscdeadline(apic);
1417
1418        if (atomic_read(&apic->lapic_timer.pending))
1419                return;
1420
1421        start_sw_tscdeadline(apic);
1422}
1423EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
1424
1425static void start_apic_timer(struct kvm_lapic *apic)
1426{
1427        ktime_t now;
1428
1429        atomic_set(&apic->lapic_timer.pending, 0);
1430
1431        if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
1432                /* lapic timer in oneshot or periodic mode */
1433                now = apic->lapic_timer.timer.base->get_time();
1434                apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1435                            * APIC_BUS_CYCLE_NS * apic->divide_count;
1436
1437                if (!apic->lapic_timer.period)
1438                        return;
1439                /*
1440                 * Do not allow the guest to program periodic timers with small
1441                 * interval, since the hrtimers are not throttled by the host
1442                 * scheduler.
1443                 */
1444                if (apic_lvtt_period(apic)) {
1445                        s64 min_period = min_timer_period_us * 1000LL;
1446
1447                        if (apic->lapic_timer.period < min_period) {
1448                                pr_info_ratelimited(
1449                                    "kvm: vcpu %i: requested %lld ns "
1450                                    "lapic timer period limited to %lld ns\n",
1451                                    apic->vcpu->vcpu_id,
1452                                    apic->lapic_timer.period, min_period);
1453                                apic->lapic_timer.period = min_period;
1454                        }
1455                }
1456
1457                hrtimer_start(&apic->lapic_timer.timer,
1458                              ktime_add_ns(now, apic->lapic_timer.period),
1459                              HRTIMER_MODE_ABS_PINNED);
1460
1461                apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
1462                           PRIx64 ", "
1463                           "timer initial count 0x%x, period %lldns, "
1464                           "expire @ 0x%016" PRIx64 ".\n", __func__,
1465                           APIC_BUS_CYCLE_NS, ktime_to_ns(now),
1466                           kvm_lapic_get_reg(apic, APIC_TMICT),
1467                           apic->lapic_timer.period,
1468                           ktime_to_ns(ktime_add_ns(now,
1469                                        apic->lapic_timer.period)));
1470        } else if (apic_lvtt_tscdeadline(apic)) {
1471                if (!(kvm_x86_ops->set_hv_timer && start_hv_tscdeadline(apic)))
1472                        start_sw_tscdeadline(apic);
1473        }
1474}
1475
1476static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1477{
1478        bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
1479
1480        if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
1481                apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
1482                if (lvt0_in_nmi_mode) {
1483                        apic_debug("Receive NMI setting on APIC_LVT0 "
1484                                   "for cpu %d\n", apic->vcpu->vcpu_id);
1485                        atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1486                } else
1487                        atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1488        }
1489}
1490
1491int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1492{
1493        int ret = 0;
1494
1495        trace_kvm_apic_write(reg, val);
1496
1497        switch (reg) {
1498        case APIC_ID:           /* Local APIC ID */
1499                if (!apic_x2apic_mode(apic))
1500                        kvm_apic_set_xapic_id(apic, val >> 24);
1501                else
1502                        ret = 1;
1503                break;
1504
1505        case APIC_TASKPRI:
1506                report_tpr_access(apic, true);
1507                apic_set_tpr(apic, val & 0xff);
1508                break;
1509
1510        case APIC_EOI:
1511                apic_set_eoi(apic);
1512                break;
1513
1514        case APIC_LDR:
1515                if (!apic_x2apic_mode(apic))
1516                        kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
1517                else
1518                        ret = 1;
1519                break;
1520
1521        case APIC_DFR:
1522                if (!apic_x2apic_mode(apic)) {
1523                        kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
1524                        recalculate_apic_map(apic->vcpu->kvm);
1525                } else
1526                        ret = 1;
1527                break;
1528
1529        case APIC_SPIV: {
1530                u32 mask = 0x3ff;
1531                if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
1532                        mask |= APIC_SPIV_DIRECTED_EOI;
1533                apic_set_spiv(apic, val & mask);
1534                if (!(val & APIC_SPIV_APIC_ENABLED)) {
1535                        int i;
1536                        u32 lvt_val;
1537
1538                        for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
1539                                lvt_val = kvm_lapic_get_reg(apic,
1540                                                       APIC_LVTT + 0x10 * i);
1541                                kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
1542                                             lvt_val | APIC_LVT_MASKED);
1543                        }
1544                        apic_update_lvtt(apic);
1545                        atomic_set(&apic->lapic_timer.pending, 0);
1546
1547                }
1548                break;
1549        }
1550        case APIC_ICR:
1551                /* No delay here, so we always clear the pending bit */
1552                kvm_lapic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
1553                apic_send_ipi(apic);
1554                break;
1555
1556        case APIC_ICR2:
1557                if (!apic_x2apic_mode(apic))
1558                        val &= 0xff000000;
1559                kvm_lapic_set_reg(apic, APIC_ICR2, val);
1560                break;
1561
1562        case APIC_LVT0:
1563                apic_manage_nmi_watchdog(apic, val);
1564        case APIC_LVTTHMR:
1565        case APIC_LVTPC:
1566        case APIC_LVT1:
1567        case APIC_LVTERR:
1568                /* TODO: Check vector */
1569                if (!kvm_apic_sw_enabled(apic))
1570                        val |= APIC_LVT_MASKED;
1571
1572                val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
1573                kvm_lapic_set_reg(apic, reg, val);
1574
1575                break;
1576
1577        case APIC_LVTT:
1578                if (!kvm_apic_sw_enabled(apic))
1579                        val |= APIC_LVT_MASKED;
1580                val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
1581                kvm_lapic_set_reg(apic, APIC_LVTT, val);
1582                apic_update_lvtt(apic);
1583                break;
1584
1585        case APIC_TMICT:
1586                if (apic_lvtt_tscdeadline(apic))
1587                        break;
1588
1589                hrtimer_cancel(&apic->lapic_timer.timer);
1590                kvm_lapic_set_reg(apic, APIC_TMICT, val);
1591                start_apic_timer(apic);
1592                break;
1593
1594        case APIC_TDCR:
1595                if (val & 4)
1596                        apic_debug("KVM_WRITE:TDCR %x\n", val);
1597                kvm_lapic_set_reg(apic, APIC_TDCR, val);
1598                update_divide_count(apic);
1599                break;
1600
1601        case APIC_ESR:
1602                if (apic_x2apic_mode(apic) && val != 0) {
1603                        apic_debug("KVM_WRITE:ESR not zero %x\n", val);
1604                        ret = 1;
1605                }
1606                break;
1607
1608        case APIC_SELF_IPI:
1609                if (apic_x2apic_mode(apic)) {
1610                        kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
1611                } else
1612                        ret = 1;
1613                break;
1614        default:
1615                ret = 1;
1616                break;
1617        }
1618        if (ret)
1619                apic_debug("Local APIC Write to read-only register %x\n", reg);
1620        return ret;
1621}
1622EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
1623
1624static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1625                            gpa_t address, int len, const void *data)
1626{
1627        struct kvm_lapic *apic = to_lapic(this);
1628        unsigned int offset = address - apic->base_address;
1629        u32 val;
1630
1631        if (!apic_mmio_in_range(apic, address))
1632                return -EOPNOTSUPP;
1633
1634        /*
1635         * APIC register must be aligned on 128-bits boundary.
1636         * 32/64/128 bits registers must be accessed thru 32 bits.
1637         * Refer SDM 8.4.1
1638         */
1639        if (len != 4 || (offset & 0xf)) {
1640                /* Don't shout loud, $infamous_os would cause only noise. */
1641                apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
1642                return 0;
1643        }
1644
1645        val = *(u32*)data;
1646
1647        /* too common printing */
1648        if (offset != APIC_EOI)
1649                apic_debug("%s: offset 0x%x with length 0x%x, and value is "
1650                           "0x%x\n", __func__, offset, len, val);
1651
1652        kvm_lapic_reg_write(apic, offset & 0xff0, val);
1653
1654        return 0;
1655}
1656
1657void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
1658{
1659        kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
1660}
1661EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
1662
1663/* emulate APIC access in a trap manner */
1664void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
1665{
1666        u32 val = 0;
1667
1668        /* hw has done the conditional check and inst decode */
1669        offset &= 0xff0;
1670
1671        kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
1672
1673        /* TODO: optimize to just emulate side effect w/o one more write */
1674        kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
1675}
1676EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
1677
1678void kvm_free_lapic(struct kvm_vcpu *vcpu)
1679{
1680        struct kvm_lapic *apic = vcpu->arch.apic;
1681
1682        if (!vcpu->arch.apic)
1683                return;
1684
1685        hrtimer_cancel(&apic->lapic_timer.timer);
1686
1687        if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
1688                static_key_slow_dec_deferred(&apic_hw_disabled);
1689
1690        if (!apic->sw_enabled)
1691                static_key_slow_dec_deferred(&apic_sw_disabled);
1692
1693        if (apic->regs)
1694                free_page((unsigned long)apic->regs);
1695
1696        kfree(apic);
1697}
1698
1699/*
1700 *----------------------------------------------------------------------
1701 * LAPIC interface
1702 *----------------------------------------------------------------------
1703 */
1704
1705u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
1706{
1707        struct kvm_lapic *apic = vcpu->arch.apic;
1708
1709        if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
1710                        apic_lvtt_period(apic))
1711                return 0;
1712
1713        return apic->lapic_timer.tscdeadline;
1714}
1715
1716void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
1717{
1718        struct kvm_lapic *apic = vcpu->arch.apic;
1719
1720        if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
1721                        apic_lvtt_period(apic))
1722                return;
1723
1724        hrtimer_cancel(&apic->lapic_timer.timer);
1725        apic->lapic_timer.tscdeadline = data;
1726        start_apic_timer(apic);
1727}
1728
1729void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
1730{
1731        struct kvm_lapic *apic = vcpu->arch.apic;
1732
1733        apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
1734                     | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
1735}
1736
1737u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
1738{
1739        u64 tpr;
1740
1741        tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
1742
1743        return (tpr & 0xf0) >> 4;
1744}
1745
1746void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
1747{
1748        u64 old_value = vcpu->arch.apic_base;
1749        struct kvm_lapic *apic = vcpu->arch.apic;
1750
1751        if (!apic) {
1752                value |= MSR_IA32_APICBASE_BSP;
1753                vcpu->arch.apic_base = value;
1754                return;
1755        }
1756
1757        vcpu->arch.apic_base = value;
1758
1759        /* update jump label if enable bit changes */
1760        if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
1761                if (value & MSR_IA32_APICBASE_ENABLE) {
1762                        kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
1763                        static_key_slow_dec_deferred(&apic_hw_disabled);
1764                } else {
1765                        static_key_slow_inc(&apic_hw_disabled.key);
1766                        recalculate_apic_map(vcpu->kvm);
1767                }
1768        }
1769
1770        if ((old_value ^ value) & X2APIC_ENABLE) {
1771                if (value & X2APIC_ENABLE) {
1772                        kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
1773                        kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
1774                } else
1775                        kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
1776        }
1777
1778        apic->base_address = apic->vcpu->arch.apic_base &
1779                             MSR_IA32_APICBASE_BASE;
1780
1781        if ((value & MSR_IA32_APICBASE_ENABLE) &&
1782             apic->base_address != APIC_DEFAULT_PHYS_BASE)
1783                pr_warn_once("APIC base relocation is unsupported by KVM");
1784
1785        /* with FSB delivery interrupt, we can restart APIC functionality */
1786        apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
1787                   "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
1788
1789}
1790
1791void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
1792{
1793        struct kvm_lapic *apic;
1794        int i;
1795
1796        apic_debug("%s\n", __func__);
1797
1798        ASSERT(vcpu);
1799        apic = vcpu->arch.apic;
1800        ASSERT(apic != NULL);
1801
1802        /* Stop the timer in case it's a reset to an active apic */
1803        hrtimer_cancel(&apic->lapic_timer.timer);
1804
1805        if (!init_event) {
1806                kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
1807                                         MSR_IA32_APICBASE_ENABLE);
1808                kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
1809        }
1810        kvm_apic_set_version(apic->vcpu);
1811
1812        for (i = 0; i < KVM_APIC_LVT_NUM; i++)
1813                kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
1814        apic_update_lvtt(apic);
1815        if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
1816                kvm_lapic_set_reg(apic, APIC_LVT0,
1817                             SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
1818        apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
1819
1820        kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
1821        apic_set_spiv(apic, 0xff);
1822        kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
1823        if (!apic_x2apic_mode(apic))
1824                kvm_apic_set_ldr(apic, 0);
1825        kvm_lapic_set_reg(apic, APIC_ESR, 0);
1826        kvm_lapic_set_reg(apic, APIC_ICR, 0);
1827        kvm_lapic_set_reg(apic, APIC_ICR2, 0);
1828        kvm_lapic_set_reg(apic, APIC_TDCR, 0);
1829        kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1830        for (i = 0; i < 8; i++) {
1831                kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
1832                kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
1833                kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
1834        }
1835        apic->irr_pending = vcpu->arch.apicv_active;
1836        apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
1837        apic->highest_isr_cache = -1;
1838        update_divide_count(apic);
1839        atomic_set(&apic->lapic_timer.pending, 0);
1840        if (kvm_vcpu_is_bsp(vcpu))
1841                kvm_lapic_set_base(vcpu,
1842                                vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
1843        vcpu->arch.pv_eoi.msr_val = 0;
1844        apic_update_ppr(apic);
1845
1846        vcpu->arch.apic_arb_prio = 0;
1847        vcpu->arch.apic_attention = 0;
1848
1849        apic_debug("%s: vcpu=%p, id=%d, base_msr="
1850                   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
1851                   vcpu, kvm_apic_id(apic),
1852                   vcpu->arch.apic_base, apic->base_address);
1853}
1854
1855/*
1856 *----------------------------------------------------------------------
1857 * timer interface
1858 *----------------------------------------------------------------------
1859 */
1860
1861static bool lapic_is_periodic(struct kvm_lapic *apic)
1862{
1863        return apic_lvtt_period(apic);
1864}
1865
1866int apic_has_pending_timer(struct kvm_vcpu *vcpu)
1867{
1868        struct kvm_lapic *apic = vcpu->arch.apic;
1869
1870        if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
1871                return atomic_read(&apic->lapic_timer.pending);
1872
1873        return 0;
1874}
1875
1876int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
1877{
1878        u32 reg = kvm_lapic_get_reg(apic, lvt_type);
1879        int vector, mode, trig_mode;
1880
1881        if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
1882                vector = reg & APIC_VECTOR_MASK;
1883                mode = reg & APIC_MODE_MASK;
1884                trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
1885                return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
1886                                        NULL);
1887        }
1888        return 0;
1889}
1890
1891void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
1892{
1893        struct kvm_lapic *apic = vcpu->arch.apic;
1894
1895        if (apic)
1896                kvm_apic_local_deliver(apic, APIC_LVT0);
1897}
1898
1899static const struct kvm_io_device_ops apic_mmio_ops = {
1900        .read     = apic_mmio_read,
1901        .write    = apic_mmio_write,
1902};
1903
1904static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
1905{
1906        struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
1907        struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
1908
1909        apic_timer_expired(apic);
1910
1911        if (lapic_is_periodic(apic)) {
1912                hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
1913                return HRTIMER_RESTART;
1914        } else
1915                return HRTIMER_NORESTART;
1916}
1917
1918int kvm_create_lapic(struct kvm_vcpu *vcpu)
1919{
1920        struct kvm_lapic *apic;
1921
1922        ASSERT(vcpu != NULL);
1923        apic_debug("apic_init %d\n", vcpu->vcpu_id);
1924
1925        apic = kzalloc(sizeof(*apic), GFP_KERNEL);
1926        if (!apic)
1927                goto nomem;
1928
1929        vcpu->arch.apic = apic;
1930
1931        apic->regs = (void *)get_zeroed_page(GFP_KERNEL);
1932        if (!apic->regs) {
1933                printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
1934                       vcpu->vcpu_id);
1935                goto nomem_free_apic;
1936        }
1937        apic->vcpu = vcpu;
1938
1939        hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
1940                     HRTIMER_MODE_ABS_PINNED);
1941        apic->lapic_timer.timer.function = apic_timer_fn;
1942
1943        /*
1944         * APIC is created enabled. This will prevent kvm_lapic_set_base from
1945         * thinking that APIC satet has changed.
1946         */
1947        vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
1948        static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
1949        kvm_lapic_reset(vcpu, false);
1950        kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
1951
1952        return 0;
1953nomem_free_apic:
1954        kfree(apic);
1955nomem:
1956        return -ENOMEM;
1957}
1958
1959int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
1960{
1961        struct kvm_lapic *apic = vcpu->arch.apic;
1962        int highest_irr;
1963
1964        if (!apic_enabled(apic))
1965                return -1;
1966
1967        apic_update_ppr(apic);
1968        highest_irr = apic_find_highest_irr(apic);
1969        if ((highest_irr == -1) ||
1970            ((highest_irr & 0xF0) <= kvm_lapic_get_reg(apic, APIC_PROCPRI)))
1971                return -1;
1972        return highest_irr;
1973}
1974
1975int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
1976{
1977        u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
1978        int r = 0;
1979
1980        if (!kvm_apic_hw_enabled(vcpu->arch.apic))
1981                r = 1;
1982        if ((lvt0 & APIC_LVT_MASKED) == 0 &&
1983            GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
1984                r = 1;
1985        return r;
1986}
1987
1988void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1989{
1990        struct kvm_lapic *apic = vcpu->arch.apic;
1991
1992        if (atomic_read(&apic->lapic_timer.pending) > 0) {
1993                kvm_apic_local_deliver(apic, APIC_LVTT);
1994                if (apic_lvtt_tscdeadline(apic))
1995                        apic->lapic_timer.tscdeadline = 0;
1996                atomic_set(&apic->lapic_timer.pending, 0);
1997        }
1998}
1999
2000int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2001{
2002        int vector = kvm_apic_has_interrupt(vcpu);
2003        struct kvm_lapic *apic = vcpu->arch.apic;
2004
2005        if (vector == -1)
2006                return -1;
2007
2008        /*
2009         * We get here even with APIC virtualization enabled, if doing
2010         * nested virtualization and L1 runs with the "acknowledge interrupt
2011         * on exit" mode.  Then we cannot inject the interrupt via RVI,
2012         * because the process would deliver it through the IDT.
2013         */
2014
2015        apic_set_isr(vector, apic);
2016        apic_update_ppr(apic);
2017        apic_clear_irr(vector, apic);
2018
2019        if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
2020                apic_clear_isr(vector, apic);
2021                apic_update_ppr(apic);
2022        }
2023
2024        return vector;
2025}
2026
2027static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2028                struct kvm_lapic_state *s, bool set)
2029{
2030        if (apic_x2apic_mode(vcpu->arch.apic)) {
2031                u32 *id = (u32 *)(s->regs + APIC_ID);
2032
2033                if (vcpu->kvm->arch.x2apic_format) {
2034                        if (*id != vcpu->vcpu_id)
2035                                return -EINVAL;
2036                } else {
2037                        if (set)
2038                                *id >>= 24;
2039                        else
2040                                *id <<= 24;
2041                }
2042        }
2043
2044        return 0;
2045}
2046
2047int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2048{
2049        memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2050        return kvm_apic_state_fixup(vcpu, s, false);
2051}
2052
2053int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2054{
2055        struct kvm_lapic *apic = vcpu->arch.apic;
2056        int r;
2057
2058
2059        kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2060        /* set SPIV separately to get count of SW disabled APICs right */
2061        apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2062
2063        r = kvm_apic_state_fixup(vcpu, s, true);
2064        if (r)
2065                return r;
2066        memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
2067
2068        recalculate_apic_map(vcpu->kvm);
2069        kvm_apic_set_version(vcpu);
2070
2071        apic_update_ppr(apic);
2072        hrtimer_cancel(&apic->lapic_timer.timer);
2073        apic_update_lvtt(apic);
2074        apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2075        update_divide_count(apic);
2076        start_apic_timer(apic);
2077        apic->irr_pending = true;
2078        apic->isr_count = vcpu->arch.apicv_active ?
2079                                1 : count_vectors(apic->regs + APIC_ISR);
2080        apic->highest_isr_cache = -1;
2081        if (vcpu->arch.apicv_active) {
2082                if (kvm_x86_ops->apicv_post_state_restore)
2083                        kvm_x86_ops->apicv_post_state_restore(vcpu);
2084                kvm_x86_ops->hwapic_irr_update(vcpu,
2085                                apic_find_highest_irr(apic));
2086                kvm_x86_ops->hwapic_isr_update(vcpu,
2087                                apic_find_highest_isr(apic));
2088        }
2089        kvm_make_request(KVM_REQ_EVENT, vcpu);
2090        if (ioapic_in_kernel(vcpu->kvm))
2091                kvm_rtc_eoi_tracking_restore_one(vcpu);
2092
2093        vcpu->arch.apic_arb_prio = 0;
2094
2095        return 0;
2096}
2097
2098void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2099{
2100        struct hrtimer *timer;
2101
2102        if (!lapic_in_kernel(vcpu))
2103                return;
2104
2105        timer = &vcpu->arch.apic->lapic_timer.timer;
2106        if (hrtimer_cancel(timer))
2107                hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
2108}
2109
2110/*
2111 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2112 *
2113 * Detect whether guest triggered PV EOI since the
2114 * last entry. If yes, set EOI on guests's behalf.
2115 * Clear PV EOI in guest memory in any case.
2116 */
2117static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2118                                        struct kvm_lapic *apic)
2119{
2120        bool pending;
2121        int vector;
2122        /*
2123         * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2124         * and KVM_PV_EOI_ENABLED in guest memory as follows:
2125         *
2126         * KVM_APIC_PV_EOI_PENDING is unset:
2127         *      -> host disabled PV EOI.
2128         * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2129         *      -> host enabled PV EOI, guest did not execute EOI yet.
2130         * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2131         *      -> host enabled PV EOI, guest executed EOI.
2132         */
2133        BUG_ON(!pv_eoi_enabled(vcpu));
2134        pending = pv_eoi_get_pending(vcpu);
2135        /*
2136         * Clear pending bit in any case: it will be set again on vmentry.
2137         * While this might not be ideal from performance point of view,
2138         * this makes sure pv eoi is only enabled when we know it's safe.
2139         */
2140        pv_eoi_clr_pending(vcpu);
2141        if (pending)
2142                return;
2143        vector = apic_set_eoi(apic);
2144        trace_kvm_pv_eoi(apic, vector);
2145}
2146
2147void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2148{
2149        u32 data;
2150
2151        if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2152                apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2153
2154        if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2155                return;
2156
2157        if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2158                                  sizeof(u32)))
2159                return;
2160
2161        apic_set_tpr(vcpu->arch.apic, data & 0xff);
2162}
2163
2164/*
2165 * apic_sync_pv_eoi_to_guest - called before vmentry
2166 *
2167 * Detect whether it's safe to enable PV EOI and
2168 * if yes do so.
2169 */
2170static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2171                                        struct kvm_lapic *apic)
2172{
2173        if (!pv_eoi_enabled(vcpu) ||
2174            /* IRR set or many bits in ISR: could be nested. */
2175            apic->irr_pending ||
2176            /* Cache not set: could be safe but we don't bother. */
2177            apic->highest_isr_cache == -1 ||
2178            /* Need EOI to update ioapic. */
2179            kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2180                /*
2181                 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2182                 * so we need not do anything here.
2183                 */
2184                return;
2185        }
2186
2187        pv_eoi_set_pending(apic->vcpu);
2188}
2189
2190void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2191{
2192        u32 data, tpr;
2193        int max_irr, max_isr;
2194        struct kvm_lapic *apic = vcpu->arch.apic;
2195
2196        apic_sync_pv_eoi_to_guest(vcpu, apic);
2197
2198        if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2199                return;
2200
2201        tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2202        max_irr = apic_find_highest_irr(apic);
2203        if (max_irr < 0)
2204                max_irr = 0;
2205        max_isr = apic_find_highest_isr(apic);
2206        if (max_isr < 0)
2207                max_isr = 0;
2208        data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2209
2210        kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2211                                sizeof(u32));
2212}
2213
2214int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2215{
2216        if (vapic_addr) {
2217                if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2218                                        &vcpu->arch.apic->vapic_cache,
2219                                        vapic_addr, sizeof(u32)))
2220                        return -EINVAL;
2221                __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2222        } else {
2223                __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2224        }
2225
2226        vcpu->arch.apic->vapic_addr = vapic_addr;
2227        return 0;
2228}
2229
2230int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2231{
2232        struct kvm_lapic *apic = vcpu->arch.apic;
2233        u32 reg = (msr - APIC_BASE_MSR) << 4;
2234
2235        if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2236                return 1;
2237
2238        if (reg == APIC_ICR2)
2239                return 1;
2240
2241        /* if this is ICR write vector before command */
2242        if (reg == APIC_ICR)
2243                kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2244        return kvm_lapic_reg_write(apic, reg, (u32)data);
2245}
2246
2247int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2248{
2249        struct kvm_lapic *apic = vcpu->arch.apic;
2250        u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
2251
2252        if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2253                return 1;
2254
2255        if (reg == APIC_DFR || reg == APIC_ICR2) {
2256                apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
2257                           reg);
2258                return 1;
2259        }
2260
2261        if (kvm_lapic_reg_read(apic, reg, 4, &low))
2262                return 1;
2263        if (reg == APIC_ICR)
2264                kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2265
2266        *data = (((u64)high) << 32) | low;
2267
2268        return 0;
2269}
2270
2271int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2272{
2273        struct kvm_lapic *apic = vcpu->arch.apic;
2274
2275        if (!lapic_in_kernel(vcpu))
2276                return 1;
2277
2278        /* if this is ICR write vector before command */
2279        if (reg == APIC_ICR)
2280                kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2281        return kvm_lapic_reg_write(apic, reg, (u32)data);
2282}
2283
2284int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2285{
2286        struct kvm_lapic *apic = vcpu->arch.apic;
2287        u32 low, high = 0;
2288
2289        if (!lapic_in_kernel(vcpu))
2290                return 1;
2291
2292        if (kvm_lapic_reg_read(apic, reg, 4, &low))
2293                return 1;
2294        if (reg == APIC_ICR)
2295                kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2296
2297        *data = (((u64)high) << 32) | low;
2298
2299        return 0;
2300}
2301
2302int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
2303{
2304        u64 addr = data & ~KVM_MSR_ENABLED;
2305        if (!IS_ALIGNED(addr, 4))
2306                return 1;
2307
2308        vcpu->arch.pv_eoi.msr_val = data;
2309        if (!pv_eoi_enabled(vcpu))
2310                return 0;
2311        return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
2312                                         addr, sizeof(u8));
2313}
2314
2315void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2316{
2317        struct kvm_lapic *apic = vcpu->arch.apic;
2318        u8 sipi_vector;
2319        unsigned long pe;
2320
2321        if (!lapic_in_kernel(vcpu) || !apic->pending_events)
2322                return;
2323
2324        /*
2325         * INITs are latched while in SMM.  Because an SMM CPU cannot
2326         * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
2327         * and delay processing of INIT until the next RSM.
2328         */
2329        if (is_smm(vcpu)) {
2330                WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
2331                if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
2332                        clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2333                return;
2334        }
2335
2336        pe = xchg(&apic->pending_events, 0);
2337        if (test_bit(KVM_APIC_INIT, &pe)) {
2338                kvm_lapic_reset(vcpu, true);
2339                kvm_vcpu_reset(vcpu, true);
2340                if (kvm_vcpu_is_bsp(apic->vcpu))
2341                        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2342                else
2343                        vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
2344        }
2345        if (test_bit(KVM_APIC_SIPI, &pe) &&
2346            vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
2347                /* evaluate pending_events before reading the vector */
2348                smp_rmb();
2349                sipi_vector = apic->sipi_vector;
2350                apic_debug("vcpu %d received sipi with vector # %x\n",
2351                         vcpu->vcpu_id, sipi_vector);
2352                kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
2353                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2354        }
2355}
2356
2357void kvm_lapic_init(void)
2358{
2359        /* do not patch jump label more than once per second */
2360        jump_label_rate_limit(&apic_hw_disabled, HZ);
2361        jump_label_rate_limit(&apic_sw_disabled, HZ);
2362}
2363