linux/arch/x86/kvm/lapic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2
   3/*
   4 * Local APIC virtualization
   5 *
   6 * Copyright (C) 2006 Qumranet, Inc.
   7 * Copyright (C) 2007 Novell
   8 * Copyright (C) 2007 Intel
   9 * Copyright 2009 Red Hat, Inc. and/or its affiliates.
  10 *
  11 * Authors:
  12 *   Dor Laor <dor.laor@qumranet.com>
  13 *   Gregory Haskins <ghaskins@novell.com>
  14 *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
  15 *
  16 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
  17 */
  18
  19#include <linux/kvm_host.h>
  20#include <linux/kvm.h>
  21#include <linux/mm.h>
  22#include <linux/highmem.h>
  23#include <linux/smp.h>
  24#include <linux/hrtimer.h>
  25#include <linux/io.h>
  26#include <linux/export.h>
  27#include <linux/math64.h>
  28#include <linux/slab.h>
  29#include <asm/processor.h>
  30#include <asm/msr.h>
  31#include <asm/page.h>
  32#include <asm/current.h>
  33#include <asm/apicdef.h>
  34#include <asm/delay.h>
  35#include <linux/atomic.h>
  36#include <linux/jump_label.h>
  37#include "kvm_cache_regs.h"
  38#include "irq.h"
  39#include "trace.h"
  40#include "x86.h"
  41#include "cpuid.h"
  42#include "hyperv.h"
  43
  44#ifndef CONFIG_X86_64
  45#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
  46#else
  47#define mod_64(x, y) ((x) % (y))
  48#endif
  49
  50#define PRId64 "d"
  51#define PRIx64 "llx"
  52#define PRIu64 "u"
  53#define PRIo64 "o"
  54
  55/* 14 is the version for Xeon and Pentium 8.4.8*/
  56#define APIC_VERSION                    (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
  57#define LAPIC_MMIO_LENGTH               (1 << 12)
  58/* followed define is not in apicdef.h */
  59#define APIC_SHORT_MASK                 0xc0000
  60#define APIC_DEST_NOSHORT               0x0
  61#define APIC_DEST_MASK                  0x800
  62#define MAX_APIC_VECTOR                 256
  63#define APIC_VECTORS_PER_REG            32
  64
  65#define APIC_BROADCAST                  0xFF
  66#define X2APIC_BROADCAST                0xFFFFFFFFul
  67
  68static bool lapic_timer_advance_dynamic __read_mostly;
  69#define LAPIC_TIMER_ADVANCE_ADJUST_MIN  100     /* clock cycles */
  70#define LAPIC_TIMER_ADVANCE_ADJUST_MAX  10000   /* clock cycles */
  71#define LAPIC_TIMER_ADVANCE_NS_INIT     1000
  72#define LAPIC_TIMER_ADVANCE_NS_MAX     5000
  73/* step-by-step approximation to mitigate fluctuation */
  74#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
  75
  76static inline int apic_test_vector(int vec, void *bitmap)
  77{
  78        return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  79}
  80
  81bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
  82{
  83        struct kvm_lapic *apic = vcpu->arch.apic;
  84
  85        return apic_test_vector(vector, apic->regs + APIC_ISR) ||
  86                apic_test_vector(vector, apic->regs + APIC_IRR);
  87}
  88
  89static inline int __apic_test_and_set_vector(int vec, void *bitmap)
  90{
  91        return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  92}
  93
  94static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
  95{
  96        return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  97}
  98
  99struct static_key_deferred apic_hw_disabled __read_mostly;
 100struct static_key_deferred apic_sw_disabled __read_mostly;
 101
 102static inline int apic_enabled(struct kvm_lapic *apic)
 103{
 104        return kvm_apic_sw_enabled(apic) &&     kvm_apic_hw_enabled(apic);
 105}
 106
 107#define LVT_MASK        \
 108        (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
 109
 110#define LINT_MASK       \
 111        (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 112         APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 113
 114static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
 115{
 116        return apic->vcpu->vcpu_id;
 117}
 118
 119bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
 120{
 121        return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
 122}
 123EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);
 124
 125static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
 126{
 127        return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
 128}
 129
 130static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
 131                u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
 132        switch (map->mode) {
 133        case KVM_APIC_MODE_X2APIC: {
 134                u32 offset = (dest_id >> 16) * 16;
 135                u32 max_apic_id = map->max_apic_id;
 136
 137                if (offset <= max_apic_id) {
 138                        u8 cluster_size = min(max_apic_id - offset + 1, 16U);
 139
 140                        offset = array_index_nospec(offset, map->max_apic_id + 1);
 141                        *cluster = &map->phys_map[offset];
 142                        *mask = dest_id & (0xffff >> (16 - cluster_size));
 143                } else {
 144                        *mask = 0;
 145                }
 146
 147                return true;
 148                }
 149        case KVM_APIC_MODE_XAPIC_FLAT:
 150                *cluster = map->xapic_flat_map;
 151                *mask = dest_id & 0xff;
 152                return true;
 153        case KVM_APIC_MODE_XAPIC_CLUSTER:
 154                *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
 155                *mask = dest_id & 0xf;
 156                return true;
 157        default:
 158                /* Not optimized. */
 159                return false;
 160        }
 161}
 162
 163static void kvm_apic_map_free(struct rcu_head *rcu)
 164{
 165        struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
 166
 167        kvfree(map);
 168}
 169
 170static void recalculate_apic_map(struct kvm *kvm)
 171{
 172        struct kvm_apic_map *new, *old = NULL;
 173        struct kvm_vcpu *vcpu;
 174        int i;
 175        u32 max_id = 255; /* enough space for any xAPIC ID */
 176
 177        mutex_lock(&kvm->arch.apic_map_lock);
 178
 179        kvm_for_each_vcpu(i, vcpu, kvm)
 180                if (kvm_apic_present(vcpu))
 181                        max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
 182
 183        new = kvzalloc(sizeof(struct kvm_apic_map) +
 184                           sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
 185                           GFP_KERNEL_ACCOUNT);
 186
 187        if (!new)
 188                goto out;
 189
 190        new->max_apic_id = max_id;
 191
 192        kvm_for_each_vcpu(i, vcpu, kvm) {
 193                struct kvm_lapic *apic = vcpu->arch.apic;
 194                struct kvm_lapic **cluster;
 195                u16 mask;
 196                u32 ldr;
 197                u8 xapic_id;
 198                u32 x2apic_id;
 199
 200                if (!kvm_apic_present(vcpu))
 201                        continue;
 202
 203                xapic_id = kvm_xapic_id(apic);
 204                x2apic_id = kvm_x2apic_id(apic);
 205
 206                /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
 207                if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
 208                                x2apic_id <= new->max_apic_id)
 209                        new->phys_map[x2apic_id] = apic;
 210                /*
 211                 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
 212                 * prevent them from masking VCPUs with APIC ID <= 0xff.
 213                 */
 214                if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
 215                        new->phys_map[xapic_id] = apic;
 216
 217                if (!kvm_apic_sw_enabled(apic))
 218                        continue;
 219
 220                ldr = kvm_lapic_get_reg(apic, APIC_LDR);
 221
 222                if (apic_x2apic_mode(apic)) {
 223                        new->mode |= KVM_APIC_MODE_X2APIC;
 224                } else if (ldr) {
 225                        ldr = GET_APIC_LOGICAL_ID(ldr);
 226                        if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
 227                                new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
 228                        else
 229                                new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
 230                }
 231
 232                if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
 233                        continue;
 234
 235                if (mask)
 236                        cluster[ffs(mask) - 1] = apic;
 237        }
 238out:
 239        old = rcu_dereference_protected(kvm->arch.apic_map,
 240                        lockdep_is_held(&kvm->arch.apic_map_lock));
 241        rcu_assign_pointer(kvm->arch.apic_map, new);
 242        mutex_unlock(&kvm->arch.apic_map_lock);
 243
 244        if (old)
 245                call_rcu(&old->rcu, kvm_apic_map_free);
 246
 247        kvm_make_scan_ioapic_request(kvm);
 248}
 249
 250static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 251{
 252        bool enabled = val & APIC_SPIV_APIC_ENABLED;
 253
 254        kvm_lapic_set_reg(apic, APIC_SPIV, val);
 255
 256        if (enabled != apic->sw_enabled) {
 257                apic->sw_enabled = enabled;
 258                if (enabled)
 259                        static_key_slow_dec_deferred(&apic_sw_disabled);
 260                else
 261                        static_key_slow_inc(&apic_sw_disabled.key);
 262
 263                recalculate_apic_map(apic->vcpu->kvm);
 264        }
 265}
 266
 267static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
 268{
 269        kvm_lapic_set_reg(apic, APIC_ID, id << 24);
 270        recalculate_apic_map(apic->vcpu->kvm);
 271}
 272
 273static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
 274{
 275        kvm_lapic_set_reg(apic, APIC_LDR, id);
 276        recalculate_apic_map(apic->vcpu->kvm);
 277}
 278
 279static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
 280{
 281        return ((id >> 4) << 16) | (1 << (id & 0xf));
 282}
 283
 284static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
 285{
 286        u32 ldr = kvm_apic_calc_x2apic_ldr(id);
 287
 288        WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
 289
 290        kvm_lapic_set_reg(apic, APIC_ID, id);
 291        kvm_lapic_set_reg(apic, APIC_LDR, ldr);
 292        recalculate_apic_map(apic->vcpu->kvm);
 293}
 294
 295static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 296{
 297        return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
 298}
 299
 300static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 301{
 302        return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
 303}
 304
 305static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 306{
 307        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
 308}
 309
 310static inline int apic_lvtt_period(struct kvm_lapic *apic)
 311{
 312        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
 313}
 314
 315static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 316{
 317        return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 318}
 319
 320static inline int apic_lvt_nmi_mode(u32 lvt_val)
 321{
 322        return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
 323}
 324
 325void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 326{
 327        struct kvm_lapic *apic = vcpu->arch.apic;
 328        struct kvm_cpuid_entry2 *feat;
 329        u32 v = APIC_VERSION;
 330
 331        if (!lapic_in_kernel(vcpu))
 332                return;
 333
 334        /*
 335         * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
 336         * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
 337         * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
 338         * version first and level-triggered interrupts never get EOIed in
 339         * IOAPIC.
 340         */
 341        feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
 342        if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
 343            !ioapic_in_kernel(vcpu->kvm))
 344                v |= APIC_LVR_DIRECTED_EOI;
 345        kvm_lapic_set_reg(apic, APIC_LVR, v);
 346}
 347
 348static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
 349        LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 350        LVT_MASK | APIC_MODE_MASK,      /* LVTTHMR */
 351        LVT_MASK | APIC_MODE_MASK,      /* LVTPC */
 352        LINT_MASK, LINT_MASK,   /* LVT0-1 */
 353        LVT_MASK                /* LVTERR */
 354};
 355
 356static int find_highest_vector(void *bitmap)
 357{
 358        int vec;
 359        u32 *reg;
 360
 361        for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
 362             vec >= 0; vec -= APIC_VECTORS_PER_REG) {
 363                reg = bitmap + REG_POS(vec);
 364                if (*reg)
 365                        return __fls(*reg) + vec;
 366        }
 367
 368        return -1;
 369}
 370
 371static u8 count_vectors(void *bitmap)
 372{
 373        int vec;
 374        u32 *reg;
 375        u8 count = 0;
 376
 377        for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
 378                reg = bitmap + REG_POS(vec);
 379                count += hweight32(*reg);
 380        }
 381
 382        return count;
 383}
 384
 385bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
 386{
 387        u32 i, vec;
 388        u32 pir_val, irr_val, prev_irr_val;
 389        int max_updated_irr;
 390
 391        max_updated_irr = -1;
 392        *max_irr = -1;
 393
 394        for (i = vec = 0; i <= 7; i++, vec += 32) {
 395                pir_val = READ_ONCE(pir[i]);
 396                irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
 397                if (pir_val) {
 398                        prev_irr_val = irr_val;
 399                        irr_val |= xchg(&pir[i], 0);
 400                        *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
 401                        if (prev_irr_val != irr_val) {
 402                                max_updated_irr =
 403                                        __fls(irr_val ^ prev_irr_val) + vec;
 404                        }
 405                }
 406                if (irr_val)
 407                        *max_irr = __fls(irr_val) + vec;
 408        }
 409
 410        return ((max_updated_irr != -1) &&
 411                (max_updated_irr == *max_irr));
 412}
 413EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
 414
 415bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
 416{
 417        struct kvm_lapic *apic = vcpu->arch.apic;
 418
 419        return __kvm_apic_update_irr(pir, apic->regs, max_irr);
 420}
 421EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 422
 423static inline int apic_search_irr(struct kvm_lapic *apic)
 424{
 425        return find_highest_vector(apic->regs + APIC_IRR);
 426}
 427
 428static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 429{
 430        int result;
 431
 432        /*
 433         * Note that irr_pending is just a hint. It will be always
 434         * true with virtual interrupt delivery enabled.
 435         */
 436        if (!apic->irr_pending)
 437                return -1;
 438
 439        result = apic_search_irr(apic);
 440        ASSERT(result == -1 || result >= 16);
 441
 442        return result;
 443}
 444
 445static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 446{
 447        struct kvm_vcpu *vcpu;
 448
 449        vcpu = apic->vcpu;
 450
 451        if (unlikely(vcpu->arch.apicv_active)) {
 452                /* need to update RVI */
 453                kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
 454                kvm_x86_ops->hwapic_irr_update(vcpu,
 455                                apic_find_highest_irr(apic));
 456        } else {
 457                apic->irr_pending = false;
 458                kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
 459                if (apic_search_irr(apic) != -1)
 460                        apic->irr_pending = true;
 461        }
 462}
 463
 464static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 465{
 466        struct kvm_vcpu *vcpu;
 467
 468        if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
 469                return;
 470
 471        vcpu = apic->vcpu;
 472
 473        /*
 474         * With APIC virtualization enabled, all caching is disabled
 475         * because the processor can modify ISR under the hood.  Instead
 476         * just set SVI.
 477         */
 478        if (unlikely(vcpu->arch.apicv_active))
 479                kvm_x86_ops->hwapic_isr_update(vcpu, vec);
 480        else {
 481                ++apic->isr_count;
 482                BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
 483                /*
 484                 * ISR (in service register) bit is set when injecting an interrupt.
 485                 * The highest vector is injected. Thus the latest bit set matches
 486                 * the highest bit in ISR.
 487                 */
 488                apic->highest_isr_cache = vec;
 489        }
 490}
 491
 492static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 493{
 494        int result;
 495
 496        /*
 497         * Note that isr_count is always 1, and highest_isr_cache
 498         * is always -1, with APIC virtualization enabled.
 499         */
 500        if (!apic->isr_count)
 501                return -1;
 502        if (likely(apic->highest_isr_cache != -1))
 503                return apic->highest_isr_cache;
 504
 505        result = find_highest_vector(apic->regs + APIC_ISR);
 506        ASSERT(result == -1 || result >= 16);
 507
 508        return result;
 509}
 510
 511static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 512{
 513        struct kvm_vcpu *vcpu;
 514        if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
 515                return;
 516
 517        vcpu = apic->vcpu;
 518
 519        /*
 520         * We do get here for APIC virtualization enabled if the guest
 521         * uses the Hyper-V APIC enlightenment.  In this case we may need
 522         * to trigger a new interrupt delivery by writing the SVI field;
 523         * on the other hand isr_count and highest_isr_cache are unused
 524         * and must be left alone.
 525         */
 526        if (unlikely(vcpu->arch.apicv_active))
 527                kvm_x86_ops->hwapic_isr_update(vcpu,
 528                                               apic_find_highest_isr(apic));
 529        else {
 530                --apic->isr_count;
 531                BUG_ON(apic->isr_count < 0);
 532                apic->highest_isr_cache = -1;
 533        }
 534}
 535
 536int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 537{
 538        /* This may race with setting of irr in __apic_accept_irq() and
 539         * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
 540         * will cause vmexit immediately and the value will be recalculated
 541         * on the next vmentry.
 542         */
 543        return apic_find_highest_irr(vcpu->arch.apic);
 544}
 545EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
 546
 547static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 548                             int vector, int level, int trig_mode,
 549                             struct dest_map *dest_map);
 550
 551int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 552                     struct dest_map *dest_map)
 553{
 554        struct kvm_lapic *apic = vcpu->arch.apic;
 555
 556        return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
 557                        irq->level, irq->trig_mode, dest_map);
 558}
 559
 560static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
 561                         struct kvm_lapic_irq *irq, u32 min)
 562{
 563        int i, count = 0;
 564        struct kvm_vcpu *vcpu;
 565
 566        if (min > map->max_apic_id)
 567                return 0;
 568
 569        for_each_set_bit(i, ipi_bitmap,
 570                min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
 571                if (map->phys_map[min + i]) {
 572                        vcpu = map->phys_map[min + i]->vcpu;
 573                        count += kvm_apic_set_irq(vcpu, irq, NULL);
 574                }
 575        }
 576
 577        return count;
 578}
 579
 580int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 581                    unsigned long ipi_bitmap_high, u32 min,
 582                    unsigned long icr, int op_64_bit)
 583{
 584        struct kvm_apic_map *map;
 585        struct kvm_lapic_irq irq = {0};
 586        int cluster_size = op_64_bit ? 64 : 32;
 587        int count;
 588
 589        if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
 590                return -KVM_EINVAL;
 591
 592        irq.vector = icr & APIC_VECTOR_MASK;
 593        irq.delivery_mode = icr & APIC_MODE_MASK;
 594        irq.level = (icr & APIC_INT_ASSERT) != 0;
 595        irq.trig_mode = icr & APIC_INT_LEVELTRIG;
 596
 597        rcu_read_lock();
 598        map = rcu_dereference(kvm->arch.apic_map);
 599
 600        count = -EOPNOTSUPP;
 601        if (likely(map)) {
 602                count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
 603                min += cluster_size;
 604                count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
 605        }
 606
 607        rcu_read_unlock();
 608        return count;
 609}
 610
 611static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
 612{
 613
 614        return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
 615                                      sizeof(val));
 616}
 617
 618static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
 619{
 620
 621        return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
 622                                      sizeof(*val));
 623}
 624
 625static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
 626{
 627        return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
 628}
 629
 630static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
 631{
 632        u8 val;
 633        if (pv_eoi_get_user(vcpu, &val) < 0)
 634                printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
 635                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 636        return val & 0x1;
 637}
 638
 639static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
 640{
 641        if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
 642                printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
 643                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 644                return;
 645        }
 646        __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 647}
 648
 649static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 650{
 651        if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
 652                printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
 653                           (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 654                return;
 655        }
 656        __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 657}
 658
 659static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
 660{
 661        int highest_irr;
 662        if (apic->vcpu->arch.apicv_active)
 663                highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
 664        else
 665                highest_irr = apic_find_highest_irr(apic);
 666        if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
 667                return -1;
 668        return highest_irr;
 669}
 670
 671static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
 672{
 673        u32 tpr, isrv, ppr, old_ppr;
 674        int isr;
 675
 676        old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
 677        tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
 678        isr = apic_find_highest_isr(apic);
 679        isrv = (isr != -1) ? isr : 0;
 680
 681        if ((tpr & 0xf0) >= (isrv & 0xf0))
 682                ppr = tpr & 0xff;
 683        else
 684                ppr = isrv & 0xf0;
 685
 686        *new_ppr = ppr;
 687        if (old_ppr != ppr)
 688                kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
 689
 690        return ppr < old_ppr;
 691}
 692
 693static void apic_update_ppr(struct kvm_lapic *apic)
 694{
 695        u32 ppr;
 696
 697        if (__apic_update_ppr(apic, &ppr) &&
 698            apic_has_interrupt_for_ppr(apic, ppr) != -1)
 699                kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 700}
 701
 702void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
 703{
 704        apic_update_ppr(vcpu->arch.apic);
 705}
 706EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
 707
 708static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 709{
 710        kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
 711        apic_update_ppr(apic);
 712}
 713
 714static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
 715{
 716        return mda == (apic_x2apic_mode(apic) ?
 717                        X2APIC_BROADCAST : APIC_BROADCAST);
 718}
 719
 720static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
 721{
 722        if (kvm_apic_broadcast(apic, mda))
 723                return true;
 724
 725        if (apic_x2apic_mode(apic))
 726                return mda == kvm_x2apic_id(apic);
 727
 728        /*
 729         * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
 730         * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
 731         * this allows unique addressing of VCPUs with APIC ID over 0xff.
 732         * The 0xff condition is needed because writeable xAPIC ID.
 733         */
 734        if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
 735                return true;
 736
 737        return mda == kvm_xapic_id(apic);
 738}
 739
 740static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 741{
 742        u32 logical_id;
 743
 744        if (kvm_apic_broadcast(apic, mda))
 745                return true;
 746
 747        logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
 748
 749        if (apic_x2apic_mode(apic))
 750                return ((logical_id >> 16) == (mda >> 16))
 751                       && (logical_id & mda & 0xffff) != 0;
 752
 753        logical_id = GET_APIC_LOGICAL_ID(logical_id);
 754
 755        switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
 756        case APIC_DFR_FLAT:
 757                return (logical_id & mda) != 0;
 758        case APIC_DFR_CLUSTER:
 759                return ((logical_id >> 4) == (mda >> 4))
 760                       && (logical_id & mda & 0xf) != 0;
 761        default:
 762                return false;
 763        }
 764}
 765
 766/* The KVM local APIC implementation has two quirks:
 767 *
 768 *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
 769 *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
 770 *    KVM doesn't do that aliasing.
 771 *
 772 *  - in-kernel IOAPIC messages have to be delivered directly to
 773 *    x2APIC, because the kernel does not support interrupt remapping.
 774 *    In order to support broadcast without interrupt remapping, x2APIC
 775 *    rewrites the destination of non-IPI messages from APIC_BROADCAST
 776 *    to X2APIC_BROADCAST.
 777 *
 778 * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
 779 * important when userspace wants to use x2APIC-format MSIs, because
 780 * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
 781 */
 782static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
 783                struct kvm_lapic *source, struct kvm_lapic *target)
 784{
 785        bool ipi = source != NULL;
 786
 787        if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
 788            !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
 789                return X2APIC_BROADCAST;
 790
 791        return dest_id;
 792}
 793
 794bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 795                           int short_hand, unsigned int dest, int dest_mode)
 796{
 797        struct kvm_lapic *target = vcpu->arch.apic;
 798        u32 mda = kvm_apic_mda(vcpu, dest, source, target);
 799
 800        ASSERT(target);
 801        switch (short_hand) {
 802        case APIC_DEST_NOSHORT:
 803                if (dest_mode == APIC_DEST_PHYSICAL)
 804                        return kvm_apic_match_physical_addr(target, mda);
 805                else
 806                        return kvm_apic_match_logical_addr(target, mda);
 807        case APIC_DEST_SELF:
 808                return target == source;
 809        case APIC_DEST_ALLINC:
 810                return true;
 811        case APIC_DEST_ALLBUT:
 812                return target != source;
 813        default:
 814                return false;
 815        }
 816}
 817EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
 818
 819int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
 820                       const unsigned long *bitmap, u32 bitmap_size)
 821{
 822        u32 mod;
 823        int i, idx = -1;
 824
 825        mod = vector % dest_vcpus;
 826
 827        for (i = 0; i <= mod; i++) {
 828                idx = find_next_bit(bitmap, bitmap_size, idx + 1);
 829                BUG_ON(idx == bitmap_size);
 830        }
 831
 832        return idx;
 833}
 834
 835static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
 836{
 837        if (!kvm->arch.disabled_lapic_found) {
 838                kvm->arch.disabled_lapic_found = true;
 839                printk(KERN_INFO
 840                       "Disabled LAPIC found during irq injection\n");
 841        }
 842}
 843
 844static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
 845                struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
 846{
 847        if (kvm->arch.x2apic_broadcast_quirk_disabled) {
 848                if ((irq->dest_id == APIC_BROADCAST &&
 849                                map->mode != KVM_APIC_MODE_X2APIC))
 850                        return true;
 851                if (irq->dest_id == X2APIC_BROADCAST)
 852                        return true;
 853        } else {
 854                bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
 855                if (irq->dest_id == (x2apic_ipi ?
 856                                     X2APIC_BROADCAST : APIC_BROADCAST))
 857                        return true;
 858        }
 859
 860        return false;
 861}
 862
 863/* Return true if the interrupt can be handled by using *bitmap as index mask
 864 * for valid destinations in *dst array.
 865 * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
 866 * Note: we may have zero kvm_lapic destinations when we return true, which
 867 * means that the interrupt should be dropped.  In this case, *bitmap would be
 868 * zero and *dst undefined.
 869 */
 870static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
 871                struct kvm_lapic **src, struct kvm_lapic_irq *irq,
 872                struct kvm_apic_map *map, struct kvm_lapic ***dst,
 873                unsigned long *bitmap)
 874{
 875        int i, lowest;
 876
 877        if (irq->shorthand == APIC_DEST_SELF && src) {
 878                *dst = src;
 879                *bitmap = 1;
 880                return true;
 881        } else if (irq->shorthand)
 882                return false;
 883
 884        if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
 885                return false;
 886
 887        if (irq->dest_mode == APIC_DEST_PHYSICAL) {
 888                if (irq->dest_id > map->max_apic_id) {
 889                        *bitmap = 0;
 890                } else {
 891                        u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
 892                        *dst = &map->phys_map[dest_id];
 893                        *bitmap = 1;
 894                }
 895                return true;
 896        }
 897
 898        *bitmap = 0;
 899        if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
 900                                (u16 *)bitmap))
 901                return false;
 902
 903        if (!kvm_lowest_prio_delivery(irq))
 904                return true;
 905
 906        if (!kvm_vector_hashing_enabled()) {
 907                lowest = -1;
 908                for_each_set_bit(i, bitmap, 16) {
 909                        if (!(*dst)[i])
 910                                continue;
 911                        if (lowest < 0)
 912                                lowest = i;
 913                        else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
 914                                                (*dst)[lowest]->vcpu) < 0)
 915                                lowest = i;
 916                }
 917        } else {
 918                if (!*bitmap)
 919                        return true;
 920
 921                lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
 922                                bitmap, 16);
 923
 924                if (!(*dst)[lowest]) {
 925                        kvm_apic_disabled_lapic_found(kvm);
 926                        *bitmap = 0;
 927                        return true;
 928                }
 929        }
 930
 931        *bitmap = (lowest >= 0) ? 1 << lowest : 0;
 932
 933        return true;
 934}
 935
 936bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 937                struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
 938{
 939        struct kvm_apic_map *map;
 940        unsigned long bitmap;
 941        struct kvm_lapic **dst = NULL;
 942        int i;
 943        bool ret;
 944
 945        *r = -1;
 946
 947        if (irq->shorthand == APIC_DEST_SELF) {
 948                *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
 949                return true;
 950        }
 951
 952        rcu_read_lock();
 953        map = rcu_dereference(kvm->arch.apic_map);
 954
 955        ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
 956        if (ret) {
 957                *r = 0;
 958                for_each_set_bit(i, &bitmap, 16) {
 959                        if (!dst[i])
 960                                continue;
 961                        *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
 962                }
 963        }
 964
 965        rcu_read_unlock();
 966        return ret;
 967}
 968
 969/*
 970 * This routine tries to handler interrupts in posted mode, here is how
 971 * it deals with different cases:
 972 * - For single-destination interrupts, handle it in posted mode
 973 * - Else if vector hashing is enabled and it is a lowest-priority
 974 *   interrupt, handle it in posted mode and use the following mechanism
 975 *   to find the destinaiton vCPU.
 976 *      1. For lowest-priority interrupts, store all the possible
 977 *         destination vCPUs in an array.
 978 *      2. Use "guest vector % max number of destination vCPUs" to find
 979 *         the right destination vCPU in the array for the lowest-priority
 980 *         interrupt.
 981 * - Otherwise, use remapped mode to inject the interrupt.
 982 */
 983bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 984                        struct kvm_vcpu **dest_vcpu)
 985{
 986        struct kvm_apic_map *map;
 987        unsigned long bitmap;
 988        struct kvm_lapic **dst = NULL;
 989        bool ret = false;
 990
 991        if (irq->shorthand)
 992                return false;
 993
 994        rcu_read_lock();
 995        map = rcu_dereference(kvm->arch.apic_map);
 996
 997        if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
 998                        hweight16(bitmap) == 1) {
 999                unsigned long i = find_first_bit(&bitmap, 16);
1000
1001                if (dst[i]) {
1002                        *dest_vcpu = dst[i]->vcpu;
1003                        ret = true;
1004                }
1005        }
1006
1007        rcu_read_unlock();
1008        return ret;
1009}
1010
1011/*
1012 * Add a pending IRQ into lapic.
1013 * Return 1 if successfully added and 0 if discarded.
1014 */
1015static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1016                             int vector, int level, int trig_mode,
1017                             struct dest_map *dest_map)
1018{
1019        int result = 0;
1020        struct kvm_vcpu *vcpu = apic->vcpu;
1021
1022        trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1023                                  trig_mode, vector);
1024        switch (delivery_mode) {
1025        case APIC_DM_LOWEST:
1026                vcpu->arch.apic_arb_prio++;
1027                /* fall through */
1028        case APIC_DM_FIXED:
1029                if (unlikely(trig_mode && !level))
1030                        break;
1031
1032                /* FIXME add logic for vcpu on reset */
1033                if (unlikely(!apic_enabled(apic)))
1034                        break;
1035
1036                result = 1;
1037
1038                if (dest_map) {
1039                        __set_bit(vcpu->vcpu_id, dest_map->map);
1040                        dest_map->vectors[vcpu->vcpu_id] = vector;
1041                }
1042
1043                if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1044                        if (trig_mode)
1045                                kvm_lapic_set_vector(vector,
1046                                                     apic->regs + APIC_TMR);
1047                        else
1048                                kvm_lapic_clear_vector(vector,
1049                                                       apic->regs + APIC_TMR);
1050                }
1051
1052                if (vcpu->arch.apicv_active)
1053                        kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
1054                else {
1055                        kvm_lapic_set_irr(vector, apic);
1056
1057                        kvm_make_request(KVM_REQ_EVENT, vcpu);
1058                        kvm_vcpu_kick(vcpu);
1059                }
1060                break;
1061
1062        case APIC_DM_REMRD:
1063                result = 1;
1064                vcpu->arch.pv.pv_unhalted = 1;
1065                kvm_make_request(KVM_REQ_EVENT, vcpu);
1066                kvm_vcpu_kick(vcpu);
1067                break;
1068
1069        case APIC_DM_SMI:
1070                result = 1;
1071                kvm_make_request(KVM_REQ_SMI, vcpu);
1072                kvm_vcpu_kick(vcpu);
1073                break;
1074
1075        case APIC_DM_NMI:
1076                result = 1;
1077                kvm_inject_nmi(vcpu);
1078                kvm_vcpu_kick(vcpu);
1079                break;
1080
1081        case APIC_DM_INIT:
1082                if (!trig_mode || level) {
1083                        result = 1;
1084                        /* assumes that there are only KVM_APIC_INIT/SIPI */
1085                        apic->pending_events = (1UL << KVM_APIC_INIT);
1086                        /* make sure pending_events is visible before sending
1087                         * the request */
1088                        smp_wmb();
1089                        kvm_make_request(KVM_REQ_EVENT, vcpu);
1090                        kvm_vcpu_kick(vcpu);
1091                }
1092                break;
1093
1094        case APIC_DM_STARTUP:
1095                result = 1;
1096                apic->sipi_vector = vector;
1097                /* make sure sipi_vector is visible for the receiver */
1098                smp_wmb();
1099                set_bit(KVM_APIC_SIPI, &apic->pending_events);
1100                kvm_make_request(KVM_REQ_EVENT, vcpu);
1101                kvm_vcpu_kick(vcpu);
1102                break;
1103
1104        case APIC_DM_EXTINT:
1105                /*
1106                 * Should only be called by kvm_apic_local_deliver() with LVT0,
1107                 * before NMI watchdog was enabled. Already handled by
1108                 * kvm_apic_accept_pic_intr().
1109                 */
1110                break;
1111
1112        default:
1113                printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1114                       delivery_mode);
1115                break;
1116        }
1117        return result;
1118}
1119
1120/*
1121 * This routine identifies the destination vcpus mask meant to receive the
1122 * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1123 * out the destination vcpus array and set the bitmap or it traverses to
1124 * each available vcpu to identify the same.
1125 */
1126void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1127                              unsigned long *vcpu_bitmap)
1128{
1129        struct kvm_lapic **dest_vcpu = NULL;
1130        struct kvm_lapic *src = NULL;
1131        struct kvm_apic_map *map;
1132        struct kvm_vcpu *vcpu;
1133        unsigned long bitmap;
1134        int i, vcpu_idx;
1135        bool ret;
1136
1137        rcu_read_lock();
1138        map = rcu_dereference(kvm->arch.apic_map);
1139
1140        ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1141                                          &bitmap);
1142        if (ret) {
1143                for_each_set_bit(i, &bitmap, 16) {
1144                        if (!dest_vcpu[i])
1145                                continue;
1146                        vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1147                        __set_bit(vcpu_idx, vcpu_bitmap);
1148                }
1149        } else {
1150                kvm_for_each_vcpu(i, vcpu, kvm) {
1151                        if (!kvm_apic_present(vcpu))
1152                                continue;
1153                        if (!kvm_apic_match_dest(vcpu, NULL,
1154                                                 irq->delivery_mode,
1155                                                 irq->dest_id,
1156                                                 irq->dest_mode))
1157                                continue;
1158                        __set_bit(i, vcpu_bitmap);
1159                }
1160        }
1161        rcu_read_unlock();
1162}
1163
1164int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1165{
1166        return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1167}
1168
1169static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1170{
1171        return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1172}
1173
1174static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1175{
1176        int trigger_mode;
1177
1178        /* Eoi the ioapic only if the ioapic doesn't own the vector. */
1179        if (!kvm_ioapic_handles_vector(apic, vector))
1180                return;
1181
1182        /* Request a KVM exit to inform the userspace IOAPIC. */
1183        if (irqchip_split(apic->vcpu->kvm)) {
1184                apic->vcpu->arch.pending_ioapic_eoi = vector;
1185                kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1186                return;
1187        }
1188
1189        if (apic_test_vector(vector, apic->regs + APIC_TMR))
1190                trigger_mode = IOAPIC_LEVEL_TRIG;
1191        else
1192                trigger_mode = IOAPIC_EDGE_TRIG;
1193
1194        kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1195}
1196
1197static int apic_set_eoi(struct kvm_lapic *apic)
1198{
1199        int vector = apic_find_highest_isr(apic);
1200
1201        trace_kvm_eoi(apic, vector);
1202
1203        /*
1204         * Not every write EOI will has corresponding ISR,
1205         * one example is when Kernel check timer on setup_IO_APIC
1206         */
1207        if (vector == -1)
1208                return vector;
1209
1210        apic_clear_isr(vector, apic);
1211        apic_update_ppr(apic);
1212
1213        if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
1214                kvm_hv_synic_send_eoi(apic->vcpu, vector);
1215
1216        kvm_ioapic_send_eoi(apic, vector);
1217        kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1218        return vector;
1219}
1220
1221/*
1222 * this interface assumes a trap-like exit, which has already finished
1223 * desired side effect including vISR and vPPR update.
1224 */
1225void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1226{
1227        struct kvm_lapic *apic = vcpu->arch.apic;
1228
1229        trace_kvm_eoi(apic, vector);
1230
1231        kvm_ioapic_send_eoi(apic, vector);
1232        kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1233}
1234EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1235
1236static void apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
1237{
1238        struct kvm_lapic_irq irq;
1239
1240        irq.vector = icr_low & APIC_VECTOR_MASK;
1241        irq.delivery_mode = icr_low & APIC_MODE_MASK;
1242        irq.dest_mode = icr_low & APIC_DEST_MASK;
1243        irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1244        irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1245        irq.shorthand = icr_low & APIC_SHORT_MASK;
1246        irq.msi_redir_hint = false;
1247        if (apic_x2apic_mode(apic))
1248                irq.dest_id = icr_high;
1249        else
1250                irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
1251
1252        trace_kvm_apic_ipi(icr_low, irq.dest_id);
1253
1254        kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1255}
1256
1257static u32 apic_get_tmcct(struct kvm_lapic *apic)
1258{
1259        ktime_t remaining, now;
1260        s64 ns;
1261        u32 tmcct;
1262
1263        ASSERT(apic != NULL);
1264
1265        /* if initial count is 0, current count should also be 0 */
1266        if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1267                apic->lapic_timer.period == 0)
1268                return 0;
1269
1270        now = ktime_get();
1271        remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1272        if (ktime_to_ns(remaining) < 0)
1273                remaining = 0;
1274
1275        ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1276        tmcct = div64_u64(ns,
1277                         (APIC_BUS_CYCLE_NS * apic->divide_count));
1278
1279        return tmcct;
1280}
1281
1282static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1283{
1284        struct kvm_vcpu *vcpu = apic->vcpu;
1285        struct kvm_run *run = vcpu->run;
1286
1287        kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1288        run->tpr_access.rip = kvm_rip_read(vcpu);
1289        run->tpr_access.is_write = write;
1290}
1291
1292static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1293{
1294        if (apic->vcpu->arch.tpr_access_reporting)
1295                __report_tpr_access(apic, write);
1296}
1297
1298static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1299{
1300        u32 val = 0;
1301
1302        if (offset >= LAPIC_MMIO_LENGTH)
1303                return 0;
1304
1305        switch (offset) {
1306        case APIC_ARBPRI:
1307                break;
1308
1309        case APIC_TMCCT:        /* Timer CCR */
1310                if (apic_lvtt_tscdeadline(apic))
1311                        return 0;
1312
1313                val = apic_get_tmcct(apic);
1314                break;
1315        case APIC_PROCPRI:
1316                apic_update_ppr(apic);
1317                val = kvm_lapic_get_reg(apic, offset);
1318                break;
1319        case APIC_TASKPRI:
1320                report_tpr_access(apic, false);
1321                /* fall thru */
1322        default:
1323                val = kvm_lapic_get_reg(apic, offset);
1324                break;
1325        }
1326
1327        return val;
1328}
1329
1330static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1331{
1332        return container_of(dev, struct kvm_lapic, dev);
1333}
1334
1335#define APIC_REG_MASK(reg)      (1ull << ((reg) >> 4))
1336#define APIC_REGS_MASK(first, count) \
1337        (APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1338
1339int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1340                void *data)
1341{
1342        unsigned char alignment = offset & 0xf;
1343        u32 result;
1344        /* this bitmask has a bit cleared for each reserved register */
1345        u64 valid_reg_mask =
1346                APIC_REG_MASK(APIC_ID) |
1347                APIC_REG_MASK(APIC_LVR) |
1348                APIC_REG_MASK(APIC_TASKPRI) |
1349                APIC_REG_MASK(APIC_PROCPRI) |
1350                APIC_REG_MASK(APIC_LDR) |
1351                APIC_REG_MASK(APIC_DFR) |
1352                APIC_REG_MASK(APIC_SPIV) |
1353                APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1354                APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1355                APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1356                APIC_REG_MASK(APIC_ESR) |
1357                APIC_REG_MASK(APIC_ICR) |
1358                APIC_REG_MASK(APIC_ICR2) |
1359                APIC_REG_MASK(APIC_LVTT) |
1360                APIC_REG_MASK(APIC_LVTTHMR) |
1361                APIC_REG_MASK(APIC_LVTPC) |
1362                APIC_REG_MASK(APIC_LVT0) |
1363                APIC_REG_MASK(APIC_LVT1) |
1364                APIC_REG_MASK(APIC_LVTERR) |
1365                APIC_REG_MASK(APIC_TMICT) |
1366                APIC_REG_MASK(APIC_TMCCT) |
1367                APIC_REG_MASK(APIC_TDCR);
1368
1369        /* ARBPRI is not valid on x2APIC */
1370        if (!apic_x2apic_mode(apic))
1371                valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
1372
1373        if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
1374                return 1;
1375
1376        result = __apic_read(apic, offset & ~0xf);
1377
1378        trace_kvm_apic_read(offset, result);
1379
1380        switch (len) {
1381        case 1:
1382        case 2:
1383        case 4:
1384                memcpy(data, (char *)&result + alignment, len);
1385                break;
1386        default:
1387                printk(KERN_ERR "Local APIC read with len = %x, "
1388                       "should be 1,2, or 4 instead\n", len);
1389                break;
1390        }
1391        return 0;
1392}
1393EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
1394
1395static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1396{
1397        return addr >= apic->base_address &&
1398                addr < apic->base_address + LAPIC_MMIO_LENGTH;
1399}
1400
1401static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1402                           gpa_t address, int len, void *data)
1403{
1404        struct kvm_lapic *apic = to_lapic(this);
1405        u32 offset = address - apic->base_address;
1406
1407        if (!apic_mmio_in_range(apic, address))
1408                return -EOPNOTSUPP;
1409
1410        if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1411                if (!kvm_check_has_quirk(vcpu->kvm,
1412                                         KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1413                        return -EOPNOTSUPP;
1414
1415                memset(data, 0xff, len);
1416                return 0;
1417        }
1418
1419        kvm_lapic_reg_read(apic, offset, len, data);
1420
1421        return 0;
1422}
1423
1424static void update_divide_count(struct kvm_lapic *apic)
1425{
1426        u32 tmp1, tmp2, tdcr;
1427
1428        tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1429        tmp1 = tdcr & 0xf;
1430        tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1431        apic->divide_count = 0x1 << (tmp2 & 0x7);
1432}
1433
1434static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1435{
1436        /*
1437         * Do not allow the guest to program periodic timers with small
1438         * interval, since the hrtimers are not throttled by the host
1439         * scheduler.
1440         */
1441        if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1442                s64 min_period = min_timer_period_us * 1000LL;
1443
1444                if (apic->lapic_timer.period < min_period) {
1445                        pr_info_ratelimited(
1446                            "kvm: vcpu %i: requested %lld ns "
1447                            "lapic timer period limited to %lld ns\n",
1448                            apic->vcpu->vcpu_id,
1449                            apic->lapic_timer.period, min_period);
1450                        apic->lapic_timer.period = min_period;
1451                }
1452        }
1453}
1454
1455static void apic_update_lvtt(struct kvm_lapic *apic)
1456{
1457        u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1458                        apic->lapic_timer.timer_mode_mask;
1459
1460        if (apic->lapic_timer.timer_mode != timer_mode) {
1461                if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1462                                APIC_LVT_TIMER_TSCDEADLINE)) {
1463                        hrtimer_cancel(&apic->lapic_timer.timer);
1464                        kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1465                        apic->lapic_timer.period = 0;
1466                        apic->lapic_timer.tscdeadline = 0;
1467                }
1468                apic->lapic_timer.timer_mode = timer_mode;
1469                limit_periodic_timer_frequency(apic);
1470        }
1471}
1472
1473/*
1474 * On APICv, this test will cause a busy wait
1475 * during a higher-priority task.
1476 */
1477
1478static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1479{
1480        struct kvm_lapic *apic = vcpu->arch.apic;
1481        u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1482
1483        if (kvm_apic_hw_enabled(apic)) {
1484                int vec = reg & APIC_VECTOR_MASK;
1485                void *bitmap = apic->regs + APIC_ISR;
1486
1487                if (vcpu->arch.apicv_active)
1488                        bitmap = apic->regs + APIC_IRR;
1489
1490                if (apic_test_vector(vec, bitmap))
1491                        return true;
1492        }
1493        return false;
1494}
1495
1496static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1497{
1498        u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1499
1500        /*
1501         * If the guest TSC is running at a different ratio than the host, then
1502         * convert the delay to nanoseconds to achieve an accurate delay.  Note
1503         * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1504         * always for VMX enabled hardware.
1505         */
1506        if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
1507                __delay(min(guest_cycles,
1508                        nsec_to_cycles(vcpu, timer_advance_ns)));
1509        } else {
1510                u64 delay_ns = guest_cycles * 1000000ULL;
1511                do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1512                ndelay(min_t(u32, delay_ns, timer_advance_ns));
1513        }
1514}
1515
1516static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1517                                              s64 advance_expire_delta)
1518{
1519        struct kvm_lapic *apic = vcpu->arch.apic;
1520        u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1521        u64 ns;
1522
1523        /* Do not adjust for tiny fluctuations or large random spikes. */
1524        if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1525            abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1526                return;
1527
1528        /* too early */
1529        if (advance_expire_delta < 0) {
1530                ns = -advance_expire_delta * 1000000ULL;
1531                do_div(ns, vcpu->arch.virtual_tsc_khz);
1532                timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1533        } else {
1534        /* too late */
1535                ns = advance_expire_delta * 1000000ULL;
1536                do_div(ns, vcpu->arch.virtual_tsc_khz);
1537                timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1538        }
1539
1540        if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1541                timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1542        apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1543}
1544
1545static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1546{
1547        struct kvm_lapic *apic = vcpu->arch.apic;
1548        u64 guest_tsc, tsc_deadline;
1549
1550        if (apic->lapic_timer.expired_tscdeadline == 0)
1551                return;
1552
1553        tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1554        apic->lapic_timer.expired_tscdeadline = 0;
1555        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1556        apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
1557
1558        if (guest_tsc < tsc_deadline)
1559                __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1560
1561        if (lapic_timer_advance_dynamic)
1562                adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
1563}
1564
1565void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1566{
1567        if (lapic_timer_int_injected(vcpu))
1568                __kvm_wait_lapic_expire(vcpu);
1569}
1570EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1571
1572static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1573{
1574        struct kvm_timer *ktimer = &apic->lapic_timer;
1575
1576        kvm_apic_local_deliver(apic, APIC_LVTT);
1577        if (apic_lvtt_tscdeadline(apic))
1578                ktimer->tscdeadline = 0;
1579        if (apic_lvtt_oneshot(apic)) {
1580                ktimer->tscdeadline = 0;
1581                ktimer->target_expiration = 0;
1582        }
1583}
1584
1585static void apic_timer_expired(struct kvm_lapic *apic)
1586{
1587        struct kvm_vcpu *vcpu = apic->vcpu;
1588        struct kvm_timer *ktimer = &apic->lapic_timer;
1589
1590        if (atomic_read(&apic->lapic_timer.pending))
1591                return;
1592
1593        if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1594                ktimer->expired_tscdeadline = ktimer->tscdeadline;
1595
1596        if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1597                if (apic->lapic_timer.timer_advance_ns)
1598                        __kvm_wait_lapic_expire(vcpu);
1599                kvm_apic_inject_pending_timer_irqs(apic);
1600                return;
1601        }
1602
1603        atomic_inc(&apic->lapic_timer.pending);
1604        kvm_set_pending_timer(vcpu);
1605}
1606
1607static void start_sw_tscdeadline(struct kvm_lapic *apic)
1608{
1609        struct kvm_timer *ktimer = &apic->lapic_timer;
1610        u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1611        u64 ns = 0;
1612        ktime_t expire;
1613        struct kvm_vcpu *vcpu = apic->vcpu;
1614        unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1615        unsigned long flags;
1616        ktime_t now;
1617
1618        if (unlikely(!tscdeadline || !this_tsc_khz))
1619                return;
1620
1621        local_irq_save(flags);
1622
1623        now = ktime_get();
1624        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1625
1626        ns = (tscdeadline - guest_tsc) * 1000000ULL;
1627        do_div(ns, this_tsc_khz);
1628
1629        if (likely(tscdeadline > guest_tsc) &&
1630            likely(ns > apic->lapic_timer.timer_advance_ns)) {
1631                expire = ktime_add_ns(now, ns);
1632                expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1633                hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
1634        } else
1635                apic_timer_expired(apic);
1636
1637        local_irq_restore(flags);
1638}
1639
1640static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1641{
1642        ktime_t now, remaining;
1643        u64 ns_remaining_old, ns_remaining_new;
1644
1645        apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1646                * APIC_BUS_CYCLE_NS * apic->divide_count;
1647        limit_periodic_timer_frequency(apic);
1648
1649        now = ktime_get();
1650        remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1651        if (ktime_to_ns(remaining) < 0)
1652                remaining = 0;
1653
1654        ns_remaining_old = ktime_to_ns(remaining);
1655        ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1656                                           apic->divide_count, old_divisor);
1657
1658        apic->lapic_timer.tscdeadline +=
1659                nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1660                nsec_to_cycles(apic->vcpu, ns_remaining_old);
1661        apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1662}
1663
1664static bool set_target_expiration(struct kvm_lapic *apic)
1665{
1666        ktime_t now;
1667        u64 tscl = rdtsc();
1668
1669        now = ktime_get();
1670        apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1671                * APIC_BUS_CYCLE_NS * apic->divide_count;
1672
1673        if (!apic->lapic_timer.period) {
1674                apic->lapic_timer.tscdeadline = 0;
1675                return false;
1676        }
1677
1678        limit_periodic_timer_frequency(apic);
1679
1680        apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1681                nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
1682        apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
1683
1684        return true;
1685}
1686
1687static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1688{
1689        ktime_t now = ktime_get();
1690        u64 tscl = rdtsc();
1691        ktime_t delta;
1692
1693        /*
1694         * Synchronize both deadlines to the same time source or
1695         * differences in the periods (caused by differences in the
1696         * underlying clocks or numerical approximation errors) will
1697         * cause the two to drift apart over time as the errors
1698         * accumulate.
1699         */
1700        apic->lapic_timer.target_expiration =
1701                ktime_add_ns(apic->lapic_timer.target_expiration,
1702                                apic->lapic_timer.period);
1703        delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1704        apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1705                nsec_to_cycles(apic->vcpu, delta);
1706}
1707
1708static void start_sw_period(struct kvm_lapic *apic)
1709{
1710        if (!apic->lapic_timer.period)
1711                return;
1712
1713        if (ktime_after(ktime_get(),
1714                        apic->lapic_timer.target_expiration)) {
1715                apic_timer_expired(apic);
1716
1717                if (apic_lvtt_oneshot(apic))
1718                        return;
1719
1720                advance_periodic_target_expiration(apic);
1721        }
1722
1723        hrtimer_start(&apic->lapic_timer.timer,
1724                apic->lapic_timer.target_expiration,
1725                HRTIMER_MODE_ABS);
1726}
1727
1728bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1729{
1730        if (!lapic_in_kernel(vcpu))
1731                return false;
1732
1733        return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1734}
1735EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1736
1737static void cancel_hv_timer(struct kvm_lapic *apic)
1738{
1739        WARN_ON(preemptible());
1740        WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1741        kvm_x86_ops->cancel_hv_timer(apic->vcpu);
1742        apic->lapic_timer.hv_timer_in_use = false;
1743}
1744
1745static bool start_hv_timer(struct kvm_lapic *apic)
1746{
1747        struct kvm_timer *ktimer = &apic->lapic_timer;
1748        struct kvm_vcpu *vcpu = apic->vcpu;
1749        bool expired;
1750
1751        WARN_ON(preemptible());
1752        if (!kvm_x86_ops->set_hv_timer)
1753                return false;
1754
1755        if (!ktimer->tscdeadline)
1756                return false;
1757
1758        if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
1759                return false;
1760
1761        ktimer->hv_timer_in_use = true;
1762        hrtimer_cancel(&ktimer->timer);
1763
1764        /*
1765         * To simplify handling the periodic timer, leave the hv timer running
1766         * even if the deadline timer has expired, i.e. rely on the resulting
1767         * VM-Exit to recompute the periodic timer's target expiration.
1768         */
1769        if (!apic_lvtt_period(apic)) {
1770                /*
1771                 * Cancel the hv timer if the sw timer fired while the hv timer
1772                 * was being programmed, or if the hv timer itself expired.
1773                 */
1774                if (atomic_read(&ktimer->pending)) {
1775                        cancel_hv_timer(apic);
1776                } else if (expired) {
1777                        apic_timer_expired(apic);
1778                        cancel_hv_timer(apic);
1779                }
1780        }
1781
1782        trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
1783
1784        return true;
1785}
1786
1787static void start_sw_timer(struct kvm_lapic *apic)
1788{
1789        struct kvm_timer *ktimer = &apic->lapic_timer;
1790
1791        WARN_ON(preemptible());
1792        if (apic->lapic_timer.hv_timer_in_use)
1793                cancel_hv_timer(apic);
1794        if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1795                return;
1796
1797        if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1798                start_sw_period(apic);
1799        else if (apic_lvtt_tscdeadline(apic))
1800                start_sw_tscdeadline(apic);
1801        trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
1802}
1803
1804static void restart_apic_timer(struct kvm_lapic *apic)
1805{
1806        preempt_disable();
1807
1808        if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
1809                goto out;
1810
1811        if (!start_hv_timer(apic))
1812                start_sw_timer(apic);
1813out:
1814        preempt_enable();
1815}
1816
1817void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1818{
1819        struct kvm_lapic *apic = vcpu->arch.apic;
1820
1821        preempt_disable();
1822        /* If the preempt notifier has already run, it also called apic_timer_expired */
1823        if (!apic->lapic_timer.hv_timer_in_use)
1824                goto out;
1825        WARN_ON(swait_active(&vcpu->wq));
1826        cancel_hv_timer(apic);
1827        apic_timer_expired(apic);
1828
1829        if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1830                advance_periodic_target_expiration(apic);
1831                restart_apic_timer(apic);
1832        }
1833out:
1834        preempt_enable();
1835}
1836EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
1837
1838void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
1839{
1840        restart_apic_timer(vcpu->arch.apic);
1841}
1842EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
1843
1844void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
1845{
1846        struct kvm_lapic *apic = vcpu->arch.apic;
1847
1848        preempt_disable();
1849        /* Possibly the TSC deadline timer is not enabled yet */
1850        if (apic->lapic_timer.hv_timer_in_use)
1851                start_sw_timer(apic);
1852        preempt_enable();
1853}
1854EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
1855
1856void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
1857{
1858        struct kvm_lapic *apic = vcpu->arch.apic;
1859
1860        WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1861        restart_apic_timer(apic);
1862}
1863
1864static void start_apic_timer(struct kvm_lapic *apic)
1865{
1866        atomic_set(&apic->lapic_timer.pending, 0);
1867
1868        if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1869            && !set_target_expiration(apic))
1870                return;
1871
1872        restart_apic_timer(apic);
1873}
1874
1875static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1876{
1877        bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
1878
1879        if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
1880                apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
1881                if (lvt0_in_nmi_mode) {
1882                        atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1883                } else
1884                        atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1885        }
1886}
1887
1888int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1889{
1890        int ret = 0;
1891
1892        trace_kvm_apic_write(reg, val);
1893
1894        switch (reg) {
1895        case APIC_ID:           /* Local APIC ID */
1896                if (!apic_x2apic_mode(apic))
1897                        kvm_apic_set_xapic_id(apic, val >> 24);
1898                else
1899                        ret = 1;
1900                break;
1901
1902        case APIC_TASKPRI:
1903                report_tpr_access(apic, true);
1904                apic_set_tpr(apic, val & 0xff);
1905                break;
1906
1907        case APIC_EOI:
1908                apic_set_eoi(apic);
1909                break;
1910
1911        case APIC_LDR:
1912                if (!apic_x2apic_mode(apic))
1913                        kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
1914                else
1915                        ret = 1;
1916                break;
1917
1918        case APIC_DFR:
1919                if (!apic_x2apic_mode(apic)) {
1920                        kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
1921                        recalculate_apic_map(apic->vcpu->kvm);
1922                } else
1923                        ret = 1;
1924                break;
1925
1926        case APIC_SPIV: {
1927                u32 mask = 0x3ff;
1928                if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
1929                        mask |= APIC_SPIV_DIRECTED_EOI;
1930                apic_set_spiv(apic, val & mask);
1931                if (!(val & APIC_SPIV_APIC_ENABLED)) {
1932                        int i;
1933                        u32 lvt_val;
1934
1935                        for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
1936                                lvt_val = kvm_lapic_get_reg(apic,
1937                                                       APIC_LVTT + 0x10 * i);
1938                                kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
1939                                             lvt_val | APIC_LVT_MASKED);
1940                        }
1941                        apic_update_lvtt(apic);
1942                        atomic_set(&apic->lapic_timer.pending, 0);
1943
1944                }
1945                break;
1946        }
1947        case APIC_ICR:
1948                /* No delay here, so we always clear the pending bit */
1949                val &= ~(1 << 12);
1950                apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
1951                kvm_lapic_set_reg(apic, APIC_ICR, val);
1952                break;
1953
1954        case APIC_ICR2:
1955                if (!apic_x2apic_mode(apic))
1956                        val &= 0xff000000;
1957                kvm_lapic_set_reg(apic, APIC_ICR2, val);
1958                break;
1959
1960        case APIC_LVT0:
1961                apic_manage_nmi_watchdog(apic, val);
1962                /* fall through */
1963        case APIC_LVTTHMR:
1964        case APIC_LVTPC:
1965        case APIC_LVT1:
1966        case APIC_LVTERR:
1967                /* TODO: Check vector */
1968                if (!kvm_apic_sw_enabled(apic))
1969                        val |= APIC_LVT_MASKED;
1970
1971                val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
1972                kvm_lapic_set_reg(apic, reg, val);
1973
1974                break;
1975
1976        case APIC_LVTT:
1977                if (!kvm_apic_sw_enabled(apic))
1978                        val |= APIC_LVT_MASKED;
1979                val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
1980                kvm_lapic_set_reg(apic, APIC_LVTT, val);
1981                apic_update_lvtt(apic);
1982                break;
1983
1984        case APIC_TMICT:
1985                if (apic_lvtt_tscdeadline(apic))
1986                        break;
1987
1988                hrtimer_cancel(&apic->lapic_timer.timer);
1989                kvm_lapic_set_reg(apic, APIC_TMICT, val);
1990                start_apic_timer(apic);
1991                break;
1992
1993        case APIC_TDCR: {
1994                uint32_t old_divisor = apic->divide_count;
1995
1996                kvm_lapic_set_reg(apic, APIC_TDCR, val);
1997                update_divide_count(apic);
1998                if (apic->divide_count != old_divisor &&
1999                                apic->lapic_timer.period) {
2000                        hrtimer_cancel(&apic->lapic_timer.timer);
2001                        update_target_expiration(apic, old_divisor);
2002                        restart_apic_timer(apic);
2003                }
2004                break;
2005        }
2006        case APIC_ESR:
2007                if (apic_x2apic_mode(apic) && val != 0)
2008                        ret = 1;
2009                break;
2010
2011        case APIC_SELF_IPI:
2012                if (apic_x2apic_mode(apic)) {
2013                        kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
2014                } else
2015                        ret = 1;
2016                break;
2017        default:
2018                ret = 1;
2019                break;
2020        }
2021
2022        return ret;
2023}
2024EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
2025
2026static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
2027                            gpa_t address, int len, const void *data)
2028{
2029        struct kvm_lapic *apic = to_lapic(this);
2030        unsigned int offset = address - apic->base_address;
2031        u32 val;
2032
2033        if (!apic_mmio_in_range(apic, address))
2034                return -EOPNOTSUPP;
2035
2036        if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2037                if (!kvm_check_has_quirk(vcpu->kvm,
2038                                         KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2039                        return -EOPNOTSUPP;
2040
2041                return 0;
2042        }
2043
2044        /*
2045         * APIC register must be aligned on 128-bits boundary.
2046         * 32/64/128 bits registers must be accessed thru 32 bits.
2047         * Refer SDM 8.4.1
2048         */
2049        if (len != 4 || (offset & 0xf))
2050                return 0;
2051
2052        val = *(u32*)data;
2053
2054        kvm_lapic_reg_write(apic, offset & 0xff0, val);
2055
2056        return 0;
2057}
2058
2059void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2060{
2061        kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2062}
2063EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2064
2065/* emulate APIC access in a trap manner */
2066void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2067{
2068        u32 val = 0;
2069
2070        /* hw has done the conditional check and inst decode */
2071        offset &= 0xff0;
2072
2073        kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
2074
2075        /* TODO: optimize to just emulate side effect w/o one more write */
2076        kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
2077}
2078EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2079
2080void kvm_free_lapic(struct kvm_vcpu *vcpu)
2081{
2082        struct kvm_lapic *apic = vcpu->arch.apic;
2083
2084        if (!vcpu->arch.apic)
2085                return;
2086
2087        hrtimer_cancel(&apic->lapic_timer.timer);
2088
2089        if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2090                static_key_slow_dec_deferred(&apic_hw_disabled);
2091
2092        if (!apic->sw_enabled)
2093                static_key_slow_dec_deferred(&apic_sw_disabled);
2094
2095        if (apic->regs)
2096                free_page((unsigned long)apic->regs);
2097
2098        kfree(apic);
2099}
2100
2101/*
2102 *----------------------------------------------------------------------
2103 * LAPIC interface
2104 *----------------------------------------------------------------------
2105 */
2106u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2107{
2108        struct kvm_lapic *apic = vcpu->arch.apic;
2109
2110        if (!lapic_in_kernel(vcpu) ||
2111                !apic_lvtt_tscdeadline(apic))
2112                return 0;
2113
2114        return apic->lapic_timer.tscdeadline;
2115}
2116
2117void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2118{
2119        struct kvm_lapic *apic = vcpu->arch.apic;
2120
2121        if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
2122                        apic_lvtt_period(apic))
2123                return;
2124
2125        hrtimer_cancel(&apic->lapic_timer.timer);
2126        apic->lapic_timer.tscdeadline = data;
2127        start_apic_timer(apic);
2128}
2129
2130void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2131{
2132        struct kvm_lapic *apic = vcpu->arch.apic;
2133
2134        apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
2135                     | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
2136}
2137
2138u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2139{
2140        u64 tpr;
2141
2142        tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2143
2144        return (tpr & 0xf0) >> 4;
2145}
2146
2147void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2148{
2149        u64 old_value = vcpu->arch.apic_base;
2150        struct kvm_lapic *apic = vcpu->arch.apic;
2151
2152        if (!apic)
2153                value |= MSR_IA32_APICBASE_BSP;
2154
2155        vcpu->arch.apic_base = value;
2156
2157        if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2158                kvm_update_cpuid(vcpu);
2159
2160        if (!apic)
2161                return;
2162
2163        /* update jump label if enable bit changes */
2164        if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2165                if (value & MSR_IA32_APICBASE_ENABLE) {
2166                        kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2167                        static_key_slow_dec_deferred(&apic_hw_disabled);
2168                } else {
2169                        static_key_slow_inc(&apic_hw_disabled.key);
2170                        recalculate_apic_map(vcpu->kvm);
2171                }
2172        }
2173
2174        if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
2175                kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2176
2177        if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
2178                kvm_x86_ops->set_virtual_apic_mode(vcpu);
2179
2180        apic->base_address = apic->vcpu->arch.apic_base &
2181                             MSR_IA32_APICBASE_BASE;
2182
2183        if ((value & MSR_IA32_APICBASE_ENABLE) &&
2184             apic->base_address != APIC_DEFAULT_PHYS_BASE)
2185                pr_warn_once("APIC base relocation is unsupported by KVM");
2186}
2187
2188void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2189{
2190        struct kvm_lapic *apic = vcpu->arch.apic;
2191        int i;
2192
2193        if (!apic)
2194                return;
2195
2196        /* Stop the timer in case it's a reset to an active apic */
2197        hrtimer_cancel(&apic->lapic_timer.timer);
2198
2199        if (!init_event) {
2200                kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
2201                                         MSR_IA32_APICBASE_ENABLE);
2202                kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2203        }
2204        kvm_apic_set_version(apic->vcpu);
2205
2206        for (i = 0; i < KVM_APIC_LVT_NUM; i++)
2207                kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
2208        apic_update_lvtt(apic);
2209        if (kvm_vcpu_is_reset_bsp(vcpu) &&
2210            kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2211                kvm_lapic_set_reg(apic, APIC_LVT0,
2212                             SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2213        apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2214
2215        kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
2216        apic_set_spiv(apic, 0xff);
2217        kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2218        if (!apic_x2apic_mode(apic))
2219                kvm_apic_set_ldr(apic, 0);
2220        kvm_lapic_set_reg(apic, APIC_ESR, 0);
2221        kvm_lapic_set_reg(apic, APIC_ICR, 0);
2222        kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2223        kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2224        kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2225        for (i = 0; i < 8; i++) {
2226                kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2227                kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2228                kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2229        }
2230        apic->irr_pending = vcpu->arch.apicv_active;
2231        apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
2232        apic->highest_isr_cache = -1;
2233        update_divide_count(apic);
2234        atomic_set(&apic->lapic_timer.pending, 0);
2235        if (kvm_vcpu_is_bsp(vcpu))
2236                kvm_lapic_set_base(vcpu,
2237                                vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
2238        vcpu->arch.pv_eoi.msr_val = 0;
2239        apic_update_ppr(apic);
2240        if (vcpu->arch.apicv_active) {
2241                kvm_x86_ops->apicv_post_state_restore(vcpu);
2242                kvm_x86_ops->hwapic_irr_update(vcpu, -1);
2243                kvm_x86_ops->hwapic_isr_update(vcpu, -1);
2244        }
2245
2246        vcpu->arch.apic_arb_prio = 0;
2247        vcpu->arch.apic_attention = 0;
2248}
2249
2250/*
2251 *----------------------------------------------------------------------
2252 * timer interface
2253 *----------------------------------------------------------------------
2254 */
2255
2256static bool lapic_is_periodic(struct kvm_lapic *apic)
2257{
2258        return apic_lvtt_period(apic);
2259}
2260
2261int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2262{
2263        struct kvm_lapic *apic = vcpu->arch.apic;
2264
2265        if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2266                return atomic_read(&apic->lapic_timer.pending);
2267
2268        return 0;
2269}
2270
2271int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2272{
2273        u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2274        int vector, mode, trig_mode;
2275
2276        if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2277                vector = reg & APIC_VECTOR_MASK;
2278                mode = reg & APIC_MODE_MASK;
2279                trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2280                return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
2281                                        NULL);
2282        }
2283        return 0;
2284}
2285
2286void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2287{
2288        struct kvm_lapic *apic = vcpu->arch.apic;
2289
2290        if (apic)
2291                kvm_apic_local_deliver(apic, APIC_LVT0);
2292}
2293
2294static const struct kvm_io_device_ops apic_mmio_ops = {
2295        .read     = apic_mmio_read,
2296        .write    = apic_mmio_write,
2297};
2298
2299static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2300{
2301        struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2302        struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2303
2304        apic_timer_expired(apic);
2305
2306        if (lapic_is_periodic(apic)) {
2307                advance_periodic_target_expiration(apic);
2308                hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2309                return HRTIMER_RESTART;
2310        } else
2311                return HRTIMER_NORESTART;
2312}
2313
2314int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2315{
2316        struct kvm_lapic *apic;
2317
2318        ASSERT(vcpu != NULL);
2319
2320        apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2321        if (!apic)
2322                goto nomem;
2323
2324        vcpu->arch.apic = apic;
2325
2326        apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2327        if (!apic->regs) {
2328                printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2329                       vcpu->vcpu_id);
2330                goto nomem_free_apic;
2331        }
2332        apic->vcpu = vcpu;
2333
2334        hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2335                     HRTIMER_MODE_ABS_HARD);
2336        apic->lapic_timer.timer.function = apic_timer_fn;
2337        if (timer_advance_ns == -1) {
2338                apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2339                lapic_timer_advance_dynamic = true;
2340        } else {
2341                apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2342                lapic_timer_advance_dynamic = false;
2343        }
2344
2345        /*
2346         * APIC is created enabled. This will prevent kvm_lapic_set_base from
2347         * thinking that APIC state has changed.
2348         */
2349        vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2350        static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2351        kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2352
2353        return 0;
2354nomem_free_apic:
2355        kfree(apic);
2356        vcpu->arch.apic = NULL;
2357nomem:
2358        return -ENOMEM;
2359}
2360
2361int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2362{
2363        struct kvm_lapic *apic = vcpu->arch.apic;
2364        u32 ppr;
2365
2366        if (!kvm_apic_hw_enabled(apic))
2367                return -1;
2368
2369        __apic_update_ppr(apic, &ppr);
2370        return apic_has_interrupt_for_ppr(apic, ppr);
2371}
2372
2373int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2374{
2375        u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2376        int r = 0;
2377
2378        if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2379                r = 1;
2380        if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2381            GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2382                r = 1;
2383        return r;
2384}
2385
2386void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2387{
2388        struct kvm_lapic *apic = vcpu->arch.apic;
2389
2390        if (atomic_read(&apic->lapic_timer.pending) > 0) {
2391                kvm_apic_inject_pending_timer_irqs(apic);
2392                atomic_set(&apic->lapic_timer.pending, 0);
2393        }
2394}
2395
2396int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2397{
2398        int vector = kvm_apic_has_interrupt(vcpu);
2399        struct kvm_lapic *apic = vcpu->arch.apic;
2400        u32 ppr;
2401
2402        if (vector == -1)
2403                return -1;
2404
2405        /*
2406         * We get here even with APIC virtualization enabled, if doing
2407         * nested virtualization and L1 runs with the "acknowledge interrupt
2408         * on exit" mode.  Then we cannot inject the interrupt via RVI,
2409         * because the process would deliver it through the IDT.
2410         */
2411
2412        apic_clear_irr(vector, apic);
2413        if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
2414                /*
2415                 * For auto-EOI interrupts, there might be another pending
2416                 * interrupt above PPR, so check whether to raise another
2417                 * KVM_REQ_EVENT.
2418                 */
2419                apic_update_ppr(apic);
2420        } else {
2421                /*
2422                 * For normal interrupts, PPR has been raised and there cannot
2423                 * be a higher-priority pending interrupt---except if there was
2424                 * a concurrent interrupt injection, but that would have
2425                 * triggered KVM_REQ_EVENT already.
2426                 */
2427                apic_set_isr(vector, apic);
2428                __apic_update_ppr(apic, &ppr);
2429        }
2430
2431        return vector;
2432}
2433
2434static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2435                struct kvm_lapic_state *s, bool set)
2436{
2437        if (apic_x2apic_mode(vcpu->arch.apic)) {
2438                u32 *id = (u32 *)(s->regs + APIC_ID);
2439                u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2440
2441                if (vcpu->kvm->arch.x2apic_format) {
2442                        if (*id != vcpu->vcpu_id)
2443                                return -EINVAL;
2444                } else {
2445                        if (set)
2446                                *id >>= 24;
2447                        else
2448                                *id <<= 24;
2449                }
2450
2451                /* In x2APIC mode, the LDR is fixed and based on the id */
2452                if (set)
2453                        *ldr = kvm_apic_calc_x2apic_ldr(*id);
2454        }
2455
2456        return 0;
2457}
2458
2459int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2460{
2461        memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2462        return kvm_apic_state_fixup(vcpu, s, false);
2463}
2464
2465int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2466{
2467        struct kvm_lapic *apic = vcpu->arch.apic;
2468        int r;
2469
2470
2471        kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2472        /* set SPIV separately to get count of SW disabled APICs right */
2473        apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2474
2475        r = kvm_apic_state_fixup(vcpu, s, true);
2476        if (r)
2477                return r;
2478        memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
2479
2480        recalculate_apic_map(vcpu->kvm);
2481        kvm_apic_set_version(vcpu);
2482
2483        apic_update_ppr(apic);
2484        hrtimer_cancel(&apic->lapic_timer.timer);
2485        apic_update_lvtt(apic);
2486        apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2487        update_divide_count(apic);
2488        start_apic_timer(apic);
2489        apic->irr_pending = true;
2490        apic->isr_count = vcpu->arch.apicv_active ?
2491                                1 : count_vectors(apic->regs + APIC_ISR);
2492        apic->highest_isr_cache = -1;
2493        if (vcpu->arch.apicv_active) {
2494                kvm_x86_ops->apicv_post_state_restore(vcpu);
2495                kvm_x86_ops->hwapic_irr_update(vcpu,
2496                                apic_find_highest_irr(apic));
2497                kvm_x86_ops->hwapic_isr_update(vcpu,
2498                                apic_find_highest_isr(apic));
2499        }
2500        kvm_make_request(KVM_REQ_EVENT, vcpu);
2501        if (ioapic_in_kernel(vcpu->kvm))
2502                kvm_rtc_eoi_tracking_restore_one(vcpu);
2503
2504        vcpu->arch.apic_arb_prio = 0;
2505
2506        return 0;
2507}
2508
2509void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2510{
2511        struct hrtimer *timer;
2512
2513        if (!lapic_in_kernel(vcpu) ||
2514                kvm_can_post_timer_interrupt(vcpu))
2515                return;
2516
2517        timer = &vcpu->arch.apic->lapic_timer.timer;
2518        if (hrtimer_cancel(timer))
2519                hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
2520}
2521
2522/*
2523 * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2524 *
2525 * Detect whether guest triggered PV EOI since the
2526 * last entry. If yes, set EOI on guests's behalf.
2527 * Clear PV EOI in guest memory in any case.
2528 */
2529static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2530                                        struct kvm_lapic *apic)
2531{
2532        bool pending;
2533        int vector;
2534        /*
2535         * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2536         * and KVM_PV_EOI_ENABLED in guest memory as follows:
2537         *
2538         * KVM_APIC_PV_EOI_PENDING is unset:
2539         *      -> host disabled PV EOI.
2540         * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2541         *      -> host enabled PV EOI, guest did not execute EOI yet.
2542         * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2543         *      -> host enabled PV EOI, guest executed EOI.
2544         */
2545        BUG_ON(!pv_eoi_enabled(vcpu));
2546        pending = pv_eoi_get_pending(vcpu);
2547        /*
2548         * Clear pending bit in any case: it will be set again on vmentry.
2549         * While this might not be ideal from performance point of view,
2550         * this makes sure pv eoi is only enabled when we know it's safe.
2551         */
2552        pv_eoi_clr_pending(vcpu);
2553        if (pending)
2554                return;
2555        vector = apic_set_eoi(apic);
2556        trace_kvm_pv_eoi(apic, vector);
2557}
2558
2559void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2560{
2561        u32 data;
2562
2563        if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2564                apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2565
2566        if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2567                return;
2568
2569        if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2570                                  sizeof(u32)))
2571                return;
2572
2573        apic_set_tpr(vcpu->arch.apic, data & 0xff);
2574}
2575
2576/*
2577 * apic_sync_pv_eoi_to_guest - called before vmentry
2578 *
2579 * Detect whether it's safe to enable PV EOI and
2580 * if yes do so.
2581 */
2582static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2583                                        struct kvm_lapic *apic)
2584{
2585        if (!pv_eoi_enabled(vcpu) ||
2586            /* IRR set or many bits in ISR: could be nested. */
2587            apic->irr_pending ||
2588            /* Cache not set: could be safe but we don't bother. */
2589            apic->highest_isr_cache == -1 ||
2590            /* Need EOI to update ioapic. */
2591            kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2592                /*
2593                 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2594                 * so we need not do anything here.
2595                 */
2596                return;
2597        }
2598
2599        pv_eoi_set_pending(apic->vcpu);
2600}
2601
2602void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2603{
2604        u32 data, tpr;
2605        int max_irr, max_isr;
2606        struct kvm_lapic *apic = vcpu->arch.apic;
2607
2608        apic_sync_pv_eoi_to_guest(vcpu, apic);
2609
2610        if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2611                return;
2612
2613        tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2614        max_irr = apic_find_highest_irr(apic);
2615        if (max_irr < 0)
2616                max_irr = 0;
2617        max_isr = apic_find_highest_isr(apic);
2618        if (max_isr < 0)
2619                max_isr = 0;
2620        data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2621
2622        kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2623                                sizeof(u32));
2624}
2625
2626int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2627{
2628        if (vapic_addr) {
2629                if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2630                                        &vcpu->arch.apic->vapic_cache,
2631                                        vapic_addr, sizeof(u32)))
2632                        return -EINVAL;
2633                __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2634        } else {
2635                __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2636        }
2637
2638        vcpu->arch.apic->vapic_addr = vapic_addr;
2639        return 0;
2640}
2641
2642int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2643{
2644        struct kvm_lapic *apic = vcpu->arch.apic;
2645        u32 reg = (msr - APIC_BASE_MSR) << 4;
2646
2647        if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2648                return 1;
2649
2650        if (reg == APIC_ICR2)
2651                return 1;
2652
2653        /* if this is ICR write vector before command */
2654        if (reg == APIC_ICR)
2655                kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2656        return kvm_lapic_reg_write(apic, reg, (u32)data);
2657}
2658
2659int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2660{
2661        struct kvm_lapic *apic = vcpu->arch.apic;
2662        u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
2663
2664        if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2665                return 1;
2666
2667        if (reg == APIC_DFR || reg == APIC_ICR2)
2668                return 1;
2669
2670        if (kvm_lapic_reg_read(apic, reg, 4, &low))
2671                return 1;
2672        if (reg == APIC_ICR)
2673                kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2674
2675        *data = (((u64)high) << 32) | low;
2676
2677        return 0;
2678}
2679
2680int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2681{
2682        struct kvm_lapic *apic = vcpu->arch.apic;
2683
2684        if (!lapic_in_kernel(vcpu))
2685                return 1;
2686
2687        /* if this is ICR write vector before command */
2688        if (reg == APIC_ICR)
2689                kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2690        return kvm_lapic_reg_write(apic, reg, (u32)data);
2691}
2692
2693int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2694{
2695        struct kvm_lapic *apic = vcpu->arch.apic;
2696        u32 low, high = 0;
2697
2698        if (!lapic_in_kernel(vcpu))
2699                return 1;
2700
2701        if (kvm_lapic_reg_read(apic, reg, 4, &low))
2702                return 1;
2703        if (reg == APIC_ICR)
2704                kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2705
2706        *data = (((u64)high) << 32) | low;
2707
2708        return 0;
2709}
2710
2711int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
2712{
2713        u64 addr = data & ~KVM_MSR_ENABLED;
2714        struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
2715        unsigned long new_len;
2716
2717        if (!IS_ALIGNED(addr, 4))
2718                return 1;
2719
2720        vcpu->arch.pv_eoi.msr_val = data;
2721        if (!pv_eoi_enabled(vcpu))
2722                return 0;
2723
2724        if (addr == ghc->gpa && len <= ghc->len)
2725                new_len = ghc->len;
2726        else
2727                new_len = len;
2728
2729        return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
2730}
2731
2732void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2733{
2734        struct kvm_lapic *apic = vcpu->arch.apic;
2735        u8 sipi_vector;
2736        unsigned long pe;
2737
2738        if (!lapic_in_kernel(vcpu) || !apic->pending_events)
2739                return;
2740
2741        /*
2742         * INITs are latched while CPU is in specific states
2743         * (SMM, VMX non-root mode, SVM with GIF=0).
2744         * Because a CPU cannot be in these states immediately
2745         * after it has processed an INIT signal (and thus in
2746         * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
2747         * and leave the INIT pending.
2748         */
2749        if (kvm_vcpu_latch_init(vcpu)) {
2750                WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
2751                if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
2752                        clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2753                return;
2754        }
2755
2756        pe = xchg(&apic->pending_events, 0);
2757        if (test_bit(KVM_APIC_INIT, &pe)) {
2758                kvm_vcpu_reset(vcpu, true);
2759                if (kvm_vcpu_is_bsp(apic->vcpu))
2760                        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2761                else
2762                        vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
2763        }
2764        if (test_bit(KVM_APIC_SIPI, &pe) &&
2765            vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
2766                /* evaluate pending_events before reading the vector */
2767                smp_rmb();
2768                sipi_vector = apic->sipi_vector;
2769                kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
2770                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2771        }
2772}
2773
2774void kvm_lapic_init(void)
2775{
2776        /* do not patch jump label more than once per second */
2777        jump_label_rate_limit(&apic_hw_disabled, HZ);
2778        jump_label_rate_limit(&apic_sw_disabled, HZ);
2779}
2780
2781void kvm_lapic_exit(void)
2782{
2783        static_key_deferred_flush(&apic_hw_disabled);
2784        static_key_deferred_flush(&apic_sw_disabled);
2785}
2786