linux/arch/x86/kvm/lapic.c
<<
>>
Prefs
   1
   2/*
   3 * Local APIC virtualization
   4 *
   5 * Copyright (C) 2006 Qumranet, Inc.
   6 * Copyright (C) 2007 Novell
   7 * Copyright (C) 2007 Intel
   8 *
   9 * Authors:
  10 *   Dor Laor <dor.laor@qumranet.com>
  11 *   Gregory Haskins <ghaskins@novell.com>
  12 *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
  13 *
  14 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
  15 *
  16 * This work is licensed under the terms of the GNU GPL, version 2.  See
  17 * the COPYING file in the top-level directory.
  18 */
  19
  20#include <linux/kvm_host.h>
  21#include <linux/kvm.h>
  22#include <linux/mm.h>
  23#include <linux/highmem.h>
  24#include <linux/smp.h>
  25#include <linux/hrtimer.h>
  26#include <linux/io.h>
  27#include <linux/module.h>
  28#include <linux/math64.h>
  29#include <asm/processor.h>
  30#include <asm/msr.h>
  31#include <asm/page.h>
  32#include <asm/current.h>
  33#include <asm/apicdef.h>
  34#include <asm/atomic.h>
  35#include <asm/apicdef.h>
  36#include "kvm_cache_regs.h"
  37#include "irq.h"
  38#include "trace.h"
  39#include "x86.h"
  40
  41#ifndef CONFIG_X86_64
  42#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
  43#else
  44#define mod_64(x, y) ((x) % (y))
  45#endif
  46
  47#define PRId64 "d"
  48#define PRIx64 "llx"
  49#define PRIu64 "u"
  50#define PRIo64 "o"
  51
  52#define APIC_BUS_CYCLE_NS 1
  53
  54/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
  55#define apic_debug(fmt, arg...)
  56
  57#define APIC_LVT_NUM                    6
  58/* 14 is the version for Xeon and Pentium 8.4.8*/
  59#define APIC_VERSION                    (0x14UL | ((APIC_LVT_NUM - 1) << 16))
  60#define LAPIC_MMIO_LENGTH               (1 << 12)
  61/* followed define is not in apicdef.h */
  62#define APIC_SHORT_MASK                 0xc0000
  63#define APIC_DEST_NOSHORT               0x0
  64#define APIC_DEST_MASK                  0x800
  65#define MAX_APIC_VECTOR                 256
  66
  67#define VEC_POS(v) ((v) & (32 - 1))
  68#define REG_POS(v) (((v) >> 5) << 4)
  69
  70static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
  71{
  72        return *((u32 *) (apic->regs + reg_off));
  73}
  74
  75static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
  76{
  77        *((u32 *) (apic->regs + reg_off)) = val;
  78}
  79
  80static inline int apic_test_and_set_vector(int vec, void *bitmap)
  81{
  82        return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  83}
  84
  85static inline int apic_test_and_clear_vector(int vec, void *bitmap)
  86{
  87        return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  88}
  89
  90static inline void apic_set_vector(int vec, void *bitmap)
  91{
  92        set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  93}
  94
  95static inline void apic_clear_vector(int vec, void *bitmap)
  96{
  97        clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
  98}
  99
 100static inline int apic_hw_enabled(struct kvm_lapic *apic)
 101{
 102        return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
 103}
 104
 105static inline int  apic_sw_enabled(struct kvm_lapic *apic)
 106{
 107        return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
 108}
 109
 110static inline int apic_enabled(struct kvm_lapic *apic)
 111{
 112        return apic_sw_enabled(apic) && apic_hw_enabled(apic);
 113}
 114
 115#define LVT_MASK        \
 116        (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
 117
 118#define LINT_MASK       \
 119        (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 120         APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 121
 122static inline int kvm_apic_id(struct kvm_lapic *apic)
 123{
 124        return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 125}
 126
 127static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 128{
 129        return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
 130}
 131
 132static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 133{
 134        return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
 135}
 136
 137static inline int apic_lvtt_period(struct kvm_lapic *apic)
 138{
 139        return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
 140}
 141
 142static inline int apic_lvt_nmi_mode(u32 lvt_val)
 143{
 144        return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
 145}
 146
 147void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 148{
 149        struct kvm_lapic *apic = vcpu->arch.apic;
 150        struct kvm_cpuid_entry2 *feat;
 151        u32 v = APIC_VERSION;
 152
 153        if (!irqchip_in_kernel(vcpu->kvm))
 154                return;
 155
 156        feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
 157        if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
 158                v |= APIC_LVR_DIRECTED_EOI;
 159        apic_set_reg(apic, APIC_LVR, v);
 160}
 161
 162static inline int apic_x2apic_mode(struct kvm_lapic *apic)
 163{
 164        return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
 165}
 166
 167static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 168        LVT_MASK | APIC_LVT_TIMER_PERIODIC,     /* LVTT */
 169        LVT_MASK | APIC_MODE_MASK,      /* LVTTHMR */
 170        LVT_MASK | APIC_MODE_MASK,      /* LVTPC */
 171        LINT_MASK, LINT_MASK,   /* LVT0-1 */
 172        LVT_MASK                /* LVTERR */
 173};
 174
 175static int find_highest_vector(void *bitmap)
 176{
 177        u32 *word = bitmap;
 178        int word_offset = MAX_APIC_VECTOR >> 5;
 179
 180        while ((word_offset != 0) && (word[(--word_offset) << 2] == 0))
 181                continue;
 182
 183        if (likely(!word_offset && !word[0]))
 184                return -1;
 185        else
 186                return fls(word[word_offset << 2]) - 1 + (word_offset << 5);
 187}
 188
 189static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
 190{
 191        apic->irr_pending = true;
 192        return apic_test_and_set_vector(vec, apic->regs + APIC_IRR);
 193}
 194
 195static inline int apic_search_irr(struct kvm_lapic *apic)
 196{
 197        return find_highest_vector(apic->regs + APIC_IRR);
 198}
 199
 200static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 201{
 202        int result;
 203
 204        if (!apic->irr_pending)
 205                return -1;
 206
 207        result = apic_search_irr(apic);
 208        ASSERT(result == -1 || result >= 16);
 209
 210        return result;
 211}
 212
 213static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 214{
 215        apic->irr_pending = false;
 216        apic_clear_vector(vec, apic->regs + APIC_IRR);
 217        if (apic_search_irr(apic) != -1)
 218                apic->irr_pending = true;
 219}
 220
 221int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 222{
 223        struct kvm_lapic *apic = vcpu->arch.apic;
 224        int highest_irr;
 225
 226        /* This may race with setting of irr in __apic_accept_irq() and
 227         * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
 228         * will cause vmexit immediately and the value will be recalculated
 229         * on the next vmentry.
 230         */
 231        if (!apic)
 232                return 0;
 233        highest_irr = apic_find_highest_irr(apic);
 234
 235        return highest_irr;
 236}
 237
 238static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 239                             int vector, int level, int trig_mode);
 240
 241int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 242{
 243        struct kvm_lapic *apic = vcpu->arch.apic;
 244
 245        return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
 246                        irq->level, irq->trig_mode);
 247}
 248
 249static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 250{
 251        int result;
 252
 253        result = find_highest_vector(apic->regs + APIC_ISR);
 254        ASSERT(result == -1 || result >= 16);
 255
 256        return result;
 257}
 258
 259static void apic_update_ppr(struct kvm_lapic *apic)
 260{
 261        u32 tpr, isrv, ppr;
 262        int isr;
 263
 264        tpr = apic_get_reg(apic, APIC_TASKPRI);
 265        isr = apic_find_highest_isr(apic);
 266        isrv = (isr != -1) ? isr : 0;
 267
 268        if ((tpr & 0xf0) >= (isrv & 0xf0))
 269                ppr = tpr & 0xff;
 270        else
 271                ppr = isrv & 0xf0;
 272
 273        apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
 274                   apic, ppr, isr, isrv);
 275
 276        apic_set_reg(apic, APIC_PROCPRI, ppr);
 277}
 278
 279static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
 280{
 281        apic_set_reg(apic, APIC_TASKPRI, tpr);
 282        apic_update_ppr(apic);
 283}
 284
 285int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
 286{
 287        return dest == 0xff || kvm_apic_id(apic) == dest;
 288}
 289
 290int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 291{
 292        int result = 0;
 293        u32 logical_id;
 294
 295        if (apic_x2apic_mode(apic)) {
 296                logical_id = apic_get_reg(apic, APIC_LDR);
 297                return logical_id & mda;
 298        }
 299
 300        logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
 301
 302        switch (apic_get_reg(apic, APIC_DFR)) {
 303        case APIC_DFR_FLAT:
 304                if (logical_id & mda)
 305                        result = 1;
 306                break;
 307        case APIC_DFR_CLUSTER:
 308                if (((logical_id >> 4) == (mda >> 0x4))
 309                    && (logical_id & mda & 0xf))
 310                        result = 1;
 311                break;
 312        default:
 313                printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n",
 314                       apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR));
 315                break;
 316        }
 317
 318        return result;
 319}
 320
 321int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 322                           int short_hand, int dest, int dest_mode)
 323{
 324        int result = 0;
 325        struct kvm_lapic *target = vcpu->arch.apic;
 326
 327        apic_debug("target %p, source %p, dest 0x%x, "
 328                   "dest_mode 0x%x, short_hand 0x%x\n",
 329                   target, source, dest, dest_mode, short_hand);
 330
 331        ASSERT(!target);
 332        switch (short_hand) {
 333        case APIC_DEST_NOSHORT:
 334                if (dest_mode == 0)
 335                        /* Physical mode. */
 336                        result = kvm_apic_match_physical_addr(target, dest);
 337                else
 338                        /* Logical mode. */
 339                        result = kvm_apic_match_logical_addr(target, dest);
 340                break;
 341        case APIC_DEST_SELF:
 342                result = (target == source);
 343                break;
 344        case APIC_DEST_ALLINC:
 345                result = 1;
 346                break;
 347        case APIC_DEST_ALLBUT:
 348                result = (target != source);
 349                break;
 350        default:
 351                printk(KERN_WARNING "Bad dest shorthand value %x\n",
 352                       short_hand);
 353                break;
 354        }
 355
 356        return result;
 357}
 358
 359/*
 360 * Add a pending IRQ into lapic.
 361 * Return 1 if successfully added and 0 if discarded.
 362 */
 363static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 364                             int vector, int level, int trig_mode)
 365{
 366        int result = 0;
 367        struct kvm_vcpu *vcpu = apic->vcpu;
 368
 369        switch (delivery_mode) {
 370        case APIC_DM_LOWEST:
 371                vcpu->arch.apic_arb_prio++;
 372        case APIC_DM_FIXED:
 373                /* FIXME add logic for vcpu on reset */
 374                if (unlikely(!apic_enabled(apic)))
 375                        break;
 376
 377                result = !apic_test_and_set_irr(vector, apic);
 378                trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
 379                                          trig_mode, vector, !result);
 380                if (!result) {
 381                        if (trig_mode)
 382                                apic_debug("level trig mode repeatedly for "
 383                                                "vector %d", vector);
 384                        break;
 385                }
 386
 387                if (trig_mode) {
 388                        apic_debug("level trig mode for vector %d", vector);
 389                        apic_set_vector(vector, apic->regs + APIC_TMR);
 390                } else
 391                        apic_clear_vector(vector, apic->regs + APIC_TMR);
 392                kvm_vcpu_kick(vcpu);
 393                break;
 394
 395        case APIC_DM_REMRD:
 396                printk(KERN_DEBUG "Ignoring delivery mode 3\n");
 397                break;
 398
 399        case APIC_DM_SMI:
 400                printk(KERN_DEBUG "Ignoring guest SMI\n");
 401                break;
 402
 403        case APIC_DM_NMI:
 404                result = 1;
 405                kvm_inject_nmi(vcpu);
 406                kvm_vcpu_kick(vcpu);
 407                break;
 408
 409        case APIC_DM_INIT:
 410                if (level) {
 411                        result = 1;
 412                        if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
 413                                printk(KERN_DEBUG
 414                                       "INIT on a runnable vcpu %d\n",
 415                                       vcpu->vcpu_id);
 416                        vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 417                        kvm_vcpu_kick(vcpu);
 418                } else {
 419                        apic_debug("Ignoring de-assert INIT to vcpu %d\n",
 420                                   vcpu->vcpu_id);
 421                }
 422                break;
 423
 424        case APIC_DM_STARTUP:
 425                apic_debug("SIPI to vcpu %d vector 0x%02x\n",
 426                           vcpu->vcpu_id, vector);
 427                if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 428                        result = 1;
 429                        vcpu->arch.sipi_vector = vector;
 430                        vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 431                        kvm_vcpu_kick(vcpu);
 432                }
 433                break;
 434
 435        case APIC_DM_EXTINT:
 436                /*
 437                 * Should only be called by kvm_apic_local_deliver() with LVT0,
 438                 * before NMI watchdog was enabled. Already handled by
 439                 * kvm_apic_accept_pic_intr().
 440                 */
 441                break;
 442
 443        default:
 444                printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
 445                       delivery_mode);
 446                break;
 447        }
 448        return result;
 449}
 450
 451int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 452{
 453        return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
 454}
 455
 456static void apic_set_eoi(struct kvm_lapic *apic)
 457{
 458        int vector = apic_find_highest_isr(apic);
 459        int trigger_mode;
 460        /*
 461         * Not every write EOI will has corresponding ISR,
 462         * one example is when Kernel check timer on setup_IO_APIC
 463         */
 464        if (vector == -1)
 465                return;
 466
 467        apic_clear_vector(vector, apic->regs + APIC_ISR);
 468        apic_update_ppr(apic);
 469
 470        if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
 471                trigger_mode = IOAPIC_LEVEL_TRIG;
 472        else
 473                trigger_mode = IOAPIC_EDGE_TRIG;
 474        if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
 475                mutex_lock(&apic->vcpu->kvm->irq_lock);
 476                kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
 477                mutex_unlock(&apic->vcpu->kvm->irq_lock);
 478        }
 479}
 480
 481static void apic_send_ipi(struct kvm_lapic *apic)
 482{
 483        u32 icr_low = apic_get_reg(apic, APIC_ICR);
 484        u32 icr_high = apic_get_reg(apic, APIC_ICR2);
 485        struct kvm_lapic_irq irq;
 486
 487        irq.vector = icr_low & APIC_VECTOR_MASK;
 488        irq.delivery_mode = icr_low & APIC_MODE_MASK;
 489        irq.dest_mode = icr_low & APIC_DEST_MASK;
 490        irq.level = icr_low & APIC_INT_ASSERT;
 491        irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
 492        irq.shorthand = icr_low & APIC_SHORT_MASK;
 493        if (apic_x2apic_mode(apic))
 494                irq.dest_id = icr_high;
 495        else
 496                irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
 497
 498        trace_kvm_apic_ipi(icr_low, irq.dest_id);
 499
 500        apic_debug("icr_high 0x%x, icr_low 0x%x, "
 501                   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
 502                   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
 503                   icr_high, icr_low, irq.shorthand, irq.dest_id,
 504                   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
 505                   irq.vector);
 506
 507        mutex_lock(&apic->vcpu->kvm->irq_lock);
 508        kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
 509        mutex_unlock(&apic->vcpu->kvm->irq_lock);
 510}
 511
 512static u32 apic_get_tmcct(struct kvm_lapic *apic)
 513{
 514        ktime_t remaining;
 515        s64 ns;
 516        u32 tmcct;
 517
 518        ASSERT(apic != NULL);
 519
 520        /* if initial count is 0, current count should also be 0 */
 521        if (apic_get_reg(apic, APIC_TMICT) == 0)
 522                return 0;
 523
 524        remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
 525        if (ktime_to_ns(remaining) < 0)
 526                remaining = ktime_set(0, 0);
 527
 528        ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
 529        tmcct = div64_u64(ns,
 530                         (APIC_BUS_CYCLE_NS * apic->divide_count));
 531
 532        return tmcct;
 533}
 534
 535static void __report_tpr_access(struct kvm_lapic *apic, bool write)
 536{
 537        struct kvm_vcpu *vcpu = apic->vcpu;
 538        struct kvm_run *run = vcpu->run;
 539
 540        set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests);
 541        run->tpr_access.rip = kvm_rip_read(vcpu);
 542        run->tpr_access.is_write = write;
 543}
 544
 545static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
 546{
 547        if (apic->vcpu->arch.tpr_access_reporting)
 548                __report_tpr_access(apic, write);
 549}
 550
 551static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 552{
 553        u32 val = 0;
 554
 555        if (offset >= LAPIC_MMIO_LENGTH)
 556                return 0;
 557
 558        switch (offset) {
 559        case APIC_ID:
 560                if (apic_x2apic_mode(apic))
 561                        val = kvm_apic_id(apic);
 562                else
 563                        val = kvm_apic_id(apic) << 24;
 564                break;
 565        case APIC_ARBPRI:
 566                printk(KERN_WARNING "Access APIC ARBPRI register "
 567                       "which is for P6\n");
 568                break;
 569
 570        case APIC_TMCCT:        /* Timer CCR */
 571                val = apic_get_tmcct(apic);
 572                break;
 573
 574        case APIC_TASKPRI:
 575                report_tpr_access(apic, false);
 576                /* fall thru */
 577        default:
 578                apic_update_ppr(apic);
 579                val = apic_get_reg(apic, offset);
 580                break;
 581        }
 582
 583        return val;
 584}
 585
 586static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
 587{
 588        return container_of(dev, struct kvm_lapic, dev);
 589}
 590
 591static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
 592                void *data)
 593{
 594        unsigned char alignment = offset & 0xf;
 595        u32 result;
 596        /* this bitmask has a bit cleared for each reserver register */
 597        static const u64 rmask = 0x43ff01ffffffe70cULL;
 598
 599        if ((alignment + len) > 4) {
 600                apic_debug("KVM_APIC_READ: alignment error %x %d\n",
 601                           offset, len);
 602                return 1;
 603        }
 604
 605        if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
 606                apic_debug("KVM_APIC_READ: read reserved register %x\n",
 607                           offset);
 608                return 1;
 609        }
 610
 611        result = __apic_read(apic, offset & ~0xf);
 612
 613        trace_kvm_apic_read(offset, result);
 614
 615        switch (len) {
 616        case 1:
 617        case 2:
 618        case 4:
 619                memcpy(data, (char *)&result + alignment, len);
 620                break;
 621        default:
 622                printk(KERN_ERR "Local APIC read with len = %x, "
 623                       "should be 1,2, or 4 instead\n", len);
 624                break;
 625        }
 626        return 0;
 627}
 628
 629static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
 630{
 631        return apic_hw_enabled(apic) &&
 632            addr >= apic->base_address &&
 633            addr < apic->base_address + LAPIC_MMIO_LENGTH;
 634}
 635
 636static int apic_mmio_read(struct kvm_io_device *this,
 637                           gpa_t address, int len, void *data)
 638{
 639        struct kvm_lapic *apic = to_lapic(this);
 640        u32 offset = address - apic->base_address;
 641
 642        if (!apic_mmio_in_range(apic, address))
 643                return -EOPNOTSUPP;
 644
 645        apic_reg_read(apic, offset, len, data);
 646
 647        return 0;
 648}
 649
 650static void update_divide_count(struct kvm_lapic *apic)
 651{
 652        u32 tmp1, tmp2, tdcr;
 653
 654        tdcr = apic_get_reg(apic, APIC_TDCR);
 655        tmp1 = tdcr & 0xf;
 656        tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
 657        apic->divide_count = 0x1 << (tmp2 & 0x7);
 658
 659        apic_debug("timer divide count is 0x%x\n",
 660                                   apic->divide_count);
 661}
 662
 663static void start_apic_timer(struct kvm_lapic *apic)
 664{
 665        ktime_t now = apic->lapic_timer.timer.base->get_time();
 666
 667        apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
 668                    APIC_BUS_CYCLE_NS * apic->divide_count;
 669        atomic_set(&apic->lapic_timer.pending, 0);
 670
 671        if (!apic->lapic_timer.period)
 672                return;
 673        /*
 674         * Do not allow the guest to program periodic timers with small
 675         * interval, since the hrtimers are not throttled by the host
 676         * scheduler.
 677         */
 678        if (apic_lvtt_period(apic)) {
 679                if (apic->lapic_timer.period < NSEC_PER_MSEC/2)
 680                        apic->lapic_timer.period = NSEC_PER_MSEC/2;
 681        }
 682
 683        hrtimer_start(&apic->lapic_timer.timer,
 684                      ktime_add_ns(now, apic->lapic_timer.period),
 685                      HRTIMER_MODE_ABS);
 686
 687        apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
 688                           PRIx64 ", "
 689                           "timer initial count 0x%x, period %lldns, "
 690                           "expire @ 0x%016" PRIx64 ".\n", __func__,
 691                           APIC_BUS_CYCLE_NS, ktime_to_ns(now),
 692                           apic_get_reg(apic, APIC_TMICT),
 693                           apic->lapic_timer.period,
 694                           ktime_to_ns(ktime_add_ns(now,
 695                                        apic->lapic_timer.period)));
 696}
 697
 698static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
 699{
 700        int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
 701
 702        if (apic_lvt_nmi_mode(lvt0_val)) {
 703                if (!nmi_wd_enabled) {
 704                        apic_debug("Receive NMI setting on APIC_LVT0 "
 705                                   "for cpu %d\n", apic->vcpu->vcpu_id);
 706                        apic->vcpu->kvm->arch.vapics_in_nmi_mode++;
 707                }
 708        } else if (nmi_wd_enabled)
 709                apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
 710}
 711
 712static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 713{
 714        int ret = 0;
 715
 716        trace_kvm_apic_write(reg, val);
 717
 718        switch (reg) {
 719        case APIC_ID:           /* Local APIC ID */
 720                if (!apic_x2apic_mode(apic))
 721                        apic_set_reg(apic, APIC_ID, val);
 722                else
 723                        ret = 1;
 724                break;
 725
 726        case APIC_TASKPRI:
 727                report_tpr_access(apic, true);
 728                apic_set_tpr(apic, val & 0xff);
 729                break;
 730
 731        case APIC_EOI:
 732                apic_set_eoi(apic);
 733                break;
 734
 735        case APIC_LDR:
 736                if (!apic_x2apic_mode(apic))
 737                        apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
 738                else
 739                        ret = 1;
 740                break;
 741
 742        case APIC_DFR:
 743                if (!apic_x2apic_mode(apic))
 744                        apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
 745                else
 746                        ret = 1;
 747                break;
 748
 749        case APIC_SPIV: {
 750                u32 mask = 0x3ff;
 751                if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
 752                        mask |= APIC_SPIV_DIRECTED_EOI;
 753                apic_set_reg(apic, APIC_SPIV, val & mask);
 754                if (!(val & APIC_SPIV_APIC_ENABLED)) {
 755                        int i;
 756                        u32 lvt_val;
 757
 758                        for (i = 0; i < APIC_LVT_NUM; i++) {
 759                                lvt_val = apic_get_reg(apic,
 760                                                       APIC_LVTT + 0x10 * i);
 761                                apic_set_reg(apic, APIC_LVTT + 0x10 * i,
 762                                             lvt_val | APIC_LVT_MASKED);
 763                        }
 764                        atomic_set(&apic->lapic_timer.pending, 0);
 765
 766                }
 767                break;
 768        }
 769        case APIC_ICR:
 770                /* No delay here, so we always clear the pending bit */
 771                apic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
 772                apic_send_ipi(apic);
 773                break;
 774
 775        case APIC_ICR2:
 776                if (!apic_x2apic_mode(apic))
 777                        val &= 0xff000000;
 778                apic_set_reg(apic, APIC_ICR2, val);
 779                break;
 780
 781        case APIC_LVT0:
 782                apic_manage_nmi_watchdog(apic, val);
 783        case APIC_LVTT:
 784        case APIC_LVTTHMR:
 785        case APIC_LVTPC:
 786        case APIC_LVT1:
 787        case APIC_LVTERR:
 788                /* TODO: Check vector */
 789                if (!apic_sw_enabled(apic))
 790                        val |= APIC_LVT_MASKED;
 791
 792                val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
 793                apic_set_reg(apic, reg, val);
 794
 795                break;
 796
 797        case APIC_TMICT:
 798                hrtimer_cancel(&apic->lapic_timer.timer);
 799                apic_set_reg(apic, APIC_TMICT, val);
 800                start_apic_timer(apic);
 801                break;
 802
 803        case APIC_TDCR:
 804                if (val & 4)
 805                        printk(KERN_ERR "KVM_WRITE:TDCR %x\n", val);
 806                apic_set_reg(apic, APIC_TDCR, val);
 807                update_divide_count(apic);
 808                break;
 809
 810        case APIC_ESR:
 811                if (apic_x2apic_mode(apic) && val != 0) {
 812                        printk(KERN_ERR "KVM_WRITE:ESR not zero %x\n", val);
 813                        ret = 1;
 814                }
 815                break;
 816
 817        case APIC_SELF_IPI:
 818                if (apic_x2apic_mode(apic)) {
 819                        apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
 820                } else
 821                        ret = 1;
 822                break;
 823        default:
 824                ret = 1;
 825                break;
 826        }
 827        if (ret)
 828                apic_debug("Local APIC Write to read-only register %x\n", reg);
 829        return ret;
 830}
 831
 832static int apic_mmio_write(struct kvm_io_device *this,
 833                            gpa_t address, int len, const void *data)
 834{
 835        struct kvm_lapic *apic = to_lapic(this);
 836        unsigned int offset = address - apic->base_address;
 837        u32 val;
 838
 839        if (!apic_mmio_in_range(apic, address))
 840                return -EOPNOTSUPP;
 841
 842        /*
 843         * APIC register must be aligned on 128-bits boundary.
 844         * 32/64/128 bits registers must be accessed thru 32 bits.
 845         * Refer SDM 8.4.1
 846         */
 847        if (len != 4 || (offset & 0xf)) {
 848                /* Don't shout loud, $infamous_os would cause only noise. */
 849                apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
 850                return 0;
 851        }
 852
 853        val = *(u32*)data;
 854
 855        /* too common printing */
 856        if (offset != APIC_EOI)
 857                apic_debug("%s: offset 0x%x with length 0x%x, and value is "
 858                           "0x%x\n", __func__, offset, len, val);
 859
 860        apic_reg_write(apic, offset & 0xff0, val);
 861
 862        return 0;
 863}
 864
 865void kvm_free_lapic(struct kvm_vcpu *vcpu)
 866{
 867        if (!vcpu->arch.apic)
 868                return;
 869
 870        hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer);
 871
 872        if (vcpu->arch.apic->regs_page)
 873                __free_page(vcpu->arch.apic->regs_page);
 874
 875        kfree(vcpu->arch.apic);
 876}
 877
 878/*
 879 *----------------------------------------------------------------------
 880 * LAPIC interface
 881 *----------------------------------------------------------------------
 882 */
 883
 884void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
 885{
 886        struct kvm_lapic *apic = vcpu->arch.apic;
 887
 888        if (!apic)
 889                return;
 890        apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
 891                     | (apic_get_reg(apic, APIC_TASKPRI) & 4));
 892}
 893
 894u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 895{
 896        struct kvm_lapic *apic = vcpu->arch.apic;
 897        u64 tpr;
 898
 899        if (!apic)
 900                return 0;
 901        tpr = (u64) apic_get_reg(apic, APIC_TASKPRI);
 902
 903        return (tpr & 0xf0) >> 4;
 904}
 905
 906void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 907{
 908        struct kvm_lapic *apic = vcpu->arch.apic;
 909
 910        if (!apic) {
 911                value |= MSR_IA32_APICBASE_BSP;
 912                vcpu->arch.apic_base = value;
 913                return;
 914        }
 915
 916        if (!kvm_vcpu_is_bsp(apic->vcpu))
 917                value &= ~MSR_IA32_APICBASE_BSP;
 918
 919        vcpu->arch.apic_base = value;
 920        if (apic_x2apic_mode(apic)) {
 921                u32 id = kvm_apic_id(apic);
 922                u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
 923                apic_set_reg(apic, APIC_LDR, ldr);
 924        }
 925        apic->base_address = apic->vcpu->arch.apic_base &
 926                             MSR_IA32_APICBASE_BASE;
 927
 928        /* with FSB delivery interrupt, we can restart APIC functionality */
 929        apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
 930                   "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
 931
 932}
 933
 934void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 935{
 936        struct kvm_lapic *apic;
 937        int i;
 938
 939        apic_debug("%s\n", __func__);
 940
 941        ASSERT(vcpu);
 942        apic = vcpu->arch.apic;
 943        ASSERT(apic != NULL);
 944
 945        /* Stop the timer in case it's a reset to an active apic */
 946        hrtimer_cancel(&apic->lapic_timer.timer);
 947
 948        apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
 949        kvm_apic_set_version(apic->vcpu);
 950
 951        for (i = 0; i < APIC_LVT_NUM; i++)
 952                apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
 953        apic_set_reg(apic, APIC_LVT0,
 954                     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 955
 956        apic_set_reg(apic, APIC_DFR, 0xffffffffU);
 957        apic_set_reg(apic, APIC_SPIV, 0xff);
 958        apic_set_reg(apic, APIC_TASKPRI, 0);
 959        apic_set_reg(apic, APIC_LDR, 0);
 960        apic_set_reg(apic, APIC_ESR, 0);
 961        apic_set_reg(apic, APIC_ICR, 0);
 962        apic_set_reg(apic, APIC_ICR2, 0);
 963        apic_set_reg(apic, APIC_TDCR, 0);
 964        apic_set_reg(apic, APIC_TMICT, 0);
 965        for (i = 0; i < 8; i++) {
 966                apic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
 967                apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
 968                apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
 969        }
 970        apic->irr_pending = false;
 971        update_divide_count(apic);
 972        atomic_set(&apic->lapic_timer.pending, 0);
 973        if (kvm_vcpu_is_bsp(vcpu))
 974                vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
 975        apic_update_ppr(apic);
 976
 977        vcpu->arch.apic_arb_prio = 0;
 978
 979        apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
 980                   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
 981                   vcpu, kvm_apic_id(apic),
 982                   vcpu->arch.apic_base, apic->base_address);
 983}
 984
 985bool kvm_apic_present(struct kvm_vcpu *vcpu)
 986{
 987        return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic);
 988}
 989
 990int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
 991{
 992        return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic);
 993}
 994
 995/*
 996 *----------------------------------------------------------------------
 997 * timer interface
 998 *----------------------------------------------------------------------
 999 */
1000
1001static bool lapic_is_periodic(struct kvm_timer *ktimer)
1002{
1003        struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic,
1004                                              lapic_timer);
1005        return apic_lvtt_period(apic);
1006}
1007
1008int apic_has_pending_timer(struct kvm_vcpu *vcpu)
1009{
1010        struct kvm_lapic *lapic = vcpu->arch.apic;
1011
1012        if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT))
1013                return atomic_read(&lapic->lapic_timer.pending);
1014
1015        return 0;
1016}
1017
1018static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
1019{
1020        u32 reg = apic_get_reg(apic, lvt_type);
1021        int vector, mode, trig_mode;
1022
1023        if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
1024                vector = reg & APIC_VECTOR_MASK;
1025                mode = reg & APIC_MODE_MASK;
1026                trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
1027                return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
1028        }
1029        return 0;
1030}
1031
1032void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
1033{
1034        struct kvm_lapic *apic = vcpu->arch.apic;
1035
1036        if (apic)
1037                kvm_apic_local_deliver(apic, APIC_LVT0);
1038}
1039
1040static struct kvm_timer_ops lapic_timer_ops = {
1041        .is_periodic = lapic_is_periodic,
1042};
1043
1044static const struct kvm_io_device_ops apic_mmio_ops = {
1045        .read     = apic_mmio_read,
1046        .write    = apic_mmio_write,
1047};
1048
1049int kvm_create_lapic(struct kvm_vcpu *vcpu)
1050{
1051        struct kvm_lapic *apic;
1052
1053        ASSERT(vcpu != NULL);
1054        apic_debug("apic_init %d\n", vcpu->vcpu_id);
1055
1056        apic = kzalloc(sizeof(*apic), GFP_KERNEL);
1057        if (!apic)
1058                goto nomem;
1059
1060        vcpu->arch.apic = apic;
1061
1062        apic->regs_page = alloc_page(GFP_KERNEL);
1063        if (apic->regs_page == NULL) {
1064                printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
1065                       vcpu->vcpu_id);
1066                goto nomem_free_apic;
1067        }
1068        apic->regs = page_address(apic->regs_page);
1069        memset(apic->regs, 0, PAGE_SIZE);
1070        apic->vcpu = vcpu;
1071
1072        hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
1073                     HRTIMER_MODE_ABS);
1074        apic->lapic_timer.timer.function = kvm_timer_fn;
1075        apic->lapic_timer.t_ops = &lapic_timer_ops;
1076        apic->lapic_timer.kvm = vcpu->kvm;
1077        apic->lapic_timer.vcpu = vcpu;
1078
1079        apic->base_address = APIC_DEFAULT_PHYS_BASE;
1080        vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
1081
1082        kvm_lapic_reset(vcpu);
1083        kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
1084
1085        return 0;
1086nomem_free_apic:
1087        kfree(apic);
1088nomem:
1089        return -ENOMEM;
1090}
1091
1092int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
1093{
1094        struct kvm_lapic *apic = vcpu->arch.apic;
1095        int highest_irr;
1096
1097        if (!apic || !apic_enabled(apic))
1098                return -1;
1099
1100        apic_update_ppr(apic);
1101        highest_irr = apic_find_highest_irr(apic);
1102        if ((highest_irr == -1) ||
1103            ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
1104                return -1;
1105        return highest_irr;
1106}
1107
1108int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
1109{
1110        u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);
1111        int r = 0;
1112
1113        if (kvm_vcpu_is_bsp(vcpu)) {
1114                if (!apic_hw_enabled(vcpu->arch.apic))
1115                        r = 1;
1116                if ((lvt0 & APIC_LVT_MASKED) == 0 &&
1117                    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
1118                        r = 1;
1119        }
1120        return r;
1121}
1122
1123void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1124{
1125        struct kvm_lapic *apic = vcpu->arch.apic;
1126
1127        if (apic && atomic_read(&apic->lapic_timer.pending) > 0) {
1128                if (kvm_apic_local_deliver(apic, APIC_LVTT))
1129                        atomic_dec(&apic->lapic_timer.pending);
1130        }
1131}
1132
1133int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
1134{
1135        int vector = kvm_apic_has_interrupt(vcpu);
1136        struct kvm_lapic *apic = vcpu->arch.apic;
1137
1138        if (vector == -1)
1139                return -1;
1140
1141        apic_set_vector(vector, apic->regs + APIC_ISR);
1142        apic_update_ppr(apic);
1143        apic_clear_irr(vector, apic);
1144        return vector;
1145}
1146
1147void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
1148{
1149        struct kvm_lapic *apic = vcpu->arch.apic;
1150
1151        apic->base_address = vcpu->arch.apic_base &
1152                             MSR_IA32_APICBASE_BASE;
1153        kvm_apic_set_version(vcpu);
1154
1155        apic_update_ppr(apic);
1156        hrtimer_cancel(&apic->lapic_timer.timer);
1157        update_divide_count(apic);
1158        start_apic_timer(apic);
1159}
1160
1161void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
1162{
1163        struct kvm_lapic *apic = vcpu->arch.apic;
1164        struct hrtimer *timer;
1165
1166        if (!apic)
1167                return;
1168
1169        timer = &apic->lapic_timer.timer;
1170        if (hrtimer_cancel(timer))
1171                hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
1172}
1173
1174void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
1175{
1176        u32 data;
1177        void *vapic;
1178
1179        if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
1180                return;
1181
1182        vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
1183        data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
1184        kunmap_atomic(vapic, KM_USER0);
1185
1186        apic_set_tpr(vcpu->arch.apic, data & 0xff);
1187}
1188
1189void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
1190{
1191        u32 data, tpr;
1192        int max_irr, max_isr;
1193        struct kvm_lapic *apic;
1194        void *vapic;
1195
1196        if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
1197                return;
1198
1199        apic = vcpu->arch.apic;
1200        tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff;
1201        max_irr = apic_find_highest_irr(apic);
1202        if (max_irr < 0)
1203                max_irr = 0;
1204        max_isr = apic_find_highest_isr(apic);
1205        if (max_isr < 0)
1206                max_isr = 0;
1207        data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
1208
1209        vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
1210        *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data;
1211        kunmap_atomic(vapic, KM_USER0);
1212}
1213
1214void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
1215{
1216        if (!irqchip_in_kernel(vcpu->kvm))
1217                return;
1218
1219        vcpu->arch.apic->vapic_addr = vapic_addr;
1220}
1221
1222int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1223{
1224        struct kvm_lapic *apic = vcpu->arch.apic;
1225        u32 reg = (msr - APIC_BASE_MSR) << 4;
1226
1227        if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
1228                return 1;
1229
1230        /* if this is ICR write vector before command */
1231        if (msr == 0x830)
1232                apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
1233        return apic_reg_write(apic, reg, (u32)data);
1234}
1235
1236int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
1237{
1238        struct kvm_lapic *apic = vcpu->arch.apic;
1239        u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
1240
1241        if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
1242                return 1;
1243
1244        if (apic_reg_read(apic, reg, 4, &low))
1245                return 1;
1246        if (msr == 0x830)
1247                apic_reg_read(apic, APIC_ICR2, 4, &high);
1248
1249        *data = (((u64)high) << 32) | low;
1250
1251        return 0;
1252}
1253