linux/arch/x86/kernel/apic/apic.c
<<
>>
Prefs
   1/*
   2 *      Local APIC handling, local APIC timers
   3 *
   4 *      (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
   5 *
   6 *      Fixes
   7 *      Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
   8 *                                      thanks to Eric Gilmore
   9 *                                      and Rolf G. Tews
  10 *                                      for testing these extensively.
  11 *      Maciej W. Rozycki       :       Various updates and fixes.
  12 *      Mikael Pettersson       :       Power Management for UP-APIC.
  13 *      Pavel Machek and
  14 *      Mikael Pettersson       :       PM converted to driver model.
  15 */
  16
  17#include <linux/perf_event.h>
  18#include <linux/kernel_stat.h>
  19#include <linux/mc146818rtc.h>
  20#include <linux/acpi_pmtmr.h>
  21#include <linux/clockchips.h>
  22#include <linux/interrupt.h>
  23#include <linux/bootmem.h>
  24#include <linux/ftrace.h>
  25#include <linux/ioport.h>
  26#include <linux/module.h>
  27#include <linux/syscore_ops.h>
  28#include <linux/delay.h>
  29#include <linux/timex.h>
  30#include <linux/i8253.h>
  31#include <linux/dmar.h>
  32#include <linux/init.h>
  33#include <linux/cpu.h>
  34#include <linux/dmi.h>
  35#include <linux/smp.h>
  36#include <linux/mm.h>
  37
  38#include <asm/irq_remapping.h>
  39#include <asm/perf_event.h>
  40#include <asm/x86_init.h>
  41#include <asm/pgalloc.h>
  42#include <linux/atomic.h>
  43#include <asm/mpspec.h>
  44#include <asm/i8259.h>
  45#include <asm/proto.h>
  46#include <asm/apic.h>
  47#include <asm/io_apic.h>
  48#include <asm/desc.h>
  49#include <asm/hpet.h>
  50#include <asm/idle.h>
  51#include <asm/mtrr.h>
  52#include <asm/time.h>
  53#include <asm/smp.h>
  54#include <asm/mce.h>
  55#include <asm/tsc.h>
  56#include <asm/hypervisor.h>
  57
  58unsigned int num_processors;
  59
  60unsigned disabled_cpus __cpuinitdata;
  61
  62/* Processor that is doing the boot up */
  63unsigned int boot_cpu_physical_apicid = -1U;
  64
  65/*
  66 * The highest APIC ID seen during enumeration.
  67 */
  68unsigned int max_physical_apicid;
  69
  70/*
  71 * Bitmask of physically existing CPUs:
  72 */
  73physid_mask_t phys_cpu_present_map;
  74
  75/*
  76 * Map cpu index to physical APIC ID
  77 */
  78DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
  79DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
  80EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
  81EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
  82
  83#ifdef CONFIG_X86_32
  84
  85/*
  86 * On x86_32, the mapping between cpu and logical apicid may vary
  87 * depending on apic in use.  The following early percpu variable is
  88 * used for the mapping.  This is where the behaviors of x86_64 and 32
  89 * actually diverge.  Let's keep it ugly for now.
  90 */
  91DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
  92
  93/* Local APIC was disabled by the BIOS and enabled by the kernel */
  94static int enabled_via_apicbase;
  95
  96/*
  97 * Handle interrupt mode configuration register (IMCR).
  98 * This register controls whether the interrupt signals
  99 * that reach the BSP come from the master PIC or from the
 100 * local APIC. Before entering Symmetric I/O Mode, either
 101 * the BIOS or the operating system must switch out of
 102 * PIC Mode by changing the IMCR.
 103 */
 104static inline void imcr_pic_to_apic(void)
 105{
 106        /* select IMCR register */
 107        outb(0x70, 0x22);
 108        /* NMI and 8259 INTR go through APIC */
 109        outb(0x01, 0x23);
 110}
 111
 112static inline void imcr_apic_to_pic(void)
 113{
 114        /* select IMCR register */
 115        outb(0x70, 0x22);
 116        /* NMI and 8259 INTR go directly to BSP */
 117        outb(0x00, 0x23);
 118}
 119#endif
 120
 121/*
 122 * Knob to control our willingness to enable the local APIC.
 123 *
 124 * +1=force-enable
 125 */
 126static int force_enable_local_apic __initdata;
 127/*
 128 * APIC command line parameters
 129 */
 130static int __init parse_lapic(char *arg)
 131{
 132        if (config_enabled(CONFIG_X86_32) && !arg)
 133                force_enable_local_apic = 1;
 134        else if (arg && !strncmp(arg, "notscdeadline", 13))
 135                setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
 136        return 0;
 137}
 138early_param("lapic", parse_lapic);
 139
 140#ifdef CONFIG_X86_64
 141static int apic_calibrate_pmtmr __initdata;
 142static __init int setup_apicpmtimer(char *s)
 143{
 144        apic_calibrate_pmtmr = 1;
 145        notsc_setup(NULL);
 146        return 0;
 147}
 148__setup("apicpmtimer", setup_apicpmtimer);
 149#endif
 150
 151int x2apic_mode;
 152#ifdef CONFIG_X86_X2APIC
 153/* x2apic enabled before OS handover */
 154int x2apic_preenabled;
 155static int x2apic_disabled;
 156static int nox2apic;
 157static __init int setup_nox2apic(char *str)
 158{
 159        if (x2apic_enabled()) {
 160                int apicid = native_apic_msr_read(APIC_ID);
 161
 162                if (apicid >= 255) {
 163                        pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
 164                                   apicid);
 165                        return 0;
 166                }
 167
 168                pr_warning("x2apic already enabled. will disable it\n");
 169        } else
 170                setup_clear_cpu_cap(X86_FEATURE_X2APIC);
 171
 172        nox2apic = 1;
 173
 174        return 0;
 175}
 176early_param("nox2apic", setup_nox2apic);
 177#endif
 178
 179unsigned long mp_lapic_addr;
 180int disable_apic;
 181/* Disable local APIC timer from the kernel commandline or via dmi quirk */
 182static int disable_apic_timer __initdata;
 183/* Local APIC timer works in C2 */
 184int local_apic_timer_c2_ok;
 185EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 186
 187int first_system_vector = 0xfe;
 188
 189/*
 190 * Debug level, exported for io_apic.c
 191 */
 192unsigned int apic_verbosity;
 193
 194int pic_mode;
 195
 196/* Have we found an MP table */
 197int smp_found_config;
 198
 199static struct resource lapic_resource = {
 200        .name = "Local APIC",
 201        .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
 202};
 203
 204unsigned int lapic_timer_frequency = 0;
 205
 206static void apic_pm_activate(void);
 207
 208static unsigned long apic_phys;
 209
 210/*
 211 * Get the LAPIC version
 212 */
 213static inline int lapic_get_version(void)
 214{
 215        return GET_APIC_VERSION(apic_read(APIC_LVR));
 216}
 217
 218/*
 219 * Check, if the APIC is integrated or a separate chip
 220 */
 221static inline int lapic_is_integrated(void)
 222{
 223#ifdef CONFIG_X86_64
 224        return 1;
 225#else
 226        return APIC_INTEGRATED(lapic_get_version());
 227#endif
 228}
 229
 230/*
 231 * Check, whether this is a modern or a first generation APIC
 232 */
 233static int modern_apic(void)
 234{
 235        /* AMD systems use old APIC versions, so check the CPU */
 236        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
 237            boot_cpu_data.x86 >= 0xf)
 238                return 1;
 239        return lapic_get_version() >= 0x14;
 240}
 241
 242/*
 243 * right after this call apic become NOOP driven
 244 * so apic->write/read doesn't do anything
 245 */
 246static void __init apic_disable(void)
 247{
 248        pr_info("APIC: switched to apic NOOP\n");
 249        apic = &apic_noop;
 250}
 251
 252void native_apic_wait_icr_idle(void)
 253{
 254        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
 255                cpu_relax();
 256}
 257
 258u32 native_safe_apic_wait_icr_idle(void)
 259{
 260        u32 send_status;
 261        int timeout;
 262
 263        timeout = 0;
 264        do {
 265                send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
 266                if (!send_status)
 267                        break;
 268                inc_irq_stat(icr_read_retry_count);
 269                udelay(100);
 270        } while (timeout++ < 1000);
 271
 272        return send_status;
 273}
 274
 275void native_apic_icr_write(u32 low, u32 id)
 276{
 277        apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
 278        apic_write(APIC_ICR, low);
 279}
 280
 281u64 native_apic_icr_read(void)
 282{
 283        u32 icr1, icr2;
 284
 285        icr2 = apic_read(APIC_ICR2);
 286        icr1 = apic_read(APIC_ICR);
 287
 288        return icr1 | ((u64)icr2 << 32);
 289}
 290
 291#ifdef CONFIG_X86_32
 292/**
 293 * get_physical_broadcast - Get number of physical broadcast IDs
 294 */
 295int get_physical_broadcast(void)
 296{
 297        return modern_apic() ? 0xff : 0xf;
 298}
 299#endif
 300
 301/**
 302 * lapic_get_maxlvt - get the maximum number of local vector table entries
 303 */
 304int lapic_get_maxlvt(void)
 305{
 306        unsigned int v;
 307
 308        v = apic_read(APIC_LVR);
 309        /*
 310         * - we always have APIC integrated on 64bit mode
 311         * - 82489DXs do not report # of LVT entries
 312         */
 313        return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
 314}
 315
 316/*
 317 * Local APIC timer
 318 */
 319
 320/* Clock divisor */
 321#define APIC_DIVISOR 16
 322#define TSC_DIVISOR  32
 323
 324/*
 325 * This function sets up the local APIC timer, with a timeout of
 326 * 'clocks' APIC bus clock. During calibration we actually call
 327 * this function twice on the boot CPU, once with a bogus timeout
 328 * value, second time for real. The other (noncalibrating) CPUs
 329 * call this function only once, with the real, calibrated value.
 330 *
 331 * We do reads before writes even if unnecessary, to get around the
 332 * P5 APIC double write bug.
 333 */
 334static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
 335{
 336        unsigned int lvtt_value, tmp_value;
 337
 338        lvtt_value = LOCAL_TIMER_VECTOR;
 339        if (!oneshot)
 340                lvtt_value |= APIC_LVT_TIMER_PERIODIC;
 341        else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 342                lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
 343
 344        if (!lapic_is_integrated())
 345                lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
 346
 347        if (!irqen)
 348                lvtt_value |= APIC_LVT_MASKED;
 349
 350        apic_write(APIC_LVTT, lvtt_value);
 351
 352        if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
 353                printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
 354                return;
 355        }
 356
 357        /*
 358         * Divide PICLK by 16
 359         */
 360        tmp_value = apic_read(APIC_TDCR);
 361        apic_write(APIC_TDCR,
 362                (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
 363                APIC_TDR_DIV_16);
 364
 365        if (!oneshot)
 366                apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
 367}
 368
 369/*
 370 * Setup extended LVT, AMD specific
 371 *
 372 * Software should use the LVT offsets the BIOS provides.  The offsets
 373 * are determined by the subsystems using it like those for MCE
 374 * threshold or IBS.  On K8 only offset 0 (APIC500) and MCE interrupts
 375 * are supported. Beginning with family 10h at least 4 offsets are
 376 * available.
 377 *
 378 * Since the offsets must be consistent for all cores, we keep track
 379 * of the LVT offsets in software and reserve the offset for the same
 380 * vector also to be used on other cores. An offset is freed by
 381 * setting the entry to APIC_EILVT_MASKED.
 382 *
 383 * If the BIOS is right, there should be no conflicts. Otherwise a
 384 * "[Firmware Bug]: ..." error message is generated. However, if
 385 * software does not properly determines the offsets, it is not
 386 * necessarily a BIOS bug.
 387 */
 388
 389static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
 390
 391static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
 392{
 393        return (old & APIC_EILVT_MASKED)
 394                || (new == APIC_EILVT_MASKED)
 395                || ((new & ~APIC_EILVT_MASKED) == old);
 396}
 397
 398static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
 399{
 400        unsigned int rsvd, vector;
 401
 402        if (offset >= APIC_EILVT_NR_MAX)
 403                return ~0;
 404
 405        rsvd = atomic_read(&eilvt_offsets[offset]);
 406        do {
 407                vector = rsvd & ~APIC_EILVT_MASKED;     /* 0: unassigned */
 408                if (vector && !eilvt_entry_is_changeable(vector, new))
 409                        /* may not change if vectors are different */
 410                        return rsvd;
 411                rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
 412        } while (rsvd != new);
 413
 414        rsvd &= ~APIC_EILVT_MASKED;
 415        if (rsvd && rsvd != vector)
 416                pr_info("LVT offset %d assigned for vector 0x%02x\n",
 417                        offset, rsvd);
 418
 419        return new;
 420}
 421
 422/*
 423 * If mask=1, the LVT entry does not generate interrupts while mask=0
 424 * enables the vector. See also the BKDGs. Must be called with
 425 * preemption disabled.
 426 */
 427
 428int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
 429{
 430        unsigned long reg = APIC_EILVTn(offset);
 431        unsigned int new, old, reserved;
 432
 433        new = (mask << 16) | (msg_type << 8) | vector;
 434        old = apic_read(reg);
 435        reserved = reserve_eilvt_offset(offset, new);
 436
 437        if (reserved != new) {
 438                pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
 439                       "vector 0x%x, but the register is already in use for "
 440                       "vector 0x%x on another cpu\n",
 441                       smp_processor_id(), reg, offset, new, reserved);
 442                return -EINVAL;
 443        }
 444
 445        if (!eilvt_entry_is_changeable(old, new)) {
 446                pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
 447                       "vector 0x%x, but the register is already in use for "
 448                       "vector 0x%x on this cpu\n",
 449                       smp_processor_id(), reg, offset, new, old);
 450                return -EBUSY;
 451        }
 452
 453        apic_write(reg, new);
 454
 455        return 0;
 456}
 457EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
 458
 459/*
 460 * Program the next event, relative to now
 461 */
 462static int lapic_next_event(unsigned long delta,
 463                            struct clock_event_device *evt)
 464{
 465        apic_write(APIC_TMICT, delta);
 466        return 0;
 467}
 468
 469static int lapic_next_deadline(unsigned long delta,
 470                               struct clock_event_device *evt)
 471{
 472        u64 tsc;
 473
 474        rdtscll(tsc);
 475        wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
 476        return 0;
 477}
 478
 479/*
 480 * Setup the lapic timer in periodic or oneshot mode
 481 */
 482static void lapic_timer_setup(enum clock_event_mode mode,
 483                              struct clock_event_device *evt)
 484{
 485        unsigned long flags;
 486        unsigned int v;
 487
 488        /* Lapic used as dummy for broadcast ? */
 489        if (evt->features & CLOCK_EVT_FEAT_DUMMY)
 490                return;
 491
 492        local_irq_save(flags);
 493
 494        switch (mode) {
 495        case CLOCK_EVT_MODE_PERIODIC:
 496        case CLOCK_EVT_MODE_ONESHOT:
 497                __setup_APIC_LVTT(lapic_timer_frequency,
 498                                  mode != CLOCK_EVT_MODE_PERIODIC, 1);
 499                break;
 500        case CLOCK_EVT_MODE_UNUSED:
 501        case CLOCK_EVT_MODE_SHUTDOWN:
 502                v = apic_read(APIC_LVTT);
 503                v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
 504                apic_write(APIC_LVTT, v);
 505                apic_write(APIC_TMICT, 0);
 506                break;
 507        case CLOCK_EVT_MODE_RESUME:
 508                /* Nothing to do here */
 509                break;
 510        }
 511
 512        local_irq_restore(flags);
 513}
 514
 515/*
 516 * Local APIC timer broadcast function
 517 */
 518static void lapic_timer_broadcast(const struct cpumask *mask)
 519{
 520#ifdef CONFIG_SMP
 521        apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
 522#endif
 523}
 524
 525
 526/*
 527 * The local apic timer can be used for any function which is CPU local.
 528 */
 529static struct clock_event_device lapic_clockevent = {
 530        .name           = "lapic",
 531        .features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
 532                        | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
 533        .shift          = 32,
 534        .set_mode       = lapic_timer_setup,
 535        .set_next_event = lapic_next_event,
 536        .broadcast      = lapic_timer_broadcast,
 537        .rating         = 100,
 538        .irq            = -1,
 539};
 540static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 541
 542/*
 543 * Setup the local APIC timer for this CPU. Copy the initialized values
 544 * of the boot CPU and register the clock event in the framework.
 545 */
 546static void __cpuinit setup_APIC_timer(void)
 547{
 548        struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 549
 550        if (this_cpu_has(X86_FEATURE_ARAT)) {
 551                lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
 552                /* Make LAPIC timer preferrable over percpu HPET */
 553                lapic_clockevent.rating = 150;
 554        }
 555
 556        memcpy(levt, &lapic_clockevent, sizeof(*levt));
 557        levt->cpumask = cpumask_of(smp_processor_id());
 558
 559        if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
 560                levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
 561                                    CLOCK_EVT_FEAT_DUMMY);
 562                levt->set_next_event = lapic_next_deadline;
 563                clockevents_config_and_register(levt,
 564                                                (tsc_khz / TSC_DIVISOR) * 1000,
 565                                                0xF, ~0UL);
 566        } else
 567                clockevents_register_device(levt);
 568}
 569
 570/*
 571 * In this functions we calibrate APIC bus clocks to the external timer.
 572 *
 573 * We want to do the calibration only once since we want to have local timer
 574 * irqs syncron. CPUs connected by the same APIC bus have the very same bus
 575 * frequency.
 576 *
 577 * This was previously done by reading the PIT/HPET and waiting for a wrap
 578 * around to find out, that a tick has elapsed. I have a box, where the PIT
 579 * readout is broken, so it never gets out of the wait loop again. This was
 580 * also reported by others.
 581 *
 582 * Monitoring the jiffies value is inaccurate and the clockevents
 583 * infrastructure allows us to do a simple substitution of the interrupt
 584 * handler.
 585 *
 586 * The calibration routine also uses the pm_timer when possible, as the PIT
 587 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
 588 * back to normal later in the boot process).
 589 */
 590
 591#define LAPIC_CAL_LOOPS         (HZ/10)
 592
 593static __initdata int lapic_cal_loops = -1;
 594static __initdata long lapic_cal_t1, lapic_cal_t2;
 595static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
 596static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
 597static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
 598
 599/*
 600 * Temporary interrupt handler.
 601 */
 602static void __init lapic_cal_handler(struct clock_event_device *dev)
 603{
 604        unsigned long long tsc = 0;
 605        long tapic = apic_read(APIC_TMCCT);
 606        unsigned long pm = acpi_pm_read_early();
 607
 608        if (cpu_has_tsc)
 609                rdtscll(tsc);
 610
 611        switch (lapic_cal_loops++) {
 612        case 0:
 613                lapic_cal_t1 = tapic;
 614                lapic_cal_tsc1 = tsc;
 615                lapic_cal_pm1 = pm;
 616                lapic_cal_j1 = jiffies;
 617                break;
 618
 619        case LAPIC_CAL_LOOPS:
 620                lapic_cal_t2 = tapic;
 621                lapic_cal_tsc2 = tsc;
 622                if (pm < lapic_cal_pm1)
 623                        pm += ACPI_PM_OVRRUN;
 624                lapic_cal_pm2 = pm;
 625                lapic_cal_j2 = jiffies;
 626                break;
 627        }
 628}
 629
 630static int __init
 631calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
 632{
 633        const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
 634        const long pm_thresh = pm_100ms / 100;
 635        unsigned long mult;
 636        u64 res;
 637
 638#ifndef CONFIG_X86_PM_TIMER
 639        return -1;
 640#endif
 641
 642        apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
 643
 644        /* Check, if the PM timer is available */
 645        if (!deltapm)
 646                return -1;
 647
 648        mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
 649
 650        if (deltapm > (pm_100ms - pm_thresh) &&
 651            deltapm < (pm_100ms + pm_thresh)) {
 652                apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
 653                return 0;
 654        }
 655
 656        res = (((u64)deltapm) *  mult) >> 22;
 657        do_div(res, 1000000);
 658        pr_warning("APIC calibration not consistent "
 659                   "with PM-Timer: %ldms instead of 100ms\n",(long)res);
 660
 661        /* Correct the lapic counter value */
 662        res = (((u64)(*delta)) * pm_100ms);
 663        do_div(res, deltapm);
 664        pr_info("APIC delta adjusted to PM-Timer: "
 665                "%lu (%ld)\n", (unsigned long)res, *delta);
 666        *delta = (long)res;
 667
 668        /* Correct the tsc counter value */
 669        if (cpu_has_tsc) {
 670                res = (((u64)(*deltatsc)) * pm_100ms);
 671                do_div(res, deltapm);
 672                apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
 673                                          "PM-Timer: %lu (%ld)\n",
 674                                        (unsigned long)res, *deltatsc);
 675                *deltatsc = (long)res;
 676        }
 677
 678        return 0;
 679}
 680
 681static int __init calibrate_APIC_clock(void)
 682{
 683        struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 684        void (*real_handler)(struct clock_event_device *dev);
 685        unsigned long deltaj;
 686        long delta, deltatsc;
 687        int pm_referenced = 0;
 688
 689        /**
 690         * check if lapic timer has already been calibrated by platform
 691         * specific routine, such as tsc calibration code. if so, we just fill
 692         * in the clockevent structure and return.
 693         */
 694
 695        if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
 696                return 0;
 697        } else if (lapic_timer_frequency) {
 698                apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
 699                                lapic_timer_frequency);
 700                lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
 701                                        TICK_NSEC, lapic_clockevent.shift);
 702                lapic_clockevent.max_delta_ns =
 703                        clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
 704                lapic_clockevent.min_delta_ns =
 705                        clockevent_delta2ns(0xF, &lapic_clockevent);
 706                lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
 707                return 0;
 708        }
 709
 710        apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
 711                    "calibrating APIC timer ...\n");
 712
 713        local_irq_disable();
 714
 715        /* Replace the global interrupt handler */
 716        real_handler = global_clock_event->event_handler;
 717        global_clock_event->event_handler = lapic_cal_handler;
 718
 719        /*
 720         * Setup the APIC counter to maximum. There is no way the lapic
 721         * can underflow in the 100ms detection time frame
 722         */
 723        __setup_APIC_LVTT(0xffffffff, 0, 0);
 724
 725        /* Let the interrupts run */
 726        local_irq_enable();
 727
 728        while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
 729                cpu_relax();
 730
 731        local_irq_disable();
 732
 733        /* Restore the real event handler */
 734        global_clock_event->event_handler = real_handler;
 735
 736        /* Build delta t1-t2 as apic timer counts down */
 737        delta = lapic_cal_t1 - lapic_cal_t2;
 738        apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
 739
 740        deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
 741
 742        /* we trust the PM based calibration if possible */
 743        pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
 744                                        &delta, &deltatsc);
 745
 746        /* Calculate the scaled math multiplication factor */
 747        lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
 748                                       lapic_clockevent.shift);
 749        lapic_clockevent.max_delta_ns =
 750                clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
 751        lapic_clockevent.min_delta_ns =
 752                clockevent_delta2ns(0xF, &lapic_clockevent);
 753
 754        lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
 755
 756        apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
 757        apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
 758        apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
 759                    lapic_timer_frequency);
 760
 761        if (cpu_has_tsc) {
 762                apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
 763                            "%ld.%04ld MHz.\n",
 764                            (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
 765                            (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
 766        }
 767
 768        apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
 769                    "%u.%04u MHz.\n",
 770                    lapic_timer_frequency / (1000000 / HZ),
 771                    lapic_timer_frequency % (1000000 / HZ));
 772
 773        /*
 774         * Do a sanity check on the APIC calibration result
 775         */
 776        if (lapic_timer_frequency < (1000000 / HZ)) {
 777                local_irq_enable();
 778                pr_warning("APIC frequency too slow, disabling apic timer\n");
 779                return -1;
 780        }
 781
 782        levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
 783
 784        /*
 785         * PM timer calibration failed or not turned on
 786         * so lets try APIC timer based calibration
 787         */
 788        if (!pm_referenced) {
 789                apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
 790
 791                /*
 792                 * Setup the apic timer manually
 793                 */
 794                levt->event_handler = lapic_cal_handler;
 795                lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
 796                lapic_cal_loops = -1;
 797
 798                /* Let the interrupts run */
 799                local_irq_enable();
 800
 801                while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
 802                        cpu_relax();
 803
 804                /* Stop the lapic timer */
 805                lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
 806
 807                /* Jiffies delta */
 808                deltaj = lapic_cal_j2 - lapic_cal_j1;
 809                apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
 810
 811                /* Check, if the jiffies result is consistent */
 812                if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
 813                        apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
 814                else
 815                        levt->features |= CLOCK_EVT_FEAT_DUMMY;
 816        } else
 817                local_irq_enable();
 818
 819        if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
 820                pr_warning("APIC timer disabled due to verification failure\n");
 821                        return -1;
 822        }
 823
 824        return 0;
 825}
 826
 827/*
 828 * Setup the boot APIC
 829 *
 830 * Calibrate and verify the result.
 831 */
 832void __init setup_boot_APIC_clock(void)
 833{
 834        /*
 835         * The local apic timer can be disabled via the kernel
 836         * commandline or from the CPU detection code. Register the lapic
 837         * timer as a dummy clock event source on SMP systems, so the
 838         * broadcast mechanism is used. On UP systems simply ignore it.
 839         */
 840        if (disable_apic_timer) {
 841                pr_info("Disabling APIC timer\n");
 842                /* No broadcast on UP ! */
 843                if (num_possible_cpus() > 1) {
 844                        lapic_clockevent.mult = 1;
 845                        setup_APIC_timer();
 846                }
 847                return;
 848        }
 849
 850        if (calibrate_APIC_clock()) {
 851                /* No broadcast on UP ! */
 852                if (num_possible_cpus() > 1)
 853                        setup_APIC_timer();
 854                return;
 855        }
 856
 857        /*
 858         * If nmi_watchdog is set to IO_APIC, we need the
 859         * PIT/HPET going.  Otherwise register lapic as a dummy
 860         * device.
 861         */
 862        lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
 863
 864        /* Setup the lapic or request the broadcast */
 865        setup_APIC_timer();
 866}
 867
 868void __cpuinit setup_secondary_APIC_clock(void)
 869{
 870        setup_APIC_timer();
 871}
 872
 873/*
 874 * The guts of the apic timer interrupt
 875 */
 876static void local_apic_timer_interrupt(void)
 877{
 878        int cpu = smp_processor_id();
 879        struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
 880
 881        /*
 882         * Normally we should not be here till LAPIC has been initialized but
 883         * in some cases like kdump, its possible that there is a pending LAPIC
 884         * timer interrupt from previous kernel's context and is delivered in
 885         * new kernel the moment interrupts are enabled.
 886         *
 887         * Interrupts are enabled early and LAPIC is setup much later, hence
 888         * its possible that when we get here evt->event_handler is NULL.
 889         * Check for event_handler being NULL and discard the interrupt as
 890         * spurious.
 891         */
 892        if (!evt->event_handler) {
 893                pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
 894                /* Switch it off */
 895                lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
 896                return;
 897        }
 898
 899        /*
 900         * the NMI deadlock-detector uses this.
 901         */
 902        inc_irq_stat(apic_timer_irqs);
 903
 904        evt->event_handler(evt);
 905}
 906
 907/*
 908 * Local APIC timer interrupt. This is the most natural way for doing
 909 * local interrupts, but local timer interrupts can be emulated by
 910 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
 911 *
 912 * [ if a single-CPU system runs an SMP kernel then we call the local
 913 *   interrupt as well. Thus we cannot inline the local irq ... ]
 914 */
 915void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
 916{
 917        struct pt_regs *old_regs = set_irq_regs(regs);
 918
 919        /*
 920         * NOTE! We'd better ACK the irq immediately,
 921         * because timer handling can be slow.
 922         */
 923        ack_APIC_irq();
 924        /*
 925         * update_process_times() expects us to have done irq_enter().
 926         * Besides, if we don't timer interrupts ignore the global
 927         * interrupt lock, which is the WrongThing (tm) to do.
 928         */
 929        irq_enter();
 930        exit_idle();
 931        local_apic_timer_interrupt();
 932        irq_exit();
 933
 934        set_irq_regs(old_regs);
 935}
 936
 937int setup_profiling_timer(unsigned int multiplier)
 938{
 939        return -EINVAL;
 940}
 941
 942/*
 943 * Local APIC start and shutdown
 944 */
 945
 946/**
 947 * clear_local_APIC - shutdown the local APIC
 948 *
 949 * This is called, when a CPU is disabled and before rebooting, so the state of
 950 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
 951 * leftovers during boot.
 952 */
 953void clear_local_APIC(void)
 954{
 955        int maxlvt;
 956        u32 v;
 957
 958        /* APIC hasn't been mapped yet */
 959        if (!x2apic_mode && !apic_phys)
 960                return;
 961
 962        maxlvt = lapic_get_maxlvt();
 963        /*
 964         * Masking an LVT entry can trigger a local APIC error
 965         * if the vector is zero. Mask LVTERR first to prevent this.
 966         */
 967        if (maxlvt >= 3) {
 968                v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
 969                apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
 970        }
 971        /*
 972         * Careful: we have to set masks only first to deassert
 973         * any level-triggered sources.
 974         */
 975        v = apic_read(APIC_LVTT);
 976        apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
 977        v = apic_read(APIC_LVT0);
 978        apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
 979        v = apic_read(APIC_LVT1);
 980        apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
 981        if (maxlvt >= 4) {
 982                v = apic_read(APIC_LVTPC);
 983                apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
 984        }
 985
 986        /* lets not touch this if we didn't frob it */
 987#ifdef CONFIG_X86_THERMAL_VECTOR
 988        if (maxlvt >= 5) {
 989                v = apic_read(APIC_LVTTHMR);
 990                apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
 991        }
 992#endif
 993#ifdef CONFIG_X86_MCE_INTEL
 994        if (maxlvt >= 6) {
 995                v = apic_read(APIC_LVTCMCI);
 996                if (!(v & APIC_LVT_MASKED))
 997                        apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
 998        }
 999#endif
1000
1001        /*
1002         * Clean APIC state for other OSs:
1003         */
1004        apic_write(APIC_LVTT, APIC_LVT_MASKED);
1005        apic_write(APIC_LVT0, APIC_LVT_MASKED);
1006        apic_write(APIC_LVT1, APIC_LVT_MASKED);
1007        if (maxlvt >= 3)
1008                apic_write(APIC_LVTERR, APIC_LVT_MASKED);
1009        if (maxlvt >= 4)
1010                apic_write(APIC_LVTPC, APIC_LVT_MASKED);
1011
1012        /* Integrated APIC (!82489DX) ? */
1013        if (lapic_is_integrated()) {
1014                if (maxlvt > 3)
1015                        /* Clear ESR due to Pentium errata 3AP and 11AP */
1016                        apic_write(APIC_ESR, 0);
1017                apic_read(APIC_ESR);
1018        }
1019}
1020
1021/**
1022 * disable_local_APIC - clear and disable the local APIC
1023 */
1024void disable_local_APIC(void)
1025{
1026        unsigned int value;
1027
1028        /* APIC hasn't been mapped yet */
1029        if (!x2apic_mode && !apic_phys)
1030                return;
1031
1032        clear_local_APIC();
1033
1034        /*
1035         * Disable APIC (implies clearing of registers
1036         * for 82489DX!).
1037         */
1038        value = apic_read(APIC_SPIV);
1039        value &= ~APIC_SPIV_APIC_ENABLED;
1040        apic_write(APIC_SPIV, value);
1041
1042#ifdef CONFIG_X86_32
1043        /*
1044         * When LAPIC was disabled by the BIOS and enabled by the kernel,
1045         * restore the disabled state.
1046         */
1047        if (enabled_via_apicbase) {
1048                unsigned int l, h;
1049
1050                rdmsr(MSR_IA32_APICBASE, l, h);
1051                l &= ~MSR_IA32_APICBASE_ENABLE;
1052                wrmsr(MSR_IA32_APICBASE, l, h);
1053        }
1054#endif
1055}
1056
1057/*
1058 * If Linux enabled the LAPIC against the BIOS default disable it down before
1059 * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
1060 * not power-off.  Additionally clear all LVT entries before disable_local_APIC
1061 * for the case where Linux didn't enable the LAPIC.
1062 */
1063void lapic_shutdown(void)
1064{
1065        unsigned long flags;
1066
1067        if (!cpu_has_apic && !apic_from_smp_config())
1068                return;
1069
1070        local_irq_save(flags);
1071
1072#ifdef CONFIG_X86_32
1073        if (!enabled_via_apicbase)
1074                clear_local_APIC();
1075        else
1076#endif
1077                disable_local_APIC();
1078
1079
1080        local_irq_restore(flags);
1081}
1082
1083/*
1084 * This is to verify that we're looking at a real local APIC.
1085 * Check these against your board if the CPUs aren't getting
1086 * started for no apparent reason.
1087 */
1088int __init verify_local_APIC(void)
1089{
1090        unsigned int reg0, reg1;
1091
1092        /*
1093         * The version register is read-only in a real APIC.
1094         */
1095        reg0 = apic_read(APIC_LVR);
1096        apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
1097        apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
1098        reg1 = apic_read(APIC_LVR);
1099        apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
1100
1101        /*
1102         * The two version reads above should print the same
1103         * numbers.  If the second one is different, then we
1104         * poke at a non-APIC.
1105         */
1106        if (reg1 != reg0)
1107                return 0;
1108
1109        /*
1110         * Check if the version looks reasonably.
1111         */
1112        reg1 = GET_APIC_VERSION(reg0);
1113        if (reg1 == 0x00 || reg1 == 0xff)
1114                return 0;
1115        reg1 = lapic_get_maxlvt();
1116        if (reg1 < 0x02 || reg1 == 0xff)
1117                return 0;
1118
1119        /*
1120         * The ID register is read/write in a real APIC.
1121         */
1122        reg0 = apic_read(APIC_ID);
1123        apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
1124        apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
1125        reg1 = apic_read(APIC_ID);
1126        apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
1127        apic_write(APIC_ID, reg0);
1128        if (reg1 != (reg0 ^ apic->apic_id_mask))
1129                return 0;
1130
1131        /*
1132         * The next two are just to see if we have sane values.
1133         * They're only really relevant if we're in Virtual Wire
1134         * compatibility mode, but most boxes are anymore.
1135         */
1136        reg0 = apic_read(APIC_LVT0);
1137        apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
1138        reg1 = apic_read(APIC_LVT1);
1139        apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
1140
1141        return 1;
1142}
1143
1144/**
1145 * sync_Arb_IDs - synchronize APIC bus arbitration IDs
1146 */
1147void __init sync_Arb_IDs(void)
1148{
1149        /*
1150         * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
1151         * needed on AMD.
1152         */
1153        if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
1154                return;
1155
1156        /*
1157         * Wait for idle.
1158         */
1159        apic_wait_icr_idle();
1160
1161        apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
1162        apic_write(APIC_ICR, APIC_DEST_ALLINC |
1163                        APIC_INT_LEVELTRIG | APIC_DM_INIT);
1164}
1165
1166/*
1167 * An initial setup of the virtual wire mode.
1168 */
1169void __init init_bsp_APIC(void)
1170{
1171        unsigned int value;
1172
1173        /*
1174         * Don't do the setup now if we have a SMP BIOS as the
1175         * through-I/O-APIC virtual wire mode might be active.
1176         */
1177        if (smp_found_config || !cpu_has_apic)
1178                return;
1179
1180        /*
1181         * Do not trust the local APIC being empty at bootup.
1182         */
1183        clear_local_APIC();
1184
1185        /*
1186         * Enable APIC.
1187         */
1188        value = apic_read(APIC_SPIV);
1189        value &= ~APIC_VECTOR_MASK;
1190        value |= APIC_SPIV_APIC_ENABLED;
1191
1192#ifdef CONFIG_X86_32
1193        /* This bit is reserved on P4/Xeon and should be cleared */
1194        if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
1195            (boot_cpu_data.x86 == 15))
1196                value &= ~APIC_SPIV_FOCUS_DISABLED;
1197        else
1198#endif
1199                value |= APIC_SPIV_FOCUS_DISABLED;
1200        value |= SPURIOUS_APIC_VECTOR;
1201        apic_write(APIC_SPIV, value);
1202
1203        /*
1204         * Set up the virtual wire mode.
1205         */
1206        apic_write(APIC_LVT0, APIC_DM_EXTINT);
1207        value = APIC_DM_NMI;
1208        if (!lapic_is_integrated())             /* 82489DX */
1209                value |= APIC_LVT_LEVEL_TRIGGER;
1210        apic_write(APIC_LVT1, value);
1211}
1212
1213static void __cpuinit lapic_setup_esr(void)
1214{
1215        unsigned int oldvalue, value, maxlvt;
1216
1217        if (!lapic_is_integrated()) {
1218                pr_info("No ESR for 82489DX.\n");
1219                return;
1220        }
1221
1222        if (apic->disable_esr) {
1223                /*
1224                 * Something untraceable is creating bad interrupts on
1225                 * secondary quads ... for the moment, just leave the
1226                 * ESR disabled - we can't do anything useful with the
1227                 * errors anyway - mbligh
1228                 */
1229                pr_info("Leaving ESR disabled.\n");
1230                return;
1231        }
1232
1233        maxlvt = lapic_get_maxlvt();
1234        if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
1235                apic_write(APIC_ESR, 0);
1236        oldvalue = apic_read(APIC_ESR);
1237
1238        /* enables sending errors */
1239        value = ERROR_APIC_VECTOR;
1240        apic_write(APIC_LVTERR, value);
1241
1242        /*
1243         * spec says clear errors after enabling vector.
1244         */
1245        if (maxlvt > 3)
1246                apic_write(APIC_ESR, 0);
1247        value = apic_read(APIC_ESR);
1248        if (value != oldvalue)
1249                apic_printk(APIC_VERBOSE, "ESR value before enabling "
1250                        "vector: 0x%08x  after: 0x%08x\n",
1251                        oldvalue, value);
1252}
1253
1254/**
1255 * setup_local_APIC - setup the local APIC
1256 *
1257 * Used to setup local APIC while initializing BSP or bringin up APs.
1258 * Always called with preemption disabled.
1259 */
1260void __cpuinit setup_local_APIC(void)
1261{
1262        int cpu = smp_processor_id();
1263        unsigned int value, queued;
1264        int i, j, acked = 0;
1265        unsigned long long tsc = 0, ntsc;
1266        long long max_loops = cpu_khz;
1267
1268        if (cpu_has_tsc)
1269                rdtscll(tsc);
1270
1271        if (disable_apic) {
1272                disable_ioapic_support();
1273                return;
1274        }
1275
1276#ifdef CONFIG_X86_32
1277        /* Pound the ESR really hard over the head with a big hammer - mbligh */
1278        if (lapic_is_integrated() && apic->disable_esr) {
1279                apic_write(APIC_ESR, 0);
1280                apic_write(APIC_ESR, 0);
1281                apic_write(APIC_ESR, 0);
1282                apic_write(APIC_ESR, 0);
1283        }
1284#endif
1285        perf_events_lapic_init();
1286
1287        /*
1288         * Double-check whether this APIC is really registered.
1289         * This is meaningless in clustered apic mode, so we skip it.
1290         */
1291        BUG_ON(!apic->apic_id_registered());
1292
1293        /*
1294         * Intel recommends to set DFR, LDR and TPR before enabling
1295         * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
1296         * document number 292116).  So here it goes...
1297         */
1298        apic->init_apic_ldr();
1299
1300#ifdef CONFIG_X86_32
1301        /*
1302         * APIC LDR is initialized.  If logical_apicid mapping was
1303         * initialized during get_smp_config(), make sure it matches the
1304         * actual value.
1305         */
1306        i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
1307        WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
1308        /* always use the value from LDR */
1309        early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
1310                logical_smp_processor_id();
1311
1312        /*
1313         * Some NUMA implementations (NUMAQ) don't initialize apicid to
1314         * node mapping during NUMA init.  Now that logical apicid is
1315         * guaranteed to be known, give it another chance.  This is already
1316         * a bit too late - percpu allocation has already happened without
1317         * proper NUMA affinity.
1318         */
1319        if (apic->x86_32_numa_cpu_node)
1320                set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
1321                                   apic->x86_32_numa_cpu_node(cpu));
1322#endif
1323
1324        /*
1325         * Set Task Priority to 'accept all'. We never change this
1326         * later on.
1327         */
1328        value = apic_read(APIC_TASKPRI);
1329        value &= ~APIC_TPRI_MASK;
1330        apic_write(APIC_TASKPRI, value);
1331
1332        /*
1333         * After a crash, we no longer service the interrupts and a pending
1334         * interrupt from previous kernel might still have ISR bit set.
1335         *
1336         * Most probably by now CPU has serviced that pending interrupt and
1337         * it might not have done the ack_APIC_irq() because it thought,
1338         * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
1339         * does not clear the ISR bit and cpu thinks it has already serivced
1340         * the interrupt. Hence a vector might get locked. It was noticed
1341         * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
1342         */
1343        do {
1344                queued = 0;
1345                for (i = APIC_ISR_NR - 1; i >= 0; i--)
1346                        queued |= apic_read(APIC_IRR + i*0x10);
1347
1348                for (i = APIC_ISR_NR - 1; i >= 0; i--) {
1349                        value = apic_read(APIC_ISR + i*0x10);
1350                        for (j = 31; j >= 0; j--) {
1351                                if (value & (1<<j)) {
1352                                        ack_APIC_irq();
1353                                        acked++;
1354                                }
1355                        }
1356                }
1357                if (acked > 256) {
1358                        printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n",
1359                               acked);
1360                        break;
1361                }
1362                if (queued) {
1363                        if (cpu_has_tsc) {
1364                                rdtscll(ntsc);
1365                                max_loops = (cpu_khz << 10) - (ntsc - tsc);
1366                        } else
1367                                max_loops--;
1368                }
1369        } while (queued && max_loops > 0);
1370        WARN_ON(max_loops <= 0);
1371
1372        /*
1373         * Now that we are all set up, enable the APIC
1374         */
1375        value = apic_read(APIC_SPIV);
1376        value &= ~APIC_VECTOR_MASK;
1377        /*
1378         * Enable APIC
1379         */
1380        value |= APIC_SPIV_APIC_ENABLED;
1381
1382#ifdef CONFIG_X86_32
1383        /*
1384         * Some unknown Intel IO/APIC (or APIC) errata is biting us with
1385         * certain networking cards. If high frequency interrupts are
1386         * happening on a particular IOAPIC pin, plus the IOAPIC routing
1387         * entry is masked/unmasked at a high rate as well then sooner or
1388         * later IOAPIC line gets 'stuck', no more interrupts are received
1389         * from the device. If focus CPU is disabled then the hang goes
1390         * away, oh well :-(
1391         *
1392         * [ This bug can be reproduced easily with a level-triggered
1393         *   PCI Ne2000 networking cards and PII/PIII processors, dual
1394         *   BX chipset. ]
1395         */
1396        /*
1397         * Actually disabling the focus CPU check just makes the hang less
1398         * frequent as it makes the interrupt distributon model be more
1399         * like LRU than MRU (the short-term load is more even across CPUs).
1400         * See also the comment in end_level_ioapic_irq().  --macro
1401         */
1402
1403        /*
1404         * - enable focus processor (bit==0)
1405         * - 64bit mode always use processor focus
1406         *   so no need to set it
1407         */
1408        value &= ~APIC_SPIV_FOCUS_DISABLED;
1409#endif
1410
1411        /*
1412         * Set spurious IRQ vector
1413         */
1414        value |= SPURIOUS_APIC_VECTOR;
1415        apic_write(APIC_SPIV, value);
1416
1417        /*
1418         * Set up LVT0, LVT1:
1419         *
1420         * set up through-local-APIC on the BP's LINT0. This is not
1421         * strictly necessary in pure symmetric-IO mode, but sometimes
1422         * we delegate interrupts to the 8259A.
1423         */
1424        /*
1425         * TODO: set up through-local-APIC from through-I/O-APIC? --macro
1426         */
1427        value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
1428        if (!cpu && (pic_mode || !value)) {
1429                value = APIC_DM_EXTINT;
1430                apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
1431        } else {
1432                value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1433                apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
1434        }
1435        apic_write(APIC_LVT0, value);
1436
1437        /*
1438         * only the BP should see the LINT1 NMI signal, obviously.
1439         */
1440        if (!cpu)
1441                value = APIC_DM_NMI;
1442        else
1443                value = APIC_DM_NMI | APIC_LVT_MASKED;
1444        if (!lapic_is_integrated())             /* 82489DX */
1445                value |= APIC_LVT_LEVEL_TRIGGER;
1446        apic_write(APIC_LVT1, value);
1447
1448#ifdef CONFIG_X86_MCE_INTEL
1449        /* Recheck CMCI information after local APIC is up on CPU #0 */
1450        if (!cpu)
1451                cmci_recheck();
1452#endif
1453}
1454
1455void __cpuinit end_local_APIC_setup(void)
1456{
1457        lapic_setup_esr();
1458
1459#ifdef CONFIG_X86_32
1460        {
1461                unsigned int value;
1462                /* Disable the local apic timer */
1463                value = apic_read(APIC_LVTT);
1464                value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1465                apic_write(APIC_LVTT, value);
1466        }
1467#endif
1468
1469        apic_pm_activate();
1470}
1471
1472void __init bsp_end_local_APIC_setup(void)
1473{
1474        end_local_APIC_setup();
1475
1476        /*
1477         * Now that local APIC setup is completed for BP, configure the fault
1478         * handling for interrupt remapping.
1479         */
1480        irq_remap_enable_fault_handling();
1481
1482}
1483
1484#ifdef CONFIG_X86_X2APIC
1485/*
1486 * Need to disable xapic and x2apic at the same time and then enable xapic mode
1487 */
1488static inline void __disable_x2apic(u64 msr)
1489{
1490        wrmsrl(MSR_IA32_APICBASE,
1491               msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1492        wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1493}
1494
1495static __init void disable_x2apic(void)
1496{
1497        u64 msr;
1498
1499        if (!cpu_has_x2apic)
1500                return;
1501
1502        rdmsrl(MSR_IA32_APICBASE, msr);
1503        if (msr & X2APIC_ENABLE) {
1504                u32 x2apic_id = read_apic_id();
1505
1506                if (x2apic_id >= 255)
1507                        panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
1508
1509                pr_info("Disabling x2apic\n");
1510                __disable_x2apic(msr);
1511
1512                if (nox2apic) {
1513                        clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC);
1514                        setup_clear_cpu_cap(X86_FEATURE_X2APIC);
1515                }
1516
1517                x2apic_disabled = 1;
1518                x2apic_mode = 0;
1519
1520                register_lapic_address(mp_lapic_addr);
1521        }
1522}
1523
1524void check_x2apic(void)
1525{
1526        if (x2apic_enabled()) {
1527                pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
1528                x2apic_preenabled = x2apic_mode = 1;
1529        }
1530}
1531
1532void enable_x2apic(void)
1533{
1534        u64 msr;
1535
1536        rdmsrl(MSR_IA32_APICBASE, msr);
1537        if (x2apic_disabled) {
1538                __disable_x2apic(msr);
1539                return;
1540        }
1541
1542        if (!x2apic_mode)
1543                return;
1544
1545        if (!(msr & X2APIC_ENABLE)) {
1546                printk_once(KERN_INFO "Enabling x2apic\n");
1547                wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
1548        }
1549}
1550#endif /* CONFIG_X86_X2APIC */
1551
1552int __init enable_IR(void)
1553{
1554#ifdef CONFIG_IRQ_REMAP
1555        if (!irq_remapping_supported()) {
1556                pr_debug("intr-remapping not supported\n");
1557                return -1;
1558        }
1559
1560        if (!x2apic_preenabled && skip_ioapic_setup) {
1561                pr_info("Skipped enabling intr-remap because of skipping "
1562                        "io-apic setup\n");
1563                return -1;
1564        }
1565
1566        return irq_remapping_enable();
1567#endif
1568        return -1;
1569}
1570
1571void __init enable_IR_x2apic(void)
1572{
1573        unsigned long flags;
1574        int ret, x2apic_enabled = 0;
1575        int hardware_init_ret;
1576
1577        /* Make sure irq_remap_ops are initialized */
1578        setup_irq_remapping_ops();
1579
1580        hardware_init_ret = irq_remapping_prepare();
1581        if (hardware_init_ret && !x2apic_supported())
1582                return;
1583
1584        ret = save_ioapic_entries();
1585        if (ret) {
1586                pr_info("Saving IO-APIC state failed: %d\n", ret);
1587                return;
1588        }
1589
1590        local_irq_save(flags);
1591        legacy_pic->mask_all();
1592        mask_ioapic_entries();
1593
1594        if (x2apic_preenabled && nox2apic)
1595                disable_x2apic();
1596
1597        if (hardware_init_ret)
1598                ret = -1;
1599        else
1600                ret = enable_IR();
1601
1602        if (!x2apic_supported())
1603                goto skip_x2apic;
1604
1605        if (ret < 0) {
1606                /* IR is required if there is APIC ID > 255 even when running
1607                 * under KVM
1608                 */
1609                if (max_physical_apicid > 255 ||
1610                    !hypervisor_x2apic_available()) {
1611                        if (x2apic_preenabled)
1612                                disable_x2apic();
1613                        goto skip_x2apic;
1614                }
1615                /*
1616                 * without IR all CPUs can be addressed by IOAPIC/MSI
1617                 * only in physical mode
1618                 */
1619                x2apic_force_phys();
1620        }
1621
1622        if (ret == IRQ_REMAP_XAPIC_MODE) {
1623                pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
1624                goto skip_x2apic;
1625        }
1626
1627        x2apic_enabled = 1;
1628
1629        if (x2apic_supported() && !x2apic_mode) {
1630                x2apic_mode = 1;
1631                enable_x2apic();
1632                pr_info("Enabled x2apic\n");
1633        }
1634
1635skip_x2apic:
1636        if (ret < 0) /* IR enabling failed */
1637                restore_ioapic_entries();
1638        legacy_pic->restore_mask();
1639        local_irq_restore(flags);
1640}
1641
1642#ifdef CONFIG_X86_64
1643/*
1644 * Detect and enable local APICs on non-SMP boards.
1645 * Original code written by Keir Fraser.
1646 * On AMD64 we trust the BIOS - if it says no APIC it is likely
1647 * not correctly set up (usually the APIC timer won't work etc.)
1648 */
1649static int __init detect_init_APIC(void)
1650{
1651        if (!cpu_has_apic) {
1652                pr_info("No local APIC present\n");
1653                return -1;
1654        }
1655
1656        mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1657        return 0;
1658}
1659#else
1660
1661static int __init apic_verify(void)
1662{
1663        u32 features, h, l;
1664
1665        /*
1666         * The APIC feature bit should now be enabled
1667         * in `cpuid'
1668         */
1669        features = cpuid_edx(1);
1670        if (!(features & (1 << X86_FEATURE_APIC))) {
1671                pr_warning("Could not enable APIC!\n");
1672                return -1;
1673        }
1674        set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1675        mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1676
1677        /* The BIOS may have set up the APIC at some other address */
1678        if (boot_cpu_data.x86 >= 6) {
1679                rdmsr(MSR_IA32_APICBASE, l, h);
1680                if (l & MSR_IA32_APICBASE_ENABLE)
1681                        mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1682        }
1683
1684        pr_info("Found and enabled local APIC!\n");
1685        return 0;
1686}
1687
1688int __init apic_force_enable(unsigned long addr)
1689{
1690        u32 h, l;
1691
1692        if (disable_apic)
1693                return -1;
1694
1695        /*
1696         * Some BIOSes disable the local APIC in the APIC_BASE
1697         * MSR. This can only be done in software for Intel P6 or later
1698         * and AMD K7 (Model > 1) or later.
1699         */
1700        if (boot_cpu_data.x86 >= 6) {
1701                rdmsr(MSR_IA32_APICBASE, l, h);
1702                if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1703                        pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1704                        l &= ~MSR_IA32_APICBASE_BASE;
1705                        l |= MSR_IA32_APICBASE_ENABLE | addr;
1706                        wrmsr(MSR_IA32_APICBASE, l, h);
1707                        enabled_via_apicbase = 1;
1708                }
1709        }
1710        return apic_verify();
1711}
1712
1713/*
1714 * Detect and initialize APIC
1715 */
1716static int __init detect_init_APIC(void)
1717{
1718        /* Disabled by kernel option? */
1719        if (disable_apic)
1720                return -1;
1721
1722        switch (boot_cpu_data.x86_vendor) {
1723        case X86_VENDOR_AMD:
1724                if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
1725                    (boot_cpu_data.x86 >= 15))
1726                        break;
1727                goto no_apic;
1728        case X86_VENDOR_INTEL:
1729                if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
1730                    (boot_cpu_data.x86 == 5 && cpu_has_apic))
1731                        break;
1732                goto no_apic;
1733        default:
1734                goto no_apic;
1735        }
1736
1737        if (!cpu_has_apic) {
1738                /*
1739                 * Over-ride BIOS and try to enable the local APIC only if
1740                 * "lapic" specified.
1741                 */
1742                if (!force_enable_local_apic) {
1743                        pr_info("Local APIC disabled by BIOS -- "
1744                                "you can enable it with \"lapic\"\n");
1745                        return -1;
1746                }
1747                if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
1748                        return -1;
1749        } else {
1750                if (apic_verify())
1751                        return -1;
1752        }
1753
1754        apic_pm_activate();
1755
1756        return 0;
1757
1758no_apic:
1759        pr_info("No local APIC present or hardware disabled\n");
1760        return -1;
1761}
1762#endif
1763
1764/**
1765 * init_apic_mappings - initialize APIC mappings
1766 */
1767void __init init_apic_mappings(void)
1768{
1769        unsigned int new_apicid;
1770
1771        if (x2apic_mode) {
1772                boot_cpu_physical_apicid = read_apic_id();
1773                return;
1774        }
1775
1776        /* If no local APIC can be found return early */
1777        if (!smp_found_config && detect_init_APIC()) {
1778                /* lets NOP'ify apic operations */
1779                pr_info("APIC: disable apic facility\n");
1780                apic_disable();
1781        } else {
1782                apic_phys = mp_lapic_addr;
1783
1784                /*
1785                 * acpi lapic path already maps that address in
1786                 * acpi_register_lapic_address()
1787                 */
1788                if (!acpi_lapic && !smp_found_config)
1789                        register_lapic_address(apic_phys);
1790        }
1791
1792        /*
1793         * Fetch the APIC ID of the BSP in case we have a
1794         * default configuration (or the MP table is broken).
1795         */
1796        new_apicid = read_apic_id();
1797        if (boot_cpu_physical_apicid != new_apicid) {
1798                boot_cpu_physical_apicid = new_apicid;
1799                /*
1800                 * yeah -- we lie about apic_version
1801                 * in case if apic was disabled via boot option
1802                 * but it's not a problem for SMP compiled kernel
1803                 * since smp_sanity_check is prepared for such a case
1804                 * and disable smp mode
1805                 */
1806                apic_version[new_apicid] =
1807                         GET_APIC_VERSION(apic_read(APIC_LVR));
1808        }
1809}
1810
1811void __init register_lapic_address(unsigned long address)
1812{
1813        mp_lapic_addr = address;
1814
1815        if (!x2apic_mode) {
1816                set_fixmap_nocache(FIX_APIC_BASE, address);
1817                apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1818                            APIC_BASE, mp_lapic_addr);
1819        }
1820        if (boot_cpu_physical_apicid == -1U) {
1821                boot_cpu_physical_apicid  = read_apic_id();
1822                apic_version[boot_cpu_physical_apicid] =
1823                         GET_APIC_VERSION(apic_read(APIC_LVR));
1824        }
1825}
1826
1827/*
1828 * This initializes the IO-APIC and APIC hardware if this is
1829 * a UP kernel.
1830 */
1831int apic_version[MAX_LOCAL_APIC];
1832
1833int __init APIC_init_uniprocessor(void)
1834{
1835        if (disable_apic) {
1836                pr_info("Apic disabled\n");
1837                return -1;
1838        }
1839#ifdef CONFIG_X86_64
1840        if (!cpu_has_apic) {
1841                disable_apic = 1;
1842                pr_info("Apic disabled by BIOS\n");
1843                return -1;
1844        }
1845#else
1846        if (!smp_found_config && !cpu_has_apic)
1847                return -1;
1848
1849        /*
1850         * Complain if the BIOS pretends there is one.
1851         */
1852        if (!cpu_has_apic &&
1853            APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
1854                pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
1855                        boot_cpu_physical_apicid);
1856                return -1;
1857        }
1858#endif
1859
1860        default_setup_apic_routing();
1861
1862        verify_local_APIC();
1863        connect_bsp_APIC();
1864
1865#ifdef CONFIG_X86_64
1866        apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
1867#else
1868        /*
1869         * Hack: In case of kdump, after a crash, kernel might be booting
1870         * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
1871         * might be zero if read from MP tables. Get it from LAPIC.
1872         */
1873# ifdef CONFIG_CRASH_DUMP
1874        boot_cpu_physical_apicid = read_apic_id();
1875# endif
1876#endif
1877        physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1878        setup_local_APIC();
1879
1880#ifdef CONFIG_X86_IO_APIC
1881        /*
1882         * Now enable IO-APICs, actually call clear_IO_APIC
1883         * We need clear_IO_APIC before enabling error vector
1884         */
1885        if (!skip_ioapic_setup && nr_ioapics)
1886                enable_IO_APIC();
1887#endif
1888
1889        bsp_end_local_APIC_setup();
1890
1891#ifdef CONFIG_X86_IO_APIC
1892        if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
1893                setup_IO_APIC();
1894        else {
1895                nr_ioapics = 0;
1896        }
1897#endif
1898
1899        x86_init.timers.setup_percpu_clockev();
1900        return 0;
1901}
1902
1903/*
1904 * Local APIC interrupts
1905 */
1906
1907/*
1908 * This interrupt should _never_ happen with our APIC/SMP architecture
1909 */
1910void smp_spurious_interrupt(struct pt_regs *regs)
1911{
1912        u32 v;
1913
1914        irq_enter();
1915        exit_idle();
1916        /*
1917         * Check if this really is a spurious interrupt and ACK it
1918         * if it is a vectored one.  Just in case...
1919         * Spurious interrupts should not be ACKed.
1920         */
1921        v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
1922        if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
1923                ack_APIC_irq();
1924
1925        inc_irq_stat(irq_spurious_count);
1926
1927        /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1928        pr_info("spurious APIC interrupt on CPU#%d, "
1929                "should never happen.\n", smp_processor_id());
1930        irq_exit();
1931}
1932
1933/*
1934 * This interrupt should never happen with our APIC/SMP architecture
1935 */
1936void smp_error_interrupt(struct pt_regs *regs)
1937{
1938        u32 v0, v1;
1939        u32 i = 0;
1940        static const char * const error_interrupt_reason[] = {
1941                "Send CS error",                /* APIC Error Bit 0 */
1942                "Receive CS error",             /* APIC Error Bit 1 */
1943                "Send accept error",            /* APIC Error Bit 2 */
1944                "Receive accept error",         /* APIC Error Bit 3 */
1945                "Redirectable IPI",             /* APIC Error Bit 4 */
1946                "Send illegal vector",          /* APIC Error Bit 5 */
1947                "Received illegal vector",      /* APIC Error Bit 6 */
1948                "Illegal register address",     /* APIC Error Bit 7 */
1949        };
1950
1951        irq_enter();
1952        exit_idle();
1953        /* First tickle the hardware, only then report what went on. -- REW */
1954        v0 = apic_read(APIC_ESR);
1955        apic_write(APIC_ESR, 0);
1956        v1 = apic_read(APIC_ESR);
1957        ack_APIC_irq();
1958        atomic_inc(&irq_err_count);
1959
1960        apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)",
1961                    smp_processor_id(), v0 , v1);
1962
1963        v1 = v1 & 0xff;
1964        while (v1) {
1965                if (v1 & 0x1)
1966                        apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
1967                i++;
1968                v1 >>= 1;
1969        }
1970
1971        apic_printk(APIC_DEBUG, KERN_CONT "\n");
1972
1973        irq_exit();
1974}
1975
1976/**
1977 * connect_bsp_APIC - attach the APIC to the interrupt system
1978 */
1979void __init connect_bsp_APIC(void)
1980{
1981#ifdef CONFIG_X86_32
1982        if (pic_mode) {
1983                /*
1984                 * Do not trust the local APIC being empty at bootup.
1985                 */
1986                clear_local_APIC();
1987                /*
1988                 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
1989                 * local APIC to INT and NMI lines.
1990                 */
1991                apic_printk(APIC_VERBOSE, "leaving PIC mode, "
1992                                "enabling APIC mode.\n");
1993                imcr_pic_to_apic();
1994        }
1995#endif
1996        if (apic->enable_apic_mode)
1997                apic->enable_apic_mode();
1998}
1999
2000/**
2001 * disconnect_bsp_APIC - detach the APIC from the interrupt system
2002 * @virt_wire_setup:    indicates, whether virtual wire mode is selected
2003 *
2004 * Virtual wire mode is necessary to deliver legacy interrupts even when the
2005 * APIC is disabled.
2006 */
2007void disconnect_bsp_APIC(int virt_wire_setup)
2008{
2009        unsigned int value;
2010
2011#ifdef CONFIG_X86_32
2012        if (pic_mode) {
2013                /*
2014                 * Put the board back into PIC mode (has an effect only on
2015                 * certain older boards).  Note that APIC interrupts, including
2016                 * IPIs, won't work beyond this point!  The only exception are
2017                 * INIT IPIs.
2018                 */
2019                apic_printk(APIC_VERBOSE, "disabling APIC mode, "
2020                                "entering PIC mode.\n");
2021                imcr_apic_to_pic();
2022                return;
2023        }
2024#endif
2025
2026        /* Go back to Virtual Wire compatibility mode */
2027
2028        /* For the spurious interrupt use vector F, and enable it */
2029        value = apic_read(APIC_SPIV);
2030        value &= ~APIC_VECTOR_MASK;
2031        value |= APIC_SPIV_APIC_ENABLED;
2032        value |= 0xf;
2033        apic_write(APIC_SPIV, value);
2034
2035        if (!virt_wire_setup) {
2036                /*
2037                 * For LVT0 make it edge triggered, active high,
2038                 * external and enabled
2039                 */
2040                value = apic_read(APIC_LVT0);
2041                value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2042                        APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2043                        APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2044                value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2045                value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
2046                apic_write(APIC_LVT0, value);
2047        } else {
2048                /* Disable LVT0 */
2049                apic_write(APIC_LVT0, APIC_LVT_MASKED);
2050        }
2051
2052        /*
2053         * For LVT1 make it edge triggered, active high,
2054         * nmi and enabled
2055         */
2056        value = apic_read(APIC_LVT1);
2057        value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2058                        APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2059                        APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2060        value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2061        value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
2062        apic_write(APIC_LVT1, value);
2063}
2064
2065void __cpuinit generic_processor_info(int apicid, int version)
2066{
2067        int cpu, max = nr_cpu_ids;
2068        bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
2069                                phys_cpu_present_map);
2070
2071        /*
2072         * If boot cpu has not been detected yet, then only allow upto
2073         * nr_cpu_ids - 1 processors and keep one slot free for boot cpu
2074         */
2075        if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 &&
2076            apicid != boot_cpu_physical_apicid) {
2077                int thiscpu = max + disabled_cpus - 1;
2078
2079                pr_warning(
2080                        "ACPI: NR_CPUS/possible_cpus limit of %i almost"
2081                        " reached. Keeping one slot for boot cpu."
2082                        "  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
2083
2084                disabled_cpus++;
2085                return;
2086        }
2087
2088        if (num_processors >= nr_cpu_ids) {
2089                int thiscpu = max + disabled_cpus;
2090
2091                pr_warning(
2092                        "ACPI: NR_CPUS/possible_cpus limit of %i reached."
2093                        "  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
2094
2095                disabled_cpus++;
2096                return;
2097        }
2098
2099        num_processors++;
2100        if (apicid == boot_cpu_physical_apicid) {
2101                /*
2102                 * x86_bios_cpu_apicid is required to have processors listed
2103                 * in same order as logical cpu numbers. Hence the first
2104                 * entry is BSP, and so on.
2105                 * boot_cpu_init() already hold bit 0 in cpu_present_mask
2106                 * for BSP.
2107                 */
2108                cpu = 0;
2109        } else
2110                cpu = cpumask_next_zero(-1, cpu_present_mask);
2111
2112        /*
2113         * Validate version
2114         */
2115        if (version == 0x0) {
2116                pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
2117                           cpu, apicid);
2118                version = 0x10;
2119        }
2120        apic_version[apicid] = version;
2121
2122        if (version != apic_version[boot_cpu_physical_apicid]) {
2123                pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
2124                        apic_version[boot_cpu_physical_apicid], cpu, version);
2125        }
2126
2127        physid_set(apicid, phys_cpu_present_map);
2128        if (apicid > max_physical_apicid)
2129                max_physical_apicid = apicid;
2130
2131#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
2132        early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
2133        early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
2134#endif
2135#ifdef CONFIG_X86_32
2136        early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
2137                apic->x86_32_early_logical_apicid(cpu);
2138#endif
2139        set_cpu_possible(cpu, true);
2140        set_cpu_present(cpu, true);
2141}
2142
2143int hard_smp_processor_id(void)
2144{
2145        return read_apic_id();
2146}
2147
2148void default_init_apic_ldr(void)
2149{
2150        unsigned long val;
2151
2152        apic_write(APIC_DFR, APIC_DFR_VALUE);
2153        val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
2154        val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
2155        apic_write(APIC_LDR, val);
2156}
2157
2158int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
2159                                   const struct cpumask *andmask,
2160                                   unsigned int *apicid)
2161{
2162        unsigned int cpu;
2163
2164        for_each_cpu_and(cpu, cpumask, andmask) {
2165                if (cpumask_test_cpu(cpu, cpu_online_mask))
2166                        break;
2167        }
2168
2169        if (likely(cpu < nr_cpu_ids)) {
2170                *apicid = per_cpu(x86_cpu_to_apicid, cpu);
2171                return 0;
2172        }
2173
2174        return -EINVAL;
2175}
2176
2177/*
2178 * Override the generic EOI implementation with an optimized version.
2179 * Only called during early boot when only one CPU is active and with
2180 * interrupts disabled, so we know this does not race with actual APIC driver
2181 * use.
2182 */
2183void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
2184{
2185        struct apic **drv;
2186
2187        for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
2188                /* Should happen once for each apic */
2189                WARN_ON((*drv)->eoi_write == eoi_write);
2190                (*drv)->eoi_write = eoi_write;
2191        }
2192}
2193
2194/*
2195 * Power management
2196 */
2197#ifdef CONFIG_PM
2198
2199static struct {
2200        /*
2201         * 'active' is true if the local APIC was enabled by us and
2202         * not the BIOS; this signifies that we are also responsible
2203         * for disabling it before entering apm/acpi suspend
2204         */
2205        int active;
2206        /* r/w apic fields */
2207        unsigned int apic_id;
2208        unsigned int apic_taskpri;
2209        unsigned int apic_ldr;
2210        unsigned int apic_dfr;
2211        unsigned int apic_spiv;
2212        unsigned int apic_lvtt;
2213        unsigned int apic_lvtpc;
2214        unsigned int apic_lvt0;
2215        unsigned int apic_lvt1;
2216        unsigned int apic_lvterr;
2217        unsigned int apic_tmict;
2218        unsigned int apic_tdcr;
2219        unsigned int apic_thmr;
2220} apic_pm_state;
2221
2222static int lapic_suspend(void)
2223{
2224        unsigned long flags;
2225        int maxlvt;
2226
2227        if (!apic_pm_state.active)
2228                return 0;
2229
2230        maxlvt = lapic_get_maxlvt();
2231
2232        apic_pm_state.apic_id = apic_read(APIC_ID);
2233        apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
2234        apic_pm_state.apic_ldr = apic_read(APIC_LDR);
2235        apic_pm_state.apic_dfr = apic_read(APIC_DFR);
2236        apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
2237        apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
2238        if (maxlvt >= 4)
2239                apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
2240        apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
2241        apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
2242        apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
2243        apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
2244        apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
2245#ifdef CONFIG_X86_THERMAL_VECTOR
2246        if (maxlvt >= 5)
2247                apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
2248#endif
2249
2250        local_irq_save(flags);
2251        disable_local_APIC();
2252
2253        irq_remapping_disable();
2254
2255        local_irq_restore(flags);
2256        return 0;
2257}
2258
2259static void lapic_resume(void)
2260{
2261        unsigned int l, h;
2262        unsigned long flags;
2263        int maxlvt;
2264
2265        if (!apic_pm_state.active)
2266                return;
2267
2268        local_irq_save(flags);
2269
2270        /*
2271         * IO-APIC and PIC have their own resume routines.
2272         * We just mask them here to make sure the interrupt
2273         * subsystem is completely quiet while we enable x2apic
2274         * and interrupt-remapping.
2275         */
2276        mask_ioapic_entries();
2277        legacy_pic->mask_all();
2278
2279        if (x2apic_mode)
2280                enable_x2apic();
2281        else {
2282                /*
2283                 * Make sure the APICBASE points to the right address
2284                 *
2285                 * FIXME! This will be wrong if we ever support suspend on
2286                 * SMP! We'll need to do this as part of the CPU restore!
2287                 */
2288                if (boot_cpu_data.x86 >= 6) {
2289                        rdmsr(MSR_IA32_APICBASE, l, h);
2290                        l &= ~MSR_IA32_APICBASE_BASE;
2291                        l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
2292                        wrmsr(MSR_IA32_APICBASE, l, h);
2293                }
2294        }
2295
2296        maxlvt = lapic_get_maxlvt();
2297        apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
2298        apic_write(APIC_ID, apic_pm_state.apic_id);
2299        apic_write(APIC_DFR, apic_pm_state.apic_dfr);
2300        apic_write(APIC_LDR, apic_pm_state.apic_ldr);
2301        apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
2302        apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
2303        apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
2304        apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
2305#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
2306        if (maxlvt >= 5)
2307                apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
2308#endif
2309        if (maxlvt >= 4)
2310                apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
2311        apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
2312        apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
2313        apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
2314        apic_write(APIC_ESR, 0);
2315        apic_read(APIC_ESR);
2316        apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
2317        apic_write(APIC_ESR, 0);
2318        apic_read(APIC_ESR);
2319
2320        irq_remapping_reenable(x2apic_mode);
2321
2322        local_irq_restore(flags);
2323}
2324
2325/*
2326 * This device has no shutdown method - fully functioning local APICs
2327 * are needed on every CPU up until machine_halt/restart/poweroff.
2328 */
2329
2330static struct syscore_ops lapic_syscore_ops = {
2331        .resume         = lapic_resume,
2332        .suspend        = lapic_suspend,
2333};
2334
2335static void __cpuinit apic_pm_activate(void)
2336{
2337        apic_pm_state.active = 1;
2338}
2339
2340static int __init init_lapic_sysfs(void)
2341{
2342        /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
2343        if (cpu_has_apic)
2344                register_syscore_ops(&lapic_syscore_ops);
2345
2346        return 0;
2347}
2348
2349/* local apic needs to resume before other devices access its registers. */
2350core_initcall(init_lapic_sysfs);
2351
2352#else   /* CONFIG_PM */
2353
2354static void apic_pm_activate(void) { }
2355
2356#endif  /* CONFIG_PM */
2357
2358#ifdef CONFIG_X86_64
2359
2360static int __cpuinit apic_cluster_num(void)
2361{
2362        int i, clusters, zeros;
2363        unsigned id;
2364        u16 *bios_cpu_apicid;
2365        DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
2366
2367        bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
2368        bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
2369
2370        for (i = 0; i < nr_cpu_ids; i++) {
2371                /* are we being called early in kernel startup? */
2372                if (bios_cpu_apicid) {
2373                        id = bios_cpu_apicid[i];
2374                } else if (i < nr_cpu_ids) {
2375                        if (cpu_present(i))
2376                                id = per_cpu(x86_bios_cpu_apicid, i);
2377                        else
2378                                continue;
2379                } else
2380                        break;
2381
2382                if (id != BAD_APICID)
2383                        __set_bit(APIC_CLUSTERID(id), clustermap);
2384        }
2385
2386        /* Problem:  Partially populated chassis may not have CPUs in some of
2387         * the APIC clusters they have been allocated.  Only present CPUs have
2388         * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
2389         * Since clusters are allocated sequentially, count zeros only if
2390         * they are bounded by ones.
2391         */
2392        clusters = 0;
2393        zeros = 0;
2394        for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
2395                if (test_bit(i, clustermap)) {
2396                        clusters += 1 + zeros;
2397                        zeros = 0;
2398                } else
2399                        ++zeros;
2400        }
2401
2402        return clusters;
2403}
2404
2405static int __cpuinitdata multi_checked;
2406static int __cpuinitdata multi;
2407
2408static int __cpuinit set_multi(const struct dmi_system_id *d)
2409{
2410        if (multi)
2411                return 0;
2412        pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
2413        multi = 1;
2414        return 0;
2415}
2416
2417static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = {
2418        {
2419                .callback = set_multi,
2420                .ident = "IBM System Summit2",
2421                .matches = {
2422                        DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
2423                        DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
2424                },
2425        },
2426        {}
2427};
2428
2429static void __cpuinit dmi_check_multi(void)
2430{
2431        if (multi_checked)
2432                return;
2433
2434        dmi_check_system(multi_dmi_table);
2435        multi_checked = 1;
2436}
2437
2438/*
2439 * apic_is_clustered_box() -- Check if we can expect good TSC
2440 *
2441 * Thus far, the major user of this is IBM's Summit2 series:
2442 * Clustered boxes may have unsynced TSC problems if they are
2443 * multi-chassis.
2444 * Use DMI to check them
2445 */
2446__cpuinit int apic_is_clustered_box(void)
2447{
2448        dmi_check_multi();
2449        if (multi)
2450                return 1;
2451
2452        if (!is_vsmp_box())
2453                return 0;
2454
2455        /*
2456         * ScaleMP vSMPowered boxes have one cluster per board and TSCs are
2457         * not guaranteed to be synced between boards
2458         */
2459        if (apic_cluster_num() > 1)
2460                return 1;
2461
2462        return 0;
2463}
2464#endif
2465
2466/*
2467 * APIC command line parameters
2468 */
2469static int __init setup_disableapic(char *arg)
2470{
2471        disable_apic = 1;
2472        setup_clear_cpu_cap(X86_FEATURE_APIC);
2473        return 0;
2474}
2475early_param("disableapic", setup_disableapic);
2476
2477/* same as disableapic, for compatibility */
2478static int __init setup_nolapic(char *arg)
2479{
2480        return setup_disableapic(arg);
2481}
2482early_param("nolapic", setup_nolapic);
2483
2484static int __init parse_lapic_timer_c2_ok(char *arg)
2485{
2486        local_apic_timer_c2_ok = 1;
2487        return 0;
2488}
2489early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
2490
2491static int __init parse_disable_apic_timer(char *arg)
2492{
2493        disable_apic_timer = 1;
2494        return 0;
2495}
2496early_param("noapictimer", parse_disable_apic_timer);
2497
2498static int __init parse_nolapic_timer(char *arg)
2499{
2500        disable_apic_timer = 1;
2501        return 0;
2502}
2503early_param("nolapic_timer", parse_nolapic_timer);
2504
2505static int __init apic_set_verbosity(char *arg)
2506{
2507        if (!arg)  {
2508#ifdef CONFIG_X86_64
2509                skip_ioapic_setup = 0;
2510                return 0;
2511#endif
2512                return -EINVAL;
2513        }
2514
2515        if (strcmp("debug", arg) == 0)
2516                apic_verbosity = APIC_DEBUG;
2517        else if (strcmp("verbose", arg) == 0)
2518                apic_verbosity = APIC_VERBOSE;
2519        else {
2520                pr_warning("APIC Verbosity level %s not recognised"
2521                        " use apic=verbose or apic=debug\n", arg);
2522                return -EINVAL;
2523        }
2524
2525        return 0;
2526}
2527early_param("apic", apic_set_verbosity);
2528
2529static int __init lapic_insert_resource(void)
2530{
2531        if (!apic_phys)
2532                return -1;
2533
2534        /* Put local APIC into the resource map. */
2535        lapic_resource.start = apic_phys;
2536        lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
2537        insert_resource(&iomem_resource, &lapic_resource);
2538
2539        return 0;
2540}
2541
2542/*
2543 * need call insert after e820_reserve_resources()
2544 * that is using request_resource
2545 */
2546late_initcall(lapic_insert_resource);
2547