linux/arch/x86/kernel/apic/apic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *      Local APIC handling, local APIC timers
   4 *
   5 *      (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
   6 *
   7 *      Fixes
   8 *      Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
   9 *                                      thanks to Eric Gilmore
  10 *                                      and Rolf G. Tews
  11 *                                      for testing these extensively.
  12 *      Maciej W. Rozycki       :       Various updates and fixes.
  13 *      Mikael Pettersson       :       Power Management for UP-APIC.
  14 *      Pavel Machek and
  15 *      Mikael Pettersson       :       PM converted to driver model.
  16 */
  17
  18#include <linux/perf_event.h>
  19#include <linux/kernel_stat.h>
  20#include <linux/mc146818rtc.h>
  21#include <linux/acpi_pmtmr.h>
  22#include <linux/clockchips.h>
  23#include <linux/interrupt.h>
  24#include <linux/memblock.h>
  25#include <linux/ftrace.h>
  26#include <linux/ioport.h>
  27#include <linux/export.h>
  28#include <linux/syscore_ops.h>
  29#include <linux/delay.h>
  30#include <linux/timex.h>
  31#include <linux/i8253.h>
  32#include <linux/dmar.h>
  33#include <linux/init.h>
  34#include <linux/cpu.h>
  35#include <linux/dmi.h>
  36#include <linux/smp.h>
  37#include <linux/mm.h>
  38
  39#include <asm/trace/irq_vectors.h>
  40#include <asm/irq_remapping.h>
  41#include <asm/perf_event.h>
  42#include <asm/x86_init.h>
  43#include <asm/pgalloc.h>
  44#include <linux/atomic.h>
  45#include <asm/mpspec.h>
  46#include <asm/i8259.h>
  47#include <asm/proto.h>
  48#include <asm/traps.h>
  49#include <asm/apic.h>
  50#include <asm/io_apic.h>
  51#include <asm/desc.h>
  52#include <asm/hpet.h>
  53#include <asm/mtrr.h>
  54#include <asm/time.h>
  55#include <asm/smp.h>
  56#include <asm/mce.h>
  57#include <asm/tsc.h>
  58#include <asm/hypervisor.h>
  59#include <asm/cpu_device_id.h>
  60#include <asm/intel-family.h>
  61#include <asm/irq_regs.h>
  62
  63unsigned int num_processors;
  64
  65unsigned disabled_cpus;
  66
  67/* Processor that is doing the boot up */
  68unsigned int boot_cpu_physical_apicid __ro_after_init = -1U;
  69EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
  70
  71u8 boot_cpu_apic_version __ro_after_init;
  72
  73/*
  74 * The highest APIC ID seen during enumeration.
  75 */
  76static unsigned int max_physical_apicid;
  77
  78/*
  79 * Bitmask of physically existing CPUs:
  80 */
  81physid_mask_t phys_cpu_present_map;
  82
  83/*
  84 * Processor to be disabled specified by kernel parameter
  85 * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
  86 * avoid undefined behaviour caused by sending INIT from AP to BSP.
  87 */
  88static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;
  89
  90/*
  91 * This variable controls which CPUs receive external NMIs.  By default,
  92 * external NMIs are delivered only to the BSP.
  93 */
  94static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
  95
  96/*
  97 * Map cpu index to physical APIC ID
  98 */
  99DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
 100DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
 101DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
 102EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 103EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 104EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
 105
 106#ifdef CONFIG_X86_32
 107
 108/*
 109 * On x86_32, the mapping between cpu and logical apicid may vary
 110 * depending on apic in use.  The following early percpu variable is
 111 * used for the mapping.  This is where the behaviors of x86_64 and 32
 112 * actually diverge.  Let's keep it ugly for now.
 113 */
 114DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
 115
 116/* Local APIC was disabled by the BIOS and enabled by the kernel */
 117static int enabled_via_apicbase __ro_after_init;
 118
 119/*
 120 * Handle interrupt mode configuration register (IMCR).
 121 * This register controls whether the interrupt signals
 122 * that reach the BSP come from the master PIC or from the
 123 * local APIC. Before entering Symmetric I/O Mode, either
 124 * the BIOS or the operating system must switch out of
 125 * PIC Mode by changing the IMCR.
 126 */
 127static inline void imcr_pic_to_apic(void)
 128{
 129        /* select IMCR register */
 130        outb(0x70, 0x22);
 131        /* NMI and 8259 INTR go through APIC */
 132        outb(0x01, 0x23);
 133}
 134
 135static inline void imcr_apic_to_pic(void)
 136{
 137        /* select IMCR register */
 138        outb(0x70, 0x22);
 139        /* NMI and 8259 INTR go directly to BSP */
 140        outb(0x00, 0x23);
 141}
 142#endif
 143
 144/*
 145 * Knob to control our willingness to enable the local APIC.
 146 *
 147 * +1=force-enable
 148 */
 149static int force_enable_local_apic __initdata;
 150
 151/*
 152 * APIC command line parameters
 153 */
 154static int __init parse_lapic(char *arg)
 155{
 156        if (IS_ENABLED(CONFIG_X86_32) && !arg)
 157                force_enable_local_apic = 1;
 158        else if (arg && !strncmp(arg, "notscdeadline", 13))
 159                setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
 160        return 0;
 161}
 162early_param("lapic", parse_lapic);
 163
 164#ifdef CONFIG_X86_64
 165static int apic_calibrate_pmtmr __initdata;
 166static __init int setup_apicpmtimer(char *s)
 167{
 168        apic_calibrate_pmtmr = 1;
 169        notsc_setup(NULL);
 170        return 0;
 171}
 172__setup("apicpmtimer", setup_apicpmtimer);
 173#endif
 174
 175unsigned long mp_lapic_addr __ro_after_init;
 176int disable_apic __ro_after_init;
 177/* Disable local APIC timer from the kernel commandline or via dmi quirk */
 178static int disable_apic_timer __initdata;
 179/* Local APIC timer works in C2 */
 180int local_apic_timer_c2_ok __ro_after_init;
 181EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 182
 183/*
 184 * Debug level, exported for io_apic.c
 185 */
 186int apic_verbosity __ro_after_init;
 187
 188int pic_mode __ro_after_init;
 189
 190/* Have we found an MP table */
 191int smp_found_config __ro_after_init;
 192
 193static struct resource lapic_resource = {
 194        .name = "Local APIC",
 195        .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
 196};
 197
 198unsigned int lapic_timer_period = 0;
 199
 200static void apic_pm_activate(void);
 201
 202static unsigned long apic_phys __ro_after_init;
 203
 204/*
 205 * Get the LAPIC version
 206 */
 207static inline int lapic_get_version(void)
 208{
 209        return GET_APIC_VERSION(apic_read(APIC_LVR));
 210}
 211
 212/*
 213 * Check, if the APIC is integrated or a separate chip
 214 */
 215static inline int lapic_is_integrated(void)
 216{
 217        return APIC_INTEGRATED(lapic_get_version());
 218}
 219
 220/*
 221 * Check, whether this is a modern or a first generation APIC
 222 */
 223static int modern_apic(void)
 224{
 225        /* AMD systems use old APIC versions, so check the CPU */
 226        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
 227            boot_cpu_data.x86 >= 0xf)
 228                return 1;
 229
 230        /* Hygon systems use modern APIC */
 231        if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
 232                return 1;
 233
 234        return lapic_get_version() >= 0x14;
 235}
 236
 237/*
 238 * right after this call apic become NOOP driven
 239 * so apic->write/read doesn't do anything
 240 */
 241static void __init apic_disable(void)
 242{
 243        pr_info("APIC: switched to apic NOOP\n");
 244        apic = &apic_noop;
 245}
 246
 247void native_apic_wait_icr_idle(void)
 248{
 249        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
 250                cpu_relax();
 251}
 252
 253u32 native_safe_apic_wait_icr_idle(void)
 254{
 255        u32 send_status;
 256        int timeout;
 257
 258        timeout = 0;
 259        do {
 260                send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
 261                if (!send_status)
 262                        break;
 263                inc_irq_stat(icr_read_retry_count);
 264                udelay(100);
 265        } while (timeout++ < 1000);
 266
 267        return send_status;
 268}
 269
 270void native_apic_icr_write(u32 low, u32 id)
 271{
 272        unsigned long flags;
 273
 274        local_irq_save(flags);
 275        apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
 276        apic_write(APIC_ICR, low);
 277        local_irq_restore(flags);
 278}
 279
 280u64 native_apic_icr_read(void)
 281{
 282        u32 icr1, icr2;
 283
 284        icr2 = apic_read(APIC_ICR2);
 285        icr1 = apic_read(APIC_ICR);
 286
 287        return icr1 | ((u64)icr2 << 32);
 288}
 289
 290#ifdef CONFIG_X86_32
 291/**
 292 * get_physical_broadcast - Get number of physical broadcast IDs
 293 */
 294int get_physical_broadcast(void)
 295{
 296        return modern_apic() ? 0xff : 0xf;
 297}
 298#endif
 299
 300/**
 301 * lapic_get_maxlvt - get the maximum number of local vector table entries
 302 */
 303int lapic_get_maxlvt(void)
 304{
 305        /*
 306         * - we always have APIC integrated on 64bit mode
 307         * - 82489DXs do not report # of LVT entries
 308         */
 309        return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
 310}
 311
 312/*
 313 * Local APIC timer
 314 */
 315
 316/* Clock divisor */
 317#define APIC_DIVISOR 16
 318#define TSC_DIVISOR  8
 319
 320/*
 321 * This function sets up the local APIC timer, with a timeout of
 322 * 'clocks' APIC bus clock. During calibration we actually call
 323 * this function twice on the boot CPU, once with a bogus timeout
 324 * value, second time for real. The other (noncalibrating) CPUs
 325 * call this function only once, with the real, calibrated value.
 326 *
 327 * We do reads before writes even if unnecessary, to get around the
 328 * P5 APIC double write bug.
 329 */
 330static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
 331{
 332        unsigned int lvtt_value, tmp_value;
 333
 334        lvtt_value = LOCAL_TIMER_VECTOR;
 335        if (!oneshot)
 336                lvtt_value |= APIC_LVT_TIMER_PERIODIC;
 337        else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 338                lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
 339
 340        if (!lapic_is_integrated())
 341                lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
 342
 343        if (!irqen)
 344                lvtt_value |= APIC_LVT_MASKED;
 345
 346        apic_write(APIC_LVTT, lvtt_value);
 347
 348        if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
 349                /*
 350                 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
 351                 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
 352                 * According to Intel, MFENCE can do the serialization here.
 353                 */
 354                asm volatile("mfence" : : : "memory");
 355                return;
 356        }
 357
 358        /*
 359         * Divide PICLK by 16
 360         */
 361        tmp_value = apic_read(APIC_TDCR);
 362        apic_write(APIC_TDCR,
 363                (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
 364                APIC_TDR_DIV_16);
 365
 366        if (!oneshot)
 367                apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
 368}
 369
 370/*
 371 * Setup extended LVT, AMD specific
 372 *
 373 * Software should use the LVT offsets the BIOS provides.  The offsets
 374 * are determined by the subsystems using it like those for MCE
 375 * threshold or IBS.  On K8 only offset 0 (APIC500) and MCE interrupts
 376 * are supported. Beginning with family 10h at least 4 offsets are
 377 * available.
 378 *
 379 * Since the offsets must be consistent for all cores, we keep track
 380 * of the LVT offsets in software and reserve the offset for the same
 381 * vector also to be used on other cores. An offset is freed by
 382 * setting the entry to APIC_EILVT_MASKED.
 383 *
 384 * If the BIOS is right, there should be no conflicts. Otherwise a
 385 * "[Firmware Bug]: ..." error message is generated. However, if
 386 * software does not properly determines the offsets, it is not
 387 * necessarily a BIOS bug.
 388 */
 389
 390static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
 391
 392static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
 393{
 394        return (old & APIC_EILVT_MASKED)
 395                || (new == APIC_EILVT_MASKED)
 396                || ((new & ~APIC_EILVT_MASKED) == old);
 397}
 398
 399static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
 400{
 401        unsigned int rsvd, vector;
 402
 403        if (offset >= APIC_EILVT_NR_MAX)
 404                return ~0;
 405
 406        rsvd = atomic_read(&eilvt_offsets[offset]);
 407        do {
 408                vector = rsvd & ~APIC_EILVT_MASKED;     /* 0: unassigned */
 409                if (vector && !eilvt_entry_is_changeable(vector, new))
 410                        /* may not change if vectors are different */
 411                        return rsvd;
 412                rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
 413        } while (rsvd != new);
 414
 415        rsvd &= ~APIC_EILVT_MASKED;
 416        if (rsvd && rsvd != vector)
 417                pr_info("LVT offset %d assigned for vector 0x%02x\n",
 418                        offset, rsvd);
 419
 420        return new;
 421}
 422
 423/*
 424 * If mask=1, the LVT entry does not generate interrupts while mask=0
 425 * enables the vector. See also the BKDGs. Must be called with
 426 * preemption disabled.
 427 */
 428
 429int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
 430{
 431        unsigned long reg = APIC_EILVTn(offset);
 432        unsigned int new, old, reserved;
 433
 434        new = (mask << 16) | (msg_type << 8) | vector;
 435        old = apic_read(reg);
 436        reserved = reserve_eilvt_offset(offset, new);
 437
 438        if (reserved != new) {
 439                pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
 440                       "vector 0x%x, but the register is already in use for "
 441                       "vector 0x%x on another cpu\n",
 442                       smp_processor_id(), reg, offset, new, reserved);
 443                return -EINVAL;
 444        }
 445
 446        if (!eilvt_entry_is_changeable(old, new)) {
 447                pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
 448                       "vector 0x%x, but the register is already in use for "
 449                       "vector 0x%x on this cpu\n",
 450                       smp_processor_id(), reg, offset, new, old);
 451                return -EBUSY;
 452        }
 453
 454        apic_write(reg, new);
 455
 456        return 0;
 457}
 458EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
 459
 460/*
 461 * Program the next event, relative to now
 462 */
 463static int lapic_next_event(unsigned long delta,
 464                            struct clock_event_device *evt)
 465{
 466        apic_write(APIC_TMICT, delta);
 467        return 0;
 468}
 469
 470static int lapic_next_deadline(unsigned long delta,
 471                               struct clock_event_device *evt)
 472{
 473        u64 tsc;
 474
 475        tsc = rdtsc();
 476        wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
 477        return 0;
 478}
 479
 480static int lapic_timer_shutdown(struct clock_event_device *evt)
 481{
 482        unsigned int v;
 483
 484        /* Lapic used as dummy for broadcast ? */
 485        if (evt->features & CLOCK_EVT_FEAT_DUMMY)
 486                return 0;
 487
 488        v = apic_read(APIC_LVTT);
 489        v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
 490        apic_write(APIC_LVTT, v);
 491        apic_write(APIC_TMICT, 0);
 492        return 0;
 493}
 494
 495static inline int
 496lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
 497{
 498        /* Lapic used as dummy for broadcast ? */
 499        if (evt->features & CLOCK_EVT_FEAT_DUMMY)
 500                return 0;
 501
 502        __setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
 503        return 0;
 504}
 505
 506static int lapic_timer_set_periodic(struct clock_event_device *evt)
 507{
 508        return lapic_timer_set_periodic_oneshot(evt, false);
 509}
 510
 511static int lapic_timer_set_oneshot(struct clock_event_device *evt)
 512{
 513        return lapic_timer_set_periodic_oneshot(evt, true);
 514}
 515
 516/*
 517 * Local APIC timer broadcast function
 518 */
 519static void lapic_timer_broadcast(const struct cpumask *mask)
 520{
 521#ifdef CONFIG_SMP
 522        apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
 523#endif
 524}
 525
 526
 527/*
 528 * The local apic timer can be used for any function which is CPU local.
 529 */
 530static struct clock_event_device lapic_clockevent = {
 531        .name                           = "lapic",
 532        .features                       = CLOCK_EVT_FEAT_PERIODIC |
 533                                          CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
 534                                          | CLOCK_EVT_FEAT_DUMMY,
 535        .shift                          = 32,
 536        .set_state_shutdown             = lapic_timer_shutdown,
 537        .set_state_periodic             = lapic_timer_set_periodic,
 538        .set_state_oneshot              = lapic_timer_set_oneshot,
 539        .set_state_oneshot_stopped      = lapic_timer_shutdown,
 540        .set_next_event                 = lapic_next_event,
 541        .broadcast                      = lapic_timer_broadcast,
 542        .rating                         = 100,
 543        .irq                            = -1,
 544};
 545static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 546
 547static __init u32 hsx_deadline_rev(void)
 548{
 549        switch (boot_cpu_data.x86_stepping) {
 550        case 0x02: return 0x3a; /* EP */
 551        case 0x04: return 0x0f; /* EX */
 552        }
 553
 554        return ~0U;
 555}
 556
 557static __init u32 bdx_deadline_rev(void)
 558{
 559        switch (boot_cpu_data.x86_stepping) {
 560        case 0x02: return 0x00000011;
 561        case 0x03: return 0x0700000e;
 562        case 0x04: return 0x0f00000c;
 563        case 0x05: return 0x0e000003;
 564        }
 565
 566        return ~0U;
 567}
 568
 569static __init u32 skx_deadline_rev(void)
 570{
 571        switch (boot_cpu_data.x86_stepping) {
 572        case 0x03: return 0x01000136;
 573        case 0x04: return 0x02000014;
 574        }
 575
 576        if (boot_cpu_data.x86_stepping > 4)
 577                return 0;
 578
 579        return ~0U;
 580}
 581
 582static const struct x86_cpu_id deadline_match[] __initconst = {
 583        X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X,          &hsx_deadline_rev),
 584        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X,        0x0b000020),
 585        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D,        &bdx_deadline_rev),
 586        X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_X,          &skx_deadline_rev),
 587
 588        X86_MATCH_INTEL_FAM6_MODEL( HASWELL,            0x22),
 589        X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L,          0x20),
 590        X86_MATCH_INTEL_FAM6_MODEL( HASWELL_G,          0x17),
 591
 592        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL,          0x25),
 593        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_G,        0x17),
 594
 595        X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_L,          0xb2),
 596        X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE,            0xb2),
 597
 598        X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE_L,         0x52),
 599        X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE,           0x52),
 600
 601        {},
 602};
 603
 604static __init bool apic_validate_deadline_timer(void)
 605{
 606        const struct x86_cpu_id *m;
 607        u32 rev;
 608
 609        if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 610                return false;
 611        if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
 612                return true;
 613
 614        m = x86_match_cpu(deadline_match);
 615        if (!m)
 616                return true;
 617
 618        /*
 619         * Function pointers will have the MSB set due to address layout,
 620         * immediate revisions will not.
 621         */
 622        if ((long)m->driver_data < 0)
 623                rev = ((u32 (*)(void))(m->driver_data))();
 624        else
 625                rev = (u32)m->driver_data;
 626
 627        if (boot_cpu_data.microcode >= rev)
 628                return true;
 629
 630        setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
 631        pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
 632               "please update microcode to version: 0x%x (or later)\n", rev);
 633        return false;
 634}
 635
 636/*
 637 * Setup the local APIC timer for this CPU. Copy the initialized values
 638 * of the boot CPU and register the clock event in the framework.
 639 */
 640static void setup_APIC_timer(void)
 641{
 642        struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
 643
 644        if (this_cpu_has(X86_FEATURE_ARAT)) {
 645                lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
 646                /* Make LAPIC timer preferrable over percpu HPET */
 647                lapic_clockevent.rating = 150;
 648        }
 649
 650        memcpy(levt, &lapic_clockevent, sizeof(*levt));
 651        levt->cpumask = cpumask_of(smp_processor_id());
 652
 653        if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
 654                levt->name = "lapic-deadline";
 655                levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
 656                                    CLOCK_EVT_FEAT_DUMMY);
 657                levt->set_next_event = lapic_next_deadline;
 658                clockevents_config_and_register(levt,
 659                                                tsc_khz * (1000 / TSC_DIVISOR),
 660                                                0xF, ~0UL);
 661        } else
 662                clockevents_register_device(levt);
 663}
 664
 665/*
 666 * Install the updated TSC frequency from recalibration at the TSC
 667 * deadline clockevent devices.
 668 */
 669static void __lapic_update_tsc_freq(void *info)
 670{
 671        struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
 672
 673        if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 674                return;
 675
 676        clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
 677}
 678
 679void lapic_update_tsc_freq(void)
 680{
 681        /*
 682         * The clockevent device's ->mult and ->shift can both be
 683         * changed. In order to avoid races, schedule the frequency
 684         * update code on each CPU.
 685         */
 686        on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
 687}
 688
 689/*
 690 * In this functions we calibrate APIC bus clocks to the external timer.
 691 *
 692 * We want to do the calibration only once since we want to have local timer
 693 * irqs syncron. CPUs connected by the same APIC bus have the very same bus
 694 * frequency.
 695 *
 696 * This was previously done by reading the PIT/HPET and waiting for a wrap
 697 * around to find out, that a tick has elapsed. I have a box, where the PIT
 698 * readout is broken, so it never gets out of the wait loop again. This was
 699 * also reported by others.
 700 *
 701 * Monitoring the jiffies value is inaccurate and the clockevents
 702 * infrastructure allows us to do a simple substitution of the interrupt
 703 * handler.
 704 *
 705 * The calibration routine also uses the pm_timer when possible, as the PIT
 706 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
 707 * back to normal later in the boot process).
 708 */
 709
 710#define LAPIC_CAL_LOOPS         (HZ/10)
 711
 712static __initdata int lapic_cal_loops = -1;
 713static __initdata long lapic_cal_t1, lapic_cal_t2;
 714static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
 715static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
 716static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
 717
 718/*
 719 * Temporary interrupt handler and polled calibration function.
 720 */
 721static void __init lapic_cal_handler(struct clock_event_device *dev)
 722{
 723        unsigned long long tsc = 0;
 724        long tapic = apic_read(APIC_TMCCT);
 725        unsigned long pm = acpi_pm_read_early();
 726
 727        if (boot_cpu_has(X86_FEATURE_TSC))
 728                tsc = rdtsc();
 729
 730        switch (lapic_cal_loops++) {
 731        case 0:
 732                lapic_cal_t1 = tapic;
 733                lapic_cal_tsc1 = tsc;
 734                lapic_cal_pm1 = pm;
 735                lapic_cal_j1 = jiffies;
 736                break;
 737
 738        case LAPIC_CAL_LOOPS:
 739                lapic_cal_t2 = tapic;
 740                lapic_cal_tsc2 = tsc;
 741                if (pm < lapic_cal_pm1)
 742                        pm += ACPI_PM_OVRRUN;
 743                lapic_cal_pm2 = pm;
 744                lapic_cal_j2 = jiffies;
 745                break;
 746        }
 747}
 748
 749static int __init
 750calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
 751{
 752        const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
 753        const long pm_thresh = pm_100ms / 100;
 754        unsigned long mult;
 755        u64 res;
 756
 757#ifndef CONFIG_X86_PM_TIMER
 758        return -1;
 759#endif
 760
 761        apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
 762
 763        /* Check, if the PM timer is available */
 764        if (!deltapm)
 765                return -1;
 766
 767        mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
 768
 769        if (deltapm > (pm_100ms - pm_thresh) &&
 770            deltapm < (pm_100ms + pm_thresh)) {
 771                apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
 772                return 0;
 773        }
 774
 775        res = (((u64)deltapm) *  mult) >> 22;
 776        do_div(res, 1000000);
 777        pr_warn("APIC calibration not consistent "
 778                "with PM-Timer: %ldms instead of 100ms\n", (long)res);
 779
 780        /* Correct the lapic counter value */
 781        res = (((u64)(*delta)) * pm_100ms);
 782        do_div(res, deltapm);
 783        pr_info("APIC delta adjusted to PM-Timer: "
 784                "%lu (%ld)\n", (unsigned long)res, *delta);
 785        *delta = (long)res;
 786
 787        /* Correct the tsc counter value */
 788        if (boot_cpu_has(X86_FEATURE_TSC)) {
 789                res = (((u64)(*deltatsc)) * pm_100ms);
 790                do_div(res, deltapm);
 791                apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
 792                                          "PM-Timer: %lu (%ld)\n",
 793                                        (unsigned long)res, *deltatsc);
 794                *deltatsc = (long)res;
 795        }
 796
 797        return 0;
 798}
 799
 800static int __init lapic_init_clockevent(void)
 801{
 802        if (!lapic_timer_period)
 803                return -1;
 804
 805        /* Calculate the scaled math multiplication factor */
 806        lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
 807                                        TICK_NSEC, lapic_clockevent.shift);
 808        lapic_clockevent.max_delta_ns =
 809                clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
 810        lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
 811        lapic_clockevent.min_delta_ns =
 812                clockevent_delta2ns(0xF, &lapic_clockevent);
 813        lapic_clockevent.min_delta_ticks = 0xF;
 814
 815        return 0;
 816}
 817
 818bool __init apic_needs_pit(void)
 819{
 820        /*
 821         * If the frequencies are not known, PIT is required for both TSC
 822         * and apic timer calibration.
 823         */
 824        if (!tsc_khz || !cpu_khz)
 825                return true;
 826
 827        /* Is there an APIC at all or is it disabled? */
 828        if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic)
 829                return true;
 830
 831        /*
 832         * If interrupt delivery mode is legacy PIC or virtual wire without
 833         * configuration, the local APIC timer wont be set up. Make sure
 834         * that the PIT is initialized.
 835         */
 836        if (apic_intr_mode == APIC_PIC ||
 837            apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG)
 838                return true;
 839
 840        /* Virt guests may lack ARAT, but still have DEADLINE */
 841        if (!boot_cpu_has(X86_FEATURE_ARAT))
 842                return true;
 843
 844        /* Deadline timer is based on TSC so no further PIT action required */
 845        if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 846                return false;
 847
 848        /* APIC timer disabled? */
 849        if (disable_apic_timer)
 850                return true;
 851        /*
 852         * The APIC timer frequency is known already, no PIT calibration
 853         * required. If unknown, let the PIT be initialized.
 854         */
 855        return lapic_timer_period == 0;
 856}
 857
 858static int __init calibrate_APIC_clock(void)
 859{
 860        struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
 861        u64 tsc_perj = 0, tsc_start = 0;
 862        unsigned long jif_start;
 863        unsigned long deltaj;
 864        long delta, deltatsc;
 865        int pm_referenced = 0;
 866
 867        if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 868                return 0;
 869
 870        /*
 871         * Check if lapic timer has already been calibrated by platform
 872         * specific routine, such as tsc calibration code. If so just fill
 873         * in the clockevent structure and return.
 874         */
 875        if (!lapic_init_clockevent()) {
 876                apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
 877                            lapic_timer_period);
 878                /*
 879                 * Direct calibration methods must have an always running
 880                 * local APIC timer, no need for broadcast timer.
 881                 */
 882                lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
 883                return 0;
 884        }
 885
 886        apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
 887                    "calibrating APIC timer ...\n");
 888
 889        /*
 890         * There are platforms w/o global clockevent devices. Instead of
 891         * making the calibration conditional on that, use a polling based
 892         * approach everywhere.
 893         */
 894        local_irq_disable();
 895
 896        /*
 897         * Setup the APIC counter to maximum. There is no way the lapic
 898         * can underflow in the 100ms detection time frame
 899         */
 900        __setup_APIC_LVTT(0xffffffff, 0, 0);
 901
 902        /*
 903         * Methods to terminate the calibration loop:
 904         *  1) Global clockevent if available (jiffies)
 905         *  2) TSC if available and frequency is known
 906         */
 907        jif_start = READ_ONCE(jiffies);
 908
 909        if (tsc_khz) {
 910                tsc_start = rdtsc();
 911                tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
 912        }
 913
 914        /*
 915         * Enable interrupts so the tick can fire, if a global
 916         * clockevent device is available
 917         */
 918        local_irq_enable();
 919
 920        while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
 921                /* Wait for a tick to elapse */
 922                while (1) {
 923                        if (tsc_khz) {
 924                                u64 tsc_now = rdtsc();
 925                                if ((tsc_now - tsc_start) >= tsc_perj) {
 926                                        tsc_start += tsc_perj;
 927                                        break;
 928                                }
 929                        } else {
 930                                unsigned long jif_now = READ_ONCE(jiffies);
 931
 932                                if (time_after(jif_now, jif_start)) {
 933                                        jif_start = jif_now;
 934                                        break;
 935                                }
 936                        }
 937                        cpu_relax();
 938                }
 939
 940                /* Invoke the calibration routine */
 941                local_irq_disable();
 942                lapic_cal_handler(NULL);
 943                local_irq_enable();
 944        }
 945
 946        local_irq_disable();
 947
 948        /* Build delta t1-t2 as apic timer counts down */
 949        delta = lapic_cal_t1 - lapic_cal_t2;
 950        apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
 951
 952        deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
 953
 954        /* we trust the PM based calibration if possible */
 955        pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
 956                                        &delta, &deltatsc);
 957
 958        lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
 959        lapic_init_clockevent();
 960
 961        apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
 962        apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
 963        apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
 964                    lapic_timer_period);
 965
 966        if (boot_cpu_has(X86_FEATURE_TSC)) {
 967                apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
 968                            "%ld.%04ld MHz.\n",
 969                            (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
 970                            (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
 971        }
 972
 973        apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
 974                    "%u.%04u MHz.\n",
 975                    lapic_timer_period / (1000000 / HZ),
 976                    lapic_timer_period % (1000000 / HZ));
 977
 978        /*
 979         * Do a sanity check on the APIC calibration result
 980         */
 981        if (lapic_timer_period < (1000000 / HZ)) {
 982                local_irq_enable();
 983                pr_warn("APIC frequency too slow, disabling apic timer\n");
 984                return -1;
 985        }
 986
 987        levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
 988
 989        /*
 990         * PM timer calibration failed or not turned on so lets try APIC
 991         * timer based calibration, if a global clockevent device is
 992         * available.
 993         */
 994        if (!pm_referenced && global_clock_event) {
 995                apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
 996
 997                /*
 998                 * Setup the apic timer manually
 999                 */
1000                levt->event_handler = lapic_cal_handler;
1001                lapic_timer_set_periodic(levt);
1002                lapic_cal_loops = -1;
1003
1004                /* Let the interrupts run */
1005                local_irq_enable();
1006
1007                while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
1008                        cpu_relax();
1009
1010                /* Stop the lapic timer */
1011                local_irq_disable();
1012                lapic_timer_shutdown(levt);
1013
1014                /* Jiffies delta */
1015                deltaj = lapic_cal_j2 - lapic_cal_j1;
1016                apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
1017
1018                /* Check, if the jiffies result is consistent */
1019                if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
1020                        apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
1021                else
1022                        levt->features |= CLOCK_EVT_FEAT_DUMMY;
1023        }
1024        local_irq_enable();
1025
1026        if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
1027                pr_warn("APIC timer disabled due to verification failure\n");
1028                return -1;
1029        }
1030
1031        return 0;
1032}
1033
1034/*
1035 * Setup the boot APIC
1036 *
1037 * Calibrate and verify the result.
1038 */
1039void __init setup_boot_APIC_clock(void)
1040{
1041        /*
1042         * The local apic timer can be disabled via the kernel
1043         * commandline or from the CPU detection code. Register the lapic
1044         * timer as a dummy clock event source on SMP systems, so the
1045         * broadcast mechanism is used. On UP systems simply ignore it.
1046         */
1047        if (disable_apic_timer) {
1048                pr_info("Disabling APIC timer\n");
1049                /* No broadcast on UP ! */
1050                if (num_possible_cpus() > 1) {
1051                        lapic_clockevent.mult = 1;
1052                        setup_APIC_timer();
1053                }
1054                return;
1055        }
1056
1057        if (calibrate_APIC_clock()) {
1058                /* No broadcast on UP ! */
1059                if (num_possible_cpus() > 1)
1060                        setup_APIC_timer();
1061                return;
1062        }
1063
1064        /*
1065         * If nmi_watchdog is set to IO_APIC, we need the
1066         * PIT/HPET going.  Otherwise register lapic as a dummy
1067         * device.
1068         */
1069        lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
1070
1071        /* Setup the lapic or request the broadcast */
1072        setup_APIC_timer();
1073        amd_e400_c1e_apic_setup();
1074}
1075
1076void setup_secondary_APIC_clock(void)
1077{
1078        setup_APIC_timer();
1079        amd_e400_c1e_apic_setup();
1080}
1081
1082/*
1083 * The guts of the apic timer interrupt
1084 */
1085static void local_apic_timer_interrupt(void)
1086{
1087        struct clock_event_device *evt = this_cpu_ptr(&lapic_events);
1088
1089        /*
1090         * Normally we should not be here till LAPIC has been initialized but
1091         * in some cases like kdump, its possible that there is a pending LAPIC
1092         * timer interrupt from previous kernel's context and is delivered in
1093         * new kernel the moment interrupts are enabled.
1094         *
1095         * Interrupts are enabled early and LAPIC is setup much later, hence
1096         * its possible that when we get here evt->event_handler is NULL.
1097         * Check for event_handler being NULL and discard the interrupt as
1098         * spurious.
1099         */
1100        if (!evt->event_handler) {
1101                pr_warn("Spurious LAPIC timer interrupt on cpu %d\n",
1102                        smp_processor_id());
1103                /* Switch it off */
1104                lapic_timer_shutdown(evt);
1105                return;
1106        }
1107
1108        /*
1109         * the NMI deadlock-detector uses this.
1110         */
1111        inc_irq_stat(apic_timer_irqs);
1112
1113        evt->event_handler(evt);
1114}
1115
1116/*
1117 * Local APIC timer interrupt. This is the most natural way for doing
1118 * local interrupts, but local timer interrupts can be emulated by
1119 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
1120 *
1121 * [ if a single-CPU system runs an SMP kernel then we call the local
1122 *   interrupt as well. Thus we cannot inline the local irq ... ]
1123 */
1124__visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
1125{
1126        struct pt_regs *old_regs = set_irq_regs(regs);
1127
1128        /*
1129         * NOTE! We'd better ACK the irq immediately,
1130         * because timer handling can be slow.
1131         *
1132         * update_process_times() expects us to have done irq_enter().
1133         * Besides, if we don't timer interrupts ignore the global
1134         * interrupt lock, which is the WrongThing (tm) to do.
1135         */
1136        entering_ack_irq();
1137        trace_local_timer_entry(LOCAL_TIMER_VECTOR);
1138        local_apic_timer_interrupt();
1139        trace_local_timer_exit(LOCAL_TIMER_VECTOR);
1140        exiting_irq();
1141
1142        set_irq_regs(old_regs);
1143}
1144
1145int setup_profiling_timer(unsigned int multiplier)
1146{
1147        return -EINVAL;
1148}
1149
1150/*
1151 * Local APIC start and shutdown
1152 */
1153
1154/**
1155 * clear_local_APIC - shutdown the local APIC
1156 *
1157 * This is called, when a CPU is disabled and before rebooting, so the state of
1158 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
1159 * leftovers during boot.
1160 */
1161void clear_local_APIC(void)
1162{
1163        int maxlvt;
1164        u32 v;
1165
1166        /* APIC hasn't been mapped yet */
1167        if (!x2apic_mode && !apic_phys)
1168                return;
1169
1170        maxlvt = lapic_get_maxlvt();
1171        /*
1172         * Masking an LVT entry can trigger a local APIC error
1173         * if the vector is zero. Mask LVTERR first to prevent this.
1174         */
1175        if (maxlvt >= 3) {
1176                v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
1177                apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
1178        }
1179        /*
1180         * Careful: we have to set masks only first to deassert
1181         * any level-triggered sources.
1182         */
1183        v = apic_read(APIC_LVTT);
1184        apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
1185        v = apic_read(APIC_LVT0);
1186        apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
1187        v = apic_read(APIC_LVT1);
1188        apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
1189        if (maxlvt >= 4) {
1190                v = apic_read(APIC_LVTPC);
1191                apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
1192        }
1193
1194        /* lets not touch this if we didn't frob it */
1195#ifdef CONFIG_X86_THERMAL_VECTOR
1196        if (maxlvt >= 5) {
1197                v = apic_read(APIC_LVTTHMR);
1198                apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
1199        }
1200#endif
1201#ifdef CONFIG_X86_MCE_INTEL
1202        if (maxlvt >= 6) {
1203                v = apic_read(APIC_LVTCMCI);
1204                if (!(v & APIC_LVT_MASKED))
1205                        apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
1206        }
1207#endif
1208
1209        /*
1210         * Clean APIC state for other OSs:
1211         */
1212        apic_write(APIC_LVTT, APIC_LVT_MASKED);
1213        apic_write(APIC_LVT0, APIC_LVT_MASKED);
1214        apic_write(APIC_LVT1, APIC_LVT_MASKED);
1215        if (maxlvt >= 3)
1216                apic_write(APIC_LVTERR, APIC_LVT_MASKED);
1217        if (maxlvt >= 4)
1218                apic_write(APIC_LVTPC, APIC_LVT_MASKED);
1219
1220        /* Integrated APIC (!82489DX) ? */
1221        if (lapic_is_integrated()) {
1222                if (maxlvt > 3)
1223                        /* Clear ESR due to Pentium errata 3AP and 11AP */
1224                        apic_write(APIC_ESR, 0);
1225                apic_read(APIC_ESR);
1226        }
1227}
1228
1229/**
1230 * apic_soft_disable - Clears and software disables the local APIC on hotplug
1231 *
1232 * Contrary to disable_local_APIC() this does not touch the enable bit in
1233 * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
1234 * bus would require a hardware reset as the APIC would lose track of bus
1235 * arbitration. On systems with FSB delivery APICBASE could be disabled,
1236 * but it has to be guaranteed that no interrupt is sent to the APIC while
1237 * in that state and it's not clear from the SDM whether it still responds
1238 * to INIT/SIPI messages. Stay on the safe side and use software disable.
1239 */
1240void apic_soft_disable(void)
1241{
1242        u32 value;
1243
1244        clear_local_APIC();
1245
1246        /* Soft disable APIC (implies clearing of registers for 82489DX!). */
1247        value = apic_read(APIC_SPIV);
1248        value &= ~APIC_SPIV_APIC_ENABLED;
1249        apic_write(APIC_SPIV, value);
1250}
1251
1252/**
1253 * disable_local_APIC - clear and disable the local APIC
1254 */
1255void disable_local_APIC(void)
1256{
1257        /* APIC hasn't been mapped yet */
1258        if (!x2apic_mode && !apic_phys)
1259                return;
1260
1261        apic_soft_disable();
1262
1263#ifdef CONFIG_X86_32
1264        /*
1265         * When LAPIC was disabled by the BIOS and enabled by the kernel,
1266         * restore the disabled state.
1267         */
1268        if (enabled_via_apicbase) {
1269                unsigned int l, h;
1270
1271                rdmsr(MSR_IA32_APICBASE, l, h);
1272                l &= ~MSR_IA32_APICBASE_ENABLE;
1273                wrmsr(MSR_IA32_APICBASE, l, h);
1274        }
1275#endif
1276}
1277
1278/*
1279 * If Linux enabled the LAPIC against the BIOS default disable it down before
1280 * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
1281 * not power-off.  Additionally clear all LVT entries before disable_local_APIC
1282 * for the case where Linux didn't enable the LAPIC.
1283 */
1284void lapic_shutdown(void)
1285{
1286        unsigned long flags;
1287
1288        if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
1289                return;
1290
1291        local_irq_save(flags);
1292
1293#ifdef CONFIG_X86_32
1294        if (!enabled_via_apicbase)
1295                clear_local_APIC();
1296        else
1297#endif
1298                disable_local_APIC();
1299
1300
1301        local_irq_restore(flags);
1302}
1303
1304/**
1305 * sync_Arb_IDs - synchronize APIC bus arbitration IDs
1306 */
1307void __init sync_Arb_IDs(void)
1308{
1309        /*
1310         * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
1311         * needed on AMD.
1312         */
1313        if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
1314                return;
1315
1316        /*
1317         * Wait for idle.
1318         */
1319        apic_wait_icr_idle();
1320
1321        apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
1322        apic_write(APIC_ICR, APIC_DEST_ALLINC |
1323                        APIC_INT_LEVELTRIG | APIC_DM_INIT);
1324}
1325
1326enum apic_intr_mode_id apic_intr_mode __ro_after_init;
1327
1328static int __init __apic_intr_mode_select(void)
1329{
1330        /* Check kernel option */
1331        if (disable_apic) {
1332                pr_info("APIC disabled via kernel command line\n");
1333                return APIC_PIC;
1334        }
1335
1336        /* Check BIOS */
1337#ifdef CONFIG_X86_64
1338        /* On 64-bit, the APIC must be integrated, Check local APIC only */
1339        if (!boot_cpu_has(X86_FEATURE_APIC)) {
1340                disable_apic = 1;
1341                pr_info("APIC disabled by BIOS\n");
1342                return APIC_PIC;
1343        }
1344#else
1345        /* On 32-bit, the APIC may be integrated APIC or 82489DX */
1346
1347        /* Neither 82489DX nor integrated APIC ? */
1348        if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
1349                disable_apic = 1;
1350                return APIC_PIC;
1351        }
1352
1353        /* If the BIOS pretends there is an integrated APIC ? */
1354        if (!boot_cpu_has(X86_FEATURE_APIC) &&
1355                APIC_INTEGRATED(boot_cpu_apic_version)) {
1356                disable_apic = 1;
1357                pr_err(FW_BUG "Local APIC %d not detected, force emulation\n",
1358                                       boot_cpu_physical_apicid);
1359                return APIC_PIC;
1360        }
1361#endif
1362
1363        /* Check MP table or ACPI MADT configuration */
1364        if (!smp_found_config) {
1365                disable_ioapic_support();
1366                if (!acpi_lapic) {
1367                        pr_info("APIC: ACPI MADT or MP tables are not detected\n");
1368                        return APIC_VIRTUAL_WIRE_NO_CONFIG;
1369                }
1370                return APIC_VIRTUAL_WIRE;
1371        }
1372
1373#ifdef CONFIG_SMP
1374        /* If SMP should be disabled, then really disable it! */
1375        if (!setup_max_cpus) {
1376                pr_info("APIC: SMP mode deactivated\n");
1377                return APIC_SYMMETRIC_IO_NO_ROUTING;
1378        }
1379
1380        if (read_apic_id() != boot_cpu_physical_apicid) {
1381                panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1382                     read_apic_id(), boot_cpu_physical_apicid);
1383                /* Or can we switch back to PIC here? */
1384        }
1385#endif
1386
1387        return APIC_SYMMETRIC_IO;
1388}
1389
1390/* Select the interrupt delivery mode for the BSP */
1391void __init apic_intr_mode_select(void)
1392{
1393        apic_intr_mode = __apic_intr_mode_select();
1394}
1395
1396/*
1397 * An initial setup of the virtual wire mode.
1398 */
1399void __init init_bsp_APIC(void)
1400{
1401        unsigned int value;
1402
1403        /*
1404         * Don't do the setup now if we have a SMP BIOS as the
1405         * through-I/O-APIC virtual wire mode might be active.
1406         */
1407        if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
1408                return;
1409
1410        /*
1411         * Do not trust the local APIC being empty at bootup.
1412         */
1413        clear_local_APIC();
1414
1415        /*
1416         * Enable APIC.
1417         */
1418        value = apic_read(APIC_SPIV);
1419        value &= ~APIC_VECTOR_MASK;
1420        value |= APIC_SPIV_APIC_ENABLED;
1421
1422#ifdef CONFIG_X86_32
1423        /* This bit is reserved on P4/Xeon and should be cleared */
1424        if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
1425            (boot_cpu_data.x86 == 15))
1426                value &= ~APIC_SPIV_FOCUS_DISABLED;
1427        else
1428#endif
1429                value |= APIC_SPIV_FOCUS_DISABLED;
1430        value |= SPURIOUS_APIC_VECTOR;
1431        apic_write(APIC_SPIV, value);
1432
1433        /*
1434         * Set up the virtual wire mode.
1435         */
1436        apic_write(APIC_LVT0, APIC_DM_EXTINT);
1437        value = APIC_DM_NMI;
1438        if (!lapic_is_integrated())             /* 82489DX */
1439                value |= APIC_LVT_LEVEL_TRIGGER;
1440        if (apic_extnmi == APIC_EXTNMI_NONE)
1441                value |= APIC_LVT_MASKED;
1442        apic_write(APIC_LVT1, value);
1443}
1444
1445static void __init apic_bsp_setup(bool upmode);
1446
1447/* Init the interrupt delivery mode for the BSP */
1448void __init apic_intr_mode_init(void)
1449{
1450        bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
1451
1452        switch (apic_intr_mode) {
1453        case APIC_PIC:
1454                pr_info("APIC: Keep in PIC mode(8259)\n");
1455                return;
1456        case APIC_VIRTUAL_WIRE:
1457                pr_info("APIC: Switch to virtual wire mode setup\n");
1458                default_setup_apic_routing();
1459                break;
1460        case APIC_VIRTUAL_WIRE_NO_CONFIG:
1461                pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
1462                upmode = true;
1463                default_setup_apic_routing();
1464                break;
1465        case APIC_SYMMETRIC_IO:
1466                pr_info("APIC: Switch to symmetric I/O mode setup\n");
1467                default_setup_apic_routing();
1468                break;
1469        case APIC_SYMMETRIC_IO_NO_ROUTING:
1470                pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
1471                break;
1472        }
1473
1474        apic_bsp_setup(upmode);
1475}
1476
1477static void lapic_setup_esr(void)
1478{
1479        unsigned int oldvalue, value, maxlvt;
1480
1481        if (!lapic_is_integrated()) {
1482                pr_info("No ESR for 82489DX.\n");
1483                return;
1484        }
1485
1486        if (apic->disable_esr) {
1487                /*
1488                 * Something untraceable is creating bad interrupts on
1489                 * secondary quads ... for the moment, just leave the
1490                 * ESR disabled - we can't do anything useful with the
1491                 * errors anyway - mbligh
1492                 */
1493                pr_info("Leaving ESR disabled.\n");
1494                return;
1495        }
1496
1497        maxlvt = lapic_get_maxlvt();
1498        if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
1499                apic_write(APIC_ESR, 0);
1500        oldvalue = apic_read(APIC_ESR);
1501
1502        /* enables sending errors */
1503        value = ERROR_APIC_VECTOR;
1504        apic_write(APIC_LVTERR, value);
1505
1506        /*
1507         * spec says clear errors after enabling vector.
1508         */
1509        if (maxlvt > 3)
1510                apic_write(APIC_ESR, 0);
1511        value = apic_read(APIC_ESR);
1512        if (value != oldvalue)
1513                apic_printk(APIC_VERBOSE, "ESR value before enabling "
1514                        "vector: 0x%08x  after: 0x%08x\n",
1515                        oldvalue, value);
1516}
1517
1518#define APIC_IR_REGS            APIC_ISR_NR
1519#define APIC_IR_BITS            (APIC_IR_REGS * 32)
1520#define APIC_IR_MAPSIZE         (APIC_IR_BITS / BITS_PER_LONG)
1521
1522union apic_ir {
1523        unsigned long   map[APIC_IR_MAPSIZE];
1524        u32             regs[APIC_IR_REGS];
1525};
1526
1527static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
1528{
1529        int i, bit;
1530
1531        /* Read the IRRs */
1532        for (i = 0; i < APIC_IR_REGS; i++)
1533                irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
1534
1535        /* Read the ISRs */
1536        for (i = 0; i < APIC_IR_REGS; i++)
1537                isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
1538
1539        /*
1540         * If the ISR map is not empty. ACK the APIC and run another round
1541         * to verify whether a pending IRR has been unblocked and turned
1542         * into a ISR.
1543         */
1544        if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
1545                /*
1546                 * There can be multiple ISR bits set when a high priority
1547                 * interrupt preempted a lower priority one. Issue an ACK
1548                 * per set bit.
1549                 */
1550                for_each_set_bit(bit, isr->map, APIC_IR_BITS)
1551                        ack_APIC_irq();
1552                return true;
1553        }
1554
1555        return !bitmap_empty(irr->map, APIC_IR_BITS);
1556}
1557
1558/*
1559 * After a crash, we no longer service the interrupts and a pending
1560 * interrupt from previous kernel might still have ISR bit set.
1561 *
1562 * Most probably by now the CPU has serviced that pending interrupt and it
1563 * might not have done the ack_APIC_irq() because it thought, interrupt
1564 * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
1565 * the ISR bit and cpu thinks it has already serivced the interrupt. Hence
1566 * a vector might get locked. It was noticed for timer irq (vector
1567 * 0x31). Issue an extra EOI to clear ISR.
1568 *
1569 * If there are pending IRR bits they turn into ISR bits after a higher
1570 * priority ISR bit has been acked.
1571 */
1572static void apic_pending_intr_clear(void)
1573{
1574        union apic_ir irr, isr;
1575        unsigned int i;
1576
1577        /* 512 loops are way oversized and give the APIC a chance to obey. */
1578        for (i = 0; i < 512; i++) {
1579                if (!apic_check_and_ack(&irr, &isr))
1580                        return;
1581        }
1582        /* Dump the IRR/ISR content if that failed */
1583        pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
1584}
1585
1586/**
1587 * setup_local_APIC - setup the local APIC
1588 *
1589 * Used to setup local APIC while initializing BSP or bringing up APs.
1590 * Always called with preemption disabled.
1591 */
1592static void setup_local_APIC(void)
1593{
1594        int cpu = smp_processor_id();
1595        unsigned int value;
1596
1597        if (disable_apic) {
1598                disable_ioapic_support();
1599                return;
1600        }
1601
1602        /*
1603         * If this comes from kexec/kcrash the APIC might be enabled in
1604         * SPIV. Soft disable it before doing further initialization.
1605         */
1606        value = apic_read(APIC_SPIV);
1607        value &= ~APIC_SPIV_APIC_ENABLED;
1608        apic_write(APIC_SPIV, value);
1609
1610#ifdef CONFIG_X86_32
1611        /* Pound the ESR really hard over the head with a big hammer - mbligh */
1612        if (lapic_is_integrated() && apic->disable_esr) {
1613                apic_write(APIC_ESR, 0);
1614                apic_write(APIC_ESR, 0);
1615                apic_write(APIC_ESR, 0);
1616                apic_write(APIC_ESR, 0);
1617        }
1618#endif
1619        /*
1620         * Double-check whether this APIC is really registered.
1621         * This is meaningless in clustered apic mode, so we skip it.
1622         */
1623        BUG_ON(!apic->apic_id_registered());
1624
1625        /*
1626         * Intel recommends to set DFR, LDR and TPR before enabling
1627         * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
1628         * document number 292116).  So here it goes...
1629         */
1630        apic->init_apic_ldr();
1631
1632#ifdef CONFIG_X86_32
1633        if (apic->dest_logical) {
1634                int logical_apicid, ldr_apicid;
1635
1636                /*
1637                 * APIC LDR is initialized.  If logical_apicid mapping was
1638                 * initialized during get_smp_config(), make sure it matches
1639                 * the actual value.
1640                 */
1641                logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
1642                ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
1643                if (logical_apicid != BAD_APICID)
1644                        WARN_ON(logical_apicid != ldr_apicid);
1645                /* Always use the value from LDR. */
1646                early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
1647        }
1648#endif
1649
1650        /*
1651         * Set Task Priority to 'accept all except vectors 0-31'.  An APIC
1652         * vector in the 16-31 range could be delivered if TPR == 0, but we
1653         * would think it's an exception and terrible things will happen.  We
1654         * never change this later on.
1655         */
1656        value = apic_read(APIC_TASKPRI);
1657        value &= ~APIC_TPRI_MASK;
1658        value |= 0x10;
1659        apic_write(APIC_TASKPRI, value);
1660
1661        /* Clear eventually stale ISR/IRR bits */
1662        apic_pending_intr_clear();
1663
1664        /*
1665         * Now that we are all set up, enable the APIC
1666         */
1667        value = apic_read(APIC_SPIV);
1668        value &= ~APIC_VECTOR_MASK;
1669        /*
1670         * Enable APIC
1671         */
1672        value |= APIC_SPIV_APIC_ENABLED;
1673
1674#ifdef CONFIG_X86_32
1675        /*
1676         * Some unknown Intel IO/APIC (or APIC) errata is biting us with
1677         * certain networking cards. If high frequency interrupts are
1678         * happening on a particular IOAPIC pin, plus the IOAPIC routing
1679         * entry is masked/unmasked at a high rate as well then sooner or
1680         * later IOAPIC line gets 'stuck', no more interrupts are received
1681         * from the device. If focus CPU is disabled then the hang goes
1682         * away, oh well :-(
1683         *
1684         * [ This bug can be reproduced easily with a level-triggered
1685         *   PCI Ne2000 networking cards and PII/PIII processors, dual
1686         *   BX chipset. ]
1687         */
1688        /*
1689         * Actually disabling the focus CPU check just makes the hang less
1690         * frequent as it makes the interrupt distributon model be more
1691         * like LRU than MRU (the short-term load is more even across CPUs).
1692         */
1693
1694        /*
1695         * - enable focus processor (bit==0)
1696         * - 64bit mode always use processor focus
1697         *   so no need to set it
1698         */
1699        value &= ~APIC_SPIV_FOCUS_DISABLED;
1700#endif
1701
1702        /*
1703         * Set spurious IRQ vector
1704         */
1705        value |= SPURIOUS_APIC_VECTOR;
1706        apic_write(APIC_SPIV, value);
1707
1708        perf_events_lapic_init();
1709
1710        /*
1711         * Set up LVT0, LVT1:
1712         *
1713         * set up through-local-APIC on the boot CPU's LINT0. This is not
1714         * strictly necessary in pure symmetric-IO mode, but sometimes
1715         * we delegate interrupts to the 8259A.
1716         */
1717        /*
1718         * TODO: set up through-local-APIC from through-I/O-APIC? --macro
1719         */
1720        value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
1721        if (!cpu && (pic_mode || !value || skip_ioapic_setup)) {
1722                value = APIC_DM_EXTINT;
1723                apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
1724        } else {
1725                value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1726                apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
1727        }
1728        apic_write(APIC_LVT0, value);
1729
1730        /*
1731         * Only the BSP sees the LINT1 NMI signal by default. This can be
1732         * modified by apic_extnmi= boot option.
1733         */
1734        if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
1735            apic_extnmi == APIC_EXTNMI_ALL)
1736                value = APIC_DM_NMI;
1737        else
1738                value = APIC_DM_NMI | APIC_LVT_MASKED;
1739
1740        /* Is 82489DX ? */
1741        if (!lapic_is_integrated())
1742                value |= APIC_LVT_LEVEL_TRIGGER;
1743        apic_write(APIC_LVT1, value);
1744
1745#ifdef CONFIG_X86_MCE_INTEL
1746        /* Recheck CMCI information after local APIC is up on CPU #0 */
1747        if (!cpu)
1748                cmci_recheck();
1749#endif
1750}
1751
1752static void end_local_APIC_setup(void)
1753{
1754        lapic_setup_esr();
1755
1756#ifdef CONFIG_X86_32
1757        {
1758                unsigned int value;
1759                /* Disable the local apic timer */
1760                value = apic_read(APIC_LVTT);
1761                value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1762                apic_write(APIC_LVTT, value);
1763        }
1764#endif
1765
1766        apic_pm_activate();
1767}
1768
1769/*
1770 * APIC setup function for application processors. Called from smpboot.c
1771 */
1772void apic_ap_setup(void)
1773{
1774        setup_local_APIC();
1775        end_local_APIC_setup();
1776}
1777
1778#ifdef CONFIG_X86_X2APIC
1779int x2apic_mode;
1780
1781enum {
1782        X2APIC_OFF,
1783        X2APIC_ON,
1784        X2APIC_DISABLED,
1785};
1786static int x2apic_state;
1787
1788static void __x2apic_disable(void)
1789{
1790        u64 msr;
1791
1792        if (!boot_cpu_has(X86_FEATURE_APIC))
1793                return;
1794
1795        rdmsrl(MSR_IA32_APICBASE, msr);
1796        if (!(msr & X2APIC_ENABLE))
1797                return;
1798        /* Disable xapic and x2apic first and then reenable xapic mode */
1799        wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1800        wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1801        printk_once(KERN_INFO "x2apic disabled\n");
1802}
1803
1804static void __x2apic_enable(void)
1805{
1806        u64 msr;
1807
1808        rdmsrl(MSR_IA32_APICBASE, msr);
1809        if (msr & X2APIC_ENABLE)
1810                return;
1811        wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
1812        printk_once(KERN_INFO "x2apic enabled\n");
1813}
1814
1815static int __init setup_nox2apic(char *str)
1816{
1817        if (x2apic_enabled()) {
1818                int apicid = native_apic_msr_read(APIC_ID);
1819
1820                if (apicid >= 255) {
1821                        pr_warn("Apicid: %08x, cannot enforce nox2apic\n",
1822                                apicid);
1823                        return 0;
1824                }
1825                pr_warn("x2apic already enabled.\n");
1826                __x2apic_disable();
1827        }
1828        setup_clear_cpu_cap(X86_FEATURE_X2APIC);
1829        x2apic_state = X2APIC_DISABLED;
1830        x2apic_mode = 0;
1831        return 0;
1832}
1833early_param("nox2apic", setup_nox2apic);
1834
1835/* Called from cpu_init() to enable x2apic on (secondary) cpus */
1836void x2apic_setup(void)
1837{
1838        /*
1839         * If x2apic is not in ON state, disable it if already enabled
1840         * from BIOS.
1841         */
1842        if (x2apic_state != X2APIC_ON) {
1843                __x2apic_disable();
1844                return;
1845        }
1846        __x2apic_enable();
1847}
1848
1849static __init void x2apic_disable(void)
1850{
1851        u32 x2apic_id, state = x2apic_state;
1852
1853        x2apic_mode = 0;
1854        x2apic_state = X2APIC_DISABLED;
1855
1856        if (state != X2APIC_ON)
1857                return;
1858
1859        x2apic_id = read_apic_id();
1860        if (x2apic_id >= 255)
1861                panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
1862
1863        __x2apic_disable();
1864        register_lapic_address(mp_lapic_addr);
1865}
1866
1867static __init void x2apic_enable(void)
1868{
1869        if (x2apic_state != X2APIC_OFF)
1870                return;
1871
1872        x2apic_mode = 1;
1873        x2apic_state = X2APIC_ON;
1874        __x2apic_enable();
1875}
1876
1877static __init void try_to_enable_x2apic(int remap_mode)
1878{
1879        if (x2apic_state == X2APIC_DISABLED)
1880                return;
1881
1882        if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
1883                /* IR is required if there is APIC ID > 255 even when running
1884                 * under KVM
1885                 */
1886                if (max_physical_apicid > 255 ||
1887                    !x86_init.hyper.x2apic_available()) {
1888                        pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
1889                        x2apic_disable();
1890                        return;
1891                }
1892
1893                /*
1894                 * without IR all CPUs can be addressed by IOAPIC/MSI
1895                 * only in physical mode
1896                 */
1897                x2apic_phys = 1;
1898        }
1899        x2apic_enable();
1900}
1901
1902void __init check_x2apic(void)
1903{
1904        if (x2apic_enabled()) {
1905                pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
1906                x2apic_mode = 1;
1907                x2apic_state = X2APIC_ON;
1908        } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
1909                x2apic_state = X2APIC_DISABLED;
1910        }
1911}
1912#else /* CONFIG_X86_X2APIC */
1913static int __init validate_x2apic(void)
1914{
1915        if (!apic_is_x2apic_enabled())
1916                return 0;
1917        /*
1918         * Checkme: Can we simply turn off x2apic here instead of panic?
1919         */
1920        panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n");
1921}
1922early_initcall(validate_x2apic);
1923
1924static inline void try_to_enable_x2apic(int remap_mode) { }
1925static inline void __x2apic_enable(void) { }
1926#endif /* !CONFIG_X86_X2APIC */
1927
1928void __init enable_IR_x2apic(void)
1929{
1930        unsigned long flags;
1931        int ret, ir_stat;
1932
1933        if (skip_ioapic_setup) {
1934                pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
1935                return;
1936        }
1937
1938        ir_stat = irq_remapping_prepare();
1939        if (ir_stat < 0 && !x2apic_supported())
1940                return;
1941
1942        ret = save_ioapic_entries();
1943        if (ret) {
1944                pr_info("Saving IO-APIC state failed: %d\n", ret);
1945                return;
1946        }
1947
1948        local_irq_save(flags);
1949        legacy_pic->mask_all();
1950        mask_ioapic_entries();
1951
1952        /* If irq_remapping_prepare() succeeded, try to enable it */
1953        if (ir_stat >= 0)
1954                ir_stat = irq_remapping_enable();
1955        /* ir_stat contains the remap mode or an error code */
1956        try_to_enable_x2apic(ir_stat);
1957
1958        if (ir_stat < 0)
1959                restore_ioapic_entries();
1960        legacy_pic->restore_mask();
1961        local_irq_restore(flags);
1962}
1963
1964#ifdef CONFIG_X86_64
1965/*
1966 * Detect and enable local APICs on non-SMP boards.
1967 * Original code written by Keir Fraser.
1968 * On AMD64 we trust the BIOS - if it says no APIC it is likely
1969 * not correctly set up (usually the APIC timer won't work etc.)
1970 */
1971static int __init detect_init_APIC(void)
1972{
1973        if (!boot_cpu_has(X86_FEATURE_APIC)) {
1974                pr_info("No local APIC present\n");
1975                return -1;
1976        }
1977
1978        mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1979        return 0;
1980}
1981#else
1982
1983static int __init apic_verify(void)
1984{
1985        u32 features, h, l;
1986
1987        /*
1988         * The APIC feature bit should now be enabled
1989         * in `cpuid'
1990         */
1991        features = cpuid_edx(1);
1992        if (!(features & (1 << X86_FEATURE_APIC))) {
1993                pr_warn("Could not enable APIC!\n");
1994                return -1;
1995        }
1996        set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1997        mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1998
1999        /* The BIOS may have set up the APIC at some other address */
2000        if (boot_cpu_data.x86 >= 6) {
2001                rdmsr(MSR_IA32_APICBASE, l, h);
2002                if (l & MSR_IA32_APICBASE_ENABLE)
2003                        mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
2004        }
2005
2006        pr_info("Found and enabled local APIC!\n");
2007        return 0;
2008}
2009
2010int __init apic_force_enable(unsigned long addr)
2011{
2012        u32 h, l;
2013
2014        if (disable_apic)
2015                return -1;
2016
2017        /*
2018         * Some BIOSes disable the local APIC in the APIC_BASE
2019         * MSR. This can only be done in software for Intel P6 or later
2020         * and AMD K7 (Model > 1) or later.
2021         */
2022        if (boot_cpu_data.x86 >= 6) {
2023                rdmsr(MSR_IA32_APICBASE, l, h);
2024                if (!(l & MSR_IA32_APICBASE_ENABLE)) {
2025                        pr_info("Local APIC disabled by BIOS -- reenabling.\n");
2026                        l &= ~MSR_IA32_APICBASE_BASE;
2027                        l |= MSR_IA32_APICBASE_ENABLE | addr;
2028                        wrmsr(MSR_IA32_APICBASE, l, h);
2029                        enabled_via_apicbase = 1;
2030                }
2031        }
2032        return apic_verify();
2033}
2034
2035/*
2036 * Detect and initialize APIC
2037 */
2038static int __init detect_init_APIC(void)
2039{
2040        /* Disabled by kernel option? */
2041        if (disable_apic)
2042                return -1;
2043
2044        switch (boot_cpu_data.x86_vendor) {
2045        case X86_VENDOR_AMD:
2046                if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
2047                    (boot_cpu_data.x86 >= 15))
2048                        break;
2049                goto no_apic;
2050        case X86_VENDOR_HYGON:
2051                break;
2052        case X86_VENDOR_INTEL:
2053                if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
2054                    (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
2055                        break;
2056                goto no_apic;
2057        default:
2058                goto no_apic;
2059        }
2060
2061        if (!boot_cpu_has(X86_FEATURE_APIC)) {
2062                /*
2063                 * Over-ride BIOS and try to enable the local APIC only if
2064                 * "lapic" specified.
2065                 */
2066                if (!force_enable_local_apic) {
2067                        pr_info("Local APIC disabled by BIOS -- "
2068                                "you can enable it with \"lapic\"\n");
2069                        return -1;
2070                }
2071                if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
2072                        return -1;
2073        } else {
2074                if (apic_verify())
2075                        return -1;
2076        }
2077
2078        apic_pm_activate();
2079
2080        return 0;
2081
2082no_apic:
2083        pr_info("No local APIC present or hardware disabled\n");
2084        return -1;
2085}
2086#endif
2087
2088/**
2089 * init_apic_mappings - initialize APIC mappings
2090 */
2091void __init init_apic_mappings(void)
2092{
2093        unsigned int new_apicid;
2094
2095        if (apic_validate_deadline_timer())
2096                pr_debug("TSC deadline timer available\n");
2097
2098        if (x2apic_mode) {
2099                boot_cpu_physical_apicid = read_apic_id();
2100                return;
2101        }
2102
2103        /* If no local APIC can be found return early */
2104        if (!smp_found_config && detect_init_APIC()) {
2105                /* lets NOP'ify apic operations */
2106                pr_info("APIC: disable apic facility\n");
2107                apic_disable();
2108        } else {
2109                apic_phys = mp_lapic_addr;
2110
2111                /*
2112                 * If the system has ACPI MADT tables or MP info, the LAPIC
2113                 * address is already registered.
2114                 */
2115                if (!acpi_lapic && !smp_found_config)
2116                        register_lapic_address(apic_phys);
2117        }
2118
2119        /*
2120         * Fetch the APIC ID of the BSP in case we have a
2121         * default configuration (or the MP table is broken).
2122         */
2123        new_apicid = read_apic_id();
2124        if (boot_cpu_physical_apicid != new_apicid) {
2125                boot_cpu_physical_apicid = new_apicid;
2126                /*
2127                 * yeah -- we lie about apic_version
2128                 * in case if apic was disabled via boot option
2129                 * but it's not a problem for SMP compiled kernel
2130                 * since apic_intr_mode_select is prepared for such
2131                 * a case and disable smp mode
2132                 */
2133                boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
2134        }
2135}
2136
2137void __init register_lapic_address(unsigned long address)
2138{
2139        mp_lapic_addr = address;
2140
2141        if (!x2apic_mode) {
2142                set_fixmap_nocache(FIX_APIC_BASE, address);
2143                apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
2144                            APIC_BASE, address);
2145        }
2146        if (boot_cpu_physical_apicid == -1U) {
2147                boot_cpu_physical_apicid  = read_apic_id();
2148                boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
2149        }
2150}
2151
2152/*
2153 * Local APIC interrupts
2154 */
2155
2156/*
2157 * This interrupt should _never_ happen with our APIC/SMP architecture
2158 */
2159__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
2160{
2161        u8 vector = ~regs->orig_ax;
2162        u32 v;
2163
2164        entering_irq();
2165        trace_spurious_apic_entry(vector);
2166
2167        inc_irq_stat(irq_spurious_count);
2168
2169        /*
2170         * If this is a spurious interrupt then do not acknowledge
2171         */
2172        if (vector == SPURIOUS_APIC_VECTOR) {
2173                /* See SDM vol 3 */
2174                pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
2175                        smp_processor_id());
2176                goto out;
2177        }
2178
2179        /*
2180         * If it is a vectored one, verify it's set in the ISR. If set,
2181         * acknowledge it.
2182         */
2183        v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
2184        if (v & (1 << (vector & 0x1f))) {
2185                pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
2186                        vector, smp_processor_id());
2187                ack_APIC_irq();
2188        } else {
2189                pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
2190                        vector, smp_processor_id());
2191        }
2192out:
2193        trace_spurious_apic_exit(vector);
2194        exiting_irq();
2195}
2196
2197/*
2198 * This interrupt should never happen with our APIC/SMP architecture
2199 */
2200__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
2201{
2202        static const char * const error_interrupt_reason[] = {
2203                "Send CS error",                /* APIC Error Bit 0 */
2204                "Receive CS error",             /* APIC Error Bit 1 */
2205                "Send accept error",            /* APIC Error Bit 2 */
2206                "Receive accept error",         /* APIC Error Bit 3 */
2207                "Redirectable IPI",             /* APIC Error Bit 4 */
2208                "Send illegal vector",          /* APIC Error Bit 5 */
2209                "Received illegal vector",      /* APIC Error Bit 6 */
2210                "Illegal register address",     /* APIC Error Bit 7 */
2211        };
2212        u32 v, i = 0;
2213
2214        entering_irq();
2215        trace_error_apic_entry(ERROR_APIC_VECTOR);
2216
2217        /* First tickle the hardware, only then report what went on. -- REW */
2218        if (lapic_get_maxlvt() > 3)     /* Due to the Pentium erratum 3AP. */
2219                apic_write(APIC_ESR, 0);
2220        v = apic_read(APIC_ESR);
2221        ack_APIC_irq();
2222        atomic_inc(&irq_err_count);
2223
2224        apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x",
2225                    smp_processor_id(), v);
2226
2227        v &= 0xff;
2228        while (v) {
2229                if (v & 0x1)
2230                        apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
2231                i++;
2232                v >>= 1;
2233        }
2234
2235        apic_printk(APIC_DEBUG, KERN_CONT "\n");
2236
2237        trace_error_apic_exit(ERROR_APIC_VECTOR);
2238        exiting_irq();
2239}
2240
2241/**
2242 * connect_bsp_APIC - attach the APIC to the interrupt system
2243 */
2244static void __init connect_bsp_APIC(void)
2245{
2246#ifdef CONFIG_X86_32
2247        if (pic_mode) {
2248                /*
2249                 * Do not trust the local APIC being empty at bootup.
2250                 */
2251                clear_local_APIC();
2252                /*
2253                 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
2254                 * local APIC to INT and NMI lines.
2255                 */
2256                apic_printk(APIC_VERBOSE, "leaving PIC mode, "
2257                                "enabling APIC mode.\n");
2258                imcr_pic_to_apic();
2259        }
2260#endif
2261}
2262
2263/**
2264 * disconnect_bsp_APIC - detach the APIC from the interrupt system
2265 * @virt_wire_setup:    indicates, whether virtual wire mode is selected
2266 *
2267 * Virtual wire mode is necessary to deliver legacy interrupts even when the
2268 * APIC is disabled.
2269 */
2270void disconnect_bsp_APIC(int virt_wire_setup)
2271{
2272        unsigned int value;
2273
2274#ifdef CONFIG_X86_32
2275        if (pic_mode) {
2276                /*
2277                 * Put the board back into PIC mode (has an effect only on
2278                 * certain older boards).  Note that APIC interrupts, including
2279                 * IPIs, won't work beyond this point!  The only exception are
2280                 * INIT IPIs.
2281                 */
2282                apic_printk(APIC_VERBOSE, "disabling APIC mode, "
2283                                "entering PIC mode.\n");
2284                imcr_apic_to_pic();
2285                return;
2286        }
2287#endif
2288
2289        /* Go back to Virtual Wire compatibility mode */
2290
2291        /* For the spurious interrupt use vector F, and enable it */
2292        value = apic_read(APIC_SPIV);
2293        value &= ~APIC_VECTOR_MASK;
2294        value |= APIC_SPIV_APIC_ENABLED;
2295        value |= 0xf;
2296        apic_write(APIC_SPIV, value);
2297
2298        if (!virt_wire_setup) {
2299                /*
2300                 * For LVT0 make it edge triggered, active high,
2301                 * external and enabled
2302                 */
2303                value = apic_read(APIC_LVT0);
2304                value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2305                        APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2306                        APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2307                value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2308                value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
2309                apic_write(APIC_LVT0, value);
2310        } else {
2311                /* Disable LVT0 */
2312                apic_write(APIC_LVT0, APIC_LVT_MASKED);
2313        }
2314
2315        /*
2316         * For LVT1 make it edge triggered, active high,
2317         * nmi and enabled
2318         */
2319        value = apic_read(APIC_LVT1);
2320        value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2321                        APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2322                        APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2323        value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2324        value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
2325        apic_write(APIC_LVT1, value);
2326}
2327
2328/*
2329 * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated
2330 * contiguously, it equals to current allocated max logical CPU ID plus 1.
2331 * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range,
2332 * so the maximum of nr_logical_cpuids is nr_cpu_ids.
2333 *
2334 * NOTE: Reserve 0 for BSP.
2335 */
2336static int nr_logical_cpuids = 1;
2337
2338/*
2339 * Used to store mapping between logical CPU IDs and APIC IDs.
2340 */
2341static int cpuid_to_apicid[] = {
2342        [0 ... NR_CPUS - 1] = -1,
2343};
2344
2345#ifdef CONFIG_SMP
2346/**
2347 * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
2348 * @apicid: APIC ID to check
2349 */
2350bool apic_id_is_primary_thread(unsigned int apicid)
2351{
2352        u32 mask;
2353
2354        if (smp_num_siblings == 1)
2355                return true;
2356        /* Isolate the SMT bit(s) in the APICID and check for 0 */
2357        mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
2358        return !(apicid & mask);
2359}
2360#endif
2361
2362/*
2363 * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
2364 * and cpuid_to_apicid[] synchronized.
2365 */
2366static int allocate_logical_cpuid(int apicid)
2367{
2368        int i;
2369
2370        /*
2371         * cpuid <-> apicid mapping is persistent, so when a cpu is up,
2372         * check if the kernel has allocated a cpuid for it.
2373         */
2374        for (i = 0; i < nr_logical_cpuids; i++) {
2375                if (cpuid_to_apicid[i] == apicid)
2376                        return i;
2377        }
2378
2379        /* Allocate a new cpuid. */
2380        if (nr_logical_cpuids >= nr_cpu_ids) {
2381                WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. "
2382                             "Processor %d/0x%x and the rest are ignored.\n",
2383                             nr_cpu_ids, nr_logical_cpuids, apicid);
2384                return -EINVAL;
2385        }
2386
2387        cpuid_to_apicid[nr_logical_cpuids] = apicid;
2388        return nr_logical_cpuids++;
2389}
2390
2391int generic_processor_info(int apicid, int version)
2392{
2393        int cpu, max = nr_cpu_ids;
2394        bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
2395                                phys_cpu_present_map);
2396
2397        /*
2398         * boot_cpu_physical_apicid is designed to have the apicid
2399         * returned by read_apic_id(), i.e, the apicid of the
2400         * currently booting-up processor. However, on some platforms,
2401         * it is temporarily modified by the apicid reported as BSP
2402         * through MP table. Concretely:
2403         *
2404         * - arch/x86/kernel/mpparse.c: MP_processor_info()
2405         * - arch/x86/mm/amdtopology.c: amd_numa_init()
2406         *
2407         * This function is executed with the modified
2408         * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel
2409         * parameter doesn't work to disable APs on kdump 2nd kernel.
2410         *
2411         * Since fixing handling of boot_cpu_physical_apicid requires
2412         * another discussion and tests on each platform, we leave it
2413         * for now and here we use read_apic_id() directly in this
2414         * function, generic_processor_info().
2415         */
2416        if (disabled_cpu_apicid != BAD_APICID &&
2417            disabled_cpu_apicid != read_apic_id() &&
2418            disabled_cpu_apicid == apicid) {
2419                int thiscpu = num_processors + disabled_cpus;
2420
2421                pr_warn("APIC: Disabling requested cpu."
2422                        " Processor %d/0x%x ignored.\n", thiscpu, apicid);
2423
2424                disabled_cpus++;
2425                return -ENODEV;
2426        }
2427
2428        /*
2429         * If boot cpu has not been detected yet, then only allow upto
2430         * nr_cpu_ids - 1 processors and keep one slot free for boot cpu
2431         */
2432        if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 &&
2433            apicid != boot_cpu_physical_apicid) {
2434                int thiscpu = max + disabled_cpus - 1;
2435
2436                pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost"
2437                        " reached. Keeping one slot for boot cpu."
2438                        "  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
2439
2440                disabled_cpus++;
2441                return -ENODEV;
2442        }
2443
2444        if (num_processors >= nr_cpu_ids) {
2445                int thiscpu = max + disabled_cpus;
2446
2447                pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. "
2448                        "Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
2449
2450                disabled_cpus++;
2451                return -EINVAL;
2452        }
2453
2454        if (apicid == boot_cpu_physical_apicid) {
2455                /*
2456                 * x86_bios_cpu_apicid is required to have processors listed
2457                 * in same order as logical cpu numbers. Hence the first
2458                 * entry is BSP, and so on.
2459                 * boot_cpu_init() already hold bit 0 in cpu_present_mask
2460                 * for BSP.
2461                 */
2462                cpu = 0;
2463
2464                /* Logical cpuid 0 is reserved for BSP. */
2465                cpuid_to_apicid[0] = apicid;
2466        } else {
2467                cpu = allocate_logical_cpuid(apicid);
2468                if (cpu < 0) {
2469                        disabled_cpus++;
2470                        return -EINVAL;
2471                }
2472        }
2473
2474        /*
2475         * Validate version
2476         */
2477        if (version == 0x0) {
2478                pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
2479                        cpu, apicid);
2480                version = 0x10;
2481        }
2482
2483        if (version != boot_cpu_apic_version) {
2484                pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
2485                        boot_cpu_apic_version, cpu, version);
2486        }
2487
2488        if (apicid > max_physical_apicid)
2489                max_physical_apicid = apicid;
2490
2491#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
2492        early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
2493        early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
2494#endif
2495#ifdef CONFIG_X86_32
2496        early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
2497                apic->x86_32_early_logical_apicid(cpu);
2498#endif
2499        set_cpu_possible(cpu, true);
2500        physid_set(apicid, phys_cpu_present_map);
2501        set_cpu_present(cpu, true);
2502        num_processors++;
2503
2504        return cpu;
2505}
2506
2507int hard_smp_processor_id(void)
2508{
2509        return read_apic_id();
2510}
2511
2512/*
2513 * Override the generic EOI implementation with an optimized version.
2514 * Only called during early boot when only one CPU is active and with
2515 * interrupts disabled, so we know this does not race with actual APIC driver
2516 * use.
2517 */
2518void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
2519{
2520        struct apic **drv;
2521
2522        for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
2523                /* Should happen once for each apic */
2524                WARN_ON((*drv)->eoi_write == eoi_write);
2525                (*drv)->native_eoi_write = (*drv)->eoi_write;
2526                (*drv)->eoi_write = eoi_write;
2527        }
2528}
2529
2530static void __init apic_bsp_up_setup(void)
2531{
2532#ifdef CONFIG_X86_64
2533        apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid));
2534#else
2535        /*
2536         * Hack: In case of kdump, after a crash, kernel might be booting
2537         * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
2538         * might be zero if read from MP tables. Get it from LAPIC.
2539         */
2540# ifdef CONFIG_CRASH_DUMP
2541        boot_cpu_physical_apicid = read_apic_id();
2542# endif
2543#endif
2544        physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
2545}
2546
2547/**
2548 * apic_bsp_setup - Setup function for local apic and io-apic
2549 * @upmode:             Force UP mode (for APIC_init_uniprocessor)
2550 */
2551static void __init apic_bsp_setup(bool upmode)
2552{
2553        connect_bsp_APIC();
2554        if (upmode)
2555                apic_bsp_up_setup();
2556        setup_local_APIC();
2557
2558        enable_IO_APIC();
2559        end_local_APIC_setup();
2560        irq_remap_enable_fault_handling();
2561        setup_IO_APIC();
2562}
2563
2564#ifdef CONFIG_UP_LATE_INIT
2565void __init up_late_init(void)
2566{
2567        if (apic_intr_mode == APIC_PIC)
2568                return;
2569
2570        /* Setup local timer */
2571        x86_init.timers.setup_percpu_clockev();
2572}
2573#endif
2574
2575/*
2576 * Power management
2577 */
2578#ifdef CONFIG_PM
2579
2580static struct {
2581        /*
2582         * 'active' is true if the local APIC was enabled by us and
2583         * not the BIOS; this signifies that we are also responsible
2584         * for disabling it before entering apm/acpi suspend
2585         */
2586        int active;
2587        /* r/w apic fields */
2588        unsigned int apic_id;
2589        unsigned int apic_taskpri;
2590        unsigned int apic_ldr;
2591        unsigned int apic_dfr;
2592        unsigned int apic_spiv;
2593        unsigned int apic_lvtt;
2594        unsigned int apic_lvtpc;
2595        unsigned int apic_lvt0;
2596        unsigned int apic_lvt1;
2597        unsigned int apic_lvterr;
2598        unsigned int apic_tmict;
2599        unsigned int apic_tdcr;
2600        unsigned int apic_thmr;
2601        unsigned int apic_cmci;
2602} apic_pm_state;
2603
2604static int lapic_suspend(void)
2605{
2606        unsigned long flags;
2607        int maxlvt;
2608
2609        if (!apic_pm_state.active)
2610                return 0;
2611
2612        maxlvt = lapic_get_maxlvt();
2613
2614        apic_pm_state.apic_id = apic_read(APIC_ID);
2615        apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
2616        apic_pm_state.apic_ldr = apic_read(APIC_LDR);
2617        apic_pm_state.apic_dfr = apic_read(APIC_DFR);
2618        apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
2619        apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
2620        if (maxlvt >= 4)
2621                apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
2622        apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
2623        apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
2624        apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
2625        apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
2626        apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
2627#ifdef CONFIG_X86_THERMAL_VECTOR
2628        if (maxlvt >= 5)
2629                apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
2630#endif
2631#ifdef CONFIG_X86_MCE_INTEL
2632        if (maxlvt >= 6)
2633                apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI);
2634#endif
2635
2636        local_irq_save(flags);
2637
2638        /*
2639         * Mask IOAPIC before disabling the local APIC to prevent stale IRR
2640         * entries on some implementations.
2641         */
2642        mask_ioapic_entries();
2643
2644        disable_local_APIC();
2645
2646        irq_remapping_disable();
2647
2648        local_irq_restore(flags);
2649        return 0;
2650}
2651
2652static void lapic_resume(void)
2653{
2654        unsigned int l, h;
2655        unsigned long flags;
2656        int maxlvt;
2657
2658        if (!apic_pm_state.active)
2659                return;
2660
2661        local_irq_save(flags);
2662
2663        /*
2664         * IO-APIC and PIC have their own resume routines.
2665         * We just mask them here to make sure the interrupt
2666         * subsystem is completely quiet while we enable x2apic
2667         * and interrupt-remapping.
2668         */
2669        mask_ioapic_entries();
2670        legacy_pic->mask_all();
2671
2672        if (x2apic_mode) {
2673                __x2apic_enable();
2674        } else {
2675                /*
2676                 * Make sure the APICBASE points to the right address
2677                 *
2678                 * FIXME! This will be wrong if we ever support suspend on
2679                 * SMP! We'll need to do this as part of the CPU restore!
2680                 */
2681                if (boot_cpu_data.x86 >= 6) {
2682                        rdmsr(MSR_IA32_APICBASE, l, h);
2683                        l &= ~MSR_IA32_APICBASE_BASE;
2684                        l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
2685                        wrmsr(MSR_IA32_APICBASE, l, h);
2686                }
2687        }
2688
2689        maxlvt = lapic_get_maxlvt();
2690        apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
2691        apic_write(APIC_ID, apic_pm_state.apic_id);
2692        apic_write(APIC_DFR, apic_pm_state.apic_dfr);
2693        apic_write(APIC_LDR, apic_pm_state.apic_ldr);
2694        apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
2695        apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
2696        apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
2697        apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
2698#ifdef CONFIG_X86_THERMAL_VECTOR
2699        if (maxlvt >= 5)
2700                apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
2701#endif
2702#ifdef CONFIG_X86_MCE_INTEL
2703        if (maxlvt >= 6)
2704                apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci);
2705#endif
2706        if (maxlvt >= 4)
2707                apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
2708        apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
2709        apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
2710        apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
2711        apic_write(APIC_ESR, 0);
2712        apic_read(APIC_ESR);
2713        apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
2714        apic_write(APIC_ESR, 0);
2715        apic_read(APIC_ESR);
2716
2717        irq_remapping_reenable(x2apic_mode);
2718
2719        local_irq_restore(flags);
2720}
2721
2722/*
2723 * This device has no shutdown method - fully functioning local APICs
2724 * are needed on every CPU up until machine_halt/restart/poweroff.
2725 */
2726
2727static struct syscore_ops lapic_syscore_ops = {
2728        .resume         = lapic_resume,
2729        .suspend        = lapic_suspend,
2730};
2731
2732static void apic_pm_activate(void)
2733{
2734        apic_pm_state.active = 1;
2735}
2736
2737static int __init init_lapic_sysfs(void)
2738{
2739        /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
2740        if (boot_cpu_has(X86_FEATURE_APIC))
2741                register_syscore_ops(&lapic_syscore_ops);
2742
2743        return 0;
2744}
2745
2746/* local apic needs to resume before other devices access its registers. */
2747core_initcall(init_lapic_sysfs);
2748
2749#else   /* CONFIG_PM */
2750
2751static void apic_pm_activate(void) { }
2752
2753#endif  /* CONFIG_PM */
2754
2755#ifdef CONFIG_X86_64
2756
2757static int multi_checked;
2758static int multi;
2759
2760static int set_multi(const struct dmi_system_id *d)
2761{
2762        if (multi)
2763                return 0;
2764        pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
2765        multi = 1;
2766        return 0;
2767}
2768
2769static const struct dmi_system_id multi_dmi_table[] = {
2770        {
2771                .callback = set_multi,
2772                .ident = "IBM System Summit2",
2773                .matches = {
2774                        DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
2775                        DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
2776                },
2777        },
2778        {}
2779};
2780
2781static void dmi_check_multi(void)
2782{
2783        if (multi_checked)
2784                return;
2785
2786        dmi_check_system(multi_dmi_table);
2787        multi_checked = 1;
2788}
2789
2790/*
2791 * apic_is_clustered_box() -- Check if we can expect good TSC
2792 *
2793 * Thus far, the major user of this is IBM's Summit2 series:
2794 * Clustered boxes may have unsynced TSC problems if they are
2795 * multi-chassis.
2796 * Use DMI to check them
2797 */
2798int apic_is_clustered_box(void)
2799{
2800        dmi_check_multi();
2801        return multi;
2802}
2803#endif
2804
2805/*
2806 * APIC command line parameters
2807 */
2808static int __init setup_disableapic(char *arg)
2809{
2810        disable_apic = 1;
2811        setup_clear_cpu_cap(X86_FEATURE_APIC);
2812        return 0;
2813}
2814early_param("disableapic", setup_disableapic);
2815
2816/* same as disableapic, for compatibility */
2817static int __init setup_nolapic(char *arg)
2818{
2819        return setup_disableapic(arg);
2820}
2821early_param("nolapic", setup_nolapic);
2822
2823static int __init parse_lapic_timer_c2_ok(char *arg)
2824{
2825        local_apic_timer_c2_ok = 1;
2826        return 0;
2827}
2828early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
2829
2830static int __init parse_disable_apic_timer(char *arg)
2831{
2832        disable_apic_timer = 1;
2833        return 0;
2834}
2835early_param("noapictimer", parse_disable_apic_timer);
2836
2837static int __init parse_nolapic_timer(char *arg)
2838{
2839        disable_apic_timer = 1;
2840        return 0;
2841}
2842early_param("nolapic_timer", parse_nolapic_timer);
2843
2844static int __init apic_set_verbosity(char *arg)
2845{
2846        if (!arg)  {
2847#ifdef CONFIG_X86_64
2848                skip_ioapic_setup = 0;
2849                return 0;
2850#endif
2851                return -EINVAL;
2852        }
2853
2854        if (strcmp("debug", arg) == 0)
2855                apic_verbosity = APIC_DEBUG;
2856        else if (strcmp("verbose", arg) == 0)
2857                apic_verbosity = APIC_VERBOSE;
2858#ifdef CONFIG_X86_64
2859        else {
2860                pr_warn("APIC Verbosity level %s not recognised"
2861                        " use apic=verbose or apic=debug\n", arg);
2862                return -EINVAL;
2863        }
2864#endif
2865
2866        return 0;
2867}
2868early_param("apic", apic_set_verbosity);
2869
2870static int __init lapic_insert_resource(void)
2871{
2872        if (!apic_phys)
2873                return -1;
2874
2875        /* Put local APIC into the resource map. */
2876        lapic_resource.start = apic_phys;
2877        lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
2878        insert_resource(&iomem_resource, &lapic_resource);
2879
2880        return 0;
2881}
2882
2883/*
2884 * need call insert after e820__reserve_resources()
2885 * that is using request_resource
2886 */
2887late_initcall(lapic_insert_resource);
2888
2889static int __init apic_set_disabled_cpu_apicid(char *arg)
2890{
2891        if (!arg || !get_option(&arg, &disabled_cpu_apicid))
2892                return -EINVAL;
2893
2894        return 0;
2895}
2896early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);
2897
2898static int __init apic_set_extnmi(char *arg)
2899{
2900        if (!arg)
2901                return -EINVAL;
2902
2903        if (!strncmp("all", arg, 3))
2904                apic_extnmi = APIC_EXTNMI_ALL;
2905        else if (!strncmp("none", arg, 4))
2906                apic_extnmi = APIC_EXTNMI_NONE;
2907        else if (!strncmp("bsp", arg, 3))
2908                apic_extnmi = APIC_EXTNMI_BSP;
2909        else {
2910                pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg);
2911                return -EINVAL;
2912        }
2913
2914        return 0;
2915}
2916early_param("apic_extnmi", apic_set_extnmi);
2917