linux/arch/x86/kernel/apic/apic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *      Local APIC handling, local APIC timers
   4 *
   5 *      (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
   6 *
   7 *      Fixes
   8 *      Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
   9 *                                      thanks to Eric Gilmore
  10 *                                      and Rolf G. Tews
  11 *                                      for testing these extensively.
  12 *      Maciej W. Rozycki       :       Various updates and fixes.
  13 *      Mikael Pettersson       :       Power Management for UP-APIC.
  14 *      Pavel Machek and
  15 *      Mikael Pettersson       :       PM converted to driver model.
  16 */
  17
  18#include <linux/perf_event.h>
  19#include <linux/kernel_stat.h>
  20#include <linux/mc146818rtc.h>
  21#include <linux/acpi_pmtmr.h>
  22#include <linux/clockchips.h>
  23#include <linux/interrupt.h>
  24#include <linux/memblock.h>
  25#include <linux/ftrace.h>
  26#include <linux/ioport.h>
  27#include <linux/export.h>
  28#include <linux/syscore_ops.h>
  29#include <linux/delay.h>
  30#include <linux/timex.h>
  31#include <linux/i8253.h>
  32#include <linux/dmar.h>
  33#include <linux/init.h>
  34#include <linux/cpu.h>
  35#include <linux/dmi.h>
  36#include <linux/smp.h>
  37#include <linux/mm.h>
  38
  39#include <asm/trace/irq_vectors.h>
  40#include <asm/irq_remapping.h>
  41#include <asm/perf_event.h>
  42#include <asm/x86_init.h>
  43#include <asm/pgalloc.h>
  44#include <linux/atomic.h>
  45#include <asm/mpspec.h>
  46#include <asm/i8259.h>
  47#include <asm/proto.h>
  48#include <asm/traps.h>
  49#include <asm/apic.h>
  50#include <asm/io_apic.h>
  51#include <asm/desc.h>
  52#include <asm/hpet.h>
  53#include <asm/mtrr.h>
  54#include <asm/time.h>
  55#include <asm/smp.h>
  56#include <asm/mce.h>
  57#include <asm/tsc.h>
  58#include <asm/hypervisor.h>
  59#include <asm/cpu_device_id.h>
  60#include <asm/intel-family.h>
  61#include <asm/irq_regs.h>
  62
  63unsigned int num_processors;
  64
  65unsigned disabled_cpus;
  66
  67/* Processor that is doing the boot up */
  68unsigned int boot_cpu_physical_apicid __ro_after_init = -1U;
  69EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
  70
  71u8 boot_cpu_apic_version __ro_after_init;
  72
  73/*
  74 * The highest APIC ID seen during enumeration.
  75 */
  76static unsigned int max_physical_apicid;
  77
  78/*
  79 * Bitmask of physically existing CPUs:
  80 */
  81physid_mask_t phys_cpu_present_map;
  82
  83/*
  84 * Processor to be disabled specified by kernel parameter
  85 * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
  86 * avoid undefined behaviour caused by sending INIT from AP to BSP.
  87 */
  88static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;
  89
  90/*
  91 * This variable controls which CPUs receive external NMIs.  By default,
  92 * external NMIs are delivered only to the BSP.
  93 */
  94static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
  95
  96/*
  97 * Map cpu index to physical APIC ID
  98 */
  99DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
 100DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
 101DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
 102EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 103EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 104EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
 105
 106#ifdef CONFIG_X86_32
 107
 108/*
 109 * On x86_32, the mapping between cpu and logical apicid may vary
 110 * depending on apic in use.  The following early percpu variable is
 111 * used for the mapping.  This is where the behaviors of x86_64 and 32
 112 * actually diverge.  Let's keep it ugly for now.
 113 */
 114DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
 115
 116/* Local APIC was disabled by the BIOS and enabled by the kernel */
 117static int enabled_via_apicbase __ro_after_init;
 118
 119/*
 120 * Handle interrupt mode configuration register (IMCR).
 121 * This register controls whether the interrupt signals
 122 * that reach the BSP come from the master PIC or from the
 123 * local APIC. Before entering Symmetric I/O Mode, either
 124 * the BIOS or the operating system must switch out of
 125 * PIC Mode by changing the IMCR.
 126 */
 127static inline void imcr_pic_to_apic(void)
 128{
 129        /* select IMCR register */
 130        outb(0x70, 0x22);
 131        /* NMI and 8259 INTR go through APIC */
 132        outb(0x01, 0x23);
 133}
 134
 135static inline void imcr_apic_to_pic(void)
 136{
 137        /* select IMCR register */
 138        outb(0x70, 0x22);
 139        /* NMI and 8259 INTR go directly to BSP */
 140        outb(0x00, 0x23);
 141}
 142#endif
 143
 144/*
 145 * Knob to control our willingness to enable the local APIC.
 146 *
 147 * +1=force-enable
 148 */
 149static int force_enable_local_apic __initdata;
 150
 151/*
 152 * APIC command line parameters
 153 */
 154static int __init parse_lapic(char *arg)
 155{
 156        if (IS_ENABLED(CONFIG_X86_32) && !arg)
 157                force_enable_local_apic = 1;
 158        else if (arg && !strncmp(arg, "notscdeadline", 13))
 159                setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
 160        return 0;
 161}
 162early_param("lapic", parse_lapic);
 163
 164#ifdef CONFIG_X86_64
 165static int apic_calibrate_pmtmr __initdata;
 166static __init int setup_apicpmtimer(char *s)
 167{
 168        apic_calibrate_pmtmr = 1;
 169        notsc_setup(NULL);
 170        return 0;
 171}
 172__setup("apicpmtimer", setup_apicpmtimer);
 173#endif
 174
 175unsigned long mp_lapic_addr __ro_after_init;
 176int disable_apic __ro_after_init;
 177/* Disable local APIC timer from the kernel commandline or via dmi quirk */
 178static int disable_apic_timer __initdata;
 179/* Local APIC timer works in C2 */
 180int local_apic_timer_c2_ok __ro_after_init;
 181EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 182
 183/*
 184 * Debug level, exported for io_apic.c
 185 */
 186int apic_verbosity __ro_after_init;
 187
 188int pic_mode __ro_after_init;
 189
 190/* Have we found an MP table */
 191int smp_found_config __ro_after_init;
 192
 193static struct resource lapic_resource = {
 194        .name = "Local APIC",
 195        .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
 196};
 197
 198unsigned int lapic_timer_period = 0;
 199
 200static void apic_pm_activate(void);
 201
 202static unsigned long apic_phys __ro_after_init;
 203
 204/*
 205 * Get the LAPIC version
 206 */
 207static inline int lapic_get_version(void)
 208{
 209        return GET_APIC_VERSION(apic_read(APIC_LVR));
 210}
 211
 212/*
 213 * Check, if the APIC is integrated or a separate chip
 214 */
 215static inline int lapic_is_integrated(void)
 216{
 217        return APIC_INTEGRATED(lapic_get_version());
 218}
 219
 220/*
 221 * Check, whether this is a modern or a first generation APIC
 222 */
 223static int modern_apic(void)
 224{
 225        /* AMD systems use old APIC versions, so check the CPU */
 226        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
 227            boot_cpu_data.x86 >= 0xf)
 228                return 1;
 229
 230        /* Hygon systems use modern APIC */
 231        if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
 232                return 1;
 233
 234        return lapic_get_version() >= 0x14;
 235}
 236
 237/*
 238 * right after this call apic become NOOP driven
 239 * so apic->write/read doesn't do anything
 240 */
 241static void __init apic_disable(void)
 242{
 243        pr_info("APIC: switched to apic NOOP\n");
 244        apic = &apic_noop;
 245}
 246
 247void native_apic_wait_icr_idle(void)
 248{
 249        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
 250                cpu_relax();
 251}
 252
 253u32 native_safe_apic_wait_icr_idle(void)
 254{
 255        u32 send_status;
 256        int timeout;
 257
 258        timeout = 0;
 259        do {
 260                send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
 261                if (!send_status)
 262                        break;
 263                inc_irq_stat(icr_read_retry_count);
 264                udelay(100);
 265        } while (timeout++ < 1000);
 266
 267        return send_status;
 268}
 269
 270void native_apic_icr_write(u32 low, u32 id)
 271{
 272        unsigned long flags;
 273
 274        local_irq_save(flags);
 275        apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
 276        apic_write(APIC_ICR, low);
 277        local_irq_restore(flags);
 278}
 279
 280u64 native_apic_icr_read(void)
 281{
 282        u32 icr1, icr2;
 283
 284        icr2 = apic_read(APIC_ICR2);
 285        icr1 = apic_read(APIC_ICR);
 286
 287        return icr1 | ((u64)icr2 << 32);
 288}
 289
 290#ifdef CONFIG_X86_32
 291/**
 292 * get_physical_broadcast - Get number of physical broadcast IDs
 293 */
 294int get_physical_broadcast(void)
 295{
 296        return modern_apic() ? 0xff : 0xf;
 297}
 298#endif
 299
 300/**
 301 * lapic_get_maxlvt - get the maximum number of local vector table entries
 302 */
 303int lapic_get_maxlvt(void)
 304{
 305        /*
 306         * - we always have APIC integrated on 64bit mode
 307         * - 82489DXs do not report # of LVT entries
 308         */
 309        return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
 310}
 311
 312/*
 313 * Local APIC timer
 314 */
 315
 316/* Clock divisor */
 317#define APIC_DIVISOR 16
 318#define TSC_DIVISOR  8
 319
 320/*
 321 * This function sets up the local APIC timer, with a timeout of
 322 * 'clocks' APIC bus clock. During calibration we actually call
 323 * this function twice on the boot CPU, once with a bogus timeout
 324 * value, second time for real. The other (noncalibrating) CPUs
 325 * call this function only once, with the real, calibrated value.
 326 *
 327 * We do reads before writes even if unnecessary, to get around the
 328 * P5 APIC double write bug.
 329 */
 330static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
 331{
 332        unsigned int lvtt_value, tmp_value;
 333
 334        lvtt_value = LOCAL_TIMER_VECTOR;
 335        if (!oneshot)
 336                lvtt_value |= APIC_LVT_TIMER_PERIODIC;
 337        else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 338                lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
 339
 340        if (!lapic_is_integrated())
 341                lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
 342
 343        if (!irqen)
 344                lvtt_value |= APIC_LVT_MASKED;
 345
 346        apic_write(APIC_LVTT, lvtt_value);
 347
 348        if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
 349                /*
 350                 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
 351                 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
 352                 * According to Intel, MFENCE can do the serialization here.
 353                 */
 354                asm volatile("mfence" : : : "memory");
 355                return;
 356        }
 357
 358        /*
 359         * Divide PICLK by 16
 360         */
 361        tmp_value = apic_read(APIC_TDCR);
 362        apic_write(APIC_TDCR,
 363                (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
 364                APIC_TDR_DIV_16);
 365
 366        if (!oneshot)
 367                apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
 368}
 369
 370/*
 371 * Setup extended LVT, AMD specific
 372 *
 373 * Software should use the LVT offsets the BIOS provides.  The offsets
 374 * are determined by the subsystems using it like those for MCE
 375 * threshold or IBS.  On K8 only offset 0 (APIC500) and MCE interrupts
 376 * are supported. Beginning with family 10h at least 4 offsets are
 377 * available.
 378 *
 379 * Since the offsets must be consistent for all cores, we keep track
 380 * of the LVT offsets in software and reserve the offset for the same
 381 * vector also to be used on other cores. An offset is freed by
 382 * setting the entry to APIC_EILVT_MASKED.
 383 *
 384 * If the BIOS is right, there should be no conflicts. Otherwise a
 385 * "[Firmware Bug]: ..." error message is generated. However, if
 386 * software does not properly determines the offsets, it is not
 387 * necessarily a BIOS bug.
 388 */
 389
 390static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
 391
 392static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
 393{
 394        return (old & APIC_EILVT_MASKED)
 395                || (new == APIC_EILVT_MASKED)
 396                || ((new & ~APIC_EILVT_MASKED) == old);
 397}
 398
 399static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
 400{
 401        unsigned int rsvd, vector;
 402
 403        if (offset >= APIC_EILVT_NR_MAX)
 404                return ~0;
 405
 406        rsvd = atomic_read(&eilvt_offsets[offset]);
 407        do {
 408                vector = rsvd & ~APIC_EILVT_MASKED;     /* 0: unassigned */
 409                if (vector && !eilvt_entry_is_changeable(vector, new))
 410                        /* may not change if vectors are different */
 411                        return rsvd;
 412                rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
 413        } while (rsvd != new);
 414
 415        rsvd &= ~APIC_EILVT_MASKED;
 416        if (rsvd && rsvd != vector)
 417                pr_info("LVT offset %d assigned for vector 0x%02x\n",
 418                        offset, rsvd);
 419
 420        return new;
 421}
 422
 423/*
 424 * If mask=1, the LVT entry does not generate interrupts while mask=0
 425 * enables the vector. See also the BKDGs. Must be called with
 426 * preemption disabled.
 427 */
 428
 429int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
 430{
 431        unsigned long reg = APIC_EILVTn(offset);
 432        unsigned int new, old, reserved;
 433
 434        new = (mask << 16) | (msg_type << 8) | vector;
 435        old = apic_read(reg);
 436        reserved = reserve_eilvt_offset(offset, new);
 437
 438        if (reserved != new) {
 439                pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
 440                       "vector 0x%x, but the register is already in use for "
 441                       "vector 0x%x on another cpu\n",
 442                       smp_processor_id(), reg, offset, new, reserved);
 443                return -EINVAL;
 444        }
 445
 446        if (!eilvt_entry_is_changeable(old, new)) {
 447                pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
 448                       "vector 0x%x, but the register is already in use for "
 449                       "vector 0x%x on this cpu\n",
 450                       smp_processor_id(), reg, offset, new, old);
 451                return -EBUSY;
 452        }
 453
 454        apic_write(reg, new);
 455
 456        return 0;
 457}
 458EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
 459
 460/*
 461 * Program the next event, relative to now
 462 */
 463static int lapic_next_event(unsigned long delta,
 464                            struct clock_event_device *evt)
 465{
 466        apic_write(APIC_TMICT, delta);
 467        return 0;
 468}
 469
 470static int lapic_next_deadline(unsigned long delta,
 471                               struct clock_event_device *evt)
 472{
 473        u64 tsc;
 474
 475        tsc = rdtsc();
 476        wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
 477        return 0;
 478}
 479
 480static int lapic_timer_shutdown(struct clock_event_device *evt)
 481{
 482        unsigned int v;
 483
 484        /* Lapic used as dummy for broadcast ? */
 485        if (evt->features & CLOCK_EVT_FEAT_DUMMY)
 486                return 0;
 487
 488        v = apic_read(APIC_LVTT);
 489        v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
 490        apic_write(APIC_LVTT, v);
 491        apic_write(APIC_TMICT, 0);
 492        return 0;
 493}
 494
 495static inline int
 496lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
 497{
 498        /* Lapic used as dummy for broadcast ? */
 499        if (evt->features & CLOCK_EVT_FEAT_DUMMY)
 500                return 0;
 501
 502        __setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
 503        return 0;
 504}
 505
 506static int lapic_timer_set_periodic(struct clock_event_device *evt)
 507{
 508        return lapic_timer_set_periodic_oneshot(evt, false);
 509}
 510
 511static int lapic_timer_set_oneshot(struct clock_event_device *evt)
 512{
 513        return lapic_timer_set_periodic_oneshot(evt, true);
 514}
 515
 516/*
 517 * Local APIC timer broadcast function
 518 */
 519static void lapic_timer_broadcast(const struct cpumask *mask)
 520{
 521#ifdef CONFIG_SMP
 522        apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
 523#endif
 524}
 525
 526
 527/*
 528 * The local apic timer can be used for any function which is CPU local.
 529 */
 530static struct clock_event_device lapic_clockevent = {
 531        .name                           = "lapic",
 532        .features                       = CLOCK_EVT_FEAT_PERIODIC |
 533                                          CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
 534                                          | CLOCK_EVT_FEAT_DUMMY,
 535        .shift                          = 32,
 536        .set_state_shutdown             = lapic_timer_shutdown,
 537        .set_state_periodic             = lapic_timer_set_periodic,
 538        .set_state_oneshot              = lapic_timer_set_oneshot,
 539        .set_state_oneshot_stopped      = lapic_timer_shutdown,
 540        .set_next_event                 = lapic_next_event,
 541        .broadcast                      = lapic_timer_broadcast,
 542        .rating                         = 100,
 543        .irq                            = -1,
 544};
 545static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 546
 547static const struct x86_cpu_id deadline_match[] __initconst = {
 548        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */
 549        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */
 550
 551        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X,        0x0b000020),
 552
 553        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011),
 554        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e),
 555        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c),
 556        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003),
 557
 558        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136),
 559        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014),
 560        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0),
 561
 562        X86_MATCH_INTEL_FAM6_MODEL( HASWELL,            0x22),
 563        X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L,          0x20),
 564        X86_MATCH_INTEL_FAM6_MODEL( HASWELL_G,          0x17),
 565
 566        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL,          0x25),
 567        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_G,        0x17),
 568
 569        X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_L,          0xb2),
 570        X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE,            0xb2),
 571
 572        X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE_L,         0x52),
 573        X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE,           0x52),
 574
 575        {},
 576};
 577
 578static __init bool apic_validate_deadline_timer(void)
 579{
 580        const struct x86_cpu_id *m;
 581        u32 rev;
 582
 583        if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 584                return false;
 585        if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
 586                return true;
 587
 588        m = x86_match_cpu(deadline_match);
 589        if (!m)
 590                return true;
 591
 592        rev = (u32)m->driver_data;
 593
 594        if (boot_cpu_data.microcode >= rev)
 595                return true;
 596
 597        setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
 598        pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
 599               "please update microcode to version: 0x%x (or later)\n", rev);
 600        return false;
 601}
 602
 603/*
 604 * Setup the local APIC timer for this CPU. Copy the initialized values
 605 * of the boot CPU and register the clock event in the framework.
 606 */
 607static void setup_APIC_timer(void)
 608{
 609        struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
 610
 611        if (this_cpu_has(X86_FEATURE_ARAT)) {
 612                lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
 613                /* Make LAPIC timer preferrable over percpu HPET */
 614                lapic_clockevent.rating = 150;
 615        }
 616
 617        memcpy(levt, &lapic_clockevent, sizeof(*levt));
 618        levt->cpumask = cpumask_of(smp_processor_id());
 619
 620        if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
 621                levt->name = "lapic-deadline";
 622                levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
 623                                    CLOCK_EVT_FEAT_DUMMY);
 624                levt->set_next_event = lapic_next_deadline;
 625                clockevents_config_and_register(levt,
 626                                                tsc_khz * (1000 / TSC_DIVISOR),
 627                                                0xF, ~0UL);
 628        } else
 629                clockevents_register_device(levt);
 630}
 631
 632/*
 633 * Install the updated TSC frequency from recalibration at the TSC
 634 * deadline clockevent devices.
 635 */
 636static void __lapic_update_tsc_freq(void *info)
 637{
 638        struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
 639
 640        if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 641                return;
 642
 643        clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
 644}
 645
 646void lapic_update_tsc_freq(void)
 647{
 648        /*
 649         * The clockevent device's ->mult and ->shift can both be
 650         * changed. In order to avoid races, schedule the frequency
 651         * update code on each CPU.
 652         */
 653        on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
 654}
 655
 656/*
 657 * In this functions we calibrate APIC bus clocks to the external timer.
 658 *
 659 * We want to do the calibration only once since we want to have local timer
 660 * irqs syncron. CPUs connected by the same APIC bus have the very same bus
 661 * frequency.
 662 *
 663 * This was previously done by reading the PIT/HPET and waiting for a wrap
 664 * around to find out, that a tick has elapsed. I have a box, where the PIT
 665 * readout is broken, so it never gets out of the wait loop again. This was
 666 * also reported by others.
 667 *
 668 * Monitoring the jiffies value is inaccurate and the clockevents
 669 * infrastructure allows us to do a simple substitution of the interrupt
 670 * handler.
 671 *
 672 * The calibration routine also uses the pm_timer when possible, as the PIT
 673 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
 674 * back to normal later in the boot process).
 675 */
 676
 677#define LAPIC_CAL_LOOPS         (HZ/10)
 678
 679static __initdata int lapic_cal_loops = -1;
 680static __initdata long lapic_cal_t1, lapic_cal_t2;
 681static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
 682static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
 683static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
 684
 685/*
 686 * Temporary interrupt handler and polled calibration function.
 687 */
 688static void __init lapic_cal_handler(struct clock_event_device *dev)
 689{
 690        unsigned long long tsc = 0;
 691        long tapic = apic_read(APIC_TMCCT);
 692        unsigned long pm = acpi_pm_read_early();
 693
 694        if (boot_cpu_has(X86_FEATURE_TSC))
 695                tsc = rdtsc();
 696
 697        switch (lapic_cal_loops++) {
 698        case 0:
 699                lapic_cal_t1 = tapic;
 700                lapic_cal_tsc1 = tsc;
 701                lapic_cal_pm1 = pm;
 702                lapic_cal_j1 = jiffies;
 703                break;
 704
 705        case LAPIC_CAL_LOOPS:
 706                lapic_cal_t2 = tapic;
 707                lapic_cal_tsc2 = tsc;
 708                if (pm < lapic_cal_pm1)
 709                        pm += ACPI_PM_OVRRUN;
 710                lapic_cal_pm2 = pm;
 711                lapic_cal_j2 = jiffies;
 712                break;
 713        }
 714}
 715
 716static int __init
 717calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
 718{
 719        const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
 720        const long pm_thresh = pm_100ms / 100;
 721        unsigned long mult;
 722        u64 res;
 723
 724#ifndef CONFIG_X86_PM_TIMER
 725        return -1;
 726#endif
 727
 728        apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
 729
 730        /* Check, if the PM timer is available */
 731        if (!deltapm)
 732                return -1;
 733
 734        mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
 735
 736        if (deltapm > (pm_100ms - pm_thresh) &&
 737            deltapm < (pm_100ms + pm_thresh)) {
 738                apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
 739                return 0;
 740        }
 741
 742        res = (((u64)deltapm) *  mult) >> 22;
 743        do_div(res, 1000000);
 744        pr_warn("APIC calibration not consistent "
 745                "with PM-Timer: %ldms instead of 100ms\n", (long)res);
 746
 747        /* Correct the lapic counter value */
 748        res = (((u64)(*delta)) * pm_100ms);
 749        do_div(res, deltapm);
 750        pr_info("APIC delta adjusted to PM-Timer: "
 751                "%lu (%ld)\n", (unsigned long)res, *delta);
 752        *delta = (long)res;
 753
 754        /* Correct the tsc counter value */
 755        if (boot_cpu_has(X86_FEATURE_TSC)) {
 756                res = (((u64)(*deltatsc)) * pm_100ms);
 757                do_div(res, deltapm);
 758                apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
 759                                          "PM-Timer: %lu (%ld)\n",
 760                                        (unsigned long)res, *deltatsc);
 761                *deltatsc = (long)res;
 762        }
 763
 764        return 0;
 765}
 766
 767static int __init lapic_init_clockevent(void)
 768{
 769        if (!lapic_timer_period)
 770                return -1;
 771
 772        /* Calculate the scaled math multiplication factor */
 773        lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
 774                                        TICK_NSEC, lapic_clockevent.shift);
 775        lapic_clockevent.max_delta_ns =
 776                clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
 777        lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
 778        lapic_clockevent.min_delta_ns =
 779                clockevent_delta2ns(0xF, &lapic_clockevent);
 780        lapic_clockevent.min_delta_ticks = 0xF;
 781
 782        return 0;
 783}
 784
 785bool __init apic_needs_pit(void)
 786{
 787        /*
 788         * If the frequencies are not known, PIT is required for both TSC
 789         * and apic timer calibration.
 790         */
 791        if (!tsc_khz || !cpu_khz)
 792                return true;
 793
 794        /* Is there an APIC at all or is it disabled? */
 795        if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic)
 796                return true;
 797
 798        /*
 799         * If interrupt delivery mode is legacy PIC or virtual wire without
 800         * configuration, the local APIC timer wont be set up. Make sure
 801         * that the PIT is initialized.
 802         */
 803        if (apic_intr_mode == APIC_PIC ||
 804            apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG)
 805                return true;
 806
 807        /* Virt guests may lack ARAT, but still have DEADLINE */
 808        if (!boot_cpu_has(X86_FEATURE_ARAT))
 809                return true;
 810
 811        /* Deadline timer is based on TSC so no further PIT action required */
 812        if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 813                return false;
 814
 815        /* APIC timer disabled? */
 816        if (disable_apic_timer)
 817                return true;
 818        /*
 819         * The APIC timer frequency is known already, no PIT calibration
 820         * required. If unknown, let the PIT be initialized.
 821         */
 822        return lapic_timer_period == 0;
 823}
 824
 825static int __init calibrate_APIC_clock(void)
 826{
 827        struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
 828        u64 tsc_perj = 0, tsc_start = 0;
 829        unsigned long jif_start;
 830        unsigned long deltaj;
 831        long delta, deltatsc;
 832        int pm_referenced = 0;
 833
 834        if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 835                return 0;
 836
 837        /*
 838         * Check if lapic timer has already been calibrated by platform
 839         * specific routine, such as tsc calibration code. If so just fill
 840         * in the clockevent structure and return.
 841         */
 842        if (!lapic_init_clockevent()) {
 843                apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
 844                            lapic_timer_period);
 845                /*
 846                 * Direct calibration methods must have an always running
 847                 * local APIC timer, no need for broadcast timer.
 848                 */
 849                lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
 850                return 0;
 851        }
 852
 853        apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
 854                    "calibrating APIC timer ...\n");
 855
 856        /*
 857         * There are platforms w/o global clockevent devices. Instead of
 858         * making the calibration conditional on that, use a polling based
 859         * approach everywhere.
 860         */
 861        local_irq_disable();
 862
 863        /*
 864         * Setup the APIC counter to maximum. There is no way the lapic
 865         * can underflow in the 100ms detection time frame
 866         */
 867        __setup_APIC_LVTT(0xffffffff, 0, 0);
 868
 869        /*
 870         * Methods to terminate the calibration loop:
 871         *  1) Global clockevent if available (jiffies)
 872         *  2) TSC if available and frequency is known
 873         */
 874        jif_start = READ_ONCE(jiffies);
 875
 876        if (tsc_khz) {
 877                tsc_start = rdtsc();
 878                tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
 879        }
 880
 881        /*
 882         * Enable interrupts so the tick can fire, if a global
 883         * clockevent device is available
 884         */
 885        local_irq_enable();
 886
 887        while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
 888                /* Wait for a tick to elapse */
 889                while (1) {
 890                        if (tsc_khz) {
 891                                u64 tsc_now = rdtsc();
 892                                if ((tsc_now - tsc_start) >= tsc_perj) {
 893                                        tsc_start += tsc_perj;
 894                                        break;
 895                                }
 896                        } else {
 897                                unsigned long jif_now = READ_ONCE(jiffies);
 898
 899                                if (time_after(jif_now, jif_start)) {
 900                                        jif_start = jif_now;
 901                                        break;
 902                                }
 903                        }
 904                        cpu_relax();
 905                }
 906
 907                /* Invoke the calibration routine */
 908                local_irq_disable();
 909                lapic_cal_handler(NULL);
 910                local_irq_enable();
 911        }
 912
 913        local_irq_disable();
 914
 915        /* Build delta t1-t2 as apic timer counts down */
 916        delta = lapic_cal_t1 - lapic_cal_t2;
 917        apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
 918
 919        deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
 920
 921        /* we trust the PM based calibration if possible */
 922        pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
 923                                        &delta, &deltatsc);
 924
 925        lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
 926        lapic_init_clockevent();
 927
 928        apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
 929        apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
 930        apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
 931                    lapic_timer_period);
 932
 933        if (boot_cpu_has(X86_FEATURE_TSC)) {
 934                apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
 935                            "%ld.%04ld MHz.\n",
 936                            (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
 937                            (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
 938        }
 939
 940        apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
 941                    "%u.%04u MHz.\n",
 942                    lapic_timer_period / (1000000 / HZ),
 943                    lapic_timer_period % (1000000 / HZ));
 944
 945        /*
 946         * Do a sanity check on the APIC calibration result
 947         */
 948        if (lapic_timer_period < (1000000 / HZ)) {
 949                local_irq_enable();
 950                pr_warn("APIC frequency too slow, disabling apic timer\n");
 951                return -1;
 952        }
 953
 954        levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
 955
 956        /*
 957         * PM timer calibration failed or not turned on so lets try APIC
 958         * timer based calibration, if a global clockevent device is
 959         * available.
 960         */
 961        if (!pm_referenced && global_clock_event) {
 962                apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
 963
 964                /*
 965                 * Setup the apic timer manually
 966                 */
 967                levt->event_handler = lapic_cal_handler;
 968                lapic_timer_set_periodic(levt);
 969                lapic_cal_loops = -1;
 970
 971                /* Let the interrupts run */
 972                local_irq_enable();
 973
 974                while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
 975                        cpu_relax();
 976
 977                /* Stop the lapic timer */
 978                local_irq_disable();
 979                lapic_timer_shutdown(levt);
 980
 981                /* Jiffies delta */
 982                deltaj = lapic_cal_j2 - lapic_cal_j1;
 983                apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
 984
 985                /* Check, if the jiffies result is consistent */
 986                if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
 987                        apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
 988                else
 989                        levt->features |= CLOCK_EVT_FEAT_DUMMY;
 990        }
 991        local_irq_enable();
 992
 993        if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
 994                pr_warn("APIC timer disabled due to verification failure\n");
 995                return -1;
 996        }
 997
 998        return 0;
 999}
1000
1001/*
1002 * Setup the boot APIC
1003 *
1004 * Calibrate and verify the result.
1005 */
1006void __init setup_boot_APIC_clock(void)
1007{
1008        /*
1009         * The local apic timer can be disabled via the kernel
1010         * commandline or from the CPU detection code. Register the lapic
1011         * timer as a dummy clock event source on SMP systems, so the
1012         * broadcast mechanism is used. On UP systems simply ignore it.
1013         */
1014        if (disable_apic_timer) {
1015                pr_info("Disabling APIC timer\n");
1016                /* No broadcast on UP ! */
1017                if (num_possible_cpus() > 1) {
1018                        lapic_clockevent.mult = 1;
1019                        setup_APIC_timer();
1020                }
1021                return;
1022        }
1023
1024        if (calibrate_APIC_clock()) {
1025                /* No broadcast on UP ! */
1026                if (num_possible_cpus() > 1)
1027                        setup_APIC_timer();
1028                return;
1029        }
1030
1031        /*
1032         * If nmi_watchdog is set to IO_APIC, we need the
1033         * PIT/HPET going.  Otherwise register lapic as a dummy
1034         * device.
1035         */
1036        lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
1037
1038        /* Setup the lapic or request the broadcast */
1039        setup_APIC_timer();
1040        amd_e400_c1e_apic_setup();
1041}
1042
1043void setup_secondary_APIC_clock(void)
1044{
1045        setup_APIC_timer();
1046        amd_e400_c1e_apic_setup();
1047}
1048
1049/*
1050 * The guts of the apic timer interrupt
1051 */
1052static void local_apic_timer_interrupt(void)
1053{
1054        struct clock_event_device *evt = this_cpu_ptr(&lapic_events);
1055
1056        /*
1057         * Normally we should not be here till LAPIC has been initialized but
1058         * in some cases like kdump, its possible that there is a pending LAPIC
1059         * timer interrupt from previous kernel's context and is delivered in
1060         * new kernel the moment interrupts are enabled.
1061         *
1062         * Interrupts are enabled early and LAPIC is setup much later, hence
1063         * its possible that when we get here evt->event_handler is NULL.
1064         * Check for event_handler being NULL and discard the interrupt as
1065         * spurious.
1066         */
1067        if (!evt->event_handler) {
1068                pr_warn("Spurious LAPIC timer interrupt on cpu %d\n",
1069                        smp_processor_id());
1070                /* Switch it off */
1071                lapic_timer_shutdown(evt);
1072                return;
1073        }
1074
1075        /*
1076         * the NMI deadlock-detector uses this.
1077         */
1078        inc_irq_stat(apic_timer_irqs);
1079
1080        evt->event_handler(evt);
1081}
1082
1083/*
1084 * Local APIC timer interrupt. This is the most natural way for doing
1085 * local interrupts, but local timer interrupts can be emulated by
1086 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
1087 *
1088 * [ if a single-CPU system runs an SMP kernel then we call the local
1089 *   interrupt as well. Thus we cannot inline the local irq ... ]
1090 */
1091DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt)
1092{
1093        struct pt_regs *old_regs = set_irq_regs(regs);
1094
1095        ack_APIC_irq();
1096        trace_local_timer_entry(LOCAL_TIMER_VECTOR);
1097        local_apic_timer_interrupt();
1098        trace_local_timer_exit(LOCAL_TIMER_VECTOR);
1099
1100        set_irq_regs(old_regs);
1101}
1102
1103int setup_profiling_timer(unsigned int multiplier)
1104{
1105        return -EINVAL;
1106}
1107
1108/*
1109 * Local APIC start and shutdown
1110 */
1111
1112/**
1113 * clear_local_APIC - shutdown the local APIC
1114 *
1115 * This is called, when a CPU is disabled and before rebooting, so the state of
1116 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
1117 * leftovers during boot.
1118 */
1119void clear_local_APIC(void)
1120{
1121        int maxlvt;
1122        u32 v;
1123
1124        /* APIC hasn't been mapped yet */
1125        if (!x2apic_mode && !apic_phys)
1126                return;
1127
1128        maxlvt = lapic_get_maxlvt();
1129        /*
1130         * Masking an LVT entry can trigger a local APIC error
1131         * if the vector is zero. Mask LVTERR first to prevent this.
1132         */
1133        if (maxlvt >= 3) {
1134                v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
1135                apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
1136        }
1137        /*
1138         * Careful: we have to set masks only first to deassert
1139         * any level-triggered sources.
1140         */
1141        v = apic_read(APIC_LVTT);
1142        apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
1143        v = apic_read(APIC_LVT0);
1144        apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
1145        v = apic_read(APIC_LVT1);
1146        apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
1147        if (maxlvt >= 4) {
1148                v = apic_read(APIC_LVTPC);
1149                apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
1150        }
1151
1152        /* lets not touch this if we didn't frob it */
1153#ifdef CONFIG_X86_THERMAL_VECTOR
1154        if (maxlvt >= 5) {
1155                v = apic_read(APIC_LVTTHMR);
1156                apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
1157        }
1158#endif
1159#ifdef CONFIG_X86_MCE_INTEL
1160        if (maxlvt >= 6) {
1161                v = apic_read(APIC_LVTCMCI);
1162                if (!(v & APIC_LVT_MASKED))
1163                        apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
1164        }
1165#endif
1166
1167        /*
1168         * Clean APIC state for other OSs:
1169         */
1170        apic_write(APIC_LVTT, APIC_LVT_MASKED);
1171        apic_write(APIC_LVT0, APIC_LVT_MASKED);
1172        apic_write(APIC_LVT1, APIC_LVT_MASKED);
1173        if (maxlvt >= 3)
1174                apic_write(APIC_LVTERR, APIC_LVT_MASKED);
1175        if (maxlvt >= 4)
1176                apic_write(APIC_LVTPC, APIC_LVT_MASKED);
1177
1178        /* Integrated APIC (!82489DX) ? */
1179        if (lapic_is_integrated()) {
1180                if (maxlvt > 3)
1181                        /* Clear ESR due to Pentium errata 3AP and 11AP */
1182                        apic_write(APIC_ESR, 0);
1183                apic_read(APIC_ESR);
1184        }
1185}
1186
1187/**
1188 * apic_soft_disable - Clears and software disables the local APIC on hotplug
1189 *
1190 * Contrary to disable_local_APIC() this does not touch the enable bit in
1191 * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
1192 * bus would require a hardware reset as the APIC would lose track of bus
1193 * arbitration. On systems with FSB delivery APICBASE could be disabled,
1194 * but it has to be guaranteed that no interrupt is sent to the APIC while
1195 * in that state and it's not clear from the SDM whether it still responds
1196 * to INIT/SIPI messages. Stay on the safe side and use software disable.
1197 */
1198void apic_soft_disable(void)
1199{
1200        u32 value;
1201
1202        clear_local_APIC();
1203
1204        /* Soft disable APIC (implies clearing of registers for 82489DX!). */
1205        value = apic_read(APIC_SPIV);
1206        value &= ~APIC_SPIV_APIC_ENABLED;
1207        apic_write(APIC_SPIV, value);
1208}
1209
1210/**
1211 * disable_local_APIC - clear and disable the local APIC
1212 */
1213void disable_local_APIC(void)
1214{
1215        /* APIC hasn't been mapped yet */
1216        if (!x2apic_mode && !apic_phys)
1217                return;
1218
1219        apic_soft_disable();
1220
1221#ifdef CONFIG_X86_32
1222        /*
1223         * When LAPIC was disabled by the BIOS and enabled by the kernel,
1224         * restore the disabled state.
1225         */
1226        if (enabled_via_apicbase) {
1227                unsigned int l, h;
1228
1229                rdmsr(MSR_IA32_APICBASE, l, h);
1230                l &= ~MSR_IA32_APICBASE_ENABLE;
1231                wrmsr(MSR_IA32_APICBASE, l, h);
1232        }
1233#endif
1234}
1235
1236/*
1237 * If Linux enabled the LAPIC against the BIOS default disable it down before
1238 * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
1239 * not power-off.  Additionally clear all LVT entries before disable_local_APIC
1240 * for the case where Linux didn't enable the LAPIC.
1241 */
1242void lapic_shutdown(void)
1243{
1244        unsigned long flags;
1245
1246        if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
1247                return;
1248
1249        local_irq_save(flags);
1250
1251#ifdef CONFIG_X86_32
1252        if (!enabled_via_apicbase)
1253                clear_local_APIC();
1254        else
1255#endif
1256                disable_local_APIC();
1257
1258
1259        local_irq_restore(flags);
1260}
1261
1262/**
1263 * sync_Arb_IDs - synchronize APIC bus arbitration IDs
1264 */
1265void __init sync_Arb_IDs(void)
1266{
1267        /*
1268         * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
1269         * needed on AMD.
1270         */
1271        if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
1272                return;
1273
1274        /*
1275         * Wait for idle.
1276         */
1277        apic_wait_icr_idle();
1278
1279        apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
1280        apic_write(APIC_ICR, APIC_DEST_ALLINC |
1281                        APIC_INT_LEVELTRIG | APIC_DM_INIT);
1282}
1283
1284enum apic_intr_mode_id apic_intr_mode __ro_after_init;
1285
1286static int __init __apic_intr_mode_select(void)
1287{
1288        /* Check kernel option */
1289        if (disable_apic) {
1290                pr_info("APIC disabled via kernel command line\n");
1291                return APIC_PIC;
1292        }
1293
1294        /* Check BIOS */
1295#ifdef CONFIG_X86_64
1296        /* On 64-bit, the APIC must be integrated, Check local APIC only */
1297        if (!boot_cpu_has(X86_FEATURE_APIC)) {
1298                disable_apic = 1;
1299                pr_info("APIC disabled by BIOS\n");
1300                return APIC_PIC;
1301        }
1302#else
1303        /* On 32-bit, the APIC may be integrated APIC or 82489DX */
1304
1305        /* Neither 82489DX nor integrated APIC ? */
1306        if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
1307                disable_apic = 1;
1308                return APIC_PIC;
1309        }
1310
1311        /* If the BIOS pretends there is an integrated APIC ? */
1312        if (!boot_cpu_has(X86_FEATURE_APIC) &&
1313                APIC_INTEGRATED(boot_cpu_apic_version)) {
1314                disable_apic = 1;
1315                pr_err(FW_BUG "Local APIC %d not detected, force emulation\n",
1316                                       boot_cpu_physical_apicid);
1317                return APIC_PIC;
1318        }
1319#endif
1320
1321        /* Check MP table or ACPI MADT configuration */
1322        if (!smp_found_config) {
1323                disable_ioapic_support();
1324                if (!acpi_lapic) {
1325                        pr_info("APIC: ACPI MADT or MP tables are not detected\n");
1326                        return APIC_VIRTUAL_WIRE_NO_CONFIG;
1327                }
1328                return APIC_VIRTUAL_WIRE;
1329        }
1330
1331#ifdef CONFIG_SMP
1332        /* If SMP should be disabled, then really disable it! */
1333        if (!setup_max_cpus) {
1334                pr_info("APIC: SMP mode deactivated\n");
1335                return APIC_SYMMETRIC_IO_NO_ROUTING;
1336        }
1337
1338        if (read_apic_id() != boot_cpu_physical_apicid) {
1339                panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1340                     read_apic_id(), boot_cpu_physical_apicid);
1341                /* Or can we switch back to PIC here? */
1342        }
1343#endif
1344
1345        return APIC_SYMMETRIC_IO;
1346}
1347
1348/* Select the interrupt delivery mode for the BSP */
1349void __init apic_intr_mode_select(void)
1350{
1351        apic_intr_mode = __apic_intr_mode_select();
1352}
1353
1354/*
1355 * An initial setup of the virtual wire mode.
1356 */
1357void __init init_bsp_APIC(void)
1358{
1359        unsigned int value;
1360
1361        /*
1362         * Don't do the setup now if we have a SMP BIOS as the
1363         * through-I/O-APIC virtual wire mode might be active.
1364         */
1365        if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
1366                return;
1367
1368        /*
1369         * Do not trust the local APIC being empty at bootup.
1370         */
1371        clear_local_APIC();
1372
1373        /*
1374         * Enable APIC.
1375         */
1376        value = apic_read(APIC_SPIV);
1377        value &= ~APIC_VECTOR_MASK;
1378        value |= APIC_SPIV_APIC_ENABLED;
1379
1380#ifdef CONFIG_X86_32
1381        /* This bit is reserved on P4/Xeon and should be cleared */
1382        if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
1383            (boot_cpu_data.x86 == 15))
1384                value &= ~APIC_SPIV_FOCUS_DISABLED;
1385        else
1386#endif
1387                value |= APIC_SPIV_FOCUS_DISABLED;
1388        value |= SPURIOUS_APIC_VECTOR;
1389        apic_write(APIC_SPIV, value);
1390
1391        /*
1392         * Set up the virtual wire mode.
1393         */
1394        apic_write(APIC_LVT0, APIC_DM_EXTINT);
1395        value = APIC_DM_NMI;
1396        if (!lapic_is_integrated())             /* 82489DX */
1397                value |= APIC_LVT_LEVEL_TRIGGER;
1398        if (apic_extnmi == APIC_EXTNMI_NONE)
1399                value |= APIC_LVT_MASKED;
1400        apic_write(APIC_LVT1, value);
1401}
1402
1403static void __init apic_bsp_setup(bool upmode);
1404
1405/* Init the interrupt delivery mode for the BSP */
1406void __init apic_intr_mode_init(void)
1407{
1408        bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
1409
1410        switch (apic_intr_mode) {
1411        case APIC_PIC:
1412                pr_info("APIC: Keep in PIC mode(8259)\n");
1413                return;
1414        case APIC_VIRTUAL_WIRE:
1415                pr_info("APIC: Switch to virtual wire mode setup\n");
1416                default_setup_apic_routing();
1417                break;
1418        case APIC_VIRTUAL_WIRE_NO_CONFIG:
1419                pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
1420                upmode = true;
1421                default_setup_apic_routing();
1422                break;
1423        case APIC_SYMMETRIC_IO:
1424                pr_info("APIC: Switch to symmetric I/O mode setup\n");
1425                default_setup_apic_routing();
1426                break;
1427        case APIC_SYMMETRIC_IO_NO_ROUTING:
1428                pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
1429                break;
1430        }
1431
1432        apic_bsp_setup(upmode);
1433}
1434
1435static void lapic_setup_esr(void)
1436{
1437        unsigned int oldvalue, value, maxlvt;
1438
1439        if (!lapic_is_integrated()) {
1440                pr_info("No ESR for 82489DX.\n");
1441                return;
1442        }
1443
1444        if (apic->disable_esr) {
1445                /*
1446                 * Something untraceable is creating bad interrupts on
1447                 * secondary quads ... for the moment, just leave the
1448                 * ESR disabled - we can't do anything useful with the
1449                 * errors anyway - mbligh
1450                 */
1451                pr_info("Leaving ESR disabled.\n");
1452                return;
1453        }
1454
1455        maxlvt = lapic_get_maxlvt();
1456        if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
1457                apic_write(APIC_ESR, 0);
1458        oldvalue = apic_read(APIC_ESR);
1459
1460        /* enables sending errors */
1461        value = ERROR_APIC_VECTOR;
1462        apic_write(APIC_LVTERR, value);
1463
1464        /*
1465         * spec says clear errors after enabling vector.
1466         */
1467        if (maxlvt > 3)
1468                apic_write(APIC_ESR, 0);
1469        value = apic_read(APIC_ESR);
1470        if (value != oldvalue)
1471                apic_printk(APIC_VERBOSE, "ESR value before enabling "
1472                        "vector: 0x%08x  after: 0x%08x\n",
1473                        oldvalue, value);
1474}
1475
1476#define APIC_IR_REGS            APIC_ISR_NR
1477#define APIC_IR_BITS            (APIC_IR_REGS * 32)
1478#define APIC_IR_MAPSIZE         (APIC_IR_BITS / BITS_PER_LONG)
1479
1480union apic_ir {
1481        unsigned long   map[APIC_IR_MAPSIZE];
1482        u32             regs[APIC_IR_REGS];
1483};
1484
1485static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
1486{
1487        int i, bit;
1488
1489        /* Read the IRRs */
1490        for (i = 0; i < APIC_IR_REGS; i++)
1491                irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
1492
1493        /* Read the ISRs */
1494        for (i = 0; i < APIC_IR_REGS; i++)
1495                isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
1496
1497        /*
1498         * If the ISR map is not empty. ACK the APIC and run another round
1499         * to verify whether a pending IRR has been unblocked and turned
1500         * into a ISR.
1501         */
1502        if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
1503                /*
1504                 * There can be multiple ISR bits set when a high priority
1505                 * interrupt preempted a lower priority one. Issue an ACK
1506                 * per set bit.
1507                 */
1508                for_each_set_bit(bit, isr->map, APIC_IR_BITS)
1509                        ack_APIC_irq();
1510                return true;
1511        }
1512
1513        return !bitmap_empty(irr->map, APIC_IR_BITS);
1514}
1515
1516/*
1517 * After a crash, we no longer service the interrupts and a pending
1518 * interrupt from previous kernel might still have ISR bit set.
1519 *
1520 * Most probably by now the CPU has serviced that pending interrupt and it
1521 * might not have done the ack_APIC_irq() because it thought, interrupt
1522 * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
1523 * the ISR bit and cpu thinks it has already serivced the interrupt. Hence
1524 * a vector might get locked. It was noticed for timer irq (vector
1525 * 0x31). Issue an extra EOI to clear ISR.
1526 *
1527 * If there are pending IRR bits they turn into ISR bits after a higher
1528 * priority ISR bit has been acked.
1529 */
1530static void apic_pending_intr_clear(void)
1531{
1532        union apic_ir irr, isr;
1533        unsigned int i;
1534
1535        /* 512 loops are way oversized and give the APIC a chance to obey. */
1536        for (i = 0; i < 512; i++) {
1537                if (!apic_check_and_ack(&irr, &isr))
1538                        return;
1539        }
1540        /* Dump the IRR/ISR content if that failed */
1541        pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
1542}
1543
1544/**
1545 * setup_local_APIC - setup the local APIC
1546 *
1547 * Used to setup local APIC while initializing BSP or bringing up APs.
1548 * Always called with preemption disabled.
1549 */
1550static void setup_local_APIC(void)
1551{
1552        int cpu = smp_processor_id();
1553        unsigned int value;
1554
1555        if (disable_apic) {
1556                disable_ioapic_support();
1557                return;
1558        }
1559
1560        /*
1561         * If this comes from kexec/kcrash the APIC might be enabled in
1562         * SPIV. Soft disable it before doing further initialization.
1563         */
1564        value = apic_read(APIC_SPIV);
1565        value &= ~APIC_SPIV_APIC_ENABLED;
1566        apic_write(APIC_SPIV, value);
1567
1568#ifdef CONFIG_X86_32
1569        /* Pound the ESR really hard over the head with a big hammer - mbligh */
1570        if (lapic_is_integrated() && apic->disable_esr) {
1571                apic_write(APIC_ESR, 0);
1572                apic_write(APIC_ESR, 0);
1573                apic_write(APIC_ESR, 0);
1574                apic_write(APIC_ESR, 0);
1575        }
1576#endif
1577        /*
1578         * Double-check whether this APIC is really registered.
1579         * This is meaningless in clustered apic mode, so we skip it.
1580         */
1581        BUG_ON(!apic->apic_id_registered());
1582
1583        /*
1584         * Intel recommends to set DFR, LDR and TPR before enabling
1585         * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
1586         * document number 292116).  So here it goes...
1587         */
1588        apic->init_apic_ldr();
1589
1590#ifdef CONFIG_X86_32
1591        if (apic->dest_logical) {
1592                int logical_apicid, ldr_apicid;
1593
1594                /*
1595                 * APIC LDR is initialized.  If logical_apicid mapping was
1596                 * initialized during get_smp_config(), make sure it matches
1597                 * the actual value.
1598                 */
1599                logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
1600                ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
1601                if (logical_apicid != BAD_APICID)
1602                        WARN_ON(logical_apicid != ldr_apicid);
1603                /* Always use the value from LDR. */
1604                early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
1605        }
1606#endif
1607
1608        /*
1609         * Set Task Priority to 'accept all except vectors 0-31'.  An APIC
1610         * vector in the 16-31 range could be delivered if TPR == 0, but we
1611         * would think it's an exception and terrible things will happen.  We
1612         * never change this later on.
1613         */
1614        value = apic_read(APIC_TASKPRI);
1615        value &= ~APIC_TPRI_MASK;
1616        value |= 0x10;
1617        apic_write(APIC_TASKPRI, value);
1618
1619        /* Clear eventually stale ISR/IRR bits */
1620        apic_pending_intr_clear();
1621
1622        /*
1623         * Now that we are all set up, enable the APIC
1624         */
1625        value = apic_read(APIC_SPIV);
1626        value &= ~APIC_VECTOR_MASK;
1627        /*
1628         * Enable APIC
1629         */
1630        value |= APIC_SPIV_APIC_ENABLED;
1631
1632#ifdef CONFIG_X86_32
1633        /*
1634         * Some unknown Intel IO/APIC (or APIC) errata is biting us with
1635         * certain networking cards. If high frequency interrupts are
1636         * happening on a particular IOAPIC pin, plus the IOAPIC routing
1637         * entry is masked/unmasked at a high rate as well then sooner or
1638         * later IOAPIC line gets 'stuck', no more interrupts are received
1639         * from the device. If focus CPU is disabled then the hang goes
1640         * away, oh well :-(
1641         *
1642         * [ This bug can be reproduced easily with a level-triggered
1643         *   PCI Ne2000 networking cards and PII/PIII processors, dual
1644         *   BX chipset. ]
1645         */
1646        /*
1647         * Actually disabling the focus CPU check just makes the hang less
1648         * frequent as it makes the interrupt distributon model be more
1649         * like LRU than MRU (the short-term load is more even across CPUs).
1650         */
1651
1652        /*
1653         * - enable focus processor (bit==0)
1654         * - 64bit mode always use processor focus
1655         *   so no need to set it
1656         */
1657        value &= ~APIC_SPIV_FOCUS_DISABLED;
1658#endif
1659
1660        /*
1661         * Set spurious IRQ vector
1662         */
1663        value |= SPURIOUS_APIC_VECTOR;
1664        apic_write(APIC_SPIV, value);
1665
1666        perf_events_lapic_init();
1667
1668        /*
1669         * Set up LVT0, LVT1:
1670         *
1671         * set up through-local-APIC on the boot CPU's LINT0. This is not
1672         * strictly necessary in pure symmetric-IO mode, but sometimes
1673         * we delegate interrupts to the 8259A.
1674         */
1675        /*
1676         * TODO: set up through-local-APIC from through-I/O-APIC? --macro
1677         */
1678        value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
1679        if (!cpu && (pic_mode || !value || skip_ioapic_setup)) {
1680                value = APIC_DM_EXTINT;
1681                apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
1682        } else {
1683                value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1684                apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
1685        }
1686        apic_write(APIC_LVT0, value);
1687
1688        /*
1689         * Only the BSP sees the LINT1 NMI signal by default. This can be
1690         * modified by apic_extnmi= boot option.
1691         */
1692        if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
1693            apic_extnmi == APIC_EXTNMI_ALL)
1694                value = APIC_DM_NMI;
1695        else
1696                value = APIC_DM_NMI | APIC_LVT_MASKED;
1697
1698        /* Is 82489DX ? */
1699        if (!lapic_is_integrated())
1700                value |= APIC_LVT_LEVEL_TRIGGER;
1701        apic_write(APIC_LVT1, value);
1702
1703#ifdef CONFIG_X86_MCE_INTEL
1704        /* Recheck CMCI information after local APIC is up on CPU #0 */
1705        if (!cpu)
1706                cmci_recheck();
1707#endif
1708}
1709
1710static void end_local_APIC_setup(void)
1711{
1712        lapic_setup_esr();
1713
1714#ifdef CONFIG_X86_32
1715        {
1716                unsigned int value;
1717                /* Disable the local apic timer */
1718                value = apic_read(APIC_LVTT);
1719                value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1720                apic_write(APIC_LVTT, value);
1721        }
1722#endif
1723
1724        apic_pm_activate();
1725}
1726
1727/*
1728 * APIC setup function for application processors. Called from smpboot.c
1729 */
1730void apic_ap_setup(void)
1731{
1732        setup_local_APIC();
1733        end_local_APIC_setup();
1734}
1735
1736#ifdef CONFIG_X86_X2APIC
1737int x2apic_mode;
1738
1739enum {
1740        X2APIC_OFF,
1741        X2APIC_ON,
1742        X2APIC_DISABLED,
1743};
1744static int x2apic_state;
1745
1746static void __x2apic_disable(void)
1747{
1748        u64 msr;
1749
1750        if (!boot_cpu_has(X86_FEATURE_APIC))
1751                return;
1752
1753        rdmsrl(MSR_IA32_APICBASE, msr);
1754        if (!(msr & X2APIC_ENABLE))
1755                return;
1756        /* Disable xapic and x2apic first and then reenable xapic mode */
1757        wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1758        wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1759        printk_once(KERN_INFO "x2apic disabled\n");
1760}
1761
1762static void __x2apic_enable(void)
1763{
1764        u64 msr;
1765
1766        rdmsrl(MSR_IA32_APICBASE, msr);
1767        if (msr & X2APIC_ENABLE)
1768                return;
1769        wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
1770        printk_once(KERN_INFO "x2apic enabled\n");
1771}
1772
1773static int __init setup_nox2apic(char *str)
1774{
1775        if (x2apic_enabled()) {
1776                int apicid = native_apic_msr_read(APIC_ID);
1777
1778                if (apicid >= 255) {
1779                        pr_warn("Apicid: %08x, cannot enforce nox2apic\n",
1780                                apicid);
1781                        return 0;
1782                }
1783                pr_warn("x2apic already enabled.\n");
1784                __x2apic_disable();
1785        }
1786        setup_clear_cpu_cap(X86_FEATURE_X2APIC);
1787        x2apic_state = X2APIC_DISABLED;
1788        x2apic_mode = 0;
1789        return 0;
1790}
1791early_param("nox2apic", setup_nox2apic);
1792
1793/* Called from cpu_init() to enable x2apic on (secondary) cpus */
1794void x2apic_setup(void)
1795{
1796        /*
1797         * If x2apic is not in ON state, disable it if already enabled
1798         * from BIOS.
1799         */
1800        if (x2apic_state != X2APIC_ON) {
1801                __x2apic_disable();
1802                return;
1803        }
1804        __x2apic_enable();
1805}
1806
1807static __init void x2apic_disable(void)
1808{
1809        u32 x2apic_id, state = x2apic_state;
1810
1811        x2apic_mode = 0;
1812        x2apic_state = X2APIC_DISABLED;
1813
1814        if (state != X2APIC_ON)
1815                return;
1816
1817        x2apic_id = read_apic_id();
1818        if (x2apic_id >= 255)
1819                panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
1820
1821        __x2apic_disable();
1822        register_lapic_address(mp_lapic_addr);
1823}
1824
1825static __init void x2apic_enable(void)
1826{
1827        if (x2apic_state != X2APIC_OFF)
1828                return;
1829
1830        x2apic_mode = 1;
1831        x2apic_state = X2APIC_ON;
1832        __x2apic_enable();
1833}
1834
1835static __init void try_to_enable_x2apic(int remap_mode)
1836{
1837        if (x2apic_state == X2APIC_DISABLED)
1838                return;
1839
1840        if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
1841                /* IR is required if there is APIC ID > 255 even when running
1842                 * under KVM
1843                 */
1844                if (max_physical_apicid > 255 ||
1845                    !x86_init.hyper.x2apic_available()) {
1846                        pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
1847                        x2apic_disable();
1848                        return;
1849                }
1850
1851                /*
1852                 * without IR all CPUs can be addressed by IOAPIC/MSI
1853                 * only in physical mode
1854                 */
1855                x2apic_phys = 1;
1856        }
1857        x2apic_enable();
1858}
1859
1860void __init check_x2apic(void)
1861{
1862        if (x2apic_enabled()) {
1863                pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
1864                x2apic_mode = 1;
1865                x2apic_state = X2APIC_ON;
1866        } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
1867                x2apic_state = X2APIC_DISABLED;
1868        }
1869}
1870#else /* CONFIG_X86_X2APIC */
1871static int __init validate_x2apic(void)
1872{
1873        if (!apic_is_x2apic_enabled())
1874                return 0;
1875        /*
1876         * Checkme: Can we simply turn off x2apic here instead of panic?
1877         */
1878        panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n");
1879}
1880early_initcall(validate_x2apic);
1881
1882static inline void try_to_enable_x2apic(int remap_mode) { }
1883static inline void __x2apic_enable(void) { }
1884#endif /* !CONFIG_X86_X2APIC */
1885
1886void __init enable_IR_x2apic(void)
1887{
1888        unsigned long flags;
1889        int ret, ir_stat;
1890
1891        if (skip_ioapic_setup) {
1892                pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
1893                return;
1894        }
1895
1896        ir_stat = irq_remapping_prepare();
1897        if (ir_stat < 0 && !x2apic_supported())
1898                return;
1899
1900        ret = save_ioapic_entries();
1901        if (ret) {
1902                pr_info("Saving IO-APIC state failed: %d\n", ret);
1903                return;
1904        }
1905
1906        local_irq_save(flags);
1907        legacy_pic->mask_all();
1908        mask_ioapic_entries();
1909
1910        /* If irq_remapping_prepare() succeeded, try to enable it */
1911        if (ir_stat >= 0)
1912                ir_stat = irq_remapping_enable();
1913        /* ir_stat contains the remap mode or an error code */
1914        try_to_enable_x2apic(ir_stat);
1915
1916        if (ir_stat < 0)
1917                restore_ioapic_entries();
1918        legacy_pic->restore_mask();
1919        local_irq_restore(flags);
1920}
1921
1922#ifdef CONFIG_X86_64
1923/*
1924 * Detect and enable local APICs on non-SMP boards.
1925 * Original code written by Keir Fraser.
1926 * On AMD64 we trust the BIOS - if it says no APIC it is likely
1927 * not correctly set up (usually the APIC timer won't work etc.)
1928 */
1929static int __init detect_init_APIC(void)
1930{
1931        if (!boot_cpu_has(X86_FEATURE_APIC)) {
1932                pr_info("No local APIC present\n");
1933                return -1;
1934        }
1935
1936        mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1937        return 0;
1938}
1939#else
1940
1941static int __init apic_verify(void)
1942{
1943        u32 features, h, l;
1944
1945        /*
1946         * The APIC feature bit should now be enabled
1947         * in `cpuid'
1948         */
1949        features = cpuid_edx(1);
1950        if (!(features & (1 << X86_FEATURE_APIC))) {
1951                pr_warn("Could not enable APIC!\n");
1952                return -1;
1953        }
1954        set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1955        mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1956
1957        /* The BIOS may have set up the APIC at some other address */
1958        if (boot_cpu_data.x86 >= 6) {
1959                rdmsr(MSR_IA32_APICBASE, l, h);
1960                if (l & MSR_IA32_APICBASE_ENABLE)
1961                        mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1962        }
1963
1964        pr_info("Found and enabled local APIC!\n");
1965        return 0;
1966}
1967
1968int __init apic_force_enable(unsigned long addr)
1969{
1970        u32 h, l;
1971
1972        if (disable_apic)
1973                return -1;
1974
1975        /*
1976         * Some BIOSes disable the local APIC in the APIC_BASE
1977         * MSR. This can only be done in software for Intel P6 or later
1978         * and AMD K7 (Model > 1) or later.
1979         */
1980        if (boot_cpu_data.x86 >= 6) {
1981                rdmsr(MSR_IA32_APICBASE, l, h);
1982                if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1983                        pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1984                        l &= ~MSR_IA32_APICBASE_BASE;
1985                        l |= MSR_IA32_APICBASE_ENABLE | addr;
1986                        wrmsr(MSR_IA32_APICBASE, l, h);
1987                        enabled_via_apicbase = 1;
1988                }
1989        }
1990        return apic_verify();
1991}
1992
1993/*
1994 * Detect and initialize APIC
1995 */
1996static int __init detect_init_APIC(void)
1997{
1998        /* Disabled by kernel option? */
1999        if (disable_apic)
2000                return -1;
2001
2002        switch (boot_cpu_data.x86_vendor) {
2003        case X86_VENDOR_AMD:
2004                if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
2005                    (boot_cpu_data.x86 >= 15))
2006                        break;
2007                goto no_apic;
2008        case X86_VENDOR_HYGON:
2009                break;
2010        case X86_VENDOR_INTEL:
2011                if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
2012                    (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
2013                        break;
2014                goto no_apic;
2015        default:
2016                goto no_apic;
2017        }
2018
2019        if (!boot_cpu_has(X86_FEATURE_APIC)) {
2020                /*
2021                 * Over-ride BIOS and try to enable the local APIC only if
2022                 * "lapic" specified.
2023                 */
2024                if (!force_enable_local_apic) {
2025                        pr_info("Local APIC disabled by BIOS -- "
2026                                "you can enable it with \"lapic\"\n");
2027                        return -1;
2028                }
2029                if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
2030                        return -1;
2031        } else {
2032                if (apic_verify())
2033                        return -1;
2034        }
2035
2036        apic_pm_activate();
2037
2038        return 0;
2039
2040no_apic:
2041        pr_info("No local APIC present or hardware disabled\n");
2042        return -1;
2043}
2044#endif
2045
2046/**
2047 * init_apic_mappings - initialize APIC mappings
2048 */
2049void __init init_apic_mappings(void)
2050{
2051        unsigned int new_apicid;
2052
2053        if (apic_validate_deadline_timer())
2054                pr_info("TSC deadline timer available\n");
2055
2056        if (x2apic_mode) {
2057                boot_cpu_physical_apicid = read_apic_id();
2058                return;
2059        }
2060
2061        /* If no local APIC can be found return early */
2062        if (!smp_found_config && detect_init_APIC()) {
2063                /* lets NOP'ify apic operations */
2064                pr_info("APIC: disable apic facility\n");
2065                apic_disable();
2066        } else {
2067                apic_phys = mp_lapic_addr;
2068
2069                /*
2070                 * If the system has ACPI MADT tables or MP info, the LAPIC
2071                 * address is already registered.
2072                 */
2073                if (!acpi_lapic && !smp_found_config)
2074                        register_lapic_address(apic_phys);
2075        }
2076
2077        /*
2078         * Fetch the APIC ID of the BSP in case we have a
2079         * default configuration (or the MP table is broken).
2080         */
2081        new_apicid = read_apic_id();
2082        if (boot_cpu_physical_apicid != new_apicid) {
2083                boot_cpu_physical_apicid = new_apicid;
2084                /*
2085                 * yeah -- we lie about apic_version
2086                 * in case if apic was disabled via boot option
2087                 * but it's not a problem for SMP compiled kernel
2088                 * since apic_intr_mode_select is prepared for such
2089                 * a case and disable smp mode
2090                 */
2091                boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
2092        }
2093}
2094
2095void __init register_lapic_address(unsigned long address)
2096{
2097        mp_lapic_addr = address;
2098
2099        if (!x2apic_mode) {
2100                set_fixmap_nocache(FIX_APIC_BASE, address);
2101                apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
2102                            APIC_BASE, address);
2103        }
2104        if (boot_cpu_physical_apicid == -1U) {
2105                boot_cpu_physical_apicid  = read_apic_id();
2106                boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
2107        }
2108}
2109
2110/*
2111 * Local APIC interrupts
2112 */
2113
2114/**
2115 * spurious_interrupt - Catch all for interrupts raised on unused vectors
2116 * @regs:       Pointer to pt_regs on stack
2117 * @vector:     The vector number
2118 *
2119 * This is invoked from ASM entry code to catch all interrupts which
2120 * trigger on an entry which is routed to the common_spurious idtentry
2121 * point.
2122 *
2123 * Also called from sysvec_spurious_apic_interrupt().
2124 */
2125DEFINE_IDTENTRY_IRQ(spurious_interrupt)
2126{
2127        u32 v;
2128
2129        trace_spurious_apic_entry(vector);
2130
2131        inc_irq_stat(irq_spurious_count);
2132
2133        /*
2134         * If this is a spurious interrupt then do not acknowledge
2135         */
2136        if (vector == SPURIOUS_APIC_VECTOR) {
2137                /* See SDM vol 3 */
2138                pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
2139                        smp_processor_id());
2140                goto out;
2141        }
2142
2143        /*
2144         * If it is a vectored one, verify it's set in the ISR. If set,
2145         * acknowledge it.
2146         */
2147        v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
2148        if (v & (1 << (vector & 0x1f))) {
2149                pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
2150                        vector, smp_processor_id());
2151                ack_APIC_irq();
2152        } else {
2153                pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
2154                        vector, smp_processor_id());
2155        }
2156out:
2157        trace_spurious_apic_exit(vector);
2158}
2159
2160DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt)
2161{
2162        __spurious_interrupt(regs, SPURIOUS_APIC_VECTOR);
2163}
2164
2165/*
2166 * This interrupt should never happen with our APIC/SMP architecture
2167 */
2168DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt)
2169{
2170        static const char * const error_interrupt_reason[] = {
2171                "Send CS error",                /* APIC Error Bit 0 */
2172                "Receive CS error",             /* APIC Error Bit 1 */
2173                "Send accept error",            /* APIC Error Bit 2 */
2174                "Receive accept error",         /* APIC Error Bit 3 */
2175                "Redirectable IPI",             /* APIC Error Bit 4 */
2176                "Send illegal vector",          /* APIC Error Bit 5 */
2177                "Received illegal vector",      /* APIC Error Bit 6 */
2178                "Illegal register address",     /* APIC Error Bit 7 */
2179        };
2180        u32 v, i = 0;
2181
2182        trace_error_apic_entry(ERROR_APIC_VECTOR);
2183
2184        /* First tickle the hardware, only then report what went on. -- REW */
2185        if (lapic_get_maxlvt() > 3)     /* Due to the Pentium erratum 3AP. */
2186                apic_write(APIC_ESR, 0);
2187        v = apic_read(APIC_ESR);
2188        ack_APIC_irq();
2189        atomic_inc(&irq_err_count);
2190
2191        apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x",
2192                    smp_processor_id(), v);
2193
2194        v &= 0xff;
2195        while (v) {
2196                if (v & 0x1)
2197                        apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
2198                i++;
2199                v >>= 1;
2200        }
2201
2202        apic_printk(APIC_DEBUG, KERN_CONT "\n");
2203
2204        trace_error_apic_exit(ERROR_APIC_VECTOR);
2205}
2206
2207/**
2208 * connect_bsp_APIC - attach the APIC to the interrupt system
2209 */
2210static void __init connect_bsp_APIC(void)
2211{
2212#ifdef CONFIG_X86_32
2213        if (pic_mode) {
2214                /*
2215                 * Do not trust the local APIC being empty at bootup.
2216                 */
2217                clear_local_APIC();
2218                /*
2219                 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
2220                 * local APIC to INT and NMI lines.
2221                 */
2222                apic_printk(APIC_VERBOSE, "leaving PIC mode, "
2223                                "enabling APIC mode.\n");
2224                imcr_pic_to_apic();
2225        }
2226#endif
2227}
2228
2229/**
2230 * disconnect_bsp_APIC - detach the APIC from the interrupt system
2231 * @virt_wire_setup:    indicates, whether virtual wire mode is selected
2232 *
2233 * Virtual wire mode is necessary to deliver legacy interrupts even when the
2234 * APIC is disabled.
2235 */
2236void disconnect_bsp_APIC(int virt_wire_setup)
2237{
2238        unsigned int value;
2239
2240#ifdef CONFIG_X86_32
2241        if (pic_mode) {
2242                /*
2243                 * Put the board back into PIC mode (has an effect only on
2244                 * certain older boards).  Note that APIC interrupts, including
2245                 * IPIs, won't work beyond this point!  The only exception are
2246                 * INIT IPIs.
2247                 */
2248                apic_printk(APIC_VERBOSE, "disabling APIC mode, "
2249                                "entering PIC mode.\n");
2250                imcr_apic_to_pic();
2251                return;
2252        }
2253#endif
2254
2255        /* Go back to Virtual Wire compatibility mode */
2256
2257        /* For the spurious interrupt use vector F, and enable it */
2258        value = apic_read(APIC_SPIV);
2259        value &= ~APIC_VECTOR_MASK;
2260        value |= APIC_SPIV_APIC_ENABLED;
2261        value |= 0xf;
2262        apic_write(APIC_SPIV, value);
2263
2264        if (!virt_wire_setup) {
2265                /*
2266                 * For LVT0 make it edge triggered, active high,
2267                 * external and enabled
2268                 */
2269                value = apic_read(APIC_LVT0);
2270                value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2271                        APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2272                        APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2273                value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2274                value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
2275                apic_write(APIC_LVT0, value);
2276        } else {
2277                /* Disable LVT0 */
2278                apic_write(APIC_LVT0, APIC_LVT_MASKED);
2279        }
2280
2281        /*
2282         * For LVT1 make it edge triggered, active high,
2283         * nmi and enabled
2284         */
2285        value = apic_read(APIC_LVT1);
2286        value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2287                        APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2288                        APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2289        value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2290        value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
2291        apic_write(APIC_LVT1, value);
2292}
2293
2294/*
2295 * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated
2296 * contiguously, it equals to current allocated max logical CPU ID plus 1.
2297 * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range,
2298 * so the maximum of nr_logical_cpuids is nr_cpu_ids.
2299 *
2300 * NOTE: Reserve 0 for BSP.
2301 */
2302static int nr_logical_cpuids = 1;
2303
2304/*
2305 * Used to store mapping between logical CPU IDs and APIC IDs.
2306 */
2307static int cpuid_to_apicid[] = {
2308        [0 ... NR_CPUS - 1] = -1,
2309};
2310
2311#ifdef CONFIG_SMP
2312/**
2313 * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
2314 * @apicid: APIC ID to check
2315 */
2316bool apic_id_is_primary_thread(unsigned int apicid)
2317{
2318        u32 mask;
2319
2320        if (smp_num_siblings == 1)
2321                return true;
2322        /* Isolate the SMT bit(s) in the APICID and check for 0 */
2323        mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
2324        return !(apicid & mask);
2325}
2326#endif
2327
2328/*
2329 * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
2330 * and cpuid_to_apicid[] synchronized.
2331 */
2332static int allocate_logical_cpuid(int apicid)
2333{
2334        int i;
2335
2336        /*
2337         * cpuid <-> apicid mapping is persistent, so when a cpu is up,
2338         * check if the kernel has allocated a cpuid for it.
2339         */
2340        for (i = 0; i < nr_logical_cpuids; i++) {
2341                if (cpuid_to_apicid[i] == apicid)
2342                        return i;
2343        }
2344
2345        /* Allocate a new cpuid. */
2346        if (nr_logical_cpuids >= nr_cpu_ids) {
2347                WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. "
2348                             "Processor %d/0x%x and the rest are ignored.\n",
2349                             nr_cpu_ids, nr_logical_cpuids, apicid);
2350                return -EINVAL;
2351        }
2352
2353        cpuid_to_apicid[nr_logical_cpuids] = apicid;
2354        return nr_logical_cpuids++;
2355}
2356
2357int generic_processor_info(int apicid, int version)
2358{
2359        int cpu, max = nr_cpu_ids;
2360        bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
2361                                phys_cpu_present_map);
2362
2363        /*
2364         * boot_cpu_physical_apicid is designed to have the apicid
2365         * returned by read_apic_id(), i.e, the apicid of the
2366         * currently booting-up processor. However, on some platforms,
2367         * it is temporarily modified by the apicid reported as BSP
2368         * through MP table. Concretely:
2369         *
2370         * - arch/x86/kernel/mpparse.c: MP_processor_info()
2371         * - arch/x86/mm/amdtopology.c: amd_numa_init()
2372         *
2373         * This function is executed with the modified
2374         * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel
2375         * parameter doesn't work to disable APs on kdump 2nd kernel.
2376         *
2377         * Since fixing handling of boot_cpu_physical_apicid requires
2378         * another discussion and tests on each platform, we leave it
2379         * for now and here we use read_apic_id() directly in this
2380         * function, generic_processor_info().
2381         */
2382        if (disabled_cpu_apicid != BAD_APICID &&
2383            disabled_cpu_apicid != read_apic_id() &&
2384            disabled_cpu_apicid == apicid) {
2385                int thiscpu = num_processors + disabled_cpus;
2386
2387                pr_warn("APIC: Disabling requested cpu."
2388                        " Processor %d/0x%x ignored.\n", thiscpu, apicid);
2389
2390                disabled_cpus++;
2391                return -ENODEV;
2392        }
2393
2394        /*
2395         * If boot cpu has not been detected yet, then only allow upto
2396         * nr_cpu_ids - 1 processors and keep one slot free for boot cpu
2397         */
2398        if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 &&
2399            apicid != boot_cpu_physical_apicid) {
2400                int thiscpu = max + disabled_cpus - 1;
2401
2402                pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost"
2403                        " reached. Keeping one slot for boot cpu."
2404                        "  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
2405
2406                disabled_cpus++;
2407                return -ENODEV;
2408        }
2409
2410        if (num_processors >= nr_cpu_ids) {
2411                int thiscpu = max + disabled_cpus;
2412
2413                pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. "
2414                        "Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
2415
2416                disabled_cpus++;
2417                return -EINVAL;
2418        }
2419
2420        if (apicid == boot_cpu_physical_apicid) {
2421                /*
2422                 * x86_bios_cpu_apicid is required to have processors listed
2423                 * in same order as logical cpu numbers. Hence the first
2424                 * entry is BSP, and so on.
2425                 * boot_cpu_init() already hold bit 0 in cpu_present_mask
2426                 * for BSP.
2427                 */
2428                cpu = 0;
2429
2430                /* Logical cpuid 0 is reserved for BSP. */
2431                cpuid_to_apicid[0] = apicid;
2432        } else {
2433                cpu = allocate_logical_cpuid(apicid);
2434                if (cpu < 0) {
2435                        disabled_cpus++;
2436                        return -EINVAL;
2437                }
2438        }
2439
2440        /*
2441         * Validate version
2442         */
2443        if (version == 0x0) {
2444                pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
2445                        cpu, apicid);
2446                version = 0x10;
2447        }
2448
2449        if (version != boot_cpu_apic_version) {
2450                pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
2451                        boot_cpu_apic_version, cpu, version);
2452        }
2453
2454        if (apicid > max_physical_apicid)
2455                max_physical_apicid = apicid;
2456
2457#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
2458        early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
2459        early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
2460#endif
2461#ifdef CONFIG_X86_32
2462        early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
2463                apic->x86_32_early_logical_apicid(cpu);
2464#endif
2465        set_cpu_possible(cpu, true);
2466        physid_set(apicid, phys_cpu_present_map);
2467        set_cpu_present(cpu, true);
2468        num_processors++;
2469
2470        return cpu;
2471}
2472
2473int hard_smp_processor_id(void)
2474{
2475        return read_apic_id();
2476}
2477
2478/*
2479 * Override the generic EOI implementation with an optimized version.
2480 * Only called during early boot when only one CPU is active and with
2481 * interrupts disabled, so we know this does not race with actual APIC driver
2482 * use.
2483 */
2484void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
2485{
2486        struct apic **drv;
2487
2488        for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
2489                /* Should happen once for each apic */
2490                WARN_ON((*drv)->eoi_write == eoi_write);
2491                (*drv)->native_eoi_write = (*drv)->eoi_write;
2492                (*drv)->eoi_write = eoi_write;
2493        }
2494}
2495
2496static void __init apic_bsp_up_setup(void)
2497{
2498#ifdef CONFIG_X86_64
2499        apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid));
2500#else
2501        /*
2502         * Hack: In case of kdump, after a crash, kernel might be booting
2503         * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
2504         * might be zero if read from MP tables. Get it from LAPIC.
2505         */
2506# ifdef CONFIG_CRASH_DUMP
2507        boot_cpu_physical_apicid = read_apic_id();
2508# endif
2509#endif
2510        physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
2511}
2512
2513/**
2514 * apic_bsp_setup - Setup function for local apic and io-apic
2515 * @upmode:             Force UP mode (for APIC_init_uniprocessor)
2516 */
2517static void __init apic_bsp_setup(bool upmode)
2518{
2519        connect_bsp_APIC();
2520        if (upmode)
2521                apic_bsp_up_setup();
2522        setup_local_APIC();
2523
2524        enable_IO_APIC();
2525        end_local_APIC_setup();
2526        irq_remap_enable_fault_handling();
2527        setup_IO_APIC();
2528}
2529
2530#ifdef CONFIG_UP_LATE_INIT
2531void __init up_late_init(void)
2532{
2533        if (apic_intr_mode == APIC_PIC)
2534                return;
2535
2536        /* Setup local timer */
2537        x86_init.timers.setup_percpu_clockev();
2538}
2539#endif
2540
2541/*
2542 * Power management
2543 */
2544#ifdef CONFIG_PM
2545
2546static struct {
2547        /*
2548         * 'active' is true if the local APIC was enabled by us and
2549         * not the BIOS; this signifies that we are also responsible
2550         * for disabling it before entering apm/acpi suspend
2551         */
2552        int active;
2553        /* r/w apic fields */
2554        unsigned int apic_id;
2555        unsigned int apic_taskpri;
2556        unsigned int apic_ldr;
2557        unsigned int apic_dfr;
2558        unsigned int apic_spiv;
2559        unsigned int apic_lvtt;
2560        unsigned int apic_lvtpc;
2561        unsigned int apic_lvt0;
2562        unsigned int apic_lvt1;
2563        unsigned int apic_lvterr;
2564        unsigned int apic_tmict;
2565        unsigned int apic_tdcr;
2566        unsigned int apic_thmr;
2567        unsigned int apic_cmci;
2568} apic_pm_state;
2569
2570static int lapic_suspend(void)
2571{
2572        unsigned long flags;
2573        int maxlvt;
2574
2575        if (!apic_pm_state.active)
2576                return 0;
2577
2578        maxlvt = lapic_get_maxlvt();
2579
2580        apic_pm_state.apic_id = apic_read(APIC_ID);
2581        apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
2582        apic_pm_state.apic_ldr = apic_read(APIC_LDR);
2583        apic_pm_state.apic_dfr = apic_read(APIC_DFR);
2584        apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
2585        apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
2586        if (maxlvt >= 4)
2587                apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
2588        apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
2589        apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
2590        apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
2591        apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
2592        apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
2593#ifdef CONFIG_X86_THERMAL_VECTOR
2594        if (maxlvt >= 5)
2595                apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
2596#endif
2597#ifdef CONFIG_X86_MCE_INTEL
2598        if (maxlvt >= 6)
2599                apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI);
2600#endif
2601
2602        local_irq_save(flags);
2603
2604        /*
2605         * Mask IOAPIC before disabling the local APIC to prevent stale IRR
2606         * entries on some implementations.
2607         */
2608        mask_ioapic_entries();
2609
2610        disable_local_APIC();
2611
2612        irq_remapping_disable();
2613
2614        local_irq_restore(flags);
2615        return 0;
2616}
2617
2618static void lapic_resume(void)
2619{
2620        unsigned int l, h;
2621        unsigned long flags;
2622        int maxlvt;
2623
2624        if (!apic_pm_state.active)
2625                return;
2626
2627        local_irq_save(flags);
2628
2629        /*
2630         * IO-APIC and PIC have their own resume routines.
2631         * We just mask them here to make sure the interrupt
2632         * subsystem is completely quiet while we enable x2apic
2633         * and interrupt-remapping.
2634         */
2635        mask_ioapic_entries();
2636        legacy_pic->mask_all();
2637
2638        if (x2apic_mode) {
2639                __x2apic_enable();
2640        } else {
2641                /*
2642                 * Make sure the APICBASE points to the right address
2643                 *
2644                 * FIXME! This will be wrong if we ever support suspend on
2645                 * SMP! We'll need to do this as part of the CPU restore!
2646                 */
2647                if (boot_cpu_data.x86 >= 6) {
2648                        rdmsr(MSR_IA32_APICBASE, l, h);
2649                        l &= ~MSR_IA32_APICBASE_BASE;
2650                        l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
2651                        wrmsr(MSR_IA32_APICBASE, l, h);
2652                }
2653        }
2654
2655        maxlvt = lapic_get_maxlvt();
2656        apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
2657        apic_write(APIC_ID, apic_pm_state.apic_id);
2658        apic_write(APIC_DFR, apic_pm_state.apic_dfr);
2659        apic_write(APIC_LDR, apic_pm_state.apic_ldr);
2660        apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
2661        apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
2662        apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
2663        apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
2664#ifdef CONFIG_X86_THERMAL_VECTOR
2665        if (maxlvt >= 5)
2666                apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
2667#endif
2668#ifdef CONFIG_X86_MCE_INTEL
2669        if (maxlvt >= 6)
2670                apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci);
2671#endif
2672        if (maxlvt >= 4)
2673                apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
2674        apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
2675        apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
2676        apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
2677        apic_write(APIC_ESR, 0);
2678        apic_read(APIC_ESR);
2679        apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
2680        apic_write(APIC_ESR, 0);
2681        apic_read(APIC_ESR);
2682
2683        irq_remapping_reenable(x2apic_mode);
2684
2685        local_irq_restore(flags);
2686}
2687
2688/*
2689 * This device has no shutdown method - fully functioning local APICs
2690 * are needed on every CPU up until machine_halt/restart/poweroff.
2691 */
2692
2693static struct syscore_ops lapic_syscore_ops = {
2694        .resume         = lapic_resume,
2695        .suspend        = lapic_suspend,
2696};
2697
2698static void apic_pm_activate(void)
2699{
2700        apic_pm_state.active = 1;
2701}
2702
2703static int __init init_lapic_sysfs(void)
2704{
2705        /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
2706        if (boot_cpu_has(X86_FEATURE_APIC))
2707                register_syscore_ops(&lapic_syscore_ops);
2708
2709        return 0;
2710}
2711
2712/* local apic needs to resume before other devices access its registers. */
2713core_initcall(init_lapic_sysfs);
2714
2715#else   /* CONFIG_PM */
2716
2717static void apic_pm_activate(void) { }
2718
2719#endif  /* CONFIG_PM */
2720
2721#ifdef CONFIG_X86_64
2722
2723static int multi_checked;
2724static int multi;
2725
2726static int set_multi(const struct dmi_system_id *d)
2727{
2728        if (multi)
2729                return 0;
2730        pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
2731        multi = 1;
2732        return 0;
2733}
2734
2735static const struct dmi_system_id multi_dmi_table[] = {
2736        {
2737                .callback = set_multi,
2738                .ident = "IBM System Summit2",
2739                .matches = {
2740                        DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
2741                        DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
2742                },
2743        },
2744        {}
2745};
2746
2747static void dmi_check_multi(void)
2748{
2749        if (multi_checked)
2750                return;
2751
2752        dmi_check_system(multi_dmi_table);
2753        multi_checked = 1;
2754}
2755
2756/*
2757 * apic_is_clustered_box() -- Check if we can expect good TSC
2758 *
2759 * Thus far, the major user of this is IBM's Summit2 series:
2760 * Clustered boxes may have unsynced TSC problems if they are
2761 * multi-chassis.
2762 * Use DMI to check them
2763 */
2764int apic_is_clustered_box(void)
2765{
2766        dmi_check_multi();
2767        return multi;
2768}
2769#endif
2770
2771/*
2772 * APIC command line parameters
2773 */
2774static int __init setup_disableapic(char *arg)
2775{
2776        disable_apic = 1;
2777        setup_clear_cpu_cap(X86_FEATURE_APIC);
2778        return 0;
2779}
2780early_param("disableapic", setup_disableapic);
2781
2782/* same as disableapic, for compatibility */
2783static int __init setup_nolapic(char *arg)
2784{
2785        return setup_disableapic(arg);
2786}
2787early_param("nolapic", setup_nolapic);
2788
2789static int __init parse_lapic_timer_c2_ok(char *arg)
2790{
2791        local_apic_timer_c2_ok = 1;
2792        return 0;
2793}
2794early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
2795
2796static int __init parse_disable_apic_timer(char *arg)
2797{
2798        disable_apic_timer = 1;
2799        return 0;
2800}
2801early_param("noapictimer", parse_disable_apic_timer);
2802
2803static int __init parse_nolapic_timer(char *arg)
2804{
2805        disable_apic_timer = 1;
2806        return 0;
2807}
2808early_param("nolapic_timer", parse_nolapic_timer);
2809
2810static int __init apic_set_verbosity(char *arg)
2811{
2812        if (!arg)  {
2813#ifdef CONFIG_X86_64
2814                skip_ioapic_setup = 0;
2815                return 0;
2816#endif
2817                return -EINVAL;
2818        }
2819
2820        if (strcmp("debug", arg) == 0)
2821                apic_verbosity = APIC_DEBUG;
2822        else if (strcmp("verbose", arg) == 0)
2823                apic_verbosity = APIC_VERBOSE;
2824#ifdef CONFIG_X86_64
2825        else {
2826                pr_warn("APIC Verbosity level %s not recognised"
2827                        " use apic=verbose or apic=debug\n", arg);
2828                return -EINVAL;
2829        }
2830#endif
2831
2832        return 0;
2833}
2834early_param("apic", apic_set_verbosity);
2835
2836static int __init lapic_insert_resource(void)
2837{
2838        if (!apic_phys)
2839                return -1;
2840
2841        /* Put local APIC into the resource map. */
2842        lapic_resource.start = apic_phys;
2843        lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
2844        insert_resource(&iomem_resource, &lapic_resource);
2845
2846        return 0;
2847}
2848
2849/*
2850 * need call insert after e820__reserve_resources()
2851 * that is using request_resource
2852 */
2853late_initcall(lapic_insert_resource);
2854
2855static int __init apic_set_disabled_cpu_apicid(char *arg)
2856{
2857        if (!arg || !get_option(&arg, &disabled_cpu_apicid))
2858                return -EINVAL;
2859
2860        return 0;
2861}
2862early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);
2863
2864static int __init apic_set_extnmi(char *arg)
2865{
2866        if (!arg)
2867                return -EINVAL;
2868
2869        if (!strncmp("all", arg, 3))
2870                apic_extnmi = APIC_EXTNMI_ALL;
2871        else if (!strncmp("none", arg, 4))
2872                apic_extnmi = APIC_EXTNMI_NONE;
2873        else if (!strncmp("bsp", arg, 3))
2874                apic_extnmi = APIC_EXTNMI_BSP;
2875        else {
2876                pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg);
2877                return -EINVAL;
2878        }
2879
2880        return 0;
2881}
2882early_param("apic_extnmi", apic_set_extnmi);
2883