linux/arch/x86/kernel/apic/apic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *      Local APIC handling, local APIC timers
   4 *
   5 *      (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
   6 *
   7 *      Fixes
   8 *      Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
   9 *                                      thanks to Eric Gilmore
  10 *                                      and Rolf G. Tews
  11 *                                      for testing these extensively.
  12 *      Maciej W. Rozycki       :       Various updates and fixes.
  13 *      Mikael Pettersson       :       Power Management for UP-APIC.
  14 *      Pavel Machek and
  15 *      Mikael Pettersson       :       PM converted to driver model.
  16 */
  17
  18#include <linux/perf_event.h>
  19#include <linux/kernel_stat.h>
  20#include <linux/mc146818rtc.h>
  21#include <linux/acpi_pmtmr.h>
  22#include <linux/clockchips.h>
  23#include <linux/interrupt.h>
  24#include <linux/memblock.h>
  25#include <linux/ftrace.h>
  26#include <linux/ioport.h>
  27#include <linux/export.h>
  28#include <linux/syscore_ops.h>
  29#include <linux/delay.h>
  30#include <linux/timex.h>
  31#include <linux/i8253.h>
  32#include <linux/dmar.h>
  33#include <linux/init.h>
  34#include <linux/cpu.h>
  35#include <linux/dmi.h>
  36#include <linux/smp.h>
  37#include <linux/mm.h>
  38
  39#include <asm/trace/irq_vectors.h>
  40#include <asm/irq_remapping.h>
  41#include <asm/pc-conf-reg.h>
  42#include <asm/perf_event.h>
  43#include <asm/x86_init.h>
  44#include <linux/atomic.h>
  45#include <asm/barrier.h>
  46#include <asm/mpspec.h>
  47#include <asm/i8259.h>
  48#include <asm/proto.h>
  49#include <asm/traps.h>
  50#include <asm/apic.h>
  51#include <asm/acpi.h>
  52#include <asm/io_apic.h>
  53#include <asm/desc.h>
  54#include <asm/hpet.h>
  55#include <asm/mtrr.h>
  56#include <asm/time.h>
  57#include <asm/smp.h>
  58#include <asm/mce.h>
  59#include <asm/tsc.h>
  60#include <asm/hypervisor.h>
  61#include <asm/cpu_device_id.h>
  62#include <asm/intel-family.h>
  63#include <asm/irq_regs.h>
  64
  65unsigned int num_processors;
  66
  67unsigned disabled_cpus;
  68
  69/* Processor that is doing the boot up */
  70unsigned int boot_cpu_physical_apicid __ro_after_init = -1U;
  71EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
  72
  73u8 boot_cpu_apic_version __ro_after_init;
  74
  75/*
  76 * The highest APIC ID seen during enumeration.
  77 */
  78static unsigned int max_physical_apicid;
  79
  80/*
  81 * Bitmask of physically existing CPUs:
  82 */
  83physid_mask_t phys_cpu_present_map;
  84
  85/*
  86 * Processor to be disabled specified by kernel parameter
  87 * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
  88 * avoid undefined behaviour caused by sending INIT from AP to BSP.
  89 */
  90static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;
  91
  92/*
  93 * This variable controls which CPUs receive external NMIs.  By default,
  94 * external NMIs are delivered only to the BSP.
  95 */
  96static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
  97
  98/*
  99 * Hypervisor supports 15 bits of APIC ID in MSI Extended Destination ID
 100 */
 101static bool virt_ext_dest_id __ro_after_init;
 102
 103/*
 104 * Map cpu index to physical APIC ID
 105 */
 106DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
 107DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
 108DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
 109EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 110EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 111EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
 112
 113#ifdef CONFIG_X86_32
 114
 115/*
 116 * On x86_32, the mapping between cpu and logical apicid may vary
 117 * depending on apic in use.  The following early percpu variable is
 118 * used for the mapping.  This is where the behaviors of x86_64 and 32
 119 * actually diverge.  Let's keep it ugly for now.
 120 */
 121DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
 122
 123/* Local APIC was disabled by the BIOS and enabled by the kernel */
 124static int enabled_via_apicbase __ro_after_init;
 125
 126/*
 127 * Handle interrupt mode configuration register (IMCR).
 128 * This register controls whether the interrupt signals
 129 * that reach the BSP come from the master PIC or from the
 130 * local APIC. Before entering Symmetric I/O Mode, either
 131 * the BIOS or the operating system must switch out of
 132 * PIC Mode by changing the IMCR.
 133 */
 134static inline void imcr_pic_to_apic(void)
 135{
 136        /* NMI and 8259 INTR go through APIC */
 137        pc_conf_set(PC_CONF_MPS_IMCR, 0x01);
 138}
 139
 140static inline void imcr_apic_to_pic(void)
 141{
 142        /* NMI and 8259 INTR go directly to BSP */
 143        pc_conf_set(PC_CONF_MPS_IMCR, 0x00);
 144}
 145#endif
 146
 147/*
 148 * Knob to control our willingness to enable the local APIC.
 149 *
 150 * +1=force-enable
 151 */
 152static int force_enable_local_apic __initdata;
 153
 154/*
 155 * APIC command line parameters
 156 */
 157static int __init parse_lapic(char *arg)
 158{
 159        if (IS_ENABLED(CONFIG_X86_32) && !arg)
 160                force_enable_local_apic = 1;
 161        else if (arg && !strncmp(arg, "notscdeadline", 13))
 162                setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
 163        return 0;
 164}
 165early_param("lapic", parse_lapic);
 166
 167#ifdef CONFIG_X86_64
 168static int apic_calibrate_pmtmr __initdata;
 169static __init int setup_apicpmtimer(char *s)
 170{
 171        apic_calibrate_pmtmr = 1;
 172        notsc_setup(NULL);
 173        return 0;
 174}
 175__setup("apicpmtimer", setup_apicpmtimer);
 176#endif
 177
 178unsigned long mp_lapic_addr __ro_after_init;
 179int disable_apic __ro_after_init;
 180/* Disable local APIC timer from the kernel commandline or via dmi quirk */
 181static int disable_apic_timer __initdata;
 182/* Local APIC timer works in C2 */
 183int local_apic_timer_c2_ok __ro_after_init;
 184EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 185
 186/*
 187 * Debug level, exported for io_apic.c
 188 */
 189int apic_verbosity __ro_after_init;
 190
 191int pic_mode __ro_after_init;
 192
 193/* Have we found an MP table */
 194int smp_found_config __ro_after_init;
 195
 196static struct resource lapic_resource = {
 197        .name = "Local APIC",
 198        .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
 199};
 200
 201unsigned int lapic_timer_period = 0;
 202
 203static void apic_pm_activate(void);
 204
 205static unsigned long apic_phys __ro_after_init;
 206
 207/*
 208 * Get the LAPIC version
 209 */
 210static inline int lapic_get_version(void)
 211{
 212        return GET_APIC_VERSION(apic_read(APIC_LVR));
 213}
 214
 215/*
 216 * Check, if the APIC is integrated or a separate chip
 217 */
 218static inline int lapic_is_integrated(void)
 219{
 220        return APIC_INTEGRATED(lapic_get_version());
 221}
 222
 223/*
 224 * Check, whether this is a modern or a first generation APIC
 225 */
 226static int modern_apic(void)
 227{
 228        /* AMD systems use old APIC versions, so check the CPU */
 229        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
 230            boot_cpu_data.x86 >= 0xf)
 231                return 1;
 232
 233        /* Hygon systems use modern APIC */
 234        if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
 235                return 1;
 236
 237        return lapic_get_version() >= 0x14;
 238}
 239
 240/*
 241 * right after this call apic become NOOP driven
 242 * so apic->write/read doesn't do anything
 243 */
 244static void __init apic_disable(void)
 245{
 246        pr_info("APIC: switched to apic NOOP\n");
 247        apic = &apic_noop;
 248}
 249
 250void native_apic_wait_icr_idle(void)
 251{
 252        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
 253                cpu_relax();
 254}
 255
 256u32 native_safe_apic_wait_icr_idle(void)
 257{
 258        u32 send_status;
 259        int timeout;
 260
 261        timeout = 0;
 262        do {
 263                send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
 264                if (!send_status)
 265                        break;
 266                inc_irq_stat(icr_read_retry_count);
 267                udelay(100);
 268        } while (timeout++ < 1000);
 269
 270        return send_status;
 271}
 272
 273void native_apic_icr_write(u32 low, u32 id)
 274{
 275        unsigned long flags;
 276
 277        local_irq_save(flags);
 278        apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
 279        apic_write(APIC_ICR, low);
 280        local_irq_restore(flags);
 281}
 282
 283u64 native_apic_icr_read(void)
 284{
 285        u32 icr1, icr2;
 286
 287        icr2 = apic_read(APIC_ICR2);
 288        icr1 = apic_read(APIC_ICR);
 289
 290        return icr1 | ((u64)icr2 << 32);
 291}
 292
 293#ifdef CONFIG_X86_32
 294/**
 295 * get_physical_broadcast - Get number of physical broadcast IDs
 296 */
 297int get_physical_broadcast(void)
 298{
 299        return modern_apic() ? 0xff : 0xf;
 300}
 301#endif
 302
 303/**
 304 * lapic_get_maxlvt - get the maximum number of local vector table entries
 305 */
 306int lapic_get_maxlvt(void)
 307{
 308        /*
 309         * - we always have APIC integrated on 64bit mode
 310         * - 82489DXs do not report # of LVT entries
 311         */
 312        return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
 313}
 314
 315/*
 316 * Local APIC timer
 317 */
 318
 319/* Clock divisor */
 320#define APIC_DIVISOR 16
 321#define TSC_DIVISOR  8
 322
 323/*
 324 * This function sets up the local APIC timer, with a timeout of
 325 * 'clocks' APIC bus clock. During calibration we actually call
 326 * this function twice on the boot CPU, once with a bogus timeout
 327 * value, second time for real. The other (noncalibrating) CPUs
 328 * call this function only once, with the real, calibrated value.
 329 *
 330 * We do reads before writes even if unnecessary, to get around the
 331 * P5 APIC double write bug.
 332 */
 333static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
 334{
 335        unsigned int lvtt_value, tmp_value;
 336
 337        lvtt_value = LOCAL_TIMER_VECTOR;
 338        if (!oneshot)
 339                lvtt_value |= APIC_LVT_TIMER_PERIODIC;
 340        else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 341                lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
 342
 343        if (!lapic_is_integrated())
 344                lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
 345
 346        if (!irqen)
 347                lvtt_value |= APIC_LVT_MASKED;
 348
 349        apic_write(APIC_LVTT, lvtt_value);
 350
 351        if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
 352                /*
 353                 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
 354                 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
 355                 * According to Intel, MFENCE can do the serialization here.
 356                 */
 357                asm volatile("mfence" : : : "memory");
 358                return;
 359        }
 360
 361        /*
 362         * Divide PICLK by 16
 363         */
 364        tmp_value = apic_read(APIC_TDCR);
 365        apic_write(APIC_TDCR,
 366                (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
 367                APIC_TDR_DIV_16);
 368
 369        if (!oneshot)
 370                apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
 371}
 372
 373/*
 374 * Setup extended LVT, AMD specific
 375 *
 376 * Software should use the LVT offsets the BIOS provides.  The offsets
 377 * are determined by the subsystems using it like those for MCE
 378 * threshold or IBS.  On K8 only offset 0 (APIC500) and MCE interrupts
 379 * are supported. Beginning with family 10h at least 4 offsets are
 380 * available.
 381 *
 382 * Since the offsets must be consistent for all cores, we keep track
 383 * of the LVT offsets in software and reserve the offset for the same
 384 * vector also to be used on other cores. An offset is freed by
 385 * setting the entry to APIC_EILVT_MASKED.
 386 *
 387 * If the BIOS is right, there should be no conflicts. Otherwise a
 388 * "[Firmware Bug]: ..." error message is generated. However, if
 389 * software does not properly determines the offsets, it is not
 390 * necessarily a BIOS bug.
 391 */
 392
 393static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
 394
 395static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
 396{
 397        return (old & APIC_EILVT_MASKED)
 398                || (new == APIC_EILVT_MASKED)
 399                || ((new & ~APIC_EILVT_MASKED) == old);
 400}
 401
 402static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
 403{
 404        unsigned int rsvd, vector;
 405
 406        if (offset >= APIC_EILVT_NR_MAX)
 407                return ~0;
 408
 409        rsvd = atomic_read(&eilvt_offsets[offset]);
 410        do {
 411                vector = rsvd & ~APIC_EILVT_MASKED;     /* 0: unassigned */
 412                if (vector && !eilvt_entry_is_changeable(vector, new))
 413                        /* may not change if vectors are different */
 414                        return rsvd;
 415                rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
 416        } while (rsvd != new);
 417
 418        rsvd &= ~APIC_EILVT_MASKED;
 419        if (rsvd && rsvd != vector)
 420                pr_info("LVT offset %d assigned for vector 0x%02x\n",
 421                        offset, rsvd);
 422
 423        return new;
 424}
 425
 426/*
 427 * If mask=1, the LVT entry does not generate interrupts while mask=0
 428 * enables the vector. See also the BKDGs. Must be called with
 429 * preemption disabled.
 430 */
 431
 432int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
 433{
 434        unsigned long reg = APIC_EILVTn(offset);
 435        unsigned int new, old, reserved;
 436
 437        new = (mask << 16) | (msg_type << 8) | vector;
 438        old = apic_read(reg);
 439        reserved = reserve_eilvt_offset(offset, new);
 440
 441        if (reserved != new) {
 442                pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
 443                       "vector 0x%x, but the register is already in use for "
 444                       "vector 0x%x on another cpu\n",
 445                       smp_processor_id(), reg, offset, new, reserved);
 446                return -EINVAL;
 447        }
 448
 449        if (!eilvt_entry_is_changeable(old, new)) {
 450                pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
 451                       "vector 0x%x, but the register is already in use for "
 452                       "vector 0x%x on this cpu\n",
 453                       smp_processor_id(), reg, offset, new, old);
 454                return -EBUSY;
 455        }
 456
 457        apic_write(reg, new);
 458
 459        return 0;
 460}
 461EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
 462
 463/*
 464 * Program the next event, relative to now
 465 */
 466static int lapic_next_event(unsigned long delta,
 467                            struct clock_event_device *evt)
 468{
 469        apic_write(APIC_TMICT, delta);
 470        return 0;
 471}
 472
 473static int lapic_next_deadline(unsigned long delta,
 474                               struct clock_event_device *evt)
 475{
 476        u64 tsc;
 477
 478        /* This MSR is special and need a special fence: */
 479        weak_wrmsr_fence();
 480
 481        tsc = rdtsc();
 482        wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
 483        return 0;
 484}
 485
 486static int lapic_timer_shutdown(struct clock_event_device *evt)
 487{
 488        unsigned int v;
 489
 490        /* Lapic used as dummy for broadcast ? */
 491        if (evt->features & CLOCK_EVT_FEAT_DUMMY)
 492                return 0;
 493
 494        v = apic_read(APIC_LVTT);
 495        v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
 496        apic_write(APIC_LVTT, v);
 497        apic_write(APIC_TMICT, 0);
 498        return 0;
 499}
 500
 501static inline int
 502lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
 503{
 504        /* Lapic used as dummy for broadcast ? */
 505        if (evt->features & CLOCK_EVT_FEAT_DUMMY)
 506                return 0;
 507
 508        __setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
 509        return 0;
 510}
 511
 512static int lapic_timer_set_periodic(struct clock_event_device *evt)
 513{
 514        return lapic_timer_set_periodic_oneshot(evt, false);
 515}
 516
 517static int lapic_timer_set_oneshot(struct clock_event_device *evt)
 518{
 519        return lapic_timer_set_periodic_oneshot(evt, true);
 520}
 521
 522/*
 523 * Local APIC timer broadcast function
 524 */
 525static void lapic_timer_broadcast(const struct cpumask *mask)
 526{
 527#ifdef CONFIG_SMP
 528        apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
 529#endif
 530}
 531
 532
 533/*
 534 * The local apic timer can be used for any function which is CPU local.
 535 */
 536static struct clock_event_device lapic_clockevent = {
 537        .name                           = "lapic",
 538        .features                       = CLOCK_EVT_FEAT_PERIODIC |
 539                                          CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
 540                                          | CLOCK_EVT_FEAT_DUMMY,
 541        .shift                          = 32,
 542        .set_state_shutdown             = lapic_timer_shutdown,
 543        .set_state_periodic             = lapic_timer_set_periodic,
 544        .set_state_oneshot              = lapic_timer_set_oneshot,
 545        .set_state_oneshot_stopped      = lapic_timer_shutdown,
 546        .set_next_event                 = lapic_next_event,
 547        .broadcast                      = lapic_timer_broadcast,
 548        .rating                         = 100,
 549        .irq                            = -1,
 550};
 551static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 552
 553static const struct x86_cpu_id deadline_match[] __initconst = {
 554        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */
 555        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */
 556
 557        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X,        0x0b000020),
 558
 559        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011),
 560        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e),
 561        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c),
 562        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003),
 563
 564        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136),
 565        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014),
 566        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0),
 567
 568        X86_MATCH_INTEL_FAM6_MODEL( HASWELL,            0x22),
 569        X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L,          0x20),
 570        X86_MATCH_INTEL_FAM6_MODEL( HASWELL_G,          0x17),
 571
 572        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL,          0x25),
 573        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_G,        0x17),
 574
 575        X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_L,          0xb2),
 576        X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE,            0xb2),
 577
 578        X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE_L,         0x52),
 579        X86_MATCH_INTEL_FAM6_MODEL( KABYLAKE,           0x52),
 580
 581        {},
 582};
 583
 584static __init bool apic_validate_deadline_timer(void)
 585{
 586        const struct x86_cpu_id *m;
 587        u32 rev;
 588
 589        if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 590                return false;
 591        if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
 592                return true;
 593
 594        m = x86_match_cpu(deadline_match);
 595        if (!m)
 596                return true;
 597
 598        rev = (u32)m->driver_data;
 599
 600        if (boot_cpu_data.microcode >= rev)
 601                return true;
 602
 603        setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
 604        pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
 605               "please update microcode to version: 0x%x (or later)\n", rev);
 606        return false;
 607}
 608
 609/*
 610 * Setup the local APIC timer for this CPU. Copy the initialized values
 611 * of the boot CPU and register the clock event in the framework.
 612 */
 613static void setup_APIC_timer(void)
 614{
 615        struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
 616
 617        if (this_cpu_has(X86_FEATURE_ARAT)) {
 618                lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
 619                /* Make LAPIC timer preferable over percpu HPET */
 620                lapic_clockevent.rating = 150;
 621        }
 622
 623        memcpy(levt, &lapic_clockevent, sizeof(*levt));
 624        levt->cpumask = cpumask_of(smp_processor_id());
 625
 626        if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
 627                levt->name = "lapic-deadline";
 628                levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
 629                                    CLOCK_EVT_FEAT_DUMMY);
 630                levt->set_next_event = lapic_next_deadline;
 631                clockevents_config_and_register(levt,
 632                                                tsc_khz * (1000 / TSC_DIVISOR),
 633                                                0xF, ~0UL);
 634        } else
 635                clockevents_register_device(levt);
 636}
 637
 638/*
 639 * Install the updated TSC frequency from recalibration at the TSC
 640 * deadline clockevent devices.
 641 */
 642static void __lapic_update_tsc_freq(void *info)
 643{
 644        struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
 645
 646        if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 647                return;
 648
 649        clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
 650}
 651
 652void lapic_update_tsc_freq(void)
 653{
 654        /*
 655         * The clockevent device's ->mult and ->shift can both be
 656         * changed. In order to avoid races, schedule the frequency
 657         * update code on each CPU.
 658         */
 659        on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
 660}
 661
 662/*
 663 * In this functions we calibrate APIC bus clocks to the external timer.
 664 *
 665 * We want to do the calibration only once since we want to have local timer
 666 * irqs synchronous. CPUs connected by the same APIC bus have the very same bus
 667 * frequency.
 668 *
 669 * This was previously done by reading the PIT/HPET and waiting for a wrap
 670 * around to find out, that a tick has elapsed. I have a box, where the PIT
 671 * readout is broken, so it never gets out of the wait loop again. This was
 672 * also reported by others.
 673 *
 674 * Monitoring the jiffies value is inaccurate and the clockevents
 675 * infrastructure allows us to do a simple substitution of the interrupt
 676 * handler.
 677 *
 678 * The calibration routine also uses the pm_timer when possible, as the PIT
 679 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
 680 * back to normal later in the boot process).
 681 */
 682
 683#define LAPIC_CAL_LOOPS         (HZ/10)
 684
 685static __initdata int lapic_cal_loops = -1;
 686static __initdata long lapic_cal_t1, lapic_cal_t2;
 687static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
 688static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
 689static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
 690
 691/*
 692 * Temporary interrupt handler and polled calibration function.
 693 */
 694static void __init lapic_cal_handler(struct clock_event_device *dev)
 695{
 696        unsigned long long tsc = 0;
 697        long tapic = apic_read(APIC_TMCCT);
 698        unsigned long pm = acpi_pm_read_early();
 699
 700        if (boot_cpu_has(X86_FEATURE_TSC))
 701                tsc = rdtsc();
 702
 703        switch (lapic_cal_loops++) {
 704        case 0:
 705                lapic_cal_t1 = tapic;
 706                lapic_cal_tsc1 = tsc;
 707                lapic_cal_pm1 = pm;
 708                lapic_cal_j1 = jiffies;
 709                break;
 710
 711        case LAPIC_CAL_LOOPS:
 712                lapic_cal_t2 = tapic;
 713                lapic_cal_tsc2 = tsc;
 714                if (pm < lapic_cal_pm1)
 715                        pm += ACPI_PM_OVRRUN;
 716                lapic_cal_pm2 = pm;
 717                lapic_cal_j2 = jiffies;
 718                break;
 719        }
 720}
 721
 722static int __init
 723calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
 724{
 725        const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
 726        const long pm_thresh = pm_100ms / 100;
 727        unsigned long mult;
 728        u64 res;
 729
 730#ifndef CONFIG_X86_PM_TIMER
 731        return -1;
 732#endif
 733
 734        apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
 735
 736        /* Check, if the PM timer is available */
 737        if (!deltapm)
 738                return -1;
 739
 740        mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
 741
 742        if (deltapm > (pm_100ms - pm_thresh) &&
 743            deltapm < (pm_100ms + pm_thresh)) {
 744                apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
 745                return 0;
 746        }
 747
 748        res = (((u64)deltapm) *  mult) >> 22;
 749        do_div(res, 1000000);
 750        pr_warn("APIC calibration not consistent "
 751                "with PM-Timer: %ldms instead of 100ms\n", (long)res);
 752
 753        /* Correct the lapic counter value */
 754        res = (((u64)(*delta)) * pm_100ms);
 755        do_div(res, deltapm);
 756        pr_info("APIC delta adjusted to PM-Timer: "
 757                "%lu (%ld)\n", (unsigned long)res, *delta);
 758        *delta = (long)res;
 759
 760        /* Correct the tsc counter value */
 761        if (boot_cpu_has(X86_FEATURE_TSC)) {
 762                res = (((u64)(*deltatsc)) * pm_100ms);
 763                do_div(res, deltapm);
 764                apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
 765                                          "PM-Timer: %lu (%ld)\n",
 766                                        (unsigned long)res, *deltatsc);
 767                *deltatsc = (long)res;
 768        }
 769
 770        return 0;
 771}
 772
 773static int __init lapic_init_clockevent(void)
 774{
 775        if (!lapic_timer_period)
 776                return -1;
 777
 778        /* Calculate the scaled math multiplication factor */
 779        lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
 780                                        TICK_NSEC, lapic_clockevent.shift);
 781        lapic_clockevent.max_delta_ns =
 782                clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
 783        lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
 784        lapic_clockevent.min_delta_ns =
 785                clockevent_delta2ns(0xF, &lapic_clockevent);
 786        lapic_clockevent.min_delta_ticks = 0xF;
 787
 788        return 0;
 789}
 790
 791bool __init apic_needs_pit(void)
 792{
 793        /*
 794         * If the frequencies are not known, PIT is required for both TSC
 795         * and apic timer calibration.
 796         */
 797        if (!tsc_khz || !cpu_khz)
 798                return true;
 799
 800        /* Is there an APIC at all or is it disabled? */
 801        if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic)
 802                return true;
 803
 804        /*
 805         * If interrupt delivery mode is legacy PIC or virtual wire without
 806         * configuration, the local APIC timer wont be set up. Make sure
 807         * that the PIT is initialized.
 808         */
 809        if (apic_intr_mode == APIC_PIC ||
 810            apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG)
 811                return true;
 812
 813        /* Virt guests may lack ARAT, but still have DEADLINE */
 814        if (!boot_cpu_has(X86_FEATURE_ARAT))
 815                return true;
 816
 817        /* Deadline timer is based on TSC so no further PIT action required */
 818        if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 819                return false;
 820
 821        /* APIC timer disabled? */
 822        if (disable_apic_timer)
 823                return true;
 824        /*
 825         * The APIC timer frequency is known already, no PIT calibration
 826         * required. If unknown, let the PIT be initialized.
 827         */
 828        return lapic_timer_period == 0;
 829}
 830
 831static int __init calibrate_APIC_clock(void)
 832{
 833        struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
 834        u64 tsc_perj = 0, tsc_start = 0;
 835        unsigned long jif_start;
 836        unsigned long deltaj;
 837        long delta, deltatsc;
 838        int pm_referenced = 0;
 839
 840        if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
 841                return 0;
 842
 843        /*
 844         * Check if lapic timer has already been calibrated by platform
 845         * specific routine, such as tsc calibration code. If so just fill
 846         * in the clockevent structure and return.
 847         */
 848        if (!lapic_init_clockevent()) {
 849                apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
 850                            lapic_timer_period);
 851                /*
 852                 * Direct calibration methods must have an always running
 853                 * local APIC timer, no need for broadcast timer.
 854                 */
 855                lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
 856                return 0;
 857        }
 858
 859        apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
 860                    "calibrating APIC timer ...\n");
 861
 862        /*
 863         * There are platforms w/o global clockevent devices. Instead of
 864         * making the calibration conditional on that, use a polling based
 865         * approach everywhere.
 866         */
 867        local_irq_disable();
 868
 869        /*
 870         * Setup the APIC counter to maximum. There is no way the lapic
 871         * can underflow in the 100ms detection time frame
 872         */
 873        __setup_APIC_LVTT(0xffffffff, 0, 0);
 874
 875        /*
 876         * Methods to terminate the calibration loop:
 877         *  1) Global clockevent if available (jiffies)
 878         *  2) TSC if available and frequency is known
 879         */
 880        jif_start = READ_ONCE(jiffies);
 881
 882        if (tsc_khz) {
 883                tsc_start = rdtsc();
 884                tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
 885        }
 886
 887        /*
 888         * Enable interrupts so the tick can fire, if a global
 889         * clockevent device is available
 890         */
 891        local_irq_enable();
 892
 893        while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
 894                /* Wait for a tick to elapse */
 895                while (1) {
 896                        if (tsc_khz) {
 897                                u64 tsc_now = rdtsc();
 898                                if ((tsc_now - tsc_start) >= tsc_perj) {
 899                                        tsc_start += tsc_perj;
 900                                        break;
 901                                }
 902                        } else {
 903                                unsigned long jif_now = READ_ONCE(jiffies);
 904
 905                                if (time_after(jif_now, jif_start)) {
 906                                        jif_start = jif_now;
 907                                        break;
 908                                }
 909                        }
 910                        cpu_relax();
 911                }
 912
 913                /* Invoke the calibration routine */
 914                local_irq_disable();
 915                lapic_cal_handler(NULL);
 916                local_irq_enable();
 917        }
 918
 919        local_irq_disable();
 920
 921        /* Build delta t1-t2 as apic timer counts down */
 922        delta = lapic_cal_t1 - lapic_cal_t2;
 923        apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
 924
 925        deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
 926
 927        /* we trust the PM based calibration if possible */
 928        pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
 929                                        &delta, &deltatsc);
 930
 931        lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
 932        lapic_init_clockevent();
 933
 934        apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
 935        apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
 936        apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
 937                    lapic_timer_period);
 938
 939        if (boot_cpu_has(X86_FEATURE_TSC)) {
 940                apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
 941                            "%ld.%04ld MHz.\n",
 942                            (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
 943                            (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
 944        }
 945
 946        apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
 947                    "%u.%04u MHz.\n",
 948                    lapic_timer_period / (1000000 / HZ),
 949                    lapic_timer_period % (1000000 / HZ));
 950
 951        /*
 952         * Do a sanity check on the APIC calibration result
 953         */
 954        if (lapic_timer_period < (1000000 / HZ)) {
 955                local_irq_enable();
 956                pr_warn("APIC frequency too slow, disabling apic timer\n");
 957                return -1;
 958        }
 959
 960        levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
 961
 962        /*
 963         * PM timer calibration failed or not turned on so lets try APIC
 964         * timer based calibration, if a global clockevent device is
 965         * available.
 966         */
 967        if (!pm_referenced && global_clock_event) {
 968                apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
 969
 970                /*
 971                 * Setup the apic timer manually
 972                 */
 973                levt->event_handler = lapic_cal_handler;
 974                lapic_timer_set_periodic(levt);
 975                lapic_cal_loops = -1;
 976
 977                /* Let the interrupts run */
 978                local_irq_enable();
 979
 980                while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
 981                        cpu_relax();
 982
 983                /* Stop the lapic timer */
 984                local_irq_disable();
 985                lapic_timer_shutdown(levt);
 986
 987                /* Jiffies delta */
 988                deltaj = lapic_cal_j2 - lapic_cal_j1;
 989                apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
 990
 991                /* Check, if the jiffies result is consistent */
 992                if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
 993                        apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
 994                else
 995                        levt->features |= CLOCK_EVT_FEAT_DUMMY;
 996        }
 997        local_irq_enable();
 998
 999        if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
1000                pr_warn("APIC timer disabled due to verification failure\n");
1001                return -1;
1002        }
1003
1004        return 0;
1005}
1006
1007/*
1008 * Setup the boot APIC
1009 *
1010 * Calibrate and verify the result.
1011 */
1012void __init setup_boot_APIC_clock(void)
1013{
1014        /*
1015         * The local apic timer can be disabled via the kernel
1016         * commandline or from the CPU detection code. Register the lapic
1017         * timer as a dummy clock event source on SMP systems, so the
1018         * broadcast mechanism is used. On UP systems simply ignore it.
1019         */
1020        if (disable_apic_timer) {
1021                pr_info("Disabling APIC timer\n");
1022                /* No broadcast on UP ! */
1023                if (num_possible_cpus() > 1) {
1024                        lapic_clockevent.mult = 1;
1025                        setup_APIC_timer();
1026                }
1027                return;
1028        }
1029
1030        if (calibrate_APIC_clock()) {
1031                /* No broadcast on UP ! */
1032                if (num_possible_cpus() > 1)
1033                        setup_APIC_timer();
1034                return;
1035        }
1036
1037        /*
1038         * If nmi_watchdog is set to IO_APIC, we need the
1039         * PIT/HPET going.  Otherwise register lapic as a dummy
1040         * device.
1041         */
1042        lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
1043
1044        /* Setup the lapic or request the broadcast */
1045        setup_APIC_timer();
1046        amd_e400_c1e_apic_setup();
1047}
1048
1049void setup_secondary_APIC_clock(void)
1050{
1051        setup_APIC_timer();
1052        amd_e400_c1e_apic_setup();
1053}
1054
1055/*
1056 * The guts of the apic timer interrupt
1057 */
1058static void local_apic_timer_interrupt(void)
1059{
1060        struct clock_event_device *evt = this_cpu_ptr(&lapic_events);
1061
1062        /*
1063         * Normally we should not be here till LAPIC has been initialized but
1064         * in some cases like kdump, its possible that there is a pending LAPIC
1065         * timer interrupt from previous kernel's context and is delivered in
1066         * new kernel the moment interrupts are enabled.
1067         *
1068         * Interrupts are enabled early and LAPIC is setup much later, hence
1069         * its possible that when we get here evt->event_handler is NULL.
1070         * Check for event_handler being NULL and discard the interrupt as
1071         * spurious.
1072         */
1073        if (!evt->event_handler) {
1074                pr_warn("Spurious LAPIC timer interrupt on cpu %d\n",
1075                        smp_processor_id());
1076                /* Switch it off */
1077                lapic_timer_shutdown(evt);
1078                return;
1079        }
1080
1081        /*
1082         * the NMI deadlock-detector uses this.
1083         */
1084        inc_irq_stat(apic_timer_irqs);
1085
1086        evt->event_handler(evt);
1087}
1088
1089/*
1090 * Local APIC timer interrupt. This is the most natural way for doing
1091 * local interrupts, but local timer interrupts can be emulated by
1092 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
1093 *
1094 * [ if a single-CPU system runs an SMP kernel then we call the local
1095 *   interrupt as well. Thus we cannot inline the local irq ... ]
1096 */
1097DEFINE_IDTENTRY_SYSVEC(sysvec_apic_timer_interrupt)
1098{
1099        struct pt_regs *old_regs = set_irq_regs(regs);
1100
1101        ack_APIC_irq();
1102        trace_local_timer_entry(LOCAL_TIMER_VECTOR);
1103        local_apic_timer_interrupt();
1104        trace_local_timer_exit(LOCAL_TIMER_VECTOR);
1105
1106        set_irq_regs(old_regs);
1107}
1108
1109int setup_profiling_timer(unsigned int multiplier)
1110{
1111        return -EINVAL;
1112}
1113
1114/*
1115 * Local APIC start and shutdown
1116 */
1117
1118/**
1119 * clear_local_APIC - shutdown the local APIC
1120 *
1121 * This is called, when a CPU is disabled and before rebooting, so the state of
1122 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
1123 * leftovers during boot.
1124 */
1125void clear_local_APIC(void)
1126{
1127        int maxlvt;
1128        u32 v;
1129
1130        /* APIC hasn't been mapped yet */
1131        if (!x2apic_mode && !apic_phys)
1132                return;
1133
1134        maxlvt = lapic_get_maxlvt();
1135        /*
1136         * Masking an LVT entry can trigger a local APIC error
1137         * if the vector is zero. Mask LVTERR first to prevent this.
1138         */
1139        if (maxlvt >= 3) {
1140                v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
1141                apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
1142        }
1143        /*
1144         * Careful: we have to set masks only first to deassert
1145         * any level-triggered sources.
1146         */
1147        v = apic_read(APIC_LVTT);
1148        apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
1149        v = apic_read(APIC_LVT0);
1150        apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
1151        v = apic_read(APIC_LVT1);
1152        apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
1153        if (maxlvt >= 4) {
1154                v = apic_read(APIC_LVTPC);
1155                apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
1156        }
1157
1158        /* lets not touch this if we didn't frob it */
1159#ifdef CONFIG_X86_THERMAL_VECTOR
1160        if (maxlvt >= 5) {
1161                v = apic_read(APIC_LVTTHMR);
1162                apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
1163        }
1164#endif
1165#ifdef CONFIG_X86_MCE_INTEL
1166        if (maxlvt >= 6) {
1167                v = apic_read(APIC_LVTCMCI);
1168                if (!(v & APIC_LVT_MASKED))
1169                        apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
1170        }
1171#endif
1172
1173        /*
1174         * Clean APIC state for other OSs:
1175         */
1176        apic_write(APIC_LVTT, APIC_LVT_MASKED);
1177        apic_write(APIC_LVT0, APIC_LVT_MASKED);
1178        apic_write(APIC_LVT1, APIC_LVT_MASKED);
1179        if (maxlvt >= 3)
1180                apic_write(APIC_LVTERR, APIC_LVT_MASKED);
1181        if (maxlvt >= 4)
1182                apic_write(APIC_LVTPC, APIC_LVT_MASKED);
1183
1184        /* Integrated APIC (!82489DX) ? */
1185        if (lapic_is_integrated()) {
1186                if (maxlvt > 3)
1187                        /* Clear ESR due to Pentium errata 3AP and 11AP */
1188                        apic_write(APIC_ESR, 0);
1189                apic_read(APIC_ESR);
1190        }
1191}
1192
1193/**
1194 * apic_soft_disable - Clears and software disables the local APIC on hotplug
1195 *
1196 * Contrary to disable_local_APIC() this does not touch the enable bit in
1197 * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
1198 * bus would require a hardware reset as the APIC would lose track of bus
1199 * arbitration. On systems with FSB delivery APICBASE could be disabled,
1200 * but it has to be guaranteed that no interrupt is sent to the APIC while
1201 * in that state and it's not clear from the SDM whether it still responds
1202 * to INIT/SIPI messages. Stay on the safe side and use software disable.
1203 */
1204void apic_soft_disable(void)
1205{
1206        u32 value;
1207
1208        clear_local_APIC();
1209
1210        /* Soft disable APIC (implies clearing of registers for 82489DX!). */
1211        value = apic_read(APIC_SPIV);
1212        value &= ~APIC_SPIV_APIC_ENABLED;
1213        apic_write(APIC_SPIV, value);
1214}
1215
1216/**
1217 * disable_local_APIC - clear and disable the local APIC
1218 */
1219void disable_local_APIC(void)
1220{
1221        /* APIC hasn't been mapped yet */
1222        if (!x2apic_mode && !apic_phys)
1223                return;
1224
1225        apic_soft_disable();
1226
1227#ifdef CONFIG_X86_32
1228        /*
1229         * When LAPIC was disabled by the BIOS and enabled by the kernel,
1230         * restore the disabled state.
1231         */
1232        if (enabled_via_apicbase) {
1233                unsigned int l, h;
1234
1235                rdmsr(MSR_IA32_APICBASE, l, h);
1236                l &= ~MSR_IA32_APICBASE_ENABLE;
1237                wrmsr(MSR_IA32_APICBASE, l, h);
1238        }
1239#endif
1240}
1241
1242/*
1243 * If Linux enabled the LAPIC against the BIOS default disable it down before
1244 * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
1245 * not power-off.  Additionally clear all LVT entries before disable_local_APIC
1246 * for the case where Linux didn't enable the LAPIC.
1247 */
1248void lapic_shutdown(void)
1249{
1250        unsigned long flags;
1251
1252        if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
1253                return;
1254
1255        local_irq_save(flags);
1256
1257#ifdef CONFIG_X86_32
1258        if (!enabled_via_apicbase)
1259                clear_local_APIC();
1260        else
1261#endif
1262                disable_local_APIC();
1263
1264
1265        local_irq_restore(flags);
1266}
1267
1268/**
1269 * sync_Arb_IDs - synchronize APIC bus arbitration IDs
1270 */
1271void __init sync_Arb_IDs(void)
1272{
1273        /*
1274         * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
1275         * needed on AMD.
1276         */
1277        if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
1278                return;
1279
1280        /*
1281         * Wait for idle.
1282         */
1283        apic_wait_icr_idle();
1284
1285        apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
1286        apic_write(APIC_ICR, APIC_DEST_ALLINC |
1287                        APIC_INT_LEVELTRIG | APIC_DM_INIT);
1288}
1289
1290enum apic_intr_mode_id apic_intr_mode __ro_after_init;
1291
1292static int __init __apic_intr_mode_select(void)
1293{
1294        /* Check kernel option */
1295        if (disable_apic) {
1296                pr_info("APIC disabled via kernel command line\n");
1297                return APIC_PIC;
1298        }
1299
1300        /* Check BIOS */
1301#ifdef CONFIG_X86_64
1302        /* On 64-bit, the APIC must be integrated, Check local APIC only */
1303        if (!boot_cpu_has(X86_FEATURE_APIC)) {
1304                disable_apic = 1;
1305                pr_info("APIC disabled by BIOS\n");
1306                return APIC_PIC;
1307        }
1308#else
1309        /* On 32-bit, the APIC may be integrated APIC or 82489DX */
1310
1311        /* Neither 82489DX nor integrated APIC ? */
1312        if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
1313                disable_apic = 1;
1314                return APIC_PIC;
1315        }
1316
1317        /* If the BIOS pretends there is an integrated APIC ? */
1318        if (!boot_cpu_has(X86_FEATURE_APIC) &&
1319                APIC_INTEGRATED(boot_cpu_apic_version)) {
1320                disable_apic = 1;
1321                pr_err(FW_BUG "Local APIC %d not detected, force emulation\n",
1322                                       boot_cpu_physical_apicid);
1323                return APIC_PIC;
1324        }
1325#endif
1326
1327        /* Check MP table or ACPI MADT configuration */
1328        if (!smp_found_config) {
1329                disable_ioapic_support();
1330                if (!acpi_lapic) {
1331                        pr_info("APIC: ACPI MADT or MP tables are not detected\n");
1332                        return APIC_VIRTUAL_WIRE_NO_CONFIG;
1333                }
1334                return APIC_VIRTUAL_WIRE;
1335        }
1336
1337#ifdef CONFIG_SMP
1338        /* If SMP should be disabled, then really disable it! */
1339        if (!setup_max_cpus) {
1340                pr_info("APIC: SMP mode deactivated\n");
1341                return APIC_SYMMETRIC_IO_NO_ROUTING;
1342        }
1343
1344        if (read_apic_id() != boot_cpu_physical_apicid) {
1345                panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1346                     read_apic_id(), boot_cpu_physical_apicid);
1347                /* Or can we switch back to PIC here? */
1348        }
1349#endif
1350
1351        return APIC_SYMMETRIC_IO;
1352}
1353
1354/* Select the interrupt delivery mode for the BSP */
1355void __init apic_intr_mode_select(void)
1356{
1357        apic_intr_mode = __apic_intr_mode_select();
1358}
1359
1360/*
1361 * An initial setup of the virtual wire mode.
1362 */
1363void __init init_bsp_APIC(void)
1364{
1365        unsigned int value;
1366
1367        /*
1368         * Don't do the setup now if we have a SMP BIOS as the
1369         * through-I/O-APIC virtual wire mode might be active.
1370         */
1371        if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
1372                return;
1373
1374        /*
1375         * Do not trust the local APIC being empty at bootup.
1376         */
1377        clear_local_APIC();
1378
1379        /*
1380         * Enable APIC.
1381         */
1382        value = apic_read(APIC_SPIV);
1383        value &= ~APIC_VECTOR_MASK;
1384        value |= APIC_SPIV_APIC_ENABLED;
1385
1386#ifdef CONFIG_X86_32
1387        /* This bit is reserved on P4/Xeon and should be cleared */
1388        if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
1389            (boot_cpu_data.x86 == 15))
1390                value &= ~APIC_SPIV_FOCUS_DISABLED;
1391        else
1392#endif
1393                value |= APIC_SPIV_FOCUS_DISABLED;
1394        value |= SPURIOUS_APIC_VECTOR;
1395        apic_write(APIC_SPIV, value);
1396
1397        /*
1398         * Set up the virtual wire mode.
1399         */
1400        apic_write(APIC_LVT0, APIC_DM_EXTINT);
1401        value = APIC_DM_NMI;
1402        if (!lapic_is_integrated())             /* 82489DX */
1403                value |= APIC_LVT_LEVEL_TRIGGER;
1404        if (apic_extnmi == APIC_EXTNMI_NONE)
1405                value |= APIC_LVT_MASKED;
1406        apic_write(APIC_LVT1, value);
1407}
1408
1409static void __init apic_bsp_setup(bool upmode);
1410
1411/* Init the interrupt delivery mode for the BSP */
1412void __init apic_intr_mode_init(void)
1413{
1414        bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
1415
1416        switch (apic_intr_mode) {
1417        case APIC_PIC:
1418                pr_info("APIC: Keep in PIC mode(8259)\n");
1419                return;
1420        case APIC_VIRTUAL_WIRE:
1421                pr_info("APIC: Switch to virtual wire mode setup\n");
1422                default_setup_apic_routing();
1423                break;
1424        case APIC_VIRTUAL_WIRE_NO_CONFIG:
1425                pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
1426                upmode = true;
1427                default_setup_apic_routing();
1428                break;
1429        case APIC_SYMMETRIC_IO:
1430                pr_info("APIC: Switch to symmetric I/O mode setup\n");
1431                default_setup_apic_routing();
1432                break;
1433        case APIC_SYMMETRIC_IO_NO_ROUTING:
1434                pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
1435                break;
1436        }
1437
1438        if (x86_platform.apic_post_init)
1439                x86_platform.apic_post_init();
1440
1441        apic_bsp_setup(upmode);
1442}
1443
1444static void lapic_setup_esr(void)
1445{
1446        unsigned int oldvalue, value, maxlvt;
1447
1448        if (!lapic_is_integrated()) {
1449                pr_info("No ESR for 82489DX.\n");
1450                return;
1451        }
1452
1453        if (apic->disable_esr) {
1454                /*
1455                 * Something untraceable is creating bad interrupts on
1456                 * secondary quads ... for the moment, just leave the
1457                 * ESR disabled - we can't do anything useful with the
1458                 * errors anyway - mbligh
1459                 */
1460                pr_info("Leaving ESR disabled.\n");
1461                return;
1462        }
1463
1464        maxlvt = lapic_get_maxlvt();
1465        if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
1466                apic_write(APIC_ESR, 0);
1467        oldvalue = apic_read(APIC_ESR);
1468
1469        /* enables sending errors */
1470        value = ERROR_APIC_VECTOR;
1471        apic_write(APIC_LVTERR, value);
1472
1473        /*
1474         * spec says clear errors after enabling vector.
1475         */
1476        if (maxlvt > 3)
1477                apic_write(APIC_ESR, 0);
1478        value = apic_read(APIC_ESR);
1479        if (value != oldvalue)
1480                apic_printk(APIC_VERBOSE, "ESR value before enabling "
1481                        "vector: 0x%08x  after: 0x%08x\n",
1482                        oldvalue, value);
1483}
1484
1485#define APIC_IR_REGS            APIC_ISR_NR
1486#define APIC_IR_BITS            (APIC_IR_REGS * 32)
1487#define APIC_IR_MAPSIZE         (APIC_IR_BITS / BITS_PER_LONG)
1488
1489union apic_ir {
1490        unsigned long   map[APIC_IR_MAPSIZE];
1491        u32             regs[APIC_IR_REGS];
1492};
1493
1494static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
1495{
1496        int i, bit;
1497
1498        /* Read the IRRs */
1499        for (i = 0; i < APIC_IR_REGS; i++)
1500                irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
1501
1502        /* Read the ISRs */
1503        for (i = 0; i < APIC_IR_REGS; i++)
1504                isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
1505
1506        /*
1507         * If the ISR map is not empty. ACK the APIC and run another round
1508         * to verify whether a pending IRR has been unblocked and turned
1509         * into a ISR.
1510         */
1511        if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
1512                /*
1513                 * There can be multiple ISR bits set when a high priority
1514                 * interrupt preempted a lower priority one. Issue an ACK
1515                 * per set bit.
1516                 */
1517                for_each_set_bit(bit, isr->map, APIC_IR_BITS)
1518                        ack_APIC_irq();
1519                return true;
1520        }
1521
1522        return !bitmap_empty(irr->map, APIC_IR_BITS);
1523}
1524
1525/*
1526 * After a crash, we no longer service the interrupts and a pending
1527 * interrupt from previous kernel might still have ISR bit set.
1528 *
1529 * Most probably by now the CPU has serviced that pending interrupt and it
1530 * might not have done the ack_APIC_irq() because it thought, interrupt
1531 * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
1532 * the ISR bit and cpu thinks it has already serviced the interrupt. Hence
1533 * a vector might get locked. It was noticed for timer irq (vector
1534 * 0x31). Issue an extra EOI to clear ISR.
1535 *
1536 * If there are pending IRR bits they turn into ISR bits after a higher
1537 * priority ISR bit has been acked.
1538 */
1539static void apic_pending_intr_clear(void)
1540{
1541        union apic_ir irr, isr;
1542        unsigned int i;
1543
1544        /* 512 loops are way oversized and give the APIC a chance to obey. */
1545        for (i = 0; i < 512; i++) {
1546                if (!apic_check_and_ack(&irr, &isr))
1547                        return;
1548        }
1549        /* Dump the IRR/ISR content if that failed */
1550        pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
1551}
1552
1553/**
1554 * setup_local_APIC - setup the local APIC
1555 *
1556 * Used to setup local APIC while initializing BSP or bringing up APs.
1557 * Always called with preemption disabled.
1558 */
1559static void setup_local_APIC(void)
1560{
1561        int cpu = smp_processor_id();
1562        unsigned int value;
1563
1564        if (disable_apic) {
1565                disable_ioapic_support();
1566                return;
1567        }
1568
1569        /*
1570         * If this comes from kexec/kcrash the APIC might be enabled in
1571         * SPIV. Soft disable it before doing further initialization.
1572         */
1573        value = apic_read(APIC_SPIV);
1574        value &= ~APIC_SPIV_APIC_ENABLED;
1575        apic_write(APIC_SPIV, value);
1576
1577#ifdef CONFIG_X86_32
1578        /* Pound the ESR really hard over the head with a big hammer - mbligh */
1579        if (lapic_is_integrated() && apic->disable_esr) {
1580                apic_write(APIC_ESR, 0);
1581                apic_write(APIC_ESR, 0);
1582                apic_write(APIC_ESR, 0);
1583                apic_write(APIC_ESR, 0);
1584        }
1585#endif
1586        /*
1587         * Double-check whether this APIC is really registered.
1588         * This is meaningless in clustered apic mode, so we skip it.
1589         */
1590        BUG_ON(!apic->apic_id_registered());
1591
1592        /*
1593         * Intel recommends to set DFR, LDR and TPR before enabling
1594         * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
1595         * document number 292116).  So here it goes...
1596         */
1597        apic->init_apic_ldr();
1598
1599#ifdef CONFIG_X86_32
1600        if (apic->dest_mode_logical) {
1601                int logical_apicid, ldr_apicid;
1602
1603                /*
1604                 * APIC LDR is initialized.  If logical_apicid mapping was
1605                 * initialized during get_smp_config(), make sure it matches
1606                 * the actual value.
1607                 */
1608                logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
1609                ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
1610                if (logical_apicid != BAD_APICID)
1611                        WARN_ON(logical_apicid != ldr_apicid);
1612                /* Always use the value from LDR. */
1613                early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
1614        }
1615#endif
1616
1617        /*
1618         * Set Task Priority to 'accept all except vectors 0-31'.  An APIC
1619         * vector in the 16-31 range could be delivered if TPR == 0, but we
1620         * would think it's an exception and terrible things will happen.  We
1621         * never change this later on.
1622         */
1623        value = apic_read(APIC_TASKPRI);
1624        value &= ~APIC_TPRI_MASK;
1625        value |= 0x10;
1626        apic_write(APIC_TASKPRI, value);
1627
1628        /* Clear eventually stale ISR/IRR bits */
1629        apic_pending_intr_clear();
1630
1631        /*
1632         * Now that we are all set up, enable the APIC
1633         */
1634        value = apic_read(APIC_SPIV);
1635        value &= ~APIC_VECTOR_MASK;
1636        /*
1637         * Enable APIC
1638         */
1639        value |= APIC_SPIV_APIC_ENABLED;
1640
1641#ifdef CONFIG_X86_32
1642        /*
1643         * Some unknown Intel IO/APIC (or APIC) errata is biting us with
1644         * certain networking cards. If high frequency interrupts are
1645         * happening on a particular IOAPIC pin, plus the IOAPIC routing
1646         * entry is masked/unmasked at a high rate as well then sooner or
1647         * later IOAPIC line gets 'stuck', no more interrupts are received
1648         * from the device. If focus CPU is disabled then the hang goes
1649         * away, oh well :-(
1650         *
1651         * [ This bug can be reproduced easily with a level-triggered
1652         *   PCI Ne2000 networking cards and PII/PIII processors, dual
1653         *   BX chipset. ]
1654         */
1655        /*
1656         * Actually disabling the focus CPU check just makes the hang less
1657         * frequent as it makes the interrupt distribution model be more
1658         * like LRU than MRU (the short-term load is more even across CPUs).
1659         */
1660
1661        /*
1662         * - enable focus processor (bit==0)
1663         * - 64bit mode always use processor focus
1664         *   so no need to set it
1665         */
1666        value &= ~APIC_SPIV_FOCUS_DISABLED;
1667#endif
1668
1669        /*
1670         * Set spurious IRQ vector
1671         */
1672        value |= SPURIOUS_APIC_VECTOR;
1673        apic_write(APIC_SPIV, value);
1674
1675        perf_events_lapic_init();
1676
1677        /*
1678         * Set up LVT0, LVT1:
1679         *
1680         * set up through-local-APIC on the boot CPU's LINT0. This is not
1681         * strictly necessary in pure symmetric-IO mode, but sometimes
1682         * we delegate interrupts to the 8259A.
1683         */
1684        /*
1685         * TODO: set up through-local-APIC from through-I/O-APIC? --macro
1686         */
1687        value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
1688        if (!cpu && (pic_mode || !value || skip_ioapic_setup)) {
1689                value = APIC_DM_EXTINT;
1690                apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
1691        } else {
1692                value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1693                apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
1694        }
1695        apic_write(APIC_LVT0, value);
1696
1697        /*
1698         * Only the BSP sees the LINT1 NMI signal by default. This can be
1699         * modified by apic_extnmi= boot option.
1700         */
1701        if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
1702            apic_extnmi == APIC_EXTNMI_ALL)
1703                value = APIC_DM_NMI;
1704        else
1705                value = APIC_DM_NMI | APIC_LVT_MASKED;
1706
1707        /* Is 82489DX ? */
1708        if (!lapic_is_integrated())
1709                value |= APIC_LVT_LEVEL_TRIGGER;
1710        apic_write(APIC_LVT1, value);
1711
1712#ifdef CONFIG_X86_MCE_INTEL
1713        /* Recheck CMCI information after local APIC is up on CPU #0 */
1714        if (!cpu)
1715                cmci_recheck();
1716#endif
1717}
1718
1719static void end_local_APIC_setup(void)
1720{
1721        lapic_setup_esr();
1722
1723#ifdef CONFIG_X86_32
1724        {
1725                unsigned int value;
1726                /* Disable the local apic timer */
1727                value = apic_read(APIC_LVTT);
1728                value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1729                apic_write(APIC_LVTT, value);
1730        }
1731#endif
1732
1733        apic_pm_activate();
1734}
1735
1736/*
1737 * APIC setup function for application processors. Called from smpboot.c
1738 */
1739void apic_ap_setup(void)
1740{
1741        setup_local_APIC();
1742        end_local_APIC_setup();
1743}
1744
1745#ifdef CONFIG_X86_X2APIC
1746int x2apic_mode;
1747EXPORT_SYMBOL_GPL(x2apic_mode);
1748
1749enum {
1750        X2APIC_OFF,
1751        X2APIC_ON,
1752        X2APIC_DISABLED,
1753};
1754static int x2apic_state;
1755
1756static void __x2apic_disable(void)
1757{
1758        u64 msr;
1759
1760        if (!boot_cpu_has(X86_FEATURE_APIC))
1761                return;
1762
1763        rdmsrl(MSR_IA32_APICBASE, msr);
1764        if (!(msr & X2APIC_ENABLE))
1765                return;
1766        /* Disable xapic and x2apic first and then reenable xapic mode */
1767        wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1768        wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1769        printk_once(KERN_INFO "x2apic disabled\n");
1770}
1771
1772static void __x2apic_enable(void)
1773{
1774        u64 msr;
1775
1776        rdmsrl(MSR_IA32_APICBASE, msr);
1777        if (msr & X2APIC_ENABLE)
1778                return;
1779        wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
1780        printk_once(KERN_INFO "x2apic enabled\n");
1781}
1782
1783static int __init setup_nox2apic(char *str)
1784{
1785        if (x2apic_enabled()) {
1786                int apicid = native_apic_msr_read(APIC_ID);
1787
1788                if (apicid >= 255) {
1789                        pr_warn("Apicid: %08x, cannot enforce nox2apic\n",
1790                                apicid);
1791                        return 0;
1792                }
1793                pr_warn("x2apic already enabled.\n");
1794                __x2apic_disable();
1795        }
1796        setup_clear_cpu_cap(X86_FEATURE_X2APIC);
1797        x2apic_state = X2APIC_DISABLED;
1798        x2apic_mode = 0;
1799        return 0;
1800}
1801early_param("nox2apic", setup_nox2apic);
1802
1803/* Called from cpu_init() to enable x2apic on (secondary) cpus */
1804void x2apic_setup(void)
1805{
1806        /*
1807         * If x2apic is not in ON state, disable it if already enabled
1808         * from BIOS.
1809         */
1810        if (x2apic_state != X2APIC_ON) {
1811                __x2apic_disable();
1812                return;
1813        }
1814        __x2apic_enable();
1815}
1816
1817static __init void x2apic_disable(void)
1818{
1819        u32 x2apic_id, state = x2apic_state;
1820
1821        x2apic_mode = 0;
1822        x2apic_state = X2APIC_DISABLED;
1823
1824        if (state != X2APIC_ON)
1825                return;
1826
1827        x2apic_id = read_apic_id();
1828        if (x2apic_id >= 255)
1829                panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
1830
1831        __x2apic_disable();
1832        register_lapic_address(mp_lapic_addr);
1833}
1834
1835static __init void x2apic_enable(void)
1836{
1837        if (x2apic_state != X2APIC_OFF)
1838                return;
1839
1840        x2apic_mode = 1;
1841        x2apic_state = X2APIC_ON;
1842        __x2apic_enable();
1843}
1844
1845static __init void try_to_enable_x2apic(int remap_mode)
1846{
1847        if (x2apic_state == X2APIC_DISABLED)
1848                return;
1849
1850        if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
1851                u32 apic_limit = 255;
1852
1853                /*
1854                 * Using X2APIC without IR is not architecturally supported
1855                 * on bare metal but may be supported in guests.
1856                 */
1857                if (!x86_init.hyper.x2apic_available()) {
1858                        pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
1859                        x2apic_disable();
1860                        return;
1861                }
1862
1863                /*
1864                 * If the hypervisor supports extended destination ID in
1865                 * MSI, that increases the maximum APIC ID that can be
1866                 * used for non-remapped IRQ domains.
1867                 */
1868                if (x86_init.hyper.msi_ext_dest_id()) {
1869                        virt_ext_dest_id = 1;
1870                        apic_limit = 32767;
1871                }
1872
1873                /*
1874                 * Without IR, all CPUs can be addressed by IOAPIC/MSI only
1875                 * in physical mode, and CPUs with an APIC ID that cannot
1876                 * be addressed must not be brought online.
1877                 */
1878                x2apic_set_max_apicid(apic_limit);
1879                x2apic_phys = 1;
1880        }
1881        x2apic_enable();
1882}
1883
1884void __init check_x2apic(void)
1885{
1886        if (x2apic_enabled()) {
1887                pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
1888                x2apic_mode = 1;
1889                x2apic_state = X2APIC_ON;
1890        } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
1891                x2apic_state = X2APIC_DISABLED;
1892        }
1893}
1894#else /* CONFIG_X86_X2APIC */
1895static int __init validate_x2apic(void)
1896{
1897        if (!apic_is_x2apic_enabled())
1898                return 0;
1899        /*
1900         * Checkme: Can we simply turn off x2apic here instead of panic?
1901         */
1902        panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n");
1903}
1904early_initcall(validate_x2apic);
1905
1906static inline void try_to_enable_x2apic(int remap_mode) { }
1907static inline void __x2apic_enable(void) { }
1908#endif /* !CONFIG_X86_X2APIC */
1909
1910void __init enable_IR_x2apic(void)
1911{
1912        unsigned long flags;
1913        int ret, ir_stat;
1914
1915        if (skip_ioapic_setup) {
1916                pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
1917                return;
1918        }
1919
1920        ir_stat = irq_remapping_prepare();
1921        if (ir_stat < 0 && !x2apic_supported())
1922                return;
1923
1924        ret = save_ioapic_entries();
1925        if (ret) {
1926                pr_info("Saving IO-APIC state failed: %d\n", ret);
1927                return;
1928        }
1929
1930        local_irq_save(flags);
1931        legacy_pic->mask_all();
1932        mask_ioapic_entries();
1933
1934        /* If irq_remapping_prepare() succeeded, try to enable it */
1935        if (ir_stat >= 0)
1936                ir_stat = irq_remapping_enable();
1937        /* ir_stat contains the remap mode or an error code */
1938        try_to_enable_x2apic(ir_stat);
1939
1940        if (ir_stat < 0)
1941                restore_ioapic_entries();
1942        legacy_pic->restore_mask();
1943        local_irq_restore(flags);
1944}
1945
1946#ifdef CONFIG_X86_64
1947/*
1948 * Detect and enable local APICs on non-SMP boards.
1949 * Original code written by Keir Fraser.
1950 * On AMD64 we trust the BIOS - if it says no APIC it is likely
1951 * not correctly set up (usually the APIC timer won't work etc.)
1952 */
1953static int __init detect_init_APIC(void)
1954{
1955        if (!boot_cpu_has(X86_FEATURE_APIC)) {
1956                pr_info("No local APIC present\n");
1957                return -1;
1958        }
1959
1960        mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1961        return 0;
1962}
1963#else
1964
1965static int __init apic_verify(void)
1966{
1967        u32 features, h, l;
1968
1969        /*
1970         * The APIC feature bit should now be enabled
1971         * in `cpuid'
1972         */
1973        features = cpuid_edx(1);
1974        if (!(features & (1 << X86_FEATURE_APIC))) {
1975                pr_warn("Could not enable APIC!\n");
1976                return -1;
1977        }
1978        set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1979        mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1980
1981        /* The BIOS may have set up the APIC at some other address */
1982        if (boot_cpu_data.x86 >= 6) {
1983                rdmsr(MSR_IA32_APICBASE, l, h);
1984                if (l & MSR_IA32_APICBASE_ENABLE)
1985                        mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1986        }
1987
1988        pr_info("Found and enabled local APIC!\n");
1989        return 0;
1990}
1991
1992int __init apic_force_enable(unsigned long addr)
1993{
1994        u32 h, l;
1995
1996        if (disable_apic)
1997                return -1;
1998
1999        /*
2000         * Some BIOSes disable the local APIC in the APIC_BASE
2001         * MSR. This can only be done in software for Intel P6 or later
2002         * and AMD K7 (Model > 1) or later.
2003         */
2004        if (boot_cpu_data.x86 >= 6) {
2005                rdmsr(MSR_IA32_APICBASE, l, h);
2006                if (!(l & MSR_IA32_APICBASE_ENABLE)) {
2007                        pr_info("Local APIC disabled by BIOS -- reenabling.\n");
2008                        l &= ~MSR_IA32_APICBASE_BASE;
2009                        l |= MSR_IA32_APICBASE_ENABLE | addr;
2010                        wrmsr(MSR_IA32_APICBASE, l, h);
2011                        enabled_via_apicbase = 1;
2012                }
2013        }
2014        return apic_verify();
2015}
2016
2017/*
2018 * Detect and initialize APIC
2019 */
2020static int __init detect_init_APIC(void)
2021{
2022        /* Disabled by kernel option? */
2023        if (disable_apic)
2024                return -1;
2025
2026        switch (boot_cpu_data.x86_vendor) {
2027        case X86_VENDOR_AMD:
2028                if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
2029                    (boot_cpu_data.x86 >= 15))
2030                        break;
2031                goto no_apic;
2032        case X86_VENDOR_HYGON:
2033                break;
2034        case X86_VENDOR_INTEL:
2035                if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
2036                    (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
2037                        break;
2038                goto no_apic;
2039        default:
2040                goto no_apic;
2041        }
2042
2043        if (!boot_cpu_has(X86_FEATURE_APIC)) {
2044                /*
2045                 * Over-ride BIOS and try to enable the local APIC only if
2046                 * "lapic" specified.
2047                 */
2048                if (!force_enable_local_apic) {
2049                        pr_info("Local APIC disabled by BIOS -- "
2050                                "you can enable it with \"lapic\"\n");
2051                        return -1;
2052                }
2053                if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
2054                        return -1;
2055        } else {
2056                if (apic_verify())
2057                        return -1;
2058        }
2059
2060        apic_pm_activate();
2061
2062        return 0;
2063
2064no_apic:
2065        pr_info("No local APIC present or hardware disabled\n");
2066        return -1;
2067}
2068#endif
2069
2070/**
2071 * init_apic_mappings - initialize APIC mappings
2072 */
2073void __init init_apic_mappings(void)
2074{
2075        unsigned int new_apicid;
2076
2077        if (apic_validate_deadline_timer())
2078                pr_info("TSC deadline timer available\n");
2079
2080        if (x2apic_mode) {
2081                boot_cpu_physical_apicid = read_apic_id();
2082                return;
2083        }
2084
2085        /* If no local APIC can be found return early */
2086        if (!smp_found_config && detect_init_APIC()) {
2087                /* lets NOP'ify apic operations */
2088                pr_info("APIC: disable apic facility\n");
2089                apic_disable();
2090        } else {
2091                apic_phys = mp_lapic_addr;
2092
2093                /*
2094                 * If the system has ACPI MADT tables or MP info, the LAPIC
2095                 * address is already registered.
2096                 */
2097                if (!acpi_lapic && !smp_found_config)
2098                        register_lapic_address(apic_phys);
2099        }
2100
2101        /*
2102         * Fetch the APIC ID of the BSP in case we have a
2103         * default configuration (or the MP table is broken).
2104         */
2105        new_apicid = read_apic_id();
2106        if (boot_cpu_physical_apicid != new_apicid) {
2107                boot_cpu_physical_apicid = new_apicid;
2108                /*
2109                 * yeah -- we lie about apic_version
2110                 * in case if apic was disabled via boot option
2111                 * but it's not a problem for SMP compiled kernel
2112                 * since apic_intr_mode_select is prepared for such
2113                 * a case and disable smp mode
2114                 */
2115                boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
2116        }
2117}
2118
2119void __init register_lapic_address(unsigned long address)
2120{
2121        mp_lapic_addr = address;
2122
2123        if (!x2apic_mode) {
2124                set_fixmap_nocache(FIX_APIC_BASE, address);
2125                apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
2126                            APIC_BASE, address);
2127        }
2128        if (boot_cpu_physical_apicid == -1U) {
2129                boot_cpu_physical_apicid  = read_apic_id();
2130                boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
2131        }
2132}
2133
2134/*
2135 * Local APIC interrupts
2136 */
2137
2138/*
2139 * Common handling code for spurious_interrupt and spurious_vector entry
2140 * points below. No point in allowing the compiler to inline it twice.
2141 */
2142static noinline void handle_spurious_interrupt(u8 vector)
2143{
2144        u32 v;
2145
2146        trace_spurious_apic_entry(vector);
2147
2148        inc_irq_stat(irq_spurious_count);
2149
2150        /*
2151         * If this is a spurious interrupt then do not acknowledge
2152         */
2153        if (vector == SPURIOUS_APIC_VECTOR) {
2154                /* See SDM vol 3 */
2155                pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
2156                        smp_processor_id());
2157                goto out;
2158        }
2159
2160        /*
2161         * If it is a vectored one, verify it's set in the ISR. If set,
2162         * acknowledge it.
2163         */
2164        v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
2165        if (v & (1 << (vector & 0x1f))) {
2166                pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
2167                        vector, smp_processor_id());
2168                ack_APIC_irq();
2169        } else {
2170                pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
2171                        vector, smp_processor_id());
2172        }
2173out:
2174        trace_spurious_apic_exit(vector);
2175}
2176
2177/**
2178 * spurious_interrupt - Catch all for interrupts raised on unused vectors
2179 * @regs:       Pointer to pt_regs on stack
2180 * @vector:     The vector number
2181 *
2182 * This is invoked from ASM entry code to catch all interrupts which
2183 * trigger on an entry which is routed to the common_spurious idtentry
2184 * point.
2185 */
2186DEFINE_IDTENTRY_IRQ(spurious_interrupt)
2187{
2188        handle_spurious_interrupt(vector);
2189}
2190
2191DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt)
2192{
2193        handle_spurious_interrupt(SPURIOUS_APIC_VECTOR);
2194}
2195
2196/*
2197 * This interrupt should never happen with our APIC/SMP architecture
2198 */
2199DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt)
2200{
2201        static const char * const error_interrupt_reason[] = {
2202                "Send CS error",                /* APIC Error Bit 0 */
2203                "Receive CS error",             /* APIC Error Bit 1 */
2204                "Send accept error",            /* APIC Error Bit 2 */
2205                "Receive accept error",         /* APIC Error Bit 3 */
2206                "Redirectable IPI",             /* APIC Error Bit 4 */
2207                "Send illegal vector",          /* APIC Error Bit 5 */
2208                "Received illegal vector",      /* APIC Error Bit 6 */
2209                "Illegal register address",     /* APIC Error Bit 7 */
2210        };
2211        u32 v, i = 0;
2212
2213        trace_error_apic_entry(ERROR_APIC_VECTOR);
2214
2215        /* First tickle the hardware, only then report what went on. -- REW */
2216        if (lapic_get_maxlvt() > 3)     /* Due to the Pentium erratum 3AP. */
2217                apic_write(APIC_ESR, 0);
2218        v = apic_read(APIC_ESR);
2219        ack_APIC_irq();
2220        atomic_inc(&irq_err_count);
2221
2222        apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x",
2223                    smp_processor_id(), v);
2224
2225        v &= 0xff;
2226        while (v) {
2227                if (v & 0x1)
2228                        apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
2229                i++;
2230                v >>= 1;
2231        }
2232
2233        apic_printk(APIC_DEBUG, KERN_CONT "\n");
2234
2235        trace_error_apic_exit(ERROR_APIC_VECTOR);
2236}
2237
2238/**
2239 * connect_bsp_APIC - attach the APIC to the interrupt system
2240 */
2241static void __init connect_bsp_APIC(void)
2242{
2243#ifdef CONFIG_X86_32
2244        if (pic_mode) {
2245                /*
2246                 * Do not trust the local APIC being empty at bootup.
2247                 */
2248                clear_local_APIC();
2249                /*
2250                 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
2251                 * local APIC to INT and NMI lines.
2252                 */
2253                apic_printk(APIC_VERBOSE, "leaving PIC mode, "
2254                                "enabling APIC mode.\n");
2255                imcr_pic_to_apic();
2256        }
2257#endif
2258}
2259
2260/**
2261 * disconnect_bsp_APIC - detach the APIC from the interrupt system
2262 * @virt_wire_setup:    indicates, whether virtual wire mode is selected
2263 *
2264 * Virtual wire mode is necessary to deliver legacy interrupts even when the
2265 * APIC is disabled.
2266 */
2267void disconnect_bsp_APIC(int virt_wire_setup)
2268{
2269        unsigned int value;
2270
2271#ifdef CONFIG_X86_32
2272        if (pic_mode) {
2273                /*
2274                 * Put the board back into PIC mode (has an effect only on
2275                 * certain older boards).  Note that APIC interrupts, including
2276                 * IPIs, won't work beyond this point!  The only exception are
2277                 * INIT IPIs.
2278                 */
2279                apic_printk(APIC_VERBOSE, "disabling APIC mode, "
2280                                "entering PIC mode.\n");
2281                imcr_apic_to_pic();
2282                return;
2283        }
2284#endif
2285
2286        /* Go back to Virtual Wire compatibility mode */
2287
2288        /* For the spurious interrupt use vector F, and enable it */
2289        value = apic_read(APIC_SPIV);
2290        value &= ~APIC_VECTOR_MASK;
2291        value |= APIC_SPIV_APIC_ENABLED;
2292        value |= 0xf;
2293        apic_write(APIC_SPIV, value);
2294
2295        if (!virt_wire_setup) {
2296                /*
2297                 * For LVT0 make it edge triggered, active high,
2298                 * external and enabled
2299                 */
2300                value = apic_read(APIC_LVT0);
2301                value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2302                        APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2303                        APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2304                value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2305                value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
2306                apic_write(APIC_LVT0, value);
2307        } else {
2308                /* Disable LVT0 */
2309                apic_write(APIC_LVT0, APIC_LVT_MASKED);
2310        }
2311
2312        /*
2313         * For LVT1 make it edge triggered, active high,
2314         * nmi and enabled
2315         */
2316        value = apic_read(APIC_LVT1);
2317        value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
2318                        APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
2319                        APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
2320        value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
2321        value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
2322        apic_write(APIC_LVT1, value);
2323}
2324
2325/*
2326 * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated
2327 * contiguously, it equals to current allocated max logical CPU ID plus 1.
2328 * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range,
2329 * so the maximum of nr_logical_cpuids is nr_cpu_ids.
2330 *
2331 * NOTE: Reserve 0 for BSP.
2332 */
2333static int nr_logical_cpuids = 1;
2334
2335/*
2336 * Used to store mapping between logical CPU IDs and APIC IDs.
2337 */
2338static int cpuid_to_apicid[] = {
2339        [0 ... NR_CPUS - 1] = -1,
2340};
2341
2342bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
2343{
2344        return phys_id == cpuid_to_apicid[cpu];
2345}
2346
2347#ifdef CONFIG_SMP
2348/**
2349 * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
2350 * @apicid: APIC ID to check
2351 */
2352bool apic_id_is_primary_thread(unsigned int apicid)
2353{
2354        u32 mask;
2355
2356        if (smp_num_siblings == 1)
2357                return true;
2358        /* Isolate the SMT bit(s) in the APICID and check for 0 */
2359        mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
2360        return !(apicid & mask);
2361}
2362#endif
2363
2364/*
2365 * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
2366 * and cpuid_to_apicid[] synchronized.
2367 */
2368static int allocate_logical_cpuid(int apicid)
2369{
2370        int i;
2371
2372        /*
2373         * cpuid <-> apicid mapping is persistent, so when a cpu is up,
2374         * check if the kernel has allocated a cpuid for it.
2375         */
2376        for (i = 0; i < nr_logical_cpuids; i++) {
2377                if (cpuid_to_apicid[i] == apicid)
2378                        return i;
2379        }
2380
2381        /* Allocate a new cpuid. */
2382        if (nr_logical_cpuids >= nr_cpu_ids) {
2383                WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. "
2384                             "Processor %d/0x%x and the rest are ignored.\n",
2385                             nr_cpu_ids, nr_logical_cpuids, apicid);
2386                return -EINVAL;
2387        }
2388
2389        cpuid_to_apicid[nr_logical_cpuids] = apicid;
2390        return nr_logical_cpuids++;
2391}
2392
2393int generic_processor_info(int apicid, int version)
2394{
2395        int cpu, max = nr_cpu_ids;
2396        bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
2397                                phys_cpu_present_map);
2398
2399        /*
2400         * boot_cpu_physical_apicid is designed to have the apicid
2401         * returned by read_apic_id(), i.e, the apicid of the
2402         * currently booting-up processor. However, on some platforms,
2403         * it is temporarily modified by the apicid reported as BSP
2404         * through MP table. Concretely:
2405         *
2406         * - arch/x86/kernel/mpparse.c: MP_processor_info()
2407         * - arch/x86/mm/amdtopology.c: amd_numa_init()
2408         *
2409         * This function is executed with the modified
2410         * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel
2411         * parameter doesn't work to disable APs on kdump 2nd kernel.
2412         *
2413         * Since fixing handling of boot_cpu_physical_apicid requires
2414         * another discussion and tests on each platform, we leave it
2415         * for now and here we use read_apic_id() directly in this
2416         * function, generic_processor_info().
2417         */
2418        if (disabled_cpu_apicid != BAD_APICID &&
2419            disabled_cpu_apicid != read_apic_id() &&
2420            disabled_cpu_apicid == apicid) {
2421                int thiscpu = num_processors + disabled_cpus;
2422
2423                pr_warn("APIC: Disabling requested cpu."
2424                        " Processor %d/0x%x ignored.\n", thiscpu, apicid);
2425
2426                disabled_cpus++;
2427                return -ENODEV;
2428        }
2429
2430        /*
2431         * If boot cpu has not been detected yet, then only allow upto
2432         * nr_cpu_ids - 1 processors and keep one slot free for boot cpu
2433         */
2434        if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 &&
2435            apicid != boot_cpu_physical_apicid) {
2436                int thiscpu = max + disabled_cpus - 1;
2437
2438                pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost"
2439                        " reached. Keeping one slot for boot cpu."
2440                        "  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
2441
2442                disabled_cpus++;
2443                return -ENODEV;
2444        }
2445
2446        if (num_processors >= nr_cpu_ids) {
2447                int thiscpu = max + disabled_cpus;
2448
2449                pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. "
2450                        "Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
2451
2452                disabled_cpus++;
2453                return -EINVAL;
2454        }
2455
2456        if (apicid == boot_cpu_physical_apicid) {
2457                /*
2458                 * x86_bios_cpu_apicid is required to have processors listed
2459                 * in same order as logical cpu numbers. Hence the first
2460                 * entry is BSP, and so on.
2461                 * boot_cpu_init() already hold bit 0 in cpu_present_mask
2462                 * for BSP.
2463                 */
2464                cpu = 0;
2465
2466                /* Logical cpuid 0 is reserved for BSP. */
2467                cpuid_to_apicid[0] = apicid;
2468        } else {
2469                cpu = allocate_logical_cpuid(apicid);
2470                if (cpu < 0) {
2471                        disabled_cpus++;
2472                        return -EINVAL;
2473                }
2474        }
2475
2476        /*
2477         * Validate version
2478         */
2479        if (version == 0x0) {
2480                pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
2481                        cpu, apicid);
2482                version = 0x10;
2483        }
2484
2485        if (version != boot_cpu_apic_version) {
2486                pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
2487                        boot_cpu_apic_version, cpu, version);
2488        }
2489
2490        if (apicid > max_physical_apicid)
2491                max_physical_apicid = apicid;
2492
2493#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
2494        early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
2495        early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
2496#endif
2497#ifdef CONFIG_X86_32
2498        early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
2499                apic->x86_32_early_logical_apicid(cpu);
2500#endif
2501        set_cpu_possible(cpu, true);
2502        physid_set(apicid, phys_cpu_present_map);
2503        set_cpu_present(cpu, true);
2504        num_processors++;
2505
2506        return cpu;
2507}
2508
2509int hard_smp_processor_id(void)
2510{
2511        return read_apic_id();
2512}
2513
2514void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg,
2515                           bool dmar)
2516{
2517        memset(msg, 0, sizeof(*msg));
2518
2519        msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
2520        msg->arch_addr_lo.dest_mode_logical = apic->dest_mode_logical;
2521        msg->arch_addr_lo.destid_0_7 = cfg->dest_apicid & 0xFF;
2522
2523        msg->arch_data.delivery_mode = APIC_DELIVERY_MODE_FIXED;
2524        msg->arch_data.vector = cfg->vector;
2525
2526        msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
2527        /*
2528         * Only the IOMMU itself can use the trick of putting destination
2529         * APIC ID into the high bits of the address. Anything else would
2530         * just be writing to memory if it tried that, and needs IR to
2531         * address APICs which can't be addressed in the normal 32-bit
2532         * address range at 0xFFExxxxx. That is typically just 8 bits, but
2533         * some hypervisors allow the extended destination ID field in bits
2534         * 5-11 to be used, giving support for 15 bits of APIC IDs in total.
2535         */
2536        if (dmar)
2537                msg->arch_addr_hi.destid_8_31 = cfg->dest_apicid >> 8;
2538        else if (virt_ext_dest_id && cfg->dest_apicid < 0x8000)
2539                msg->arch_addr_lo.virt_destid_8_14 = cfg->dest_apicid >> 8;
2540        else
2541                WARN_ON_ONCE(cfg->dest_apicid > 0xFF);
2542}
2543
2544u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid)
2545{
2546        u32 dest = msg->arch_addr_lo.destid_0_7;
2547
2548        if (extid)
2549                dest |= msg->arch_addr_hi.destid_8_31 << 8;
2550        return dest;
2551}
2552EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid);
2553
2554/*
2555 * Override the generic EOI implementation with an optimized version.
2556 * Only called during early boot when only one CPU is active and with
2557 * interrupts disabled, so we know this does not race with actual APIC driver
2558 * use.
2559 */
2560void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
2561{
2562        struct apic **drv;
2563
2564        for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
2565                /* Should happen once for each apic */
2566                WARN_ON((*drv)->eoi_write == eoi_write);
2567                (*drv)->native_eoi_write = (*drv)->eoi_write;
2568                (*drv)->eoi_write = eoi_write;
2569        }
2570}
2571
2572static void __init apic_bsp_up_setup(void)
2573{
2574#ifdef CONFIG_X86_64
2575        apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid));
2576#else
2577        /*
2578         * Hack: In case of kdump, after a crash, kernel might be booting
2579         * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
2580         * might be zero if read from MP tables. Get it from LAPIC.
2581         */
2582# ifdef CONFIG_CRASH_DUMP
2583        boot_cpu_physical_apicid = read_apic_id();
2584# endif
2585#endif
2586        physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
2587}
2588
2589/**
2590 * apic_bsp_setup - Setup function for local apic and io-apic
2591 * @upmode:             Force UP mode (for APIC_init_uniprocessor)
2592 */
2593static void __init apic_bsp_setup(bool upmode)
2594{
2595        connect_bsp_APIC();
2596        if (upmode)
2597                apic_bsp_up_setup();
2598        setup_local_APIC();
2599
2600        enable_IO_APIC();
2601        end_local_APIC_setup();
2602        irq_remap_enable_fault_handling();
2603        setup_IO_APIC();
2604        lapic_update_legacy_vectors();
2605}
2606
2607#ifdef CONFIG_UP_LATE_INIT
2608void __init up_late_init(void)
2609{
2610        if (apic_intr_mode == APIC_PIC)
2611                return;
2612
2613        /* Setup local timer */
2614        x86_init.timers.setup_percpu_clockev();
2615}
2616#endif
2617
2618/*
2619 * Power management
2620 */
2621#ifdef CONFIG_PM
2622
2623static struct {
2624        /*
2625         * 'active' is true if the local APIC was enabled by us and
2626         * not the BIOS; this signifies that we are also responsible
2627         * for disabling it before entering apm/acpi suspend
2628         */
2629        int active;
2630        /* r/w apic fields */
2631        unsigned int apic_id;
2632        unsigned int apic_taskpri;
2633        unsigned int apic_ldr;
2634        unsigned int apic_dfr;
2635        unsigned int apic_spiv;
2636        unsigned int apic_lvtt;
2637        unsigned int apic_lvtpc;
2638        unsigned int apic_lvt0;
2639        unsigned int apic_lvt1;
2640        unsigned int apic_lvterr;
2641        unsigned int apic_tmict;
2642        unsigned int apic_tdcr;
2643        unsigned int apic_thmr;
2644        unsigned int apic_cmci;
2645} apic_pm_state;
2646
2647static int lapic_suspend(void)
2648{
2649        unsigned long flags;
2650        int maxlvt;
2651
2652        if (!apic_pm_state.active)
2653                return 0;
2654
2655        maxlvt = lapic_get_maxlvt();
2656
2657        apic_pm_state.apic_id = apic_read(APIC_ID);
2658        apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
2659        apic_pm_state.apic_ldr = apic_read(APIC_LDR);
2660        apic_pm_state.apic_dfr = apic_read(APIC_DFR);
2661        apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
2662        apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
2663        if (maxlvt >= 4)
2664                apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
2665        apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
2666        apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
2667        apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
2668        apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
2669        apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
2670#ifdef CONFIG_X86_THERMAL_VECTOR
2671        if (maxlvt >= 5)
2672                apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
2673#endif
2674#ifdef CONFIG_X86_MCE_INTEL
2675        if (maxlvt >= 6)
2676                apic_pm_state.apic_cmci = apic_read(APIC_LVTCMCI);
2677#endif
2678
2679        local_irq_save(flags);
2680
2681        /*
2682         * Mask IOAPIC before disabling the local APIC to prevent stale IRR
2683         * entries on some implementations.
2684         */
2685        mask_ioapic_entries();
2686
2687        disable_local_APIC();
2688
2689        irq_remapping_disable();
2690
2691        local_irq_restore(flags);
2692        return 0;
2693}
2694
2695static void lapic_resume(void)
2696{
2697        unsigned int l, h;
2698        unsigned long flags;
2699        int maxlvt;
2700
2701        if (!apic_pm_state.active)
2702                return;
2703
2704        local_irq_save(flags);
2705
2706        /*
2707         * IO-APIC and PIC have their own resume routines.
2708         * We just mask them here to make sure the interrupt
2709         * subsystem is completely quiet while we enable x2apic
2710         * and interrupt-remapping.
2711         */
2712        mask_ioapic_entries();
2713        legacy_pic->mask_all();
2714
2715        if (x2apic_mode) {
2716                __x2apic_enable();
2717        } else {
2718                /*
2719                 * Make sure the APICBASE points to the right address
2720                 *
2721                 * FIXME! This will be wrong if we ever support suspend on
2722                 * SMP! We'll need to do this as part of the CPU restore!
2723                 */
2724                if (boot_cpu_data.x86 >= 6) {
2725                        rdmsr(MSR_IA32_APICBASE, l, h);
2726                        l &= ~MSR_IA32_APICBASE_BASE;
2727                        l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
2728                        wrmsr(MSR_IA32_APICBASE, l, h);
2729                }
2730        }
2731
2732        maxlvt = lapic_get_maxlvt();
2733        apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
2734        apic_write(APIC_ID, apic_pm_state.apic_id);
2735        apic_write(APIC_DFR, apic_pm_state.apic_dfr);
2736        apic_write(APIC_LDR, apic_pm_state.apic_ldr);
2737        apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
2738        apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
2739        apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
2740        apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
2741#ifdef CONFIG_X86_THERMAL_VECTOR
2742        if (maxlvt >= 5)
2743                apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
2744#endif
2745#ifdef CONFIG_X86_MCE_INTEL
2746        if (maxlvt >= 6)
2747                apic_write(APIC_LVTCMCI, apic_pm_state.apic_cmci);
2748#endif
2749        if (maxlvt >= 4)
2750                apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
2751        apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
2752        apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
2753        apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
2754        apic_write(APIC_ESR, 0);
2755        apic_read(APIC_ESR);
2756        apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
2757        apic_write(APIC_ESR, 0);
2758        apic_read(APIC_ESR);
2759
2760        irq_remapping_reenable(x2apic_mode);
2761
2762        local_irq_restore(flags);
2763}
2764
2765/*
2766 * This device has no shutdown method - fully functioning local APICs
2767 * are needed on every CPU up until machine_halt/restart/poweroff.
2768 */
2769
2770static struct syscore_ops lapic_syscore_ops = {
2771        .resume         = lapic_resume,
2772        .suspend        = lapic_suspend,
2773};
2774
2775static void apic_pm_activate(void)
2776{
2777        apic_pm_state.active = 1;
2778}
2779
2780static int __init init_lapic_sysfs(void)
2781{
2782        /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
2783        if (boot_cpu_has(X86_FEATURE_APIC))
2784                register_syscore_ops(&lapic_syscore_ops);
2785
2786        return 0;
2787}
2788
2789/* local apic needs to resume before other devices access its registers. */
2790core_initcall(init_lapic_sysfs);
2791
2792#else   /* CONFIG_PM */
2793
2794static void apic_pm_activate(void) { }
2795
2796#endif  /* CONFIG_PM */
2797
2798#ifdef CONFIG_X86_64
2799
2800static int multi_checked;
2801static int multi;
2802
2803static int set_multi(const struct dmi_system_id *d)
2804{
2805        if (multi)
2806                return 0;
2807        pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
2808        multi = 1;
2809        return 0;
2810}
2811
2812static const struct dmi_system_id multi_dmi_table[] = {
2813        {
2814                .callback = set_multi,
2815                .ident = "IBM System Summit2",
2816                .matches = {
2817                        DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
2818                        DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
2819                },
2820        },
2821        {}
2822};
2823
2824static void dmi_check_multi(void)
2825{
2826        if (multi_checked)
2827                return;
2828
2829        dmi_check_system(multi_dmi_table);
2830        multi_checked = 1;
2831}
2832
2833/*
2834 * apic_is_clustered_box() -- Check if we can expect good TSC
2835 *
2836 * Thus far, the major user of this is IBM's Summit2 series:
2837 * Clustered boxes may have unsynced TSC problems if they are
2838 * multi-chassis.
2839 * Use DMI to check them
2840 */
2841int apic_is_clustered_box(void)
2842{
2843        dmi_check_multi();
2844        return multi;
2845}
2846#endif
2847
2848/*
2849 * APIC command line parameters
2850 */
2851static int __init setup_disableapic(char *arg)
2852{
2853        disable_apic = 1;
2854        setup_clear_cpu_cap(X86_FEATURE_APIC);
2855        return 0;
2856}
2857early_param("disableapic", setup_disableapic);
2858
2859/* same as disableapic, for compatibility */
2860static int __init setup_nolapic(char *arg)
2861{
2862        return setup_disableapic(arg);
2863}
2864early_param("nolapic", setup_nolapic);
2865
2866static int __init parse_lapic_timer_c2_ok(char *arg)
2867{
2868        local_apic_timer_c2_ok = 1;
2869        return 0;
2870}
2871early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
2872
2873static int __init parse_disable_apic_timer(char *arg)
2874{
2875        disable_apic_timer = 1;
2876        return 0;
2877}
2878early_param("noapictimer", parse_disable_apic_timer);
2879
2880static int __init parse_nolapic_timer(char *arg)
2881{
2882        disable_apic_timer = 1;
2883        return 0;
2884}
2885early_param("nolapic_timer", parse_nolapic_timer);
2886
2887static int __init apic_set_verbosity(char *arg)
2888{
2889        if (!arg)  {
2890#ifdef CONFIG_X86_64
2891                skip_ioapic_setup = 0;
2892                return 0;
2893#endif
2894                return -EINVAL;
2895        }
2896
2897        if (strcmp("debug", arg) == 0)
2898                apic_verbosity = APIC_DEBUG;
2899        else if (strcmp("verbose", arg) == 0)
2900                apic_verbosity = APIC_VERBOSE;
2901#ifdef CONFIG_X86_64
2902        else {
2903                pr_warn("APIC Verbosity level %s not recognised"
2904                        " use apic=verbose or apic=debug\n", arg);
2905                return -EINVAL;
2906        }
2907#endif
2908
2909        return 0;
2910}
2911early_param("apic", apic_set_verbosity);
2912
2913static int __init lapic_insert_resource(void)
2914{
2915        if (!apic_phys)
2916                return -1;
2917
2918        /* Put local APIC into the resource map. */
2919        lapic_resource.start = apic_phys;
2920        lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
2921        insert_resource(&iomem_resource, &lapic_resource);
2922
2923        return 0;
2924}
2925
2926/*
2927 * need call insert after e820__reserve_resources()
2928 * that is using request_resource
2929 */
2930late_initcall(lapic_insert_resource);
2931
2932static int __init apic_set_disabled_cpu_apicid(char *arg)
2933{
2934        if (!arg || !get_option(&arg, &disabled_cpu_apicid))
2935                return -EINVAL;
2936
2937        return 0;
2938}
2939early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);
2940
2941static int __init apic_set_extnmi(char *arg)
2942{
2943        if (!arg)
2944                return -EINVAL;
2945
2946        if (!strncmp("all", arg, 3))
2947                apic_extnmi = APIC_EXTNMI_ALL;
2948        else if (!strncmp("none", arg, 4))
2949                apic_extnmi = APIC_EXTNMI_NONE;
2950        else if (!strncmp("bsp", arg, 3))
2951                apic_extnmi = APIC_EXTNMI_BSP;
2952        else {
2953                pr_warn("Unknown external NMI delivery mode `%s' ignored\n", arg);
2954                return -EINVAL;
2955        }
2956
2957        return 0;
2958}
2959early_param("apic_extnmi", apic_set_extnmi);
2960