linux/arch/x86/kernel/smp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      Intel SMP support routines.
   4 *
   5 *      (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
   6 *      (c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com>
   7 *      (c) 2002,2003 Andi Kleen, SuSE Labs.
   8 *
   9 *      i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
  10 */
  11
  12#include <linux/init.h>
  13
  14#include <linux/mm.h>
  15#include <linux/delay.h>
  16#include <linux/spinlock.h>
  17#include <linux/export.h>
  18#include <linux/kernel_stat.h>
  19#include <linux/mc146818rtc.h>
  20#include <linux/cache.h>
  21#include <linux/interrupt.h>
  22#include <linux/cpu.h>
  23#include <linux/gfp.h>
  24
  25#include <asm/mtrr.h>
  26#include <asm/tlbflush.h>
  27#include <asm/mmu_context.h>
  28#include <asm/proto.h>
  29#include <asm/apic.h>
  30#include <asm/idtentry.h>
  31#include <asm/nmi.h>
  32#include <asm/mce.h>
  33#include <asm/trace/irq_vectors.h>
  34#include <asm/kexec.h>
  35#include <asm/virtext.h>
  36
  37/*
  38 *      Some notes on x86 processor bugs affecting SMP operation:
  39 *
  40 *      Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
  41 *      The Linux implications for SMP are handled as follows:
  42 *
  43 *      Pentium III / [Xeon]
  44 *              None of the E1AP-E3AP errata are visible to the user.
  45 *
  46 *      E1AP.   see PII A1AP
  47 *      E2AP.   see PII A2AP
  48 *      E3AP.   see PII A3AP
  49 *
  50 *      Pentium II / [Xeon]
  51 *              None of the A1AP-A3AP errata are visible to the user.
  52 *
  53 *      A1AP.   see PPro 1AP
  54 *      A2AP.   see PPro 2AP
  55 *      A3AP.   see PPro 7AP
  56 *
  57 *      Pentium Pro
  58 *              None of 1AP-9AP errata are visible to the normal user,
  59 *      except occasional delivery of 'spurious interrupt' as trap #15.
  60 *      This is very rare and a non-problem.
  61 *
  62 *      1AP.    Linux maps APIC as non-cacheable
  63 *      2AP.    worked around in hardware
  64 *      3AP.    fixed in C0 and above steppings microcode update.
  65 *              Linux does not use excessive STARTUP_IPIs.
  66 *      4AP.    worked around in hardware
  67 *      5AP.    symmetric IO mode (normal Linux operation) not affected.
  68 *              'noapic' mode has vector 0xf filled out properly.
  69 *      6AP.    'noapic' mode might be affected - fixed in later steppings
  70 *      7AP.    We do not assume writes to the LVT deasserting IRQs
  71 *      8AP.    We do not enable low power mode (deep sleep) during MP bootup
  72 *      9AP.    We do not use mixed mode
  73 *
  74 *      Pentium
  75 *              There is a marginal case where REP MOVS on 100MHz SMP
  76 *      machines with B stepping processors can fail. XXX should provide
  77 *      an L1cache=Writethrough or L1cache=off option.
  78 *
  79 *              B stepping CPUs may hang. There are hardware work arounds
  80 *      for this. We warn about it in case your board doesn't have the work
  81 *      arounds. Basically that's so I can tell anyone with a B stepping
  82 *      CPU and SMP problems "tough".
  83 *
  84 *      Specific items [From Pentium Processor Specification Update]
  85 *
  86 *      1AP.    Linux doesn't use remote read
  87 *      2AP.    Linux doesn't trust APIC errors
  88 *      3AP.    We work around this
  89 *      4AP.    Linux never generated 3 interrupts of the same priority
  90 *              to cause a lost local interrupt.
  91 *      5AP.    Remote read is never used
  92 *      6AP.    not affected - worked around in hardware
  93 *      7AP.    not affected - worked around in hardware
  94 *      8AP.    worked around in hardware - we get explicit CS errors if not
  95 *      9AP.    only 'noapic' mode affected. Might generate spurious
  96 *              interrupts, we log only the first one and count the
  97 *              rest silently.
  98 *      10AP.   not affected - worked around in hardware
  99 *      11AP.   Linux reads the APIC between writes to avoid this, as per
 100 *              the documentation. Make sure you preserve this as it affects
 101 *              the C stepping chips too.
 102 *      12AP.   not affected - worked around in hardware
 103 *      13AP.   not affected - worked around in hardware
 104 *      14AP.   we always deassert INIT during bootup
 105 *      15AP.   not affected - worked around in hardware
 106 *      16AP.   not affected - worked around in hardware
 107 *      17AP.   not affected - worked around in hardware
 108 *      18AP.   not affected - worked around in hardware
 109 *      19AP.   not affected - worked around in BIOS
 110 *
 111 *      If this sounds worrying believe me these bugs are either ___RARE___,
 112 *      or are signal timing bugs worked around in hardware and there's
 113 *      about nothing of note with C stepping upwards.
 114 */
 115
 116static atomic_t stopping_cpu = ATOMIC_INIT(-1);
 117static bool smp_no_nmi_ipi = false;
 118
 119static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
 120{
 121        /* We are registered on stopping cpu too, avoid spurious NMI */
 122        if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
 123                return NMI_HANDLED;
 124
 125        cpu_emergency_vmxoff();
 126        stop_this_cpu(NULL);
 127
 128        return NMI_HANDLED;
 129}
 130
 131/*
 132 * this function calls the 'stop' function on all other CPUs in the system.
 133 */
 134DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
 135{
 136        ack_APIC_irq();
 137        cpu_emergency_vmxoff();
 138        stop_this_cpu(NULL);
 139}
 140
 141static int register_stop_handler(void)
 142{
 143        return register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
 144                                    NMI_FLAG_FIRST, "smp_stop");
 145}
 146
 147static void native_stop_other_cpus(int wait)
 148{
 149        unsigned long flags;
 150        unsigned long timeout;
 151
 152        if (reboot_force)
 153                return;
 154
 155        /*
 156         * Use an own vector here because smp_call_function
 157         * does lots of things not suitable in a panic situation.
 158         */
 159
 160        /*
 161         * We start by using the REBOOT_VECTOR irq.
 162         * The irq is treated as a sync point to allow critical
 163         * regions of code on other cpus to release their spin locks
 164         * and re-enable irqs.  Jumping straight to an NMI might
 165         * accidentally cause deadlocks with further shutdown/panic
 166         * code.  By syncing, we give the cpus up to one second to
 167         * finish their work before we force them off with the NMI.
 168         */
 169        if (num_online_cpus() > 1) {
 170                /* did someone beat us here? */
 171                if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1)
 172                        return;
 173
 174                /* sync above data before sending IRQ */
 175                wmb();
 176
 177                apic_send_IPI_allbutself(REBOOT_VECTOR);
 178
 179                /*
 180                 * Don't wait longer than a second for IPI completion. The
 181                 * wait request is not checked here because that would
 182                 * prevent an NMI shutdown attempt in case that not all
 183                 * CPUs reach shutdown state.
 184                 */
 185                timeout = USEC_PER_SEC;
 186                while (num_online_cpus() > 1 && timeout--)
 187                        udelay(1);
 188        }
 189
 190        /* if the REBOOT_VECTOR didn't work, try with the NMI */
 191        if (num_online_cpus() > 1) {
 192                /*
 193                 * If NMI IPI is enabled, try to register the stop handler
 194                 * and send the IPI. In any case try to wait for the other
 195                 * CPUs to stop.
 196                 */
 197                if (!smp_no_nmi_ipi && !register_stop_handler()) {
 198                        /* Sync above data before sending IRQ */
 199                        wmb();
 200
 201                        pr_emerg("Shutting down cpus with NMI\n");
 202
 203                        apic_send_IPI_allbutself(NMI_VECTOR);
 204                }
 205                /*
 206                 * Don't wait longer than 10 ms if the caller didn't
 207                 * request it. If wait is true, the machine hangs here if
 208                 * one or more CPUs do not reach shutdown state.
 209                 */
 210                timeout = USEC_PER_MSEC * 10;
 211                while (num_online_cpus() > 1 && (wait || timeout--))
 212                        udelay(1);
 213        }
 214
 215        local_irq_save(flags);
 216        disable_local_APIC();
 217        mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
 218        local_irq_restore(flags);
 219}
 220
 221/*
 222 * Reschedule call back. KVM uses this interrupt to force a cpu out of
 223 * guest mode.
 224 */
 225DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_reschedule_ipi)
 226{
 227        ack_APIC_irq();
 228        trace_reschedule_entry(RESCHEDULE_VECTOR);
 229        inc_irq_stat(irq_resched_count);
 230        scheduler_ipi();
 231        trace_reschedule_exit(RESCHEDULE_VECTOR);
 232}
 233
 234DEFINE_IDTENTRY_SYSVEC(sysvec_call_function)
 235{
 236        ack_APIC_irq();
 237        trace_call_function_entry(CALL_FUNCTION_VECTOR);
 238        inc_irq_stat(irq_call_count);
 239        generic_smp_call_function_interrupt();
 240        trace_call_function_exit(CALL_FUNCTION_VECTOR);
 241}
 242
 243DEFINE_IDTENTRY_SYSVEC(sysvec_call_function_single)
 244{
 245        ack_APIC_irq();
 246        trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
 247        inc_irq_stat(irq_call_count);
 248        generic_smp_call_function_single_interrupt();
 249        trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
 250}
 251
 252static int __init nonmi_ipi_setup(char *str)
 253{
 254        smp_no_nmi_ipi = true;
 255        return 1;
 256}
 257
 258__setup("nonmi_ipi", nonmi_ipi_setup);
 259
 260struct smp_ops smp_ops = {
 261        .smp_prepare_boot_cpu   = native_smp_prepare_boot_cpu,
 262        .smp_prepare_cpus       = native_smp_prepare_cpus,
 263        .smp_cpus_done          = native_smp_cpus_done,
 264
 265        .stop_other_cpus        = native_stop_other_cpus,
 266#if defined(CONFIG_KEXEC_CORE)
 267        .crash_stop_other_cpus  = kdump_nmi_shootdown_cpus,
 268#endif
 269        .smp_send_reschedule    = native_smp_send_reschedule,
 270
 271        .cpu_up                 = native_cpu_up,
 272        .cpu_die                = native_cpu_die,
 273        .cpu_disable            = native_cpu_disable,
 274        .play_dead              = native_play_dead,
 275
 276        .send_call_func_ipi     = native_send_call_func_ipi,
 277        .send_call_func_single_ipi = native_send_call_func_single_ipi,
 278};
 279EXPORT_SYMBOL_GPL(smp_ops);
 280