linux/arch/x86/kernel/smp.c
<<
>>
Prefs
   1/*
   2 *      Intel SMP support routines.
   3 *
   4 *      (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
   5 *      (c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com>
   6 *      (c) 2002,2003 Andi Kleen, SuSE Labs.
   7 *
   8 *      i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
   9 *
  10 *      This code is released under the GNU General Public License version 2 or
  11 *      later.
  12 */
  13
  14#include <linux/init.h>
  15
  16#include <linux/mm.h>
  17#include <linux/delay.h>
  18#include <linux/spinlock.h>
  19#include <linux/export.h>
  20#include <linux/kernel_stat.h>
  21#include <linux/mc146818rtc.h>
  22#include <linux/cache.h>
  23#include <linux/interrupt.h>
  24#include <linux/cpu.h>
  25#include <linux/gfp.h>
  26
  27#include <asm/mtrr.h>
  28#include <asm/tlbflush.h>
  29#include <asm/mmu_context.h>
  30#include <asm/proto.h>
  31#include <asm/apic.h>
  32#include <asm/nmi.h>
  33/*
  34 *      Some notes on x86 processor bugs affecting SMP operation:
  35 *
  36 *      Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
  37 *      The Linux implications for SMP are handled as follows:
  38 *
  39 *      Pentium III / [Xeon]
  40 *              None of the E1AP-E3AP errata are visible to the user.
  41 *
  42 *      E1AP.   see PII A1AP
  43 *      E2AP.   see PII A2AP
  44 *      E3AP.   see PII A3AP
  45 *
  46 *      Pentium II / [Xeon]
  47 *              None of the A1AP-A3AP errata are visible to the user.
  48 *
  49 *      A1AP.   see PPro 1AP
  50 *      A2AP.   see PPro 2AP
  51 *      A3AP.   see PPro 7AP
  52 *
  53 *      Pentium Pro
  54 *              None of 1AP-9AP errata are visible to the normal user,
  55 *      except occasional delivery of 'spurious interrupt' as trap #15.
  56 *      This is very rare and a non-problem.
  57 *
  58 *      1AP.    Linux maps APIC as non-cacheable
  59 *      2AP.    worked around in hardware
  60 *      3AP.    fixed in C0 and above steppings microcode update.
  61 *              Linux does not use excessive STARTUP_IPIs.
  62 *      4AP.    worked around in hardware
  63 *      5AP.    symmetric IO mode (normal Linux operation) not affected.
  64 *              'noapic' mode has vector 0xf filled out properly.
  65 *      6AP.    'noapic' mode might be affected - fixed in later steppings
  66 *      7AP.    We do not assume writes to the LVT deassering IRQs
  67 *      8AP.    We do not enable low power mode (deep sleep) during MP bootup
  68 *      9AP.    We do not use mixed mode
  69 *
  70 *      Pentium
  71 *              There is a marginal case where REP MOVS on 100MHz SMP
  72 *      machines with B stepping processors can fail. XXX should provide
  73 *      an L1cache=Writethrough or L1cache=off option.
  74 *
  75 *              B stepping CPUs may hang. There are hardware work arounds
  76 *      for this. We warn about it in case your board doesn't have the work
  77 *      arounds. Basically that's so I can tell anyone with a B stepping
  78 *      CPU and SMP problems "tough".
  79 *
  80 *      Specific items [From Pentium Processor Specification Update]
  81 *
  82 *      1AP.    Linux doesn't use remote read
  83 *      2AP.    Linux doesn't trust APIC errors
  84 *      3AP.    We work around this
  85 *      4AP.    Linux never generated 3 interrupts of the same priority
  86 *              to cause a lost local interrupt.
  87 *      5AP.    Remote read is never used
  88 *      6AP.    not affected - worked around in hardware
  89 *      7AP.    not affected - worked around in hardware
  90 *      8AP.    worked around in hardware - we get explicit CS errors if not
  91 *      9AP.    only 'noapic' mode affected. Might generate spurious
  92 *              interrupts, we log only the first one and count the
  93 *              rest silently.
  94 *      10AP.   not affected - worked around in hardware
  95 *      11AP.   Linux reads the APIC between writes to avoid this, as per
  96 *              the documentation. Make sure you preserve this as it affects
  97 *              the C stepping chips too.
  98 *      12AP.   not affected - worked around in hardware
  99 *      13AP.   not affected - worked around in hardware
 100 *      14AP.   we always deassert INIT during bootup
 101 *      15AP.   not affected - worked around in hardware
 102 *      16AP.   not affected - worked around in hardware
 103 *      17AP.   not affected - worked around in hardware
 104 *      18AP.   not affected - worked around in hardware
 105 *      19AP.   not affected - worked around in BIOS
 106 *
 107 *      If this sounds worrying believe me these bugs are either ___RARE___,
 108 *      or are signal timing bugs worked around in hardware and there's
 109 *      about nothing of note with C stepping upwards.
 110 */
 111
 112static atomic_t stopping_cpu = ATOMIC_INIT(-1);
 113static bool smp_no_nmi_ipi = false;
 114
 115/*
 116 * this function sends a 'reschedule' IPI to another CPU.
 117 * it goes straight through and wastes no time serializing
 118 * anything. Worst case is that we lose a reschedule ...
 119 */
 120static void native_smp_send_reschedule(int cpu)
 121{
 122        if (unlikely(cpu_is_offline(cpu))) {
 123                WARN_ON(1);
 124                return;
 125        }
 126        apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
 127}
 128
 129void native_send_call_func_single_ipi(int cpu)
 130{
 131        apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
 132}
 133
 134void native_send_call_func_ipi(const struct cpumask *mask)
 135{
 136        cpumask_var_t allbutself;
 137
 138        if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) {
 139                apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
 140                return;
 141        }
 142
 143        cpumask_copy(allbutself, cpu_online_mask);
 144        cpumask_clear_cpu(smp_processor_id(), allbutself);
 145
 146        if (cpumask_equal(mask, allbutself) &&
 147            cpumask_equal(cpu_online_mask, cpu_callout_mask))
 148                apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR);
 149        else
 150                apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
 151
 152        free_cpumask_var(allbutself);
 153}
 154
 155static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
 156{
 157        /* We are registered on stopping cpu too, avoid spurious NMI */
 158        if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
 159                return NMI_HANDLED;
 160
 161        stop_this_cpu(NULL);
 162
 163        return NMI_HANDLED;
 164}
 165
 166/*
 167 * this function calls the 'stop' function on all other CPUs in the system.
 168 */
 169
 170asmlinkage void smp_reboot_interrupt(void)
 171{
 172        ack_APIC_irq();
 173        irq_enter();
 174        stop_this_cpu(NULL);
 175        irq_exit();
 176}
 177
 178static void native_stop_other_cpus(int wait)
 179{
 180        unsigned long flags;
 181        unsigned long timeout;
 182
 183        if (reboot_force)
 184                return;
 185
 186        /*
 187         * Use an own vector here because smp_call_function
 188         * does lots of things not suitable in a panic situation.
 189         */
 190
 191        /*
 192         * We start by using the REBOOT_VECTOR irq.
 193         * The irq is treated as a sync point to allow critical
 194         * regions of code on other cpus to release their spin locks
 195         * and re-enable irqs.  Jumping straight to an NMI might
 196         * accidentally cause deadlocks with further shutdown/panic
 197         * code.  By syncing, we give the cpus up to one second to
 198         * finish their work before we force them off with the NMI.
 199         */
 200        if (num_online_cpus() > 1) {
 201                /* did someone beat us here? */
 202                if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1)
 203                        return;
 204
 205                /* sync above data before sending IRQ */
 206                wmb();
 207
 208                apic->send_IPI_allbutself(REBOOT_VECTOR);
 209
 210                /*
 211                 * Don't wait longer than a second if the caller
 212                 * didn't ask us to wait.
 213                 */
 214                timeout = USEC_PER_SEC;
 215                while (num_online_cpus() > 1 && (wait || timeout--))
 216                        udelay(1);
 217        }
 218        
 219        /* if the REBOOT_VECTOR didn't work, try with the NMI */
 220        if ((num_online_cpus() > 1) && (!smp_no_nmi_ipi))  {
 221                if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
 222                                         NMI_FLAG_FIRST, "smp_stop"))
 223                        /* Note: we ignore failures here */
 224                        /* Hope the REBOOT_IRQ is good enough */
 225                        goto finish;
 226
 227                /* sync above data before sending IRQ */
 228                wmb();
 229
 230                pr_emerg("Shutting down cpus with NMI\n");
 231
 232                apic->send_IPI_allbutself(NMI_VECTOR);
 233
 234                /*
 235                 * Don't wait longer than a 10 ms if the caller
 236                 * didn't ask us to wait.
 237                 */
 238                timeout = USEC_PER_MSEC * 10;
 239                while (num_online_cpus() > 1 && (wait || timeout--))
 240                        udelay(1);
 241        }
 242
 243finish:
 244        local_irq_save(flags);
 245        disable_local_APIC();
 246        local_irq_restore(flags);
 247}
 248
 249/*
 250 * Reschedule call back.
 251 */
 252void smp_reschedule_interrupt(struct pt_regs *regs)
 253{
 254        ack_APIC_irq();
 255        inc_irq_stat(irq_resched_count);
 256        scheduler_ipi();
 257        /*
 258         * KVM uses this interrupt to force a cpu out of guest mode
 259         */
 260}
 261
 262void smp_call_function_interrupt(struct pt_regs *regs)
 263{
 264        ack_APIC_irq();
 265        irq_enter();
 266        generic_smp_call_function_interrupt();
 267        inc_irq_stat(irq_call_count);
 268        irq_exit();
 269}
 270
 271void smp_call_function_single_interrupt(struct pt_regs *regs)
 272{
 273        ack_APIC_irq();
 274        irq_enter();
 275        generic_smp_call_function_single_interrupt();
 276        inc_irq_stat(irq_call_count);
 277        irq_exit();
 278}
 279
 280static int __init nonmi_ipi_setup(char *str)
 281{
 282        smp_no_nmi_ipi = true;
 283        return 1;
 284}
 285
 286__setup("nonmi_ipi", nonmi_ipi_setup);
 287
 288struct smp_ops smp_ops = {
 289        .smp_prepare_boot_cpu   = native_smp_prepare_boot_cpu,
 290        .smp_prepare_cpus       = native_smp_prepare_cpus,
 291        .smp_cpus_done          = native_smp_cpus_done,
 292
 293        .stop_other_cpus        = native_stop_other_cpus,
 294        .smp_send_reschedule    = native_smp_send_reschedule,
 295
 296        .cpu_up                 = native_cpu_up,
 297        .cpu_die                = native_cpu_die,
 298        .cpu_disable            = native_cpu_disable,
 299        .play_dead              = native_play_dead,
 300
 301        .send_call_func_ipi     = native_send_call_func_ipi,
 302        .send_call_func_single_ipi = native_send_call_func_single_ipi,
 303};
 304EXPORT_SYMBOL_GPL(smp_ops);
 305