LXR linux/kernel/cpu.c

   1/* CPU control.
   2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
   3 *
   4 * This code is licenced under the GPL.
   5 */
   6#include <linux/proc_fs.h>
   7#include <linux/smp.h>
   8#include <linux/init.h>
   9#include <linux/notifier.h>
  10#include <linux/sched.h>
  11#include <linux/sched/smt.h>
  12#include <linux/unistd.h>
  13#include <linux/cpu.h>
  14#include <linux/oom.h>
  15#include <linux/rcupdate.h>
  16#include <linux/export.h>
  17#include <linux/bug.h>
  18#include <linux/kthread.h>
  19#include <linux/stop_machine.h>
  20#include <linux/mutex.h>
  21#include <linux/gfp.h>
  22#include <linux/suspend.h>
  23#include <linux/lockdep.h>
  24#include <linux/tick.h>
  25
  26#include "smpboot.h"
  27#include "sched/sched.h"
  28
  29#ifdef CONFIG_SMP
  30/* Serializes the updates to cpu_online_mask, cpu_present_mask */
  31static DEFINE_MUTEX(cpu_add_remove_lock);
  32
  33static DEFINE_PER_CPU(bool, booted_once);
  34
  35/*
  36 * The following two APIs (cpu_maps_update_begin/done) must be used when
  37 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
  38 * The APIs cpu_notifier_register_begin/done() must be used to protect CPU
  39 * hotplug callback (un)registration performed using __register_cpu_notifier()
  40 * or __unregister_cpu_notifier().
  41 */
  42void cpu_maps_update_begin(void)
  43{
  44        mutex_lock(&cpu_add_remove_lock);
  45}
  46EXPORT_SYMBOL(cpu_notifier_register_begin);
  47
  48void cpu_maps_update_done(void)
  49{
  50        mutex_unlock(&cpu_add_remove_lock);
  51}
  52EXPORT_SYMBOL(cpu_notifier_register_done);
  53
  54static RAW_NOTIFIER_HEAD(cpu_chain);
  55
  56/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  57 * Should always be manipulated under cpu_add_remove_lock
  58 */
  59static int cpu_hotplug_disabled;
  60
  61#ifdef CONFIG_HOTPLUG_CPU
  62
  63static struct {
  64        struct task_struct *active_writer;
  65        struct mutex lock; /* Synchronizes accesses to refcount, */
  66        /*
  67         * Also blocks the new readers during
  68         * an ongoing cpu hotplug operation.
  69         */
  70        int refcount;
  71
  72#ifdef CONFIG_DEBUG_LOCK_ALLOC
  73        struct lockdep_map dep_map;
  74#endif
  75} cpu_hotplug = {
  76        .active_writer = NULL,
  77        .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
  78        .refcount = 0,
  79#ifdef CONFIG_DEBUG_LOCK_ALLOC
  80        .dep_map = {.name = "cpu_hotplug.lock" },
  81#endif
  82};
  83
  84/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
  85#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
  86#define cpuhp_lock_acquire_tryread() \
  87                                  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
  88#define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
  89#define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
  90
  91void get_online_cpus(void)
  92{
  93        might_sleep();
  94        if (cpu_hotplug.active_writer == current)
  95                return;
  96        cpuhp_lock_acquire_read();
  97        mutex_lock(&cpu_hotplug.lock);
  98        cpu_hotplug.refcount++;
  99        mutex_unlock(&cpu_hotplug.lock);
 100}
 101EXPORT_SYMBOL_GPL(get_online_cpus);
 102
 103bool try_get_online_cpus(void)
 104{
 105        if (cpu_hotplug.active_writer == current)
 106                return true;
 107        if (!mutex_trylock(&cpu_hotplug.lock))
 108                return false;
 109        cpuhp_lock_acquire_tryread();
 110        cpu_hotplug.refcount++;
 111        mutex_unlock(&cpu_hotplug.lock);
 112        return true;
 113}
 114EXPORT_SYMBOL_GPL(try_get_online_cpus);
 115
 116void put_online_cpus(void)
 117{
 118        if (cpu_hotplug.active_writer == current)
 119                return;
 120        mutex_lock(&cpu_hotplug.lock);
 121
 122        if (WARN_ON(!cpu_hotplug.refcount))
 123                cpu_hotplug.refcount++; /* try to fix things up */
 124
 125        if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
 126                wake_up_process(cpu_hotplug.active_writer);
 127        mutex_unlock(&cpu_hotplug.lock);
 128        cpuhp_lock_release();
 129
 130}
 131EXPORT_SYMBOL_GPL(put_online_cpus);
 132
 133#ifdef CONFIG_PROVE_LOCKING
 134/*
 135 * The cpu_hotplug structure actually has 2 separate lockdep allocation
 136 * debug structures. One is the core dep_map in the structure itself and
 137 * the other one is in the cpu_hotplug.lock mutex. The core dep_map is
 138 * tracked as a rwlock with get_online_cpus() treated as a read lock and
 139 * cpu_hotplug_begin() as a write lock.
 140 *
 141 * The try_get_online_cpus() can work around potential recursive lock
 142 * taking issue reported by lockdep. However, the lockdep structure in
 143 * the mutex will still report lockdep error because of the mutex_lock()
 144 * in put_online_cpus(). It is actually not a real problem as the success
 145 * of try_get_online_cpus() means other cpus cannot start a hotplug event
 146 * sequence before put_onlines_cpus(). Other mutex lock holders can only
 147 * increment and decrement the reference count which is pretty quick.
 148 *
 149 * To avoid false positive of this kind, the lockdep tracking in the mutex
 150 * itself is turned off. Lockdep tracking will still be done in the core
 151 * dep_map structure as a rwlock.
 152 */
 153static int __init disable_cpu_hotplug_mutex_lockdep(void)
 154{
 155        lockdep_set_novalidate_class(&cpu_hotplug.lock);
 156        return 0;
 157}
 158pure_initcall(disable_cpu_hotplug_mutex_lockdep);
 159
 160#endif
 161
 162/*
 163 * This ensures that the hotplug operation can begin only when the
 164 * refcount goes to zero.
 165 *
 166 * Note that during a cpu-hotplug operation, the new readers, if any,
 167 * will be blocked by the cpu_hotplug.lock
 168 *
 169 * Since cpu_hotplug_begin() is always called after invoking
 170 * cpu_maps_update_begin(), we can be sure that only one writer is active.
 171 *
 172 * Note that theoretically, there is a possibility of a livelock:
 173 * - Refcount goes to zero, last reader wakes up the sleeping
 174 *   writer.
 175 * - Last reader unlocks the cpu_hotplug.lock.
 176 * - A new reader arrives at this moment, bumps up the refcount.
 177 * - The writer acquires the cpu_hotplug.lock finds the refcount
 178 *   non zero and goes to sleep again.
 179 *
 180 * However, this is very difficult to achieve in practice since
 181 * get_online_cpus() not an api which is called all that often.
 182 *
 183 */
 184static void cpu_hotplug_begin(void)
 185{
 186        cpu_hotplug.active_writer = current;
 187
 188        cpuhp_lock_acquire();
 189        for (;;) {
 190                mutex_lock(&cpu_hotplug.lock);
 191                if (likely(!cpu_hotplug.refcount))
 192                        break;
 193                __set_current_state(TASK_UNINTERRUPTIBLE);
 194                mutex_unlock(&cpu_hotplug.lock);
 195                schedule();
 196        }
 197}
 198
 199static void cpu_hotplug_done(void)
 200{
 201        cpu_hotplug.active_writer = NULL;
 202        mutex_unlock(&cpu_hotplug.lock);
 203        cpuhp_lock_release();
 204}
 205
 206/*
 207 * Wait for currently running CPU hotplug operations to complete (if any) and
 208 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 209 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 210 * hotplug path before performing hotplug operations. So acquiring that lock
 211 * guarantees mutual exclusion from any currently running hotplug operations.
 212 */
 213void cpu_hotplug_disable(void)
 214{
 215        cpu_maps_update_begin();
 216        cpu_hotplug_disabled = 1;
 217        cpu_maps_update_done();
 218}
 219
 220void cpu_hotplug_enable(void)
 221{
 222        cpu_maps_update_begin();
 223        cpu_hotplug_disabled = 0;
 224        cpu_maps_update_done();
 225}
 226
 227#else /* #if CONFIG_HOTPLUG_CPU */
 228static void cpu_hotplug_begin(void) {}
 229static void cpu_hotplug_done(void) {}
 230#endif  /* #else #if CONFIG_HOTPLUG_CPU */
 231
 232/*
 233 * Architectures that need SMT-specific errata handling during SMT hotplug
 234 * should override this.
 235 */
 236void __weak arch_smt_update(void) { }
 237
 238#ifdef CONFIG_HOTPLUG_SMT
 239enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 240
 241void __init cpu_smt_disable(bool force)
 242{
 243        if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
 244                cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
 245                return;
 246
 247        if (force) {
 248                pr_info("SMT: Force disabled\n");
 249                cpu_smt_control = CPU_SMT_FORCE_DISABLED;
 250        } else {
 251                cpu_smt_control = CPU_SMT_DISABLED;
 252        }
 253}
 254
 255/*
 256 * The decision whether SMT is supported can only be done after the full
 257 * CPU identification. Called from architecture code.
 258 */
 259void __init cpu_smt_check_topology(void)
 260{
 261        if (!topology_smt_supported())
 262                cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
 263}
 264
 265static int __init smt_cmdline_disable(char *str)
 266{
 267        cpu_smt_disable(str && !strcmp(str, "force"));
 268        return 0;
 269}
 270early_param("nosmt", smt_cmdline_disable);
 271
 272bool cpu_smt_allowed(unsigned int cpu)
 273{
 274        if (cpu_smt_control == CPU_SMT_ENABLED)
 275                return true;
 276
 277        if (topology_is_primary_thread(cpu))
 278                return true;
 279
 280        /*
 281         * On x86 it's required to boot all logical CPUs at least once so
 282         * that the init code can get a chance to set CR4.MCE on each
 283         * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
 284         * core will shutdown the machine.
 285         */
 286        return !per_cpu(booted_once, cpu);
 287}
 288#endif
 289
 290/* Need to know about CPUs going up/down? */
 291int __ref register_cpu_notifier(struct notifier_block *nb)
 292{
 293        int ret;
 294        cpu_maps_update_begin();
 295        ret = raw_notifier_chain_register(&cpu_chain, nb);
 296        cpu_maps_update_done();
 297        return ret;
 298}
 299
 300int __ref __register_cpu_notifier(struct notifier_block *nb)
 301{
 302        return raw_notifier_chain_register(&cpu_chain, nb);
 303}
 304
 305static int __cpu_notify(unsigned long val, void *v, int nr_to_call,
 306                        int *nr_calls)
 307{
 308        int ret;
 309
 310        ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call,
 311                                        nr_calls);
 312
 313        return notifier_to_errno(ret);
 314}
 315
 316static int cpu_notify(unsigned long val, void *v)
 317{
 318        return __cpu_notify(val, v, -1, NULL);
 319}
 320
 321#ifdef CONFIG_HOTPLUG_CPU
 322
 323static void cpu_notify_nofail(unsigned long val, void *v)
 324{
 325        BUG_ON(cpu_notify(val, v));
 326}
 327EXPORT_SYMBOL(register_cpu_notifier);
 328EXPORT_SYMBOL(__register_cpu_notifier);
 329
 330void __ref unregister_cpu_notifier(struct notifier_block *nb)
 331{
 332        cpu_maps_update_begin();
 333        raw_notifier_chain_unregister(&cpu_chain, nb);
 334        cpu_maps_update_done();
 335}
 336EXPORT_SYMBOL(unregister_cpu_notifier);
 337
 338void __ref __unregister_cpu_notifier(struct notifier_block *nb)
 339{
 340        raw_notifier_chain_unregister(&cpu_chain, nb);
 341}
 342EXPORT_SYMBOL(__unregister_cpu_notifier);
 343
 344/**
 345 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 346 * @cpu: a CPU id
 347 *
 348 * This function walks all processes, finds a valid mm struct for each one and
 349 * then clears a corresponding bit in mm's cpumask.  While this all sounds
 350 * trivial, there are various non-obvious corner cases, which this function
 351 * tries to solve in a safe manner.
 352 *
 353 * Also note that the function uses a somewhat relaxed locking scheme, so it may
 354 * be called only for an already offlined CPU.
 355 */
 356void clear_tasks_mm_cpumask(int cpu)
 357{
 358        struct task_struct *p;
 359
 360        /*
 361         * This function is called after the cpu is taken down and marked
 362         * offline, so its not like new tasks will ever get this cpu set in
 363         * their mm mask. -- Peter Zijlstra
 364         * Thus, we may use rcu_read_lock() here, instead of grabbing
 365         * full-fledged tasklist_lock.
 366         */
 367        WARN_ON(cpu_online(cpu));
 368        rcu_read_lock();
 369        for_each_process(p) {
 370                struct task_struct *t;
 371
 372                /*
 373                 * Main thread might exit, but other threads may still have
 374                 * a valid mm. Find one.
 375                 */
 376                t = find_lock_task_mm(p);
 377                if (!t)
 378                        continue;
 379                cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
 380                task_unlock(t);
 381        }
 382        rcu_read_unlock();
 383}
 384
 385static inline void check_for_tasks(int cpu)
 386{
 387        struct task_struct *p;
 388        cputime_t utime, stime;
 389
 390        qwrite_lock_irq(&tasklist_lock);
 391        for_each_process(p) {
 392                task_cputime(p, &utime, &stime);
 393                if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
 394                    (utime || stime))
 395                        printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
 396                                "(state = %ld, flags = %x)\n",
 397                                p->comm, task_pid_nr(p), cpu,
 398                                p->state, p->flags);
 399        }
 400        qwrite_unlock_irq(&tasklist_lock);
 401}
 402
 403struct take_cpu_down_param {
 404        unsigned long mod;
 405        void *hcpu;
 406};
 407
 408/* Take this CPU down. */
 409static int __ref take_cpu_down(void *_param)
 410{
 411        struct take_cpu_down_param *param = _param;
 412        int err;
 413
 414        /* Ensure this CPU doesn't handle any more interrupts. */
 415        err = __cpu_disable();
 416        if (err < 0)
 417                return err;
 418
 419        cpu_notify(CPU_DYING | param->mod, param->hcpu);
 420        /* Park the stopper thread */
 421        stop_machine_park((long)param->hcpu);
 422        return 0;
 423}
 424
 425/* Requires cpu_add_remove_lock to be held */
 426static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 427{
 428        int err, nr_calls = 0;
 429        void *hcpu = (void *)(long)cpu;
 430        unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 431        struct take_cpu_down_param tcd_param = {
 432                .mod = mod,
 433                .hcpu = hcpu,
 434        };
 435
 436        if (num_online_cpus() == 1)
 437                return -EBUSY;
 438
 439        if (!cpu_online(cpu))
 440                return -EINVAL;
 441
 442        /*
 443         * The sibling_mask will be cleared in take_cpu_down when the
 444         * operation succeeds. So we can't test the sibling mask after
 445         * that. We needs to call sched_cpu_activate() if it fails.
 446         */
 447        sched_cpu_deactivate(cpu);
 448
 449        cpu_hotplug_begin();
 450
 451        err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
 452        if (err) {
 453                nr_calls--;
 454                __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
 455                printk("%s: attempt to take down CPU %u failed\n",
 456                                __func__, cpu);
 457                goto out_release;
 458        }
 459
 460        /*
 461         * By now we've cleared cpu_active_mask, wait for all preempt-disabled
 462         * and RCU users of this state to go away such that all new such users
 463         * will observe it.
 464         *
 465         * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
 466         * not imply sync_sched(), so explicitly call both.
 467         *
 468         * Do sync before park smpboot threads to take care the rcu boost case.
 469         */
 470#ifdef CONFIG_PREEMPT
 471        synchronize_sched();
 472#endif
 473        synchronize_rcu();
 474
 475        smpboot_park_threads(cpu);
 476
 477        /*
 478         * So now all preempt/rcu users must observe !cpu_active().
 479         */
 480
 481        err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
 482        if (err) {
 483                /* CPU didn't die: tell everyone.  Can't complain. */
 484                smpboot_unpark_threads(cpu);
 485                cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
 486                goto out_release;
 487        }
 488        BUG_ON(cpu_online(cpu));
 489
 490        /*
 491         * The migration_call() CPU_DYING callback will have removed all
 492         * runnable tasks from the cpu, there's only the idle task left now
 493         * that the migration thread is done doing the stop_machine thing.
 494         *
 495         * Wait for the stop thread to go away.
 496         */
 497        while (!idle_cpu(cpu))
 498                cpu_relax();
 499
 500        hotplug_cpu__broadcast_tick_pull(cpu);
 501        /* This actually kills the CPU. */
 502        __cpu_die(cpu);
 503
 504        /* CPU is completely dead: tell everyone.  Too late to complain. */
 505        cpu_notify_nofail(CPU_DEAD | mod, hcpu);
 506
 507        check_for_tasks(cpu);
 508
 509out_release:
 510        cpu_hotplug_done();
 511        if (!err)
 512                cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
 513        else
 514                sched_cpu_activate(cpu); /* Revert sched_cpu_deactivate() */
 515        arch_smt_update();
 516        return err;
 517}
 518
 519static int cpu_down_maps_locked(unsigned int cpu)
 520{
 521        if (cpu_hotplug_disabled)
 522                return -EBUSY;
 523        return _cpu_down(cpu, 0);
 524}
 525
 526int __ref cpu_down(unsigned int cpu)
 527{
 528        int err;
 529
 530        cpu_maps_update_begin();
 531        err = cpu_down_maps_locked(cpu);
 532        cpu_maps_update_done();
 533        return err;
 534}
 535EXPORT_SYMBOL(cpu_down);
 536#endif /*CONFIG_HOTPLUG_CPU*/
 537
 538/* Requires cpu_add_remove_lock to be held */
 539static int _cpu_up(unsigned int cpu, int tasks_frozen)
 540{
 541        int ret, nr_calls = 0;
 542        void *hcpu = (void *)(long)cpu;
 543        unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 544        struct task_struct *idle;
 545
 546        cpu_hotplug_begin();
 547
 548        if (cpu_online(cpu) || !cpu_present(cpu)) {
 549                ret = -EINVAL;
 550                goto out;
 551        }
 552
 553        idle = idle_thread_get(cpu);
 554        if (IS_ERR(idle)) {
 555                ret = PTR_ERR(idle);
 556                goto out;
 557        }
 558
 559        ret = smpboot_create_threads(cpu);
 560        if (ret)
 561                goto out;
 562
 563        ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
 564        if (ret) {
 565                nr_calls--;
 566                printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
 567                                __func__, cpu);
 568                goto out_notify;
 569        }
 570
 571        /* Arch-specific enabling code. */
 572        ret = __cpu_up(cpu, idle);
 573        if (ret != 0)
 574                goto out_notify;
 575        BUG_ON(!cpu_online(cpu));
 576
 577        /* Wake the per cpu threads */
 578        smpboot_unpark_threads(cpu);
 579
 580        /* Now call notifier in preparation. */
 581        cpu_notify(CPU_ONLINE | mod, hcpu);
 582
 583out_notify:
 584        if (ret != 0)
 585                __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
 586out:
 587        cpu_hotplug_done();
 588        if (!ret)
 589                sched_cpu_activate(cpu);
 590        arch_smt_update();
 591
 592        return ret;
 593}
 594
 595int cpu_up(unsigned int cpu)
 596{
 597        int err = 0;
 598
 599        if (!cpu_possible(cpu)) {
 600                printk(KERN_ERR "can't online cpu %d because it is not "
 601                        "configured as may-hotadd at boot time\n", cpu);
 602#if defined(CONFIG_IA64)
 603                printk(KERN_ERR "please check additional_cpus= boot "
 604                                "parameter\n");
 605#endif
 606                return -EINVAL;
 607        }
 608
 609        err = try_online_node(cpu_to_node(cpu));
 610        if (err)
 611                return err;
 612
 613        cpu_maps_update_begin();
 614
 615        if (cpu_hotplug_disabled) {
 616                err = -EBUSY;
 617                goto out;
 618        }
 619        if (!cpu_smt_allowed(cpu)) {
 620                err = -EPERM;
 621                goto out;
 622        }
 623
 624        err = _cpu_up(cpu, 0);
 625
 626out:
 627        cpu_maps_update_done();
 628        return err;
 629}
 630EXPORT_SYMBOL_GPL(cpu_up);
 631
 632#ifdef CONFIG_PM_SLEEP_SMP
 633static cpumask_var_t frozen_cpus;
 634
 635int disable_nonboot_cpus(void)
 636{
 637        int cpu, first_cpu, error = 0;
 638
 639        cpu_maps_update_begin();
 640        first_cpu = cpumask_first(cpu_online_mask);
 641        /*
 642         * We take down all of the non-boot CPUs in one shot to avoid races
 643         * with the userspace trying to use the CPU hotplug at the same time
 644         */
 645        cpumask_clear(frozen_cpus);
 646
 647        printk("Disabling non-boot CPUs ...\n");
 648        for_each_online_cpu(cpu) {
 649                if (cpu == first_cpu)
 650                        continue;
 651                error = _cpu_down(cpu, 1);
 652                if (!error)
 653                        cpumask_set_cpu(cpu, frozen_cpus);
 654                else {
 655                        printk(KERN_ERR "Error taking CPU%d down: %d\n",
 656                                cpu, error);
 657                        break;
 658                }
 659        }
 660
 661        if (!error) {
 662                BUG_ON(num_online_cpus() > 1);
 663                /* Make sure the CPUs won't be enabled by someone else */
 664                cpu_hotplug_disabled = 1;
 665        } else {
 666                printk(KERN_ERR "Non-boot CPUs are not disabled\n");
 667        }
 668        cpu_maps_update_done();
 669        return error;
 670}
 671
 672void __weak arch_enable_nonboot_cpus_begin(void)
 673{
 674}
 675
 676void __weak arch_enable_nonboot_cpus_end(void)
 677{
 678}
 679
 680void __ref enable_nonboot_cpus(void)
 681{
 682        int cpu, error;
 683
 684        /* Allow everyone to use the CPU hotplug again */
 685        cpu_maps_update_begin();
 686        cpu_hotplug_disabled = 0;
 687        if (cpumask_empty(frozen_cpus))
 688                goto out;
 689
 690        printk(KERN_INFO "Enabling non-boot CPUs ...\n");
 691
 692        arch_enable_nonboot_cpus_begin();
 693
 694        for_each_cpu(cpu, frozen_cpus) {
 695                error = _cpu_up(cpu, 1);
 696                if (!error) {
 697                        printk(KERN_INFO "CPU%d is up\n", cpu);
 698                        continue;
 699                }
 700                printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
 701        }
 702
 703        arch_enable_nonboot_cpus_end();
 704
 705        cpumask_clear(frozen_cpus);
 706out:
 707        cpu_maps_update_done();
 708}
 709
 710static int __init alloc_frozen_cpus(void)
 711{
 712        if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
 713                return -ENOMEM;
 714        return 0;
 715}
 716core_initcall(alloc_frozen_cpus);
 717
 718/*
 719 * When callbacks for CPU hotplug notifications are being executed, we must
 720 * ensure that the state of the system with respect to the tasks being frozen
 721 * or not, as reported by the notification, remains unchanged *throughout the
 722 * duration* of the execution of the callbacks.
 723 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
 724 *
 725 * This synchronization is implemented by mutually excluding regular CPU
 726 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
 727 * Hibernate notifications.
 728 */
 729static int
 730cpu_hotplug_pm_callback(struct notifier_block *nb,
 731                        unsigned long action, void *ptr)
 732{
 733        switch (action) {
 734
 735        case PM_SUSPEND_PREPARE:
 736        case PM_HIBERNATION_PREPARE:
 737                cpu_hotplug_disable();
 738                break;
 739
 740        case PM_POST_SUSPEND:
 741        case PM_POST_HIBERNATION:
 742                cpu_hotplug_enable();
 743                break;
 744
 745        default:
 746                return NOTIFY_DONE;
 747        }
 748
 749        return NOTIFY_OK;
 750}
 751
 752
 753static int __init cpu_hotplug_pm_sync_init(void)
 754{
 755        /*
 756         * cpu_hotplug_pm_callback has higher priority than x86
 757         * bsp_pm_callback which depends on cpu_hotplug_pm_callback
 758         * to disable cpu hotplug to avoid cpu hotplug race.
 759         */
 760        pm_notifier(cpu_hotplug_pm_callback, 0);
 761        return 0;
 762}
 763core_initcall(cpu_hotplug_pm_sync_init);
 764
 765#endif /* CONFIG_PM_SLEEP_SMP */
 766
 767/**
 768 * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
 769 * @cpu: cpu that just started
 770 *
 771 * This function calls the cpu_chain notifiers with CPU_STARTING.
 772 * It must be called by the arch code on the new cpu, before the new cpu
 773 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 774 */
 775void notify_cpu_starting(unsigned int cpu)
 776{
 777        unsigned long val = CPU_STARTING;
 778
 779        per_cpu(booted_once, cpu) = true;
 780#ifdef CONFIG_PM_SLEEP_SMP
 781        if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
 782                val = CPU_STARTING_FROZEN;
 783#endif /* CONFIG_PM_SLEEP_SMP */
 784        cpu_notify(val, (void *)(long)cpu);
 785}
 786
 787#endif /* CONFIG_SMP */
 788
 789#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
 790
 791#ifdef CONFIG_HOTPLUG_SMT
 792
 793static void cpuhp_offline_cpu_device(unsigned int cpu)
 794{
 795        struct device *dev = get_cpu_device(cpu);
 796
 797        dev->offline = true;
 798        /* Tell user space about the state change */
 799        kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
 800}
 801
 802static void cpuhp_online_cpu_device(unsigned int cpu)
 803{
 804        struct device *dev = get_cpu_device(cpu);
 805
 806        dev->offline = false;
 807        /* Tell user space about the state change */
 808        kobject_uevent(&dev->kobj, KOBJ_ONLINE);
 809}
 810
 811static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
 812{
 813        int cpu, ret = 0;
 814
 815        cpu_maps_update_begin();
 816        for_each_online_cpu(cpu) {
 817                if (topology_is_primary_thread(cpu))
 818                        continue;
 819                ret = cpu_down_maps_locked(cpu);
 820                if (ret)
 821                        break;
 822                /*
 823                 * As this needs to hold the cpu maps lock it's impossible
 824                 * to call device_offline() because that ends up calling
 825                 * cpu_down() which takes cpu maps lock. cpu maps lock
 826                 * needs to be held as this might race against in kernel
 827                 * abusers of the hotplug machinery (thermal management).
 828                 *
 829                 * So nothing would update device:offline state. That would
 830                 * leave the sysfs entry stale and prevent onlining after
 831                 * smt control has been changed to 'off' again. This is
 832                 * called under the sysfs hotplug lock, so it is properly
 833                 * serialized against the regular offline usage.
 834                 */
 835                cpuhp_offline_cpu_device(cpu);
 836        }
 837        if (!ret)
 838                cpu_smt_control = ctrlval;
 839        cpu_maps_update_done();
 840        return ret;
 841}
 842
 843static int cpuhp_smt_enable(void)
 844{
 845        int cpu, ret = 0;
 846
 847        cpu_maps_update_begin();
 848        cpu_smt_control = CPU_SMT_ENABLED;
 849        for_each_present_cpu(cpu) {
 850                /* Skip online CPUs and CPUs on offline nodes */
 851                if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
 852                        continue;
 853                ret = _cpu_up(cpu, 0);
 854                if (ret)
 855                        break;
 856                /* See comment in cpuhp_smt_disable() */
 857                cpuhp_online_cpu_device(cpu);
 858        }
 859        cpu_maps_update_done();
 860        return ret;
 861}
 862
 863
 864static ssize_t
 865__store_smt_control(struct device *dev, struct device_attribute *attr,
 866                    const char *buf, size_t count)
 867{
 868        int ctrlval, ret;
 869
 870        if (sysfs_streq(buf, "on"))
 871                ctrlval = CPU_SMT_ENABLED;
 872        else if (sysfs_streq(buf, "off"))
 873                ctrlval = CPU_SMT_DISABLED;
 874        else if (sysfs_streq(buf, "forceoff"))
 875                ctrlval = CPU_SMT_FORCE_DISABLED;
 876        else
 877                return -EINVAL;
 878
 879        if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
 880                return -EPERM;
 881
 882        if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
 883                return -ENODEV;
 884
 885        ret = lock_device_hotplug_sysfs();
 886        if (ret)
 887                return ret;
 888
 889        if (ctrlval != cpu_smt_control) {
 890                switch (ctrlval) {
 891                case CPU_SMT_ENABLED:
 892                        ret = cpuhp_smt_enable();
 893                        break;
 894                case CPU_SMT_DISABLED:
 895                case CPU_SMT_FORCE_DISABLED:
 896                        ret = cpuhp_smt_disable(ctrlval);
 897                        break;
 898                }
 899        }
 900
 901        unlock_device_hotplug();
 902        return ret ? ret : count;
 903}
 904
 905#else /* !CONFIG_HOTPLUG_SMT */
 906static ssize_t
 907__store_smt_control(struct device *dev, struct device_attribute *attr,
 908                    const char *buf, size_t count)
 909{
 910        return -ENODEV;
 911}
 912#endif /* CONFIG_HOTPLUG_SMT */
 913
 914static const char *smt_states[] = {
 915        [CPU_SMT_ENABLED]               = "on",
 916        [CPU_SMT_DISABLED]              = "off",
 917        [CPU_SMT_FORCE_DISABLED]        = "forceoff",
 918        [CPU_SMT_NOT_SUPPORTED]         = "notsupported",
 919        [CPU_SMT_NOT_IMPLEMENTED]       = "notimplemented",
 920};
 921
 922static ssize_t
 923show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
 924{
 925        const char *state = smt_states[cpu_smt_control];
 926
 927        return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
 928}
 929
 930static ssize_t
 931store_smt_control(struct device *dev, struct device_attribute *attr,
 932                  const char *buf, size_t count)
 933{
 934        return __store_smt_control(dev, attr, buf, count);
 935}
 936static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
 937
 938static ssize_t
 939show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
 940{
 941        return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
 942}
 943static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
 944
 945static struct attribute *cpuhp_smt_attrs[] = {
 946        &dev_attr_control.attr,
 947        &dev_attr_active.attr,
 948        NULL
 949};
 950
 951static const struct attribute_group cpuhp_smt_attr_group = {
 952        .attrs = cpuhp_smt_attrs,
 953        .name = "smt",
 954        NULL
 955};
 956
 957static int __init cpu_smt_sysfs_init(void)
 958{
 959        return sysfs_create_group(&cpu_subsys.dev_root->kobj,
 960                                  &cpuhp_smt_attr_group);
 961}
 962
 963static int __init cpuhp_sysfs_init(void)
 964{
 965        return cpu_smt_sysfs_init();
 966}
 967device_initcall(cpuhp_sysfs_init);
 968#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
 969
 970/*
 971 * cpu_bit_bitmap[] is a special, "compressed" data structure that
 972 * represents all NR_CPUS bits binary values of 1<<nr.
 973 *
 974 * It is used by cpumask_of() to get a constant address to a CPU
 975 * mask value that has a single bit set only.
 976 */
 977
 978/* cpu_bit_bitmap[0] is empty - so we can back into it */
 979#define MASK_DECLARE_1(x)       [x+1][0] = (1UL << (x))
 980#define MASK_DECLARE_2(x)       MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
 981#define MASK_DECLARE_4(x)       MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
 982#define MASK_DECLARE_8(x)       MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
 983
 984const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
 985
 986        MASK_DECLARE_8(0),      MASK_DECLARE_8(8),
 987        MASK_DECLARE_8(16),     MASK_DECLARE_8(24),
 988#if BITS_PER_LONG > 32
 989        MASK_DECLARE_8(32),     MASK_DECLARE_8(40),
 990        MASK_DECLARE_8(48),     MASK_DECLARE_8(56),
 991#endif
 992};
 993EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
 994
 995const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
 996EXPORT_SYMBOL(cpu_all_bits);
 997
 998#ifdef CONFIG_INIT_ALL_POSSIBLE
 999static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
1000        = CPU_BITS_ALL;

1001#else
1002static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
1003#endif
1004const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
1005EXPORT_SYMBOL(cpu_possible_mask);
1006
1007static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
1008const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
1009EXPORT_SYMBOL(cpu_online_mask);
1010
1011static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
1012const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
1013EXPORT_SYMBOL(cpu_present_mask);
1014
1015static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
1016const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
1017EXPORT_SYMBOL(cpu_active_mask);
1018
1019void set_cpu_possible(unsigned int cpu, bool possible)
1020{
1021        if (possible)
1022                cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
1023        else
1024                cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
1025}
1026
1027void set_cpu_present(unsigned int cpu, bool present)
1028{
1029        if (present)
1030                cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
1031        else
1032                cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
1033}
1034
1035void set_cpu_online(unsigned int cpu, bool online)
1036{
1037        if (online)
1038                cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
1039        else
1040                cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
1041}
1042
1043void set_cpu_active(unsigned int cpu, bool active)
1044{
1045        if (active)
1046                cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
1047        else
1048                cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
1049}
1050
1051void reset_cpu_possible_mask(void)
1052{
1053        bitmap_zero(cpu_possible_bits, NR_CPUS);
1054}
1055
1056void init_cpu_present(const struct cpumask *src)
1057{
1058        cpumask_copy(to_cpumask(cpu_present_bits), src);
1059}
1060
1061void init_cpu_possible(const struct cpumask *src)
1062{
1063        cpumask_copy(to_cpumask(cpu_possible_bits), src);
1064}
1065
1066void init_cpu_online(const struct cpumask *src)
1067{
1068        cpumask_copy(to_cpumask(cpu_online_bits), src);
1069}
1070
1071void __init boot_cpu_state_init(void)
1072{
1073        this_cpu_write(booted_once, true);
1074}
1075
1076/*
1077 * These are used for a global "mitigations=" cmdline option for toggling
1078 * optional CPU mitigations.
1079 */
1080enum cpu_mitigations {
1081        CPU_MITIGATIONS_OFF,
1082        CPU_MITIGATIONS_AUTO,
1083        CPU_MITIGATIONS_AUTO_NOSMT,
1084};
1085
1086static enum cpu_mitigations cpu_mitigations __read_mostly =
1087        CPU_MITIGATIONS_AUTO;
1088
1089static int __init mitigations_parse_cmdline(char *arg)
1090{
1091        if (!strcmp(arg, "off"))
1092                cpu_mitigations = CPU_MITIGATIONS_OFF;
1093        else if (!strcmp(arg, "auto"))
1094                cpu_mitigations = CPU_MITIGATIONS_AUTO;
1095        else if (!strcmp(arg, "auto,nosmt"))
1096                cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
1097
1098        return 0;
1099}
1100early_param("mitigations", mitigations_parse_cmdline);
1101
1102/* mitigations=off */
1103bool cpu_mitigations_off(void)
1104{
1105        return cpu_mitigations == CPU_MITIGATIONS_OFF;
1106}
1107EXPORT_SYMBOL_GPL(cpu_mitigations_off);
1108
1109/* mitigations=auto,nosmt */
1110bool cpu_mitigations_auto_nosmt(void)
1111{
1112        return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
1113}
1114EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
1115