linux/kernel/cpu.c
<<
>>
Prefs
   1/* CPU control.
   2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
   3 *
   4 * This code is licenced under the GPL.
   5 */
   6#include <linux/proc_fs.h>
   7#include <linux/smp.h>
   8#include <linux/init.h>
   9#include <linux/notifier.h>
  10#include <linux/sched/signal.h>
  11#include <linux/sched/hotplug.h>
  12#include <linux/sched/task.h>
  13#include <linux/unistd.h>
  14#include <linux/cpu.h>
  15#include <linux/oom.h>
  16#include <linux/rcupdate.h>
  17#include <linux/export.h>
  18#include <linux/bug.h>
  19#include <linux/kthread.h>
  20#include <linux/stop_machine.h>
  21#include <linux/mutex.h>
  22#include <linux/gfp.h>
  23#include <linux/suspend.h>
  24#include <linux/lockdep.h>
  25#include <linux/tick.h>
  26#include <linux/irq.h>
  27#include <linux/smpboot.h>
  28#include <linux/relay.h>
  29#include <linux/slab.h>
  30
  31#include <trace/events/power.h>
  32#define CREATE_TRACE_POINTS
  33#include <trace/events/cpuhp.h>
  34
  35#include "smpboot.h"
  36
  37/**
  38 * cpuhp_cpu_state - Per cpu hotplug state storage
  39 * @state:      The current cpu state
  40 * @target:     The target state
  41 * @thread:     Pointer to the hotplug thread
  42 * @should_run: Thread should execute
  43 * @rollback:   Perform a rollback
  44 * @single:     Single callback invocation
  45 * @bringup:    Single callback bringup or teardown selector
  46 * @cb_state:   The state for a single callback (install/uninstall)
  47 * @result:     Result of the operation
  48 * @done:       Signal completion to the issuer of the task
  49 */
  50struct cpuhp_cpu_state {
  51        enum cpuhp_state        state;
  52        enum cpuhp_state        target;
  53#ifdef CONFIG_SMP
  54        struct task_struct      *thread;
  55        bool                    should_run;
  56        bool                    rollback;
  57        bool                    single;
  58        bool                    bringup;
  59        struct hlist_node       *node;
  60        enum cpuhp_state        cb_state;
  61        int                     result;
  62        struct completion       done;
  63#endif
  64};
  65
  66static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
  67
  68/**
  69 * cpuhp_step - Hotplug state machine step
  70 * @name:       Name of the step
  71 * @startup:    Startup function of the step
  72 * @teardown:   Teardown function of the step
  73 * @skip_onerr: Do not invoke the functions on error rollback
  74 *              Will go away once the notifiers are gone
  75 * @cant_stop:  Bringup/teardown can't be stopped at this step
  76 */
  77struct cpuhp_step {
  78        const char              *name;
  79        union {
  80                int             (*single)(unsigned int cpu);
  81                int             (*multi)(unsigned int cpu,
  82                                         struct hlist_node *node);
  83        } startup;
  84        union {
  85                int             (*single)(unsigned int cpu);
  86                int             (*multi)(unsigned int cpu,
  87                                         struct hlist_node *node);
  88        } teardown;
  89        struct hlist_head       list;
  90        bool                    skip_onerr;
  91        bool                    cant_stop;
  92        bool                    multi_instance;
  93};
  94
  95static DEFINE_MUTEX(cpuhp_state_mutex);
  96static struct cpuhp_step cpuhp_bp_states[];
  97static struct cpuhp_step cpuhp_ap_states[];
  98
  99static bool cpuhp_is_ap_state(enum cpuhp_state state)
 100{
 101        /*
 102         * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
 103         * purposes as that state is handled explicitly in cpu_down.
 104         */
 105        return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
 106}
 107
 108static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
 109{
 110        struct cpuhp_step *sp;
 111
 112        sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
 113        return sp + state;
 114}
 115
 116/**
 117 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
 118 * @cpu:        The cpu for which the callback should be invoked
 119 * @step:       The step in the state machine
 120 * @bringup:    True if the bringup callback should be invoked
 121 *
 122 * Called from cpu hotplug and from the state register machinery.
 123 */
 124static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
 125                                 bool bringup, struct hlist_node *node)
 126{
 127        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 128        struct cpuhp_step *step = cpuhp_get_step(state);
 129        int (*cbm)(unsigned int cpu, struct hlist_node *node);
 130        int (*cb)(unsigned int cpu);
 131        int ret, cnt;
 132
 133        if (!step->multi_instance) {
 134                cb = bringup ? step->startup.single : step->teardown.single;
 135                if (!cb)
 136                        return 0;
 137                trace_cpuhp_enter(cpu, st->target, state, cb);
 138                ret = cb(cpu);
 139                trace_cpuhp_exit(cpu, st->state, state, ret);
 140                return ret;
 141        }
 142        cbm = bringup ? step->startup.multi : step->teardown.multi;
 143        if (!cbm)
 144                return 0;
 145
 146        /* Single invocation for instance add/remove */
 147        if (node) {
 148                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
 149                ret = cbm(cpu, node);
 150                trace_cpuhp_exit(cpu, st->state, state, ret);
 151                return ret;
 152        }
 153
 154        /* State transition. Invoke on all instances */
 155        cnt = 0;
 156        hlist_for_each(node, &step->list) {
 157                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
 158                ret = cbm(cpu, node);
 159                trace_cpuhp_exit(cpu, st->state, state, ret);
 160                if (ret)
 161                        goto err;
 162                cnt++;
 163        }
 164        return 0;
 165err:
 166        /* Rollback the instances if one failed */
 167        cbm = !bringup ? step->startup.multi : step->teardown.multi;
 168        if (!cbm)
 169                return ret;
 170
 171        hlist_for_each(node, &step->list) {
 172                if (!cnt--)
 173                        break;
 174                cbm(cpu, node);
 175        }
 176        return ret;
 177}
 178
 179#ifdef CONFIG_SMP
 180/* Serializes the updates to cpu_online_mask, cpu_present_mask */
 181static DEFINE_MUTEX(cpu_add_remove_lock);
 182bool cpuhp_tasks_frozen;
 183EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
 184
 185/*
 186 * The following two APIs (cpu_maps_update_begin/done) must be used when
 187 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
 188 */
 189void cpu_maps_update_begin(void)
 190{
 191        mutex_lock(&cpu_add_remove_lock);
 192}
 193
 194void cpu_maps_update_done(void)
 195{
 196        mutex_unlock(&cpu_add_remove_lock);
 197}
 198
 199/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 200 * Should always be manipulated under cpu_add_remove_lock
 201 */
 202static int cpu_hotplug_disabled;
 203
 204#ifdef CONFIG_HOTPLUG_CPU
 205
 206static struct {
 207        struct task_struct *active_writer;
 208        /* wait queue to wake up the active_writer */
 209        wait_queue_head_t wq;
 210        /* verifies that no writer will get active while readers are active */
 211        struct mutex lock;
 212        /*
 213         * Also blocks the new readers during
 214         * an ongoing cpu hotplug operation.
 215         */
 216        atomic_t refcount;
 217
 218#ifdef CONFIG_DEBUG_LOCK_ALLOC
 219        struct lockdep_map dep_map;
 220#endif
 221} cpu_hotplug = {
 222        .active_writer = NULL,
 223        .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
 224        .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
 225#ifdef CONFIG_DEBUG_LOCK_ALLOC
 226        .dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
 227#endif
 228};
 229
 230/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
 231#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
 232#define cpuhp_lock_acquire_tryread() \
 233                                  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
 234#define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
 235#define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
 236
 237
 238void get_online_cpus(void)
 239{
 240        might_sleep();
 241        if (cpu_hotplug.active_writer == current)
 242                return;
 243        cpuhp_lock_acquire_read();
 244        mutex_lock(&cpu_hotplug.lock);
 245        atomic_inc(&cpu_hotplug.refcount);
 246        mutex_unlock(&cpu_hotplug.lock);
 247}
 248EXPORT_SYMBOL_GPL(get_online_cpus);
 249
 250void put_online_cpus(void)
 251{
 252        int refcount;
 253
 254        if (cpu_hotplug.active_writer == current)
 255                return;
 256
 257        refcount = atomic_dec_return(&cpu_hotplug.refcount);
 258        if (WARN_ON(refcount < 0)) /* try to fix things up */
 259                atomic_inc(&cpu_hotplug.refcount);
 260
 261        if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
 262                wake_up(&cpu_hotplug.wq);
 263
 264        cpuhp_lock_release();
 265
 266}
 267EXPORT_SYMBOL_GPL(put_online_cpus);
 268
 269/*
 270 * This ensures that the hotplug operation can begin only when the
 271 * refcount goes to zero.
 272 *
 273 * Note that during a cpu-hotplug operation, the new readers, if any,
 274 * will be blocked by the cpu_hotplug.lock
 275 *
 276 * Since cpu_hotplug_begin() is always called after invoking
 277 * cpu_maps_update_begin(), we can be sure that only one writer is active.
 278 *
 279 * Note that theoretically, there is a possibility of a livelock:
 280 * - Refcount goes to zero, last reader wakes up the sleeping
 281 *   writer.
 282 * - Last reader unlocks the cpu_hotplug.lock.
 283 * - A new reader arrives at this moment, bumps up the refcount.
 284 * - The writer acquires the cpu_hotplug.lock finds the refcount
 285 *   non zero and goes to sleep again.
 286 *
 287 * However, this is very difficult to achieve in practice since
 288 * get_online_cpus() not an api which is called all that often.
 289 *
 290 */
 291void cpu_hotplug_begin(void)
 292{
 293        DEFINE_WAIT(wait);
 294
 295        cpu_hotplug.active_writer = current;
 296        cpuhp_lock_acquire();
 297
 298        for (;;) {
 299                mutex_lock(&cpu_hotplug.lock);
 300                prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
 301                if (likely(!atomic_read(&cpu_hotplug.refcount)))
 302                                break;
 303                mutex_unlock(&cpu_hotplug.lock);
 304                schedule();
 305        }
 306        finish_wait(&cpu_hotplug.wq, &wait);
 307}
 308
 309void cpu_hotplug_done(void)
 310{
 311        cpu_hotplug.active_writer = NULL;
 312        mutex_unlock(&cpu_hotplug.lock);
 313        cpuhp_lock_release();
 314}
 315
 316/*
 317 * Wait for currently running CPU hotplug operations to complete (if any) and
 318 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 319 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 320 * hotplug path before performing hotplug operations. So acquiring that lock
 321 * guarantees mutual exclusion from any currently running hotplug operations.
 322 */
 323void cpu_hotplug_disable(void)
 324{
 325        cpu_maps_update_begin();
 326        cpu_hotplug_disabled++;
 327        cpu_maps_update_done();
 328}
 329EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
 330
 331static void __cpu_hotplug_enable(void)
 332{
 333        if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
 334                return;
 335        cpu_hotplug_disabled--;
 336}
 337
 338void cpu_hotplug_enable(void)
 339{
 340        cpu_maps_update_begin();
 341        __cpu_hotplug_enable();
 342        cpu_maps_update_done();
 343}
 344EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 345#endif  /* CONFIG_HOTPLUG_CPU */
 346
 347/* Notifier wrappers for transitioning to state machine */
 348
 349static int bringup_wait_for_ap(unsigned int cpu)
 350{
 351        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 352
 353        wait_for_completion(&st->done);
 354        return st->result;
 355}
 356
 357static int bringup_cpu(unsigned int cpu)
 358{
 359        struct task_struct *idle = idle_thread_get(cpu);
 360        int ret;
 361
 362        /*
 363         * Some architectures have to walk the irq descriptors to
 364         * setup the vector space for the cpu which comes online.
 365         * Prevent irq alloc/free across the bringup.
 366         */
 367        irq_lock_sparse();
 368
 369        /* Arch-specific enabling code. */
 370        ret = __cpu_up(cpu, idle);
 371        irq_unlock_sparse();
 372        if (ret)
 373                return ret;
 374        ret = bringup_wait_for_ap(cpu);
 375        BUG_ON(!cpu_online(cpu));
 376        return ret;
 377}
 378
 379/*
 380 * Hotplug state machine related functions
 381 */
 382static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
 383{
 384        for (st->state++; st->state < st->target; st->state++) {
 385                struct cpuhp_step *step = cpuhp_get_step(st->state);
 386
 387                if (!step->skip_onerr)
 388                        cpuhp_invoke_callback(cpu, st->state, true, NULL);
 389        }
 390}
 391
 392static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 393                                enum cpuhp_state target)
 394{
 395        enum cpuhp_state prev_state = st->state;
 396        int ret = 0;
 397
 398        for (; st->state > target; st->state--) {
 399                ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
 400                if (ret) {
 401                        st->target = prev_state;
 402                        undo_cpu_down(cpu, st);
 403                        break;
 404                }
 405        }
 406        return ret;
 407}
 408
 409static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
 410{
 411        for (st->state--; st->state > st->target; st->state--) {
 412                struct cpuhp_step *step = cpuhp_get_step(st->state);
 413
 414                if (!step->skip_onerr)
 415                        cpuhp_invoke_callback(cpu, st->state, false, NULL);
 416        }
 417}
 418
 419static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 420                              enum cpuhp_state target)
 421{
 422        enum cpuhp_state prev_state = st->state;
 423        int ret = 0;
 424
 425        while (st->state < target) {
 426                st->state++;
 427                ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
 428                if (ret) {
 429                        st->target = prev_state;
 430                        undo_cpu_up(cpu, st);
 431                        break;
 432                }
 433        }
 434        return ret;
 435}
 436
 437/*
 438 * The cpu hotplug threads manage the bringup and teardown of the cpus
 439 */
 440static void cpuhp_create(unsigned int cpu)
 441{
 442        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 443
 444        init_completion(&st->done);
 445}
 446
 447static int cpuhp_should_run(unsigned int cpu)
 448{
 449        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 450
 451        return st->should_run;
 452}
 453
 454/* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
 455static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
 456{
 457        enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
 458
 459        return cpuhp_down_callbacks(cpu, st, target);
 460}
 461
 462/* Execute the online startup callbacks. Used to be CPU_ONLINE */
 463static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
 464{
 465        return cpuhp_up_callbacks(cpu, st, st->target);
 466}
 467
 468/*
 469 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
 470 * callbacks when a state gets [un]installed at runtime.
 471 */
 472static void cpuhp_thread_fun(unsigned int cpu)
 473{
 474        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 475        int ret = 0;
 476
 477        /*
 478         * Paired with the mb() in cpuhp_kick_ap_work and
 479         * cpuhp_invoke_ap_callback, so the work set is consistent visible.
 480         */
 481        smp_mb();
 482        if (!st->should_run)
 483                return;
 484
 485        st->should_run = false;
 486
 487        /* Single callback invocation for [un]install ? */
 488        if (st->single) {
 489                if (st->cb_state < CPUHP_AP_ONLINE) {
 490                        local_irq_disable();
 491                        ret = cpuhp_invoke_callback(cpu, st->cb_state,
 492                                                    st->bringup, st->node);
 493                        local_irq_enable();
 494                } else {
 495                        ret = cpuhp_invoke_callback(cpu, st->cb_state,
 496                                                    st->bringup, st->node);
 497                }
 498        } else if (st->rollback) {
 499                BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
 500
 501                undo_cpu_down(cpu, st);
 502                st->rollback = false;
 503        } else {
 504                /* Cannot happen .... */
 505                BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
 506
 507                /* Regular hotplug work */
 508                if (st->state < st->target)
 509                        ret = cpuhp_ap_online(cpu, st);
 510                else if (st->state > st->target)
 511                        ret = cpuhp_ap_offline(cpu, st);
 512        }
 513        st->result = ret;
 514        complete(&st->done);
 515}
 516
 517/* Invoke a single callback on a remote cpu */
 518static int
 519cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
 520                         struct hlist_node *node)
 521{
 522        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 523
 524        if (!cpu_online(cpu))
 525                return 0;
 526
 527        /*
 528         * If we are up and running, use the hotplug thread. For early calls
 529         * we invoke the thread function directly.
 530         */
 531        if (!st->thread)
 532                return cpuhp_invoke_callback(cpu, state, bringup, node);
 533
 534        st->cb_state = state;
 535        st->single = true;
 536        st->bringup = bringup;
 537        st->node = node;
 538
 539        /*
 540         * Make sure the above stores are visible before should_run becomes
 541         * true. Paired with the mb() above in cpuhp_thread_fun()
 542         */
 543        smp_mb();
 544        st->should_run = true;
 545        wake_up_process(st->thread);
 546        wait_for_completion(&st->done);
 547        return st->result;
 548}
 549
 550/* Regular hotplug invocation of the AP hotplug thread */
 551static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
 552{
 553        st->result = 0;
 554        st->single = false;
 555        /*
 556         * Make sure the above stores are visible before should_run becomes
 557         * true. Paired with the mb() above in cpuhp_thread_fun()
 558         */
 559        smp_mb();
 560        st->should_run = true;
 561        wake_up_process(st->thread);
 562}
 563
 564static int cpuhp_kick_ap_work(unsigned int cpu)
 565{
 566        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 567        enum cpuhp_state state = st->state;
 568
 569        trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
 570        __cpuhp_kick_ap_work(st);
 571        wait_for_completion(&st->done);
 572        trace_cpuhp_exit(cpu, st->state, state, st->result);
 573        return st->result;
 574}
 575
 576static struct smp_hotplug_thread cpuhp_threads = {
 577        .store                  = &cpuhp_state.thread,
 578        .create                 = &cpuhp_create,
 579        .thread_should_run      = cpuhp_should_run,
 580        .thread_fn              = cpuhp_thread_fun,
 581        .thread_comm            = "cpuhp/%u",
 582        .selfparking            = true,
 583};
 584
 585void __init cpuhp_threads_init(void)
 586{
 587        BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
 588        kthread_unpark(this_cpu_read(cpuhp_state.thread));
 589}
 590
 591#ifdef CONFIG_HOTPLUG_CPU
 592/**
 593 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 594 * @cpu: a CPU id
 595 *
 596 * This function walks all processes, finds a valid mm struct for each one and
 597 * then clears a corresponding bit in mm's cpumask.  While this all sounds
 598 * trivial, there are various non-obvious corner cases, which this function
 599 * tries to solve in a safe manner.
 600 *
 601 * Also note that the function uses a somewhat relaxed locking scheme, so it may
 602 * be called only for an already offlined CPU.
 603 */
 604void clear_tasks_mm_cpumask(int cpu)
 605{
 606        struct task_struct *p;
 607
 608        /*
 609         * This function is called after the cpu is taken down and marked
 610         * offline, so its not like new tasks will ever get this cpu set in
 611         * their mm mask. -- Peter Zijlstra
 612         * Thus, we may use rcu_read_lock() here, instead of grabbing
 613         * full-fledged tasklist_lock.
 614         */
 615        WARN_ON(cpu_online(cpu));
 616        rcu_read_lock();
 617        for_each_process(p) {
 618                struct task_struct *t;
 619
 620                /*
 621                 * Main thread might exit, but other threads may still have
 622                 * a valid mm. Find one.
 623                 */
 624                t = find_lock_task_mm(p);
 625                if (!t)
 626                        continue;
 627                cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
 628                task_unlock(t);
 629        }
 630        rcu_read_unlock();
 631}
 632
 633static inline void check_for_tasks(int dead_cpu)
 634{
 635        struct task_struct *g, *p;
 636
 637        read_lock(&tasklist_lock);
 638        for_each_process_thread(g, p) {
 639                if (!p->on_rq)
 640                        continue;
 641                /*
 642                 * We do the check with unlocked task_rq(p)->lock.
 643                 * Order the reading to do not warn about a task,
 644                 * which was running on this cpu in the past, and
 645                 * it's just been woken on another cpu.
 646                 */
 647                rmb();
 648                if (task_cpu(p) != dead_cpu)
 649                        continue;
 650
 651                pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
 652                        p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
 653        }
 654        read_unlock(&tasklist_lock);
 655}
 656
 657/* Take this CPU down. */
 658static int take_cpu_down(void *_param)
 659{
 660        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 661        enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
 662        int err, cpu = smp_processor_id();
 663
 664        /* Ensure this CPU doesn't handle any more interrupts. */
 665        err = __cpu_disable();
 666        if (err < 0)
 667                return err;
 668
 669        /*
 670         * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
 671         * do this step again.
 672         */
 673        WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
 674        st->state--;
 675        /* Invoke the former CPU_DYING callbacks */
 676        for (; st->state > target; st->state--)
 677                cpuhp_invoke_callback(cpu, st->state, false, NULL);
 678
 679        /* Give up timekeeping duties */
 680        tick_handover_do_timer();
 681        /* Park the stopper thread */
 682        stop_machine_park(cpu);
 683        return 0;
 684}
 685
 686static int takedown_cpu(unsigned int cpu)
 687{
 688        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 689        int err;
 690
 691        /* Park the smpboot threads */
 692        kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
 693        smpboot_park_threads(cpu);
 694
 695        /*
 696         * Prevent irq alloc/free while the dying cpu reorganizes the
 697         * interrupt affinities.
 698         */
 699        irq_lock_sparse();
 700
 701        /*
 702         * So now all preempt/rcu users must observe !cpu_active().
 703         */
 704        err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
 705        if (err) {
 706                /* CPU refused to die */
 707                irq_unlock_sparse();
 708                /* Unpark the hotplug thread so we can rollback there */
 709                kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
 710                return err;
 711        }
 712        BUG_ON(cpu_online(cpu));
 713
 714        /*
 715         * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
 716         * runnable tasks from the cpu, there's only the idle task left now
 717         * that the migration thread is done doing the stop_machine thing.
 718         *
 719         * Wait for the stop thread to go away.
 720         */
 721        wait_for_completion(&st->done);
 722        BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
 723
 724        /* Interrupts are moved away from the dying cpu, reenable alloc/free */
 725        irq_unlock_sparse();
 726
 727        hotplug_cpu__broadcast_tick_pull(cpu);
 728        /* This actually kills the CPU. */
 729        __cpu_die(cpu);
 730
 731        tick_cleanup_dead_cpu(cpu);
 732        return 0;
 733}
 734
 735static void cpuhp_complete_idle_dead(void *arg)
 736{
 737        struct cpuhp_cpu_state *st = arg;
 738
 739        complete(&st->done);
 740}
 741
 742void cpuhp_report_idle_dead(void)
 743{
 744        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 745
 746        BUG_ON(st->state != CPUHP_AP_OFFLINE);
 747        rcu_report_dead(smp_processor_id());
 748        st->state = CPUHP_AP_IDLE_DEAD;
 749        /*
 750         * We cannot call complete after rcu_report_dead() so we delegate it
 751         * to an online cpu.
 752         */
 753        smp_call_function_single(cpumask_first(cpu_online_mask),
 754                                 cpuhp_complete_idle_dead, st, 0);
 755}
 756
 757#else
 758#define takedown_cpu            NULL
 759#endif
 760
 761#ifdef CONFIG_HOTPLUG_CPU
 762
 763/* Requires cpu_add_remove_lock to be held */
 764static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 765                           enum cpuhp_state target)
 766{
 767        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 768        int prev_state, ret = 0;
 769
 770        if (num_online_cpus() == 1)
 771                return -EBUSY;
 772
 773        if (!cpu_present(cpu))
 774                return -EINVAL;
 775
 776        cpu_hotplug_begin();
 777
 778        cpuhp_tasks_frozen = tasks_frozen;
 779
 780        prev_state = st->state;
 781        st->target = target;
 782        /*
 783         * If the current CPU state is in the range of the AP hotplug thread,
 784         * then we need to kick the thread.
 785         */
 786        if (st->state > CPUHP_TEARDOWN_CPU) {
 787                ret = cpuhp_kick_ap_work(cpu);
 788                /*
 789                 * The AP side has done the error rollback already. Just
 790                 * return the error code..
 791                 */
 792                if (ret)
 793                        goto out;
 794
 795                /*
 796                 * We might have stopped still in the range of the AP hotplug
 797                 * thread. Nothing to do anymore.
 798                 */
 799                if (st->state > CPUHP_TEARDOWN_CPU)
 800                        goto out;
 801        }
 802        /*
 803         * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
 804         * to do the further cleanups.
 805         */
 806        ret = cpuhp_down_callbacks(cpu, st, target);
 807        if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
 808                st->target = prev_state;
 809                st->rollback = true;
 810                cpuhp_kick_ap_work(cpu);
 811        }
 812
 813out:
 814        cpu_hotplug_done();
 815        return ret;
 816}
 817
 818static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
 819{
 820        int err;
 821
 822        cpu_maps_update_begin();
 823
 824        if (cpu_hotplug_disabled) {
 825                err = -EBUSY;
 826                goto out;
 827        }
 828
 829        err = _cpu_down(cpu, 0, target);
 830
 831out:
 832        cpu_maps_update_done();
 833        return err;
 834}
 835int cpu_down(unsigned int cpu)
 836{
 837        return do_cpu_down(cpu, CPUHP_OFFLINE);
 838}
 839EXPORT_SYMBOL(cpu_down);
 840#endif /*CONFIG_HOTPLUG_CPU*/
 841
 842/**
 843 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
 844 * @cpu: cpu that just started
 845 *
 846 * It must be called by the arch code on the new cpu, before the new cpu
 847 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 848 */
 849void notify_cpu_starting(unsigned int cpu)
 850{
 851        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 852        enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
 853
 854        rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
 855        while (st->state < target) {
 856                st->state++;
 857                cpuhp_invoke_callback(cpu, st->state, true, NULL);
 858        }
 859}
 860
 861/*
 862 * Called from the idle task. We need to set active here, so we can kick off
 863 * the stopper thread and unpark the smpboot threads. If the target state is
 864 * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the
 865 * cpu further.
 866 */
 867void cpuhp_online_idle(enum cpuhp_state state)
 868{
 869        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 870        unsigned int cpu = smp_processor_id();
 871
 872        /* Happens for the boot cpu */
 873        if (state != CPUHP_AP_ONLINE_IDLE)
 874                return;
 875
 876        st->state = CPUHP_AP_ONLINE_IDLE;
 877
 878        /* Unpark the stopper thread and the hotplug thread of this cpu */
 879        stop_machine_unpark(cpu);
 880        kthread_unpark(st->thread);
 881
 882        /* Should we go further up ? */
 883        if (st->target > CPUHP_AP_ONLINE_IDLE)
 884                __cpuhp_kick_ap_work(st);
 885        else
 886                complete(&st->done);
 887}
 888
 889/* Requires cpu_add_remove_lock to be held */
 890static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 891{
 892        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 893        struct task_struct *idle;
 894        int ret = 0;
 895
 896        cpu_hotplug_begin();
 897
 898        if (!cpu_present(cpu)) {
 899                ret = -EINVAL;
 900                goto out;
 901        }
 902
 903        /*
 904         * The caller of do_cpu_up might have raced with another
 905         * caller. Ignore it for now.
 906         */
 907        if (st->state >= target)
 908                goto out;
 909
 910        if (st->state == CPUHP_OFFLINE) {
 911                /* Let it fail before we try to bring the cpu up */
 912                idle = idle_thread_get(cpu);
 913                if (IS_ERR(idle)) {
 914                        ret = PTR_ERR(idle);
 915                        goto out;
 916                }
 917        }
 918
 919        cpuhp_tasks_frozen = tasks_frozen;
 920
 921        st->target = target;
 922        /*
 923         * If the current CPU state is in the range of the AP hotplug thread,
 924         * then we need to kick the thread once more.
 925         */
 926        if (st->state > CPUHP_BRINGUP_CPU) {
 927                ret = cpuhp_kick_ap_work(cpu);
 928                /*
 929                 * The AP side has done the error rollback already. Just
 930                 * return the error code..
 931                 */
 932                if (ret)
 933                        goto out;
 934        }
 935
 936        /*
 937         * Try to reach the target state. We max out on the BP at
 938         * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
 939         * responsible for bringing it up to the target state.
 940         */
 941        target = min((int)target, CPUHP_BRINGUP_CPU);
 942        ret = cpuhp_up_callbacks(cpu, st, target);
 943out:
 944        cpu_hotplug_done();
 945        return ret;
 946}
 947
 948static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
 949{
 950        int err = 0;
 951
 952        if (!cpu_possible(cpu)) {
 953                pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
 954                       cpu);
 955#if defined(CONFIG_IA64)
 956                pr_err("please check additional_cpus= boot parameter\n");
 957#endif
 958                return -EINVAL;
 959        }
 960
 961        err = try_online_node(cpu_to_node(cpu));
 962        if (err)
 963                return err;
 964
 965        cpu_maps_update_begin();
 966
 967        if (cpu_hotplug_disabled) {
 968                err = -EBUSY;
 969                goto out;
 970        }
 971
 972        err = _cpu_up(cpu, 0, target);
 973out:
 974        cpu_maps_update_done();
 975        return err;
 976}
 977
 978int cpu_up(unsigned int cpu)
 979{
 980        return do_cpu_up(cpu, CPUHP_ONLINE);
 981}
 982EXPORT_SYMBOL_GPL(cpu_up);
 983
 984#ifdef CONFIG_PM_SLEEP_SMP
 985static cpumask_var_t frozen_cpus;
 986
 987int freeze_secondary_cpus(int primary)
 988{
 989        int cpu, error = 0;
 990
 991        cpu_maps_update_begin();
 992        if (!cpu_online(primary))
 993                primary = cpumask_first(cpu_online_mask);
 994        /*
 995         * We take down all of the non-boot CPUs in one shot to avoid races
 996         * with the userspace trying to use the CPU hotplug at the same time
 997         */
 998        cpumask_clear(frozen_cpus);
 999
1000        pr_info("Disabling non-boot CPUs ...\n");
1001        for_each_online_cpu(cpu) {
1002                if (cpu == primary)
1003                        continue;
1004                trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1005                error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1006                trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1007                if (!error)
1008                        cpumask_set_cpu(cpu, frozen_cpus);
1009                else {
1010                        pr_err("Error taking CPU%d down: %d\n", cpu, error);
1011                        break;
1012                }
1013        }
1014
1015        if (!error)
1016                BUG_ON(num_online_cpus() > 1);
1017        else
1018                pr_err("Non-boot CPUs are not disabled\n");
1019
1020        /*
1021         * Make sure the CPUs won't be enabled by someone else. We need to do
1022         * this even in case of failure as all disable_nonboot_cpus() users are
1023         * supposed to do enable_nonboot_cpus() on the failure path.
1024         */
1025        cpu_hotplug_disabled++;
1026
1027        cpu_maps_update_done();
1028        return error;
1029}
1030
1031void __weak arch_enable_nonboot_cpus_begin(void)
1032{
1033}
1034
1035void __weak arch_enable_nonboot_cpus_end(void)
1036{
1037}
1038
1039void enable_nonboot_cpus(void)
1040{
1041        int cpu, error;
1042
1043        /* Allow everyone to use the CPU hotplug again */
1044        cpu_maps_update_begin();
1045        __cpu_hotplug_enable();
1046        if (cpumask_empty(frozen_cpus))
1047                goto out;
1048
1049        pr_info("Enabling non-boot CPUs ...\n");
1050
1051        arch_enable_nonboot_cpus_begin();
1052
1053        for_each_cpu(cpu, frozen_cpus) {
1054                trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1055                error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1056                trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1057                if (!error) {
1058                        pr_info("CPU%d is up\n", cpu);
1059                        continue;
1060                }
1061                pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1062        }
1063
1064        arch_enable_nonboot_cpus_end();
1065
1066        cpumask_clear(frozen_cpus);
1067out:
1068        cpu_maps_update_done();
1069}
1070
1071static int __init alloc_frozen_cpus(void)
1072{
1073        if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1074                return -ENOMEM;
1075        return 0;
1076}
1077core_initcall(alloc_frozen_cpus);
1078
1079/*
1080 * When callbacks for CPU hotplug notifications are being executed, we must
1081 * ensure that the state of the system with respect to the tasks being frozen
1082 * or not, as reported by the notification, remains unchanged *throughout the
1083 * duration* of the execution of the callbacks.
1084 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1085 *
1086 * This synchronization is implemented by mutually excluding regular CPU
1087 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1088 * Hibernate notifications.
1089 */
1090static int
1091cpu_hotplug_pm_callback(struct notifier_block *nb,
1092                        unsigned long action, void *ptr)
1093{
1094        switch (action) {
1095
1096        case PM_SUSPEND_PREPARE:
1097        case PM_HIBERNATION_PREPARE:
1098                cpu_hotplug_disable();
1099                break;
1100
1101        case PM_POST_SUSPEND:
1102        case PM_POST_HIBERNATION:
1103                cpu_hotplug_enable();
1104                break;
1105
1106        default:
1107                return NOTIFY_DONE;
1108        }
1109
1110        return NOTIFY_OK;
1111}
1112
1113
1114static int __init cpu_hotplug_pm_sync_init(void)
1115{
1116        /*
1117         * cpu_hotplug_pm_callback has higher priority than x86
1118         * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1119         * to disable cpu hotplug to avoid cpu hotplug race.
1120         */
1121        pm_notifier(cpu_hotplug_pm_callback, 0);
1122        return 0;
1123}
1124core_initcall(cpu_hotplug_pm_sync_init);
1125
1126#endif /* CONFIG_PM_SLEEP_SMP */
1127
1128int __boot_cpu_id;
1129
1130#endif /* CONFIG_SMP */
1131
1132/* Boot processor state steps */
1133static struct cpuhp_step cpuhp_bp_states[] = {
1134        [CPUHP_OFFLINE] = {
1135                .name                   = "offline",
1136                .startup.single         = NULL,
1137                .teardown.single        = NULL,
1138        },
1139#ifdef CONFIG_SMP
1140        [CPUHP_CREATE_THREADS]= {
1141                .name                   = "threads:prepare",
1142                .startup.single         = smpboot_create_threads,
1143                .teardown.single        = NULL,
1144                .cant_stop              = true,
1145        },
1146        [CPUHP_PERF_PREPARE] = {
1147                .name                   = "perf:prepare",
1148                .startup.single         = perf_event_init_cpu,
1149                .teardown.single        = perf_event_exit_cpu,
1150        },
1151        [CPUHP_WORKQUEUE_PREP] = {
1152                .name                   = "workqueue:prepare",
1153                .startup.single         = workqueue_prepare_cpu,
1154                .teardown.single        = NULL,
1155        },
1156        [CPUHP_HRTIMERS_PREPARE] = {
1157                .name                   = "hrtimers:prepare",
1158                .startup.single         = hrtimers_prepare_cpu,
1159                .teardown.single        = hrtimers_dead_cpu,
1160        },
1161        [CPUHP_SMPCFD_PREPARE] = {
1162                .name                   = "smpcfd:prepare",
1163                .startup.single         = smpcfd_prepare_cpu,
1164                .teardown.single        = smpcfd_dead_cpu,
1165        },
1166        [CPUHP_RELAY_PREPARE] = {
1167                .name                   = "relay:prepare",
1168                .startup.single         = relay_prepare_cpu,
1169                .teardown.single        = NULL,
1170        },
1171        [CPUHP_SLAB_PREPARE] = {
1172                .name                   = "slab:prepare",
1173                .startup.single         = slab_prepare_cpu,
1174                .teardown.single        = slab_dead_cpu,
1175        },
1176        [CPUHP_RCUTREE_PREP] = {
1177                .name                   = "RCU/tree:prepare",
1178                .startup.single         = rcutree_prepare_cpu,
1179                .teardown.single        = rcutree_dead_cpu,
1180        },
1181        /*
1182         * On the tear-down path, timers_dead_cpu() must be invoked
1183         * before blk_mq_queue_reinit_notify() from notify_dead(),
1184         * otherwise a RCU stall occurs.
1185         */
1186        [CPUHP_TIMERS_DEAD] = {
1187                .name                   = "timers:dead",
1188                .startup.single         = NULL,
1189                .teardown.single        = timers_dead_cpu,
1190        },
1191        /* Kicks the plugged cpu into life */
1192        [CPUHP_BRINGUP_CPU] = {
1193                .name                   = "cpu:bringup",
1194                .startup.single         = bringup_cpu,
1195                .teardown.single        = NULL,
1196                .cant_stop              = true,
1197        },
1198        [CPUHP_AP_SMPCFD_DYING] = {
1199                .name                   = "smpcfd:dying",
1200                .startup.single         = NULL,
1201                .teardown.single        = smpcfd_dying_cpu,
1202        },
1203        /*
1204         * Handled on controll processor until the plugged processor manages
1205         * this itself.
1206         */
1207        [CPUHP_TEARDOWN_CPU] = {
1208                .name                   = "cpu:teardown",
1209                .startup.single         = NULL,
1210                .teardown.single        = takedown_cpu,
1211                .cant_stop              = true,
1212        },
1213#else
1214        [CPUHP_BRINGUP_CPU] = { },
1215#endif
1216};
1217
1218/* Application processor state steps */
1219static struct cpuhp_step cpuhp_ap_states[] = {
1220#ifdef CONFIG_SMP
1221        /* Final state before CPU kills itself */
1222        [CPUHP_AP_IDLE_DEAD] = {
1223                .name                   = "idle:dead",
1224        },
1225        /*
1226         * Last state before CPU enters the idle loop to die. Transient state
1227         * for synchronization.
1228         */
1229        [CPUHP_AP_OFFLINE] = {
1230                .name                   = "ap:offline",
1231                .cant_stop              = true,
1232        },
1233        /* First state is scheduler control. Interrupts are disabled */
1234        [CPUHP_AP_SCHED_STARTING] = {
1235                .name                   = "sched:starting",
1236                .startup.single         = sched_cpu_starting,
1237                .teardown.single        = sched_cpu_dying,
1238        },
1239        [CPUHP_AP_RCUTREE_DYING] = {
1240                .name                   = "RCU/tree:dying",
1241                .startup.single         = NULL,
1242                .teardown.single        = rcutree_dying_cpu,
1243        },
1244        /* Entry state on starting. Interrupts enabled from here on. Transient
1245         * state for synchronsization */
1246        [CPUHP_AP_ONLINE] = {
1247                .name                   = "ap:online",
1248        },
1249        /* Handle smpboot threads park/unpark */
1250        [CPUHP_AP_SMPBOOT_THREADS] = {
1251                .name                   = "smpboot/threads:online",
1252                .startup.single         = smpboot_unpark_threads,
1253                .teardown.single        = NULL,
1254        },
1255        [CPUHP_AP_PERF_ONLINE] = {
1256                .name                   = "perf:online",
1257                .startup.single         = perf_event_init_cpu,
1258                .teardown.single        = perf_event_exit_cpu,
1259        },
1260        [CPUHP_AP_WORKQUEUE_ONLINE] = {
1261                .name                   = "workqueue:online",
1262                .startup.single         = workqueue_online_cpu,
1263                .teardown.single        = workqueue_offline_cpu,
1264        },
1265        [CPUHP_AP_RCUTREE_ONLINE] = {
1266                .name                   = "RCU/tree:online",
1267                .startup.single         = rcutree_online_cpu,
1268                .teardown.single        = rcutree_offline_cpu,
1269        },
1270#endif
1271        /*
1272         * The dynamically registered state space is here
1273         */
1274
1275#ifdef CONFIG_SMP
1276        /* Last state is scheduler control setting the cpu active */
1277        [CPUHP_AP_ACTIVE] = {
1278                .name                   = "sched:active",
1279                .startup.single         = sched_cpu_activate,
1280                .teardown.single        = sched_cpu_deactivate,
1281        },
1282#endif
1283
1284        /* CPU is fully up and running. */
1285        [CPUHP_ONLINE] = {
1286                .name                   = "online",
1287                .startup.single         = NULL,
1288                .teardown.single        = NULL,
1289        },
1290};
1291
1292/* Sanity check for callbacks */
1293static int cpuhp_cb_check(enum cpuhp_state state)
1294{
1295        if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
1296                return -EINVAL;
1297        return 0;
1298}
1299
1300/*
1301 * Returns a free for dynamic slot assignment of the Online state. The states
1302 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1303 * by having no name assigned.
1304 */
1305static int cpuhp_reserve_state(enum cpuhp_state state)
1306{
1307        enum cpuhp_state i, end;
1308        struct cpuhp_step *step;
1309
1310        switch (state) {
1311        case CPUHP_AP_ONLINE_DYN:
1312                step = cpuhp_ap_states + CPUHP_AP_ONLINE_DYN;
1313                end = CPUHP_AP_ONLINE_DYN_END;
1314                break;
1315        case CPUHP_BP_PREPARE_DYN:
1316                step = cpuhp_bp_states + CPUHP_BP_PREPARE_DYN;
1317                end = CPUHP_BP_PREPARE_DYN_END;
1318                break;
1319        default:
1320                return -EINVAL;
1321        }
1322
1323        for (i = state; i <= end; i++, step++) {
1324                if (!step->name)
1325                        return i;
1326        }
1327        WARN(1, "No more dynamic states available for CPU hotplug\n");
1328        return -ENOSPC;
1329}
1330
1331static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
1332                                 int (*startup)(unsigned int cpu),
1333                                 int (*teardown)(unsigned int cpu),
1334                                 bool multi_instance)
1335{
1336        /* (Un)Install the callbacks for further cpu hotplug operations */
1337        struct cpuhp_step *sp;
1338        int ret = 0;
1339
1340        if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) {
1341                ret = cpuhp_reserve_state(state);
1342                if (ret < 0)
1343                        return ret;
1344                state = ret;
1345        }
1346        sp = cpuhp_get_step(state);
1347        if (name && sp->name)
1348                return -EBUSY;
1349
1350        sp->startup.single = startup;
1351        sp->teardown.single = teardown;
1352        sp->name = name;
1353        sp->multi_instance = multi_instance;
1354        INIT_HLIST_HEAD(&sp->list);
1355        return ret;
1356}
1357
1358static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1359{
1360        return cpuhp_get_step(state)->teardown.single;
1361}
1362
1363/*
1364 * Call the startup/teardown function for a step either on the AP or
1365 * on the current CPU.
1366 */
1367static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1368                            struct hlist_node *node)
1369{
1370        struct cpuhp_step *sp = cpuhp_get_step(state);
1371        int ret;
1372
1373        if ((bringup && !sp->startup.single) ||
1374            (!bringup && !sp->teardown.single))
1375                return 0;
1376        /*
1377         * The non AP bound callbacks can fail on bringup. On teardown
1378         * e.g. module removal we crash for now.
1379         */
1380#ifdef CONFIG_SMP
1381        if (cpuhp_is_ap_state(state))
1382                ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1383        else
1384                ret = cpuhp_invoke_callback(cpu, state, bringup, node);
1385#else
1386        ret = cpuhp_invoke_callback(cpu, state, bringup, node);
1387#endif
1388        BUG_ON(ret && !bringup);
1389        return ret;
1390}
1391
1392/*
1393 * Called from __cpuhp_setup_state on a recoverable failure.
1394 *
1395 * Note: The teardown callbacks for rollback are not allowed to fail!
1396 */
1397static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1398                                   struct hlist_node *node)
1399{
1400        int cpu;
1401
1402        /* Roll back the already executed steps on the other cpus */
1403        for_each_present_cpu(cpu) {
1404                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1405                int cpustate = st->state;
1406
1407                if (cpu >= failedcpu)
1408                        break;
1409
1410                /* Did we invoke the startup call on that cpu ? */
1411                if (cpustate >= state)
1412                        cpuhp_issue_call(cpu, state, false, node);
1413        }
1414}
1415
1416int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
1417                               bool invoke)
1418{
1419        struct cpuhp_step *sp;
1420        int cpu;
1421        int ret;
1422
1423        sp = cpuhp_get_step(state);
1424        if (sp->multi_instance == false)
1425                return -EINVAL;
1426
1427        get_online_cpus();
1428        mutex_lock(&cpuhp_state_mutex);
1429
1430        if (!invoke || !sp->startup.multi)
1431                goto add_node;
1432
1433        /*
1434         * Try to call the startup callback for each present cpu
1435         * depending on the hotplug state of the cpu.
1436         */
1437        for_each_present_cpu(cpu) {
1438                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1439                int cpustate = st->state;
1440
1441                if (cpustate < state)
1442                        continue;
1443
1444                ret = cpuhp_issue_call(cpu, state, true, node);
1445                if (ret) {
1446                        if (sp->teardown.multi)
1447                                cpuhp_rollback_install(cpu, state, node);
1448                        goto unlock;
1449                }
1450        }
1451add_node:
1452        ret = 0;
1453        hlist_add_head(node, &sp->list);
1454unlock:
1455        mutex_unlock(&cpuhp_state_mutex);
1456        put_online_cpus();
1457        return ret;
1458}
1459EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
1460
1461/**
1462 * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
1463 * @state:              The state to setup
1464 * @invoke:             If true, the startup function is invoked for cpus where
1465 *                      cpu state >= @state
1466 * @startup:            startup callback function
1467 * @teardown:           teardown callback function
1468 * @multi_instance:     State is set up for multiple instances which get
1469 *                      added afterwards.
1470 *
1471 * Returns:
1472 *   On success:
1473 *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
1474 *      0 for all other states
1475 *   On failure: proper (negative) error code
1476 */
1477int __cpuhp_setup_state(enum cpuhp_state state,
1478                        const char *name, bool invoke,
1479                        int (*startup)(unsigned int cpu),
1480                        int (*teardown)(unsigned int cpu),
1481                        bool multi_instance)
1482{
1483        int cpu, ret = 0;
1484        bool dynstate;
1485
1486        if (cpuhp_cb_check(state) || !name)
1487                return -EINVAL;
1488
1489        get_online_cpus();
1490        mutex_lock(&cpuhp_state_mutex);
1491
1492        ret = cpuhp_store_callbacks(state, name, startup, teardown,
1493                                    multi_instance);
1494
1495        dynstate = state == CPUHP_AP_ONLINE_DYN;
1496        if (ret > 0 && dynstate) {
1497                state = ret;
1498                ret = 0;
1499        }
1500
1501        if (ret || !invoke || !startup)
1502                goto out;
1503
1504        /*
1505         * Try to call the startup callback for each present cpu
1506         * depending on the hotplug state of the cpu.
1507         */
1508        for_each_present_cpu(cpu) {
1509                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1510                int cpustate = st->state;
1511
1512                if (cpustate < state)
1513                        continue;
1514
1515                ret = cpuhp_issue_call(cpu, state, true, NULL);
1516                if (ret) {
1517                        if (teardown)
1518                                cpuhp_rollback_install(cpu, state, NULL);
1519                        cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1520                        goto out;
1521                }
1522        }
1523out:
1524        mutex_unlock(&cpuhp_state_mutex);
1525        put_online_cpus();
1526        /*
1527         * If the requested state is CPUHP_AP_ONLINE_DYN, return the
1528         * dynamically allocated state in case of success.
1529         */
1530        if (!ret && dynstate)
1531                return state;
1532        return ret;
1533}
1534EXPORT_SYMBOL(__cpuhp_setup_state);
1535
1536int __cpuhp_state_remove_instance(enum cpuhp_state state,
1537                                  struct hlist_node *node, bool invoke)
1538{
1539        struct cpuhp_step *sp = cpuhp_get_step(state);
1540        int cpu;
1541
1542        BUG_ON(cpuhp_cb_check(state));
1543
1544        if (!sp->multi_instance)
1545                return -EINVAL;
1546
1547        get_online_cpus();
1548        mutex_lock(&cpuhp_state_mutex);
1549
1550        if (!invoke || !cpuhp_get_teardown_cb(state))
1551                goto remove;
1552        /*
1553         * Call the teardown callback for each present cpu depending
1554         * on the hotplug state of the cpu. This function is not
1555         * allowed to fail currently!
1556         */
1557        for_each_present_cpu(cpu) {
1558                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1559                int cpustate = st->state;
1560
1561                if (cpustate >= state)
1562                        cpuhp_issue_call(cpu, state, false, node);
1563        }
1564
1565remove:
1566        hlist_del(node);
1567        mutex_unlock(&cpuhp_state_mutex);
1568        put_online_cpus();
1569
1570        return 0;
1571}
1572EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
1573
1574/**
1575 * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
1576 * @state:      The state to remove
1577 * @invoke:     If true, the teardown function is invoked for cpus where
1578 *              cpu state >= @state
1579 *
1580 * The teardown callback is currently not allowed to fail. Think
1581 * about module removal!
1582 */
1583void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
1584{
1585        struct cpuhp_step *sp = cpuhp_get_step(state);
1586        int cpu;
1587
1588        BUG_ON(cpuhp_cb_check(state));
1589
1590        get_online_cpus();
1591
1592        mutex_lock(&cpuhp_state_mutex);
1593        if (sp->multi_instance) {
1594                WARN(!hlist_empty(&sp->list),
1595                     "Error: Removing state %d which has instances left.\n",
1596                     state);
1597                goto remove;
1598        }
1599
1600        if (!invoke || !cpuhp_get_teardown_cb(state))
1601                goto remove;
1602
1603        /*
1604         * Call the teardown callback for each present cpu depending
1605         * on the hotplug state of the cpu. This function is not
1606         * allowed to fail currently!
1607         */
1608        for_each_present_cpu(cpu) {
1609                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1610                int cpustate = st->state;
1611
1612                if (cpustate >= state)
1613                        cpuhp_issue_call(cpu, state, false, NULL);
1614        }
1615remove:
1616        cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1617        mutex_unlock(&cpuhp_state_mutex);
1618        put_online_cpus();
1619}
1620EXPORT_SYMBOL(__cpuhp_remove_state);
1621
1622#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
1623static ssize_t show_cpuhp_state(struct device *dev,
1624                                struct device_attribute *attr, char *buf)
1625{
1626        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1627
1628        return sprintf(buf, "%d\n", st->state);
1629}
1630static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
1631
1632static ssize_t write_cpuhp_target(struct device *dev,
1633                                  struct device_attribute *attr,
1634                                  const char *buf, size_t count)
1635{
1636        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1637        struct cpuhp_step *sp;
1638        int target, ret;
1639
1640        ret = kstrtoint(buf, 10, &target);
1641        if (ret)
1642                return ret;
1643
1644#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
1645        if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
1646                return -EINVAL;
1647#else
1648        if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
1649                return -EINVAL;
1650#endif
1651
1652        ret = lock_device_hotplug_sysfs();
1653        if (ret)
1654                return ret;
1655
1656        mutex_lock(&cpuhp_state_mutex);
1657        sp = cpuhp_get_step(target);
1658        ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
1659        mutex_unlock(&cpuhp_state_mutex);
1660        if (ret)
1661                goto out;
1662
1663        if (st->state < target)
1664                ret = do_cpu_up(dev->id, target);
1665        else
1666                ret = do_cpu_down(dev->id, target);
1667out:
1668        unlock_device_hotplug();
1669        return ret ? ret : count;
1670}
1671
1672static ssize_t show_cpuhp_target(struct device *dev,
1673                                 struct device_attribute *attr, char *buf)
1674{
1675        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1676
1677        return sprintf(buf, "%d\n", st->target);
1678}
1679static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
1680
1681static struct attribute *cpuhp_cpu_attrs[] = {
1682        &dev_attr_state.attr,
1683        &dev_attr_target.attr,
1684        NULL
1685};
1686
1687static struct attribute_group cpuhp_cpu_attr_group = {
1688        .attrs = cpuhp_cpu_attrs,
1689        .name = "hotplug",
1690        NULL
1691};
1692
1693static ssize_t show_cpuhp_states(struct device *dev,
1694                                 struct device_attribute *attr, char *buf)
1695{
1696        ssize_t cur, res = 0;
1697        int i;
1698
1699        mutex_lock(&cpuhp_state_mutex);
1700        for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
1701                struct cpuhp_step *sp = cpuhp_get_step(i);
1702
1703                if (sp->name) {
1704                        cur = sprintf(buf, "%3d: %s\n", i, sp->name);
1705                        buf += cur;
1706                        res += cur;
1707                }
1708        }
1709        mutex_unlock(&cpuhp_state_mutex);
1710        return res;
1711}
1712static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
1713
1714static struct attribute *cpuhp_cpu_root_attrs[] = {
1715        &dev_attr_states.attr,
1716        NULL
1717};
1718
1719static struct attribute_group cpuhp_cpu_root_attr_group = {
1720        .attrs = cpuhp_cpu_root_attrs,
1721        .name = "hotplug",
1722        NULL
1723};
1724
1725static int __init cpuhp_sysfs_init(void)
1726{
1727        int cpu, ret;
1728
1729        ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
1730                                 &cpuhp_cpu_root_attr_group);
1731        if (ret)
1732                return ret;
1733
1734        for_each_possible_cpu(cpu) {
1735                struct device *dev = get_cpu_device(cpu);
1736
1737                if (!dev)
1738                        continue;
1739                ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
1740                if (ret)
1741                        return ret;
1742        }
1743        return 0;
1744}
1745device_initcall(cpuhp_sysfs_init);
1746#endif
1747
1748/*
1749 * cpu_bit_bitmap[] is a special, "compressed" data structure that
1750 * represents all NR_CPUS bits binary values of 1<<nr.
1751 *
1752 * It is used by cpumask_of() to get a constant address to a CPU
1753 * mask value that has a single bit set only.
1754 */
1755
1756/* cpu_bit_bitmap[0] is empty - so we can back into it */
1757#define MASK_DECLARE_1(x)       [x+1][0] = (1UL << (x))
1758#define MASK_DECLARE_2(x)       MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
1759#define MASK_DECLARE_4(x)       MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
1760#define MASK_DECLARE_8(x)       MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
1761
1762const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
1763
1764        MASK_DECLARE_8(0),      MASK_DECLARE_8(8),
1765        MASK_DECLARE_8(16),     MASK_DECLARE_8(24),
1766#if BITS_PER_LONG > 32
1767        MASK_DECLARE_8(32),     MASK_DECLARE_8(40),
1768        MASK_DECLARE_8(48),     MASK_DECLARE_8(56),
1769#endif
1770};
1771EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
1772
1773const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
1774EXPORT_SYMBOL(cpu_all_bits);
1775
1776#ifdef CONFIG_INIT_ALL_POSSIBLE
1777struct cpumask __cpu_possible_mask __read_mostly
1778        = {CPU_BITS_ALL};
1779#else
1780struct cpumask __cpu_possible_mask __read_mostly;
1781#endif
1782EXPORT_SYMBOL(__cpu_possible_mask);
1783
1784struct cpumask __cpu_online_mask __read_mostly;
1785EXPORT_SYMBOL(__cpu_online_mask);
1786
1787struct cpumask __cpu_present_mask __read_mostly;
1788EXPORT_SYMBOL(__cpu_present_mask);
1789
1790struct cpumask __cpu_active_mask __read_mostly;
1791EXPORT_SYMBOL(__cpu_active_mask);
1792
1793void init_cpu_present(const struct cpumask *src)
1794{
1795        cpumask_copy(&__cpu_present_mask, src);
1796}
1797
1798void init_cpu_possible(const struct cpumask *src)
1799{
1800        cpumask_copy(&__cpu_possible_mask, src);
1801}
1802
1803void init_cpu_online(const struct cpumask *src)
1804{
1805        cpumask_copy(&__cpu_online_mask, src);
1806}
1807
1808/*
1809 * Activate the first processor.
1810 */
1811void __init boot_cpu_init(void)
1812{
1813        int cpu = smp_processor_id();
1814
1815        /* Mark the boot cpu "present", "online" etc for SMP and UP case */
1816        set_cpu_online(cpu, true);
1817        set_cpu_active(cpu, true);
1818        set_cpu_present(cpu, true);
1819        set_cpu_possible(cpu, true);
1820
1821#ifdef CONFIG_SMP
1822        __boot_cpu_id = cpu;
1823#endif
1824}
1825
1826/*
1827 * Must be called _AFTER_ setting up the per_cpu areas
1828 */
1829void __init boot_cpu_state_init(void)
1830{
1831        per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
1832}
1833