linux/kernel/cpu.c
<<
>>
Prefs
   1/* CPU control.
   2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
   3 *
   4 * This code is licenced under the GPL.
   5 */
   6#include <linux/sched/mm.h>
   7#include <linux/proc_fs.h>
   8#include <linux/smp.h>
   9#include <linux/init.h>
  10#include <linux/notifier.h>
  11#include <linux/sched/signal.h>
  12#include <linux/sched/hotplug.h>
  13#include <linux/sched/isolation.h>
  14#include <linux/sched/task.h>
  15#include <linux/sched/smt.h>
  16#include <linux/unistd.h>
  17#include <linux/cpu.h>
  18#include <linux/oom.h>
  19#include <linux/rcupdate.h>
  20#include <linux/export.h>
  21#include <linux/bug.h>
  22#include <linux/kthread.h>
  23#include <linux/stop_machine.h>
  24#include <linux/mutex.h>
  25#include <linux/gfp.h>
  26#include <linux/suspend.h>
  27#include <linux/lockdep.h>
  28#include <linux/tick.h>
  29#include <linux/irq.h>
  30#include <linux/nmi.h>
  31#include <linux/smpboot.h>
  32#include <linux/relay.h>
  33#include <linux/slab.h>
  34#include <linux/percpu-rwsem.h>
  35
  36#include <trace/events/power.h>
  37#define CREATE_TRACE_POINTS
  38#include <trace/events/cpuhp.h>
  39
  40#include "smpboot.h"
  41
  42/**
  43 * cpuhp_cpu_state - Per cpu hotplug state storage
  44 * @state:      The current cpu state
  45 * @target:     The target state
  46 * @thread:     Pointer to the hotplug thread
  47 * @should_run: Thread should execute
  48 * @rollback:   Perform a rollback
  49 * @single:     Single callback invocation
  50 * @bringup:    Single callback bringup or teardown selector
  51 * @cb_state:   The state for a single callback (install/uninstall)
  52 * @result:     Result of the operation
  53 * @done_up:    Signal completion to the issuer of the task for cpu-up
  54 * @done_down:  Signal completion to the issuer of the task for cpu-down
  55 */
  56struct cpuhp_cpu_state {
  57        enum cpuhp_state        state;
  58        enum cpuhp_state        target;
  59        enum cpuhp_state        fail;
  60#ifdef CONFIG_SMP
  61        struct task_struct      *thread;
  62        bool                    should_run;
  63        bool                    rollback;
  64        bool                    single;
  65        bool                    bringup;
  66        bool                    booted_once;
  67        struct hlist_node       *node;
  68        struct hlist_node       *last;
  69        enum cpuhp_state        cb_state;
  70        int                     result;
  71        struct completion       done_up;
  72        struct completion       done_down;
  73#endif
  74};
  75
  76static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
  77        .fail = CPUHP_INVALID,
  78};
  79
  80#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
  81static struct lockdep_map cpuhp_state_up_map =
  82        STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
  83static struct lockdep_map cpuhp_state_down_map =
  84        STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
  85
  86
  87static inline void cpuhp_lock_acquire(bool bringup)
  88{
  89        lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
  90}
  91
  92static inline void cpuhp_lock_release(bool bringup)
  93{
  94        lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
  95}
  96#else
  97
  98static inline void cpuhp_lock_acquire(bool bringup) { }
  99static inline void cpuhp_lock_release(bool bringup) { }
 100
 101#endif
 102
 103/**
 104 * cpuhp_step - Hotplug state machine step
 105 * @name:       Name of the step
 106 * @startup:    Startup function of the step
 107 * @teardown:   Teardown function of the step
 108 * @cant_stop:  Bringup/teardown can't be stopped at this step
 109 */
 110struct cpuhp_step {
 111        const char              *name;
 112        union {
 113                int             (*single)(unsigned int cpu);
 114                int             (*multi)(unsigned int cpu,
 115                                         struct hlist_node *node);
 116        } startup;
 117        union {
 118                int             (*single)(unsigned int cpu);
 119                int             (*multi)(unsigned int cpu,
 120                                         struct hlist_node *node);
 121        } teardown;
 122        struct hlist_head       list;
 123        bool                    cant_stop;
 124        bool                    multi_instance;
 125};
 126
 127static DEFINE_MUTEX(cpuhp_state_mutex);
 128static struct cpuhp_step cpuhp_hp_states[];
 129
 130static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
 131{
 132        return cpuhp_hp_states + state;
 133}
 134
 135/**
 136 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
 137 * @cpu:        The cpu for which the callback should be invoked
 138 * @state:      The state to do callbacks for
 139 * @bringup:    True if the bringup callback should be invoked
 140 * @node:       For multi-instance, do a single entry callback for install/remove
 141 * @lastp:      For multi-instance rollback, remember how far we got
 142 *
 143 * Called from cpu hotplug and from the state register machinery.
 144 */
 145static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
 146                                 bool bringup, struct hlist_node *node,
 147                                 struct hlist_node **lastp)
 148{
 149        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 150        struct cpuhp_step *step = cpuhp_get_step(state);
 151        int (*cbm)(unsigned int cpu, struct hlist_node *node);
 152        int (*cb)(unsigned int cpu);
 153        int ret, cnt;
 154
 155        if (st->fail == state) {
 156                st->fail = CPUHP_INVALID;
 157
 158                if (!(bringup ? step->startup.single : step->teardown.single))
 159                        return 0;
 160
 161                return -EAGAIN;
 162        }
 163
 164        if (!step->multi_instance) {
 165                WARN_ON_ONCE(lastp && *lastp);
 166                cb = bringup ? step->startup.single : step->teardown.single;
 167                if (!cb)
 168                        return 0;
 169                trace_cpuhp_enter(cpu, st->target, state, cb);
 170                ret = cb(cpu);
 171                trace_cpuhp_exit(cpu, st->state, state, ret);
 172                return ret;
 173        }
 174        cbm = bringup ? step->startup.multi : step->teardown.multi;
 175        if (!cbm)
 176                return 0;
 177
 178        /* Single invocation for instance add/remove */
 179        if (node) {
 180                WARN_ON_ONCE(lastp && *lastp);
 181                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
 182                ret = cbm(cpu, node);
 183                trace_cpuhp_exit(cpu, st->state, state, ret);
 184                return ret;
 185        }
 186
 187        /* State transition. Invoke on all instances */
 188        cnt = 0;
 189        hlist_for_each(node, &step->list) {
 190                if (lastp && node == *lastp)
 191                        break;
 192
 193                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
 194                ret = cbm(cpu, node);
 195                trace_cpuhp_exit(cpu, st->state, state, ret);
 196                if (ret) {
 197                        if (!lastp)
 198                                goto err;
 199
 200                        *lastp = node;
 201                        return ret;
 202                }
 203                cnt++;
 204        }
 205        if (lastp)
 206                *lastp = NULL;
 207        return 0;
 208err:
 209        /* Rollback the instances if one failed */
 210        cbm = !bringup ? step->startup.multi : step->teardown.multi;
 211        if (!cbm)
 212                return ret;
 213
 214        hlist_for_each(node, &step->list) {
 215                if (!cnt--)
 216                        break;
 217
 218                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
 219                ret = cbm(cpu, node);
 220                trace_cpuhp_exit(cpu, st->state, state, ret);
 221                /*
 222                 * Rollback must not fail,
 223                 */
 224                WARN_ON_ONCE(ret);
 225        }
 226        return ret;
 227}
 228
 229#ifdef CONFIG_SMP
 230static bool cpuhp_is_ap_state(enum cpuhp_state state)
 231{
 232        /*
 233         * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
 234         * purposes as that state is handled explicitly in cpu_down.
 235         */
 236        return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
 237}
 238
 239static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
 240{
 241        struct completion *done = bringup ? &st->done_up : &st->done_down;
 242        wait_for_completion(done);
 243}
 244
 245static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
 246{
 247        struct completion *done = bringup ? &st->done_up : &st->done_down;
 248        complete(done);
 249}
 250
 251/*
 252 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
 253 */
 254static bool cpuhp_is_atomic_state(enum cpuhp_state state)
 255{
 256        return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
 257}
 258
 259/* Serializes the updates to cpu_online_mask, cpu_present_mask */
 260static DEFINE_MUTEX(cpu_add_remove_lock);
 261bool cpuhp_tasks_frozen;
 262EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
 263
 264/*
 265 * The following two APIs (cpu_maps_update_begin/done) must be used when
 266 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
 267 */
 268void cpu_maps_update_begin(void)
 269{
 270        mutex_lock(&cpu_add_remove_lock);
 271}
 272
 273void cpu_maps_update_done(void)
 274{
 275        mutex_unlock(&cpu_add_remove_lock);
 276}
 277
 278/*
 279 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 280 * Should always be manipulated under cpu_add_remove_lock
 281 */
 282static int cpu_hotplug_disabled;
 283
 284#ifdef CONFIG_HOTPLUG_CPU
 285
 286DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
 287
 288void cpus_read_lock(void)
 289{
 290        percpu_down_read(&cpu_hotplug_lock);
 291}
 292EXPORT_SYMBOL_GPL(cpus_read_lock);
 293
 294int cpus_read_trylock(void)
 295{
 296        return percpu_down_read_trylock(&cpu_hotplug_lock);
 297}
 298EXPORT_SYMBOL_GPL(cpus_read_trylock);
 299
 300void cpus_read_unlock(void)
 301{
 302        percpu_up_read(&cpu_hotplug_lock);
 303}
 304EXPORT_SYMBOL_GPL(cpus_read_unlock);
 305
 306void cpus_write_lock(void)
 307{
 308        percpu_down_write(&cpu_hotplug_lock);
 309}
 310
 311void cpus_write_unlock(void)
 312{
 313        percpu_up_write(&cpu_hotplug_lock);
 314}
 315
 316void lockdep_assert_cpus_held(void)
 317{
 318        /*
 319         * We can't have hotplug operations before userspace starts running,
 320         * and some init codepaths will knowingly not take the hotplug lock.
 321         * This is all valid, so mute lockdep until it makes sense to report
 322         * unheld locks.
 323         */
 324        if (system_state < SYSTEM_RUNNING)
 325                return;
 326
 327        percpu_rwsem_assert_held(&cpu_hotplug_lock);
 328}
 329
 330static void lockdep_acquire_cpus_lock(void)
 331{
 332        rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
 333}
 334
 335static void lockdep_release_cpus_lock(void)
 336{
 337        rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
 338}
 339
 340/*
 341 * Wait for currently running CPU hotplug operations to complete (if any) and
 342 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 343 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 344 * hotplug path before performing hotplug operations. So acquiring that lock
 345 * guarantees mutual exclusion from any currently running hotplug operations.
 346 */
 347void cpu_hotplug_disable(void)
 348{
 349        cpu_maps_update_begin();
 350        cpu_hotplug_disabled++;
 351        cpu_maps_update_done();
 352}
 353EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
 354
 355static void __cpu_hotplug_enable(void)
 356{
 357        if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
 358                return;
 359        cpu_hotplug_disabled--;
 360}
 361
 362void cpu_hotplug_enable(void)
 363{
 364        cpu_maps_update_begin();
 365        __cpu_hotplug_enable();
 366        cpu_maps_update_done();
 367}
 368EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 369
 370#else
 371
 372static void lockdep_acquire_cpus_lock(void)
 373{
 374}
 375
 376static void lockdep_release_cpus_lock(void)
 377{
 378}
 379
 380#endif  /* CONFIG_HOTPLUG_CPU */
 381
 382/*
 383 * Architectures that need SMT-specific errata handling during SMT hotplug
 384 * should override this.
 385 */
 386void __weak arch_smt_update(void) { }
 387
 388#ifdef CONFIG_HOTPLUG_SMT
 389enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 390
 391void __init cpu_smt_disable(bool force)
 392{
 393        if (!cpu_smt_possible())
 394                return;
 395
 396        if (force) {
 397                pr_info("SMT: Force disabled\n");
 398                cpu_smt_control = CPU_SMT_FORCE_DISABLED;
 399        } else {
 400                cpu_smt_control = CPU_SMT_DISABLED;
 401        }
 402}
 403
 404/*
 405 * The decision whether SMT is supported can only be done after the full
 406 * CPU identification. Called from architecture code.
 407 */
 408void __init cpu_smt_check_topology(void)
 409{
 410        if (!topology_smt_supported())
 411                cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
 412}
 413
 414static int __init smt_cmdline_disable(char *str)
 415{
 416        cpu_smt_disable(str && !strcmp(str, "force"));
 417        return 0;
 418}
 419early_param("nosmt", smt_cmdline_disable);
 420
 421static inline bool cpu_smt_allowed(unsigned int cpu)
 422{
 423        if (cpu_smt_control == CPU_SMT_ENABLED)
 424                return true;
 425
 426        if (topology_is_primary_thread(cpu))
 427                return true;
 428
 429        /*
 430         * On x86 it's required to boot all logical CPUs at least once so
 431         * that the init code can get a chance to set CR4.MCE on each
 432         * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
 433         * core will shutdown the machine.
 434         */
 435        return !per_cpu(cpuhp_state, cpu).booted_once;
 436}
 437
 438/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
 439bool cpu_smt_possible(void)
 440{
 441        return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
 442                cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
 443}
 444EXPORT_SYMBOL_GPL(cpu_smt_possible);
 445#else
 446static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
 447#endif
 448
 449static inline enum cpuhp_state
 450cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
 451{
 452        enum cpuhp_state prev_state = st->state;
 453
 454        st->rollback = false;
 455        st->last = NULL;
 456
 457        st->target = target;
 458        st->single = false;
 459        st->bringup = st->state < target;
 460
 461        return prev_state;
 462}
 463
 464static inline void
 465cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
 466{
 467        st->rollback = true;
 468
 469        /*
 470         * If we have st->last we need to undo partial multi_instance of this
 471         * state first. Otherwise start undo at the previous state.
 472         */
 473        if (!st->last) {
 474                if (st->bringup)
 475                        st->state--;
 476                else
 477                        st->state++;
 478        }
 479
 480        st->target = prev_state;
 481        st->bringup = !st->bringup;
 482}
 483
 484/* Regular hotplug invocation of the AP hotplug thread */
 485static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
 486{
 487        if (!st->single && st->state == st->target)
 488                return;
 489
 490        st->result = 0;
 491        /*
 492         * Make sure the above stores are visible before should_run becomes
 493         * true. Paired with the mb() above in cpuhp_thread_fun()
 494         */
 495        smp_mb();
 496        st->should_run = true;
 497        wake_up_process(st->thread);
 498        wait_for_ap_thread(st, st->bringup);
 499}
 500
 501static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
 502{
 503        enum cpuhp_state prev_state;
 504        int ret;
 505
 506        prev_state = cpuhp_set_state(st, target);
 507        __cpuhp_kick_ap(st);
 508        if ((ret = st->result)) {
 509                cpuhp_reset_state(st, prev_state);
 510                __cpuhp_kick_ap(st);
 511        }
 512
 513        return ret;
 514}
 515
 516static int bringup_wait_for_ap(unsigned int cpu)
 517{
 518        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 519
 520        /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
 521        wait_for_ap_thread(st, true);
 522        if (WARN_ON_ONCE((!cpu_online(cpu))))
 523                return -ECANCELED;
 524
 525        /* Unpark the stopper thread and the hotplug thread of the target cpu */
 526        stop_machine_unpark(cpu);
 527        kthread_unpark(st->thread);
 528
 529        /*
 530         * SMT soft disabling on X86 requires to bring the CPU out of the
 531         * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
 532         * CPU marked itself as booted_once in cpu_notify_starting() so the
 533         * cpu_smt_allowed() check will now return false if this is not the
 534         * primary sibling.
 535         */
 536        if (!cpu_smt_allowed(cpu))
 537                return -ECANCELED;
 538
 539        if (st->target <= CPUHP_AP_ONLINE_IDLE)
 540                return 0;
 541
 542        return cpuhp_kick_ap(st, st->target);
 543}
 544
 545static int bringup_cpu(unsigned int cpu)
 546{
 547        struct task_struct *idle = idle_thread_get(cpu);
 548        int ret;
 549
 550        /*
 551         * Some architectures have to walk the irq descriptors to
 552         * setup the vector space for the cpu which comes online.
 553         * Prevent irq alloc/free across the bringup.
 554         */
 555        irq_lock_sparse();
 556
 557        /* Arch-specific enabling code. */
 558        ret = __cpu_up(cpu, idle);
 559        irq_unlock_sparse();
 560        if (ret)
 561                return ret;
 562        return bringup_wait_for_ap(cpu);
 563}
 564
 565static int finish_cpu(unsigned int cpu)
 566{
 567        struct task_struct *idle = idle_thread_get(cpu);
 568        struct mm_struct *mm = idle->active_mm;
 569
 570        /*
 571         * idle_task_exit() will have switched to &init_mm, now
 572         * clean up any remaining active_mm state.
 573         */
 574        if (mm != &init_mm)
 575                idle->active_mm = &init_mm;
 576        mmdrop(mm);
 577        return 0;
 578}
 579
 580/*
 581 * Hotplug state machine related functions
 582 */
 583
 584static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
 585{
 586        for (st->state--; st->state > st->target; st->state--)
 587                cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
 588}
 589
 590static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 591                              enum cpuhp_state target)
 592{
 593        enum cpuhp_state prev_state = st->state;
 594        int ret = 0;
 595
 596        while (st->state < target) {
 597                st->state++;
 598                ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
 599                if (ret) {
 600                        st->target = prev_state;
 601                        undo_cpu_up(cpu, st);
 602                        break;
 603                }
 604        }
 605        return ret;
 606}
 607
 608/*
 609 * The cpu hotplug threads manage the bringup and teardown of the cpus
 610 */
 611static void cpuhp_create(unsigned int cpu)
 612{
 613        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 614
 615        init_completion(&st->done_up);
 616        init_completion(&st->done_down);
 617}
 618
 619static int cpuhp_should_run(unsigned int cpu)
 620{
 621        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 622
 623        return st->should_run;
 624}
 625
 626/*
 627 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
 628 * callbacks when a state gets [un]installed at runtime.
 629 *
 630 * Each invocation of this function by the smpboot thread does a single AP
 631 * state callback.
 632 *
 633 * It has 3 modes of operation:
 634 *  - single: runs st->cb_state
 635 *  - up:     runs ++st->state, while st->state < st->target
 636 *  - down:   runs st->state--, while st->state > st->target
 637 *
 638 * When complete or on error, should_run is cleared and the completion is fired.
 639 */
 640static void cpuhp_thread_fun(unsigned int cpu)
 641{
 642        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 643        bool bringup = st->bringup;
 644        enum cpuhp_state state;
 645
 646        if (WARN_ON_ONCE(!st->should_run))
 647                return;
 648
 649        /*
 650         * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
 651         * that if we see ->should_run we also see the rest of the state.
 652         */
 653        smp_mb();
 654
 655        /*
 656         * The BP holds the hotplug lock, but we're now running on the AP,
 657         * ensure that anybody asserting the lock is held, will actually find
 658         * it so.
 659         */
 660        lockdep_acquire_cpus_lock();
 661        cpuhp_lock_acquire(bringup);
 662
 663        if (st->single) {
 664                state = st->cb_state;
 665                st->should_run = false;
 666        } else {
 667                if (bringup) {
 668                        st->state++;
 669                        state = st->state;
 670                        st->should_run = (st->state < st->target);
 671                        WARN_ON_ONCE(st->state > st->target);
 672                } else {
 673                        state = st->state;
 674                        st->state--;
 675                        st->should_run = (st->state > st->target);
 676                        WARN_ON_ONCE(st->state < st->target);
 677                }
 678        }
 679
 680        WARN_ON_ONCE(!cpuhp_is_ap_state(state));
 681
 682        if (cpuhp_is_atomic_state(state)) {
 683                local_irq_disable();
 684                st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
 685                local_irq_enable();
 686
 687                /*
 688                 * STARTING/DYING must not fail!
 689                 */
 690                WARN_ON_ONCE(st->result);
 691        } else {
 692                st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
 693        }
 694
 695        if (st->result) {
 696                /*
 697                 * If we fail on a rollback, we're up a creek without no
 698                 * paddle, no way forward, no way back. We loose, thanks for
 699                 * playing.
 700                 */
 701                WARN_ON_ONCE(st->rollback);
 702                st->should_run = false;
 703        }
 704
 705        cpuhp_lock_release(bringup);
 706        lockdep_release_cpus_lock();
 707
 708        if (!st->should_run)
 709                complete_ap_thread(st, bringup);
 710}
 711
 712/* Invoke a single callback on a remote cpu */
 713static int
 714cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
 715                         struct hlist_node *node)
 716{
 717        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 718        int ret;
 719
 720        if (!cpu_online(cpu))
 721                return 0;
 722
 723        cpuhp_lock_acquire(false);
 724        cpuhp_lock_release(false);
 725
 726        cpuhp_lock_acquire(true);
 727        cpuhp_lock_release(true);
 728
 729        /*
 730         * If we are up and running, use the hotplug thread. For early calls
 731         * we invoke the thread function directly.
 732         */
 733        if (!st->thread)
 734                return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
 735
 736        st->rollback = false;
 737        st->last = NULL;
 738
 739        st->node = node;
 740        st->bringup = bringup;
 741        st->cb_state = state;
 742        st->single = true;
 743
 744        __cpuhp_kick_ap(st);
 745
 746        /*
 747         * If we failed and did a partial, do a rollback.
 748         */
 749        if ((ret = st->result) && st->last) {
 750                st->rollback = true;
 751                st->bringup = !bringup;
 752
 753                __cpuhp_kick_ap(st);
 754        }
 755
 756        /*
 757         * Clean up the leftovers so the next hotplug operation wont use stale
 758         * data.
 759         */
 760        st->node = st->last = NULL;
 761        return ret;
 762}
 763
 764static int cpuhp_kick_ap_work(unsigned int cpu)
 765{
 766        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 767        enum cpuhp_state prev_state = st->state;
 768        int ret;
 769
 770        cpuhp_lock_acquire(false);
 771        cpuhp_lock_release(false);
 772
 773        cpuhp_lock_acquire(true);
 774        cpuhp_lock_release(true);
 775
 776        trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
 777        ret = cpuhp_kick_ap(st, st->target);
 778        trace_cpuhp_exit(cpu, st->state, prev_state, ret);
 779
 780        return ret;
 781}
 782
 783static struct smp_hotplug_thread cpuhp_threads = {
 784        .store                  = &cpuhp_state.thread,
 785        .create                 = &cpuhp_create,
 786        .thread_should_run      = cpuhp_should_run,
 787        .thread_fn              = cpuhp_thread_fun,
 788        .thread_comm            = "cpuhp/%u",
 789        .selfparking            = true,
 790};
 791
 792void __init cpuhp_threads_init(void)
 793{
 794        BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
 795        kthread_unpark(this_cpu_read(cpuhp_state.thread));
 796}
 797
 798#ifdef CONFIG_HOTPLUG_CPU
 799#ifndef arch_clear_mm_cpumask_cpu
 800#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
 801#endif
 802
 803/**
 804 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 805 * @cpu: a CPU id
 806 *
 807 * This function walks all processes, finds a valid mm struct for each one and
 808 * then clears a corresponding bit in mm's cpumask.  While this all sounds
 809 * trivial, there are various non-obvious corner cases, which this function
 810 * tries to solve in a safe manner.
 811 *
 812 * Also note that the function uses a somewhat relaxed locking scheme, so it may
 813 * be called only for an already offlined CPU.
 814 */
 815void clear_tasks_mm_cpumask(int cpu)
 816{
 817        struct task_struct *p;
 818
 819        /*
 820         * This function is called after the cpu is taken down and marked
 821         * offline, so its not like new tasks will ever get this cpu set in
 822         * their mm mask. -- Peter Zijlstra
 823         * Thus, we may use rcu_read_lock() here, instead of grabbing
 824         * full-fledged tasklist_lock.
 825         */
 826        WARN_ON(cpu_online(cpu));
 827        rcu_read_lock();
 828        for_each_process(p) {
 829                struct task_struct *t;
 830
 831                /*
 832                 * Main thread might exit, but other threads may still have
 833                 * a valid mm. Find one.
 834                 */
 835                t = find_lock_task_mm(p);
 836                if (!t)
 837                        continue;
 838                arch_clear_mm_cpumask_cpu(cpu, t->mm);
 839                task_unlock(t);
 840        }
 841        rcu_read_unlock();
 842}
 843
 844/* Take this CPU down. */
 845static int take_cpu_down(void *_param)
 846{
 847        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 848        enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
 849        int err, cpu = smp_processor_id();
 850        int ret;
 851
 852        /* Ensure this CPU doesn't handle any more interrupts. */
 853        err = __cpu_disable();
 854        if (err < 0)
 855                return err;
 856
 857        /*
 858         * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
 859         * do this step again.
 860         */
 861        WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
 862        st->state--;
 863        /* Invoke the former CPU_DYING callbacks */
 864        for (; st->state > target; st->state--) {
 865                ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
 866                /*
 867                 * DYING must not fail!
 868                 */
 869                WARN_ON_ONCE(ret);
 870        }
 871
 872        /* Give up timekeeping duties */
 873        tick_handover_do_timer();
 874        /* Remove CPU from timer broadcasting */
 875        tick_offline_cpu(cpu);
 876        /* Park the stopper thread */
 877        stop_machine_park(cpu);
 878        return 0;
 879}
 880
 881static int takedown_cpu(unsigned int cpu)
 882{
 883        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 884        int err;
 885
 886        /* Park the smpboot threads */
 887        kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
 888
 889        /*
 890         * Prevent irq alloc/free while the dying cpu reorganizes the
 891         * interrupt affinities.
 892         */
 893        irq_lock_sparse();
 894
 895        /*
 896         * So now all preempt/rcu users must observe !cpu_active().
 897         */
 898        err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
 899        if (err) {
 900                /* CPU refused to die */
 901                irq_unlock_sparse();
 902                /* Unpark the hotplug thread so we can rollback there */
 903                kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
 904                return err;
 905        }
 906        BUG_ON(cpu_online(cpu));
 907
 908        /*
 909         * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
 910         * all runnable tasks from the CPU, there's only the idle task left now
 911         * that the migration thread is done doing the stop_machine thing.
 912         *
 913         * Wait for the stop thread to go away.
 914         */
 915        wait_for_ap_thread(st, false);
 916        BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
 917
 918        /* Interrupts are moved away from the dying cpu, reenable alloc/free */
 919        irq_unlock_sparse();
 920
 921        hotplug_cpu__broadcast_tick_pull(cpu);
 922        /* This actually kills the CPU. */
 923        __cpu_die(cpu);
 924
 925        tick_cleanup_dead_cpu(cpu);
 926        rcutree_migrate_callbacks(cpu);
 927        return 0;
 928}
 929
 930static void cpuhp_complete_idle_dead(void *arg)
 931{
 932        struct cpuhp_cpu_state *st = arg;
 933
 934        complete_ap_thread(st, false);
 935}
 936
 937void cpuhp_report_idle_dead(void)
 938{
 939        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 940
 941        BUG_ON(st->state != CPUHP_AP_OFFLINE);
 942        rcu_report_dead(smp_processor_id());
 943        st->state = CPUHP_AP_IDLE_DEAD;
 944        /*
 945         * We cannot call complete after rcu_report_dead() so we delegate it
 946         * to an online cpu.
 947         */
 948        smp_call_function_single(cpumask_first(cpu_online_mask),
 949                                 cpuhp_complete_idle_dead, st, 0);
 950}
 951
 952static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
 953{
 954        for (st->state++; st->state < st->target; st->state++)
 955                cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
 956}
 957
 958static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 959                                enum cpuhp_state target)
 960{
 961        enum cpuhp_state prev_state = st->state;
 962        int ret = 0;
 963
 964        for (; st->state > target; st->state--) {
 965                ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
 966                if (ret) {
 967                        st->target = prev_state;
 968                        if (st->state < prev_state)
 969                                undo_cpu_down(cpu, st);
 970                        break;
 971                }
 972        }
 973        return ret;
 974}
 975
 976/* Requires cpu_add_remove_lock to be held */
 977static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 978                           enum cpuhp_state target)
 979{
 980        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 981        int prev_state, ret = 0;
 982
 983        if (num_online_cpus() == 1)
 984                return -EBUSY;
 985
 986        if (!cpu_present(cpu))
 987                return -EINVAL;
 988
 989        cpus_write_lock();
 990
 991        cpuhp_tasks_frozen = tasks_frozen;
 992
 993        prev_state = cpuhp_set_state(st, target);
 994        /*
 995         * If the current CPU state is in the range of the AP hotplug thread,
 996         * then we need to kick the thread.
 997         */
 998        if (st->state > CPUHP_TEARDOWN_CPU) {
 999                st->target = max((int)target, CPUHP_TEARDOWN_CPU);
1000                ret = cpuhp_kick_ap_work(cpu);
1001                /*
1002                 * The AP side has done the error rollback already. Just
1003                 * return the error code..
1004                 */
1005                if (ret)
1006                        goto out;
1007
1008                /*
1009                 * We might have stopped still in the range of the AP hotplug
1010                 * thread. Nothing to do anymore.
1011                 */
1012                if (st->state > CPUHP_TEARDOWN_CPU)
1013                        goto out;
1014
1015                st->target = target;
1016        }
1017        /*
1018         * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
1019         * to do the further cleanups.
1020         */
1021        ret = cpuhp_down_callbacks(cpu, st, target);
1022        if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
1023                cpuhp_reset_state(st, prev_state);
1024                __cpuhp_kick_ap(st);
1025        }
1026
1027out:
1028        cpus_write_unlock();
1029        /*
1030         * Do post unplug cleanup. This is still protected against
1031         * concurrent CPU hotplug via cpu_add_remove_lock.
1032         */
1033        lockup_detector_cleanup();
1034        arch_smt_update();
1035        return ret;
1036}
1037
1038static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1039{
1040        if (cpu_hotplug_disabled)
1041                return -EBUSY;
1042        return _cpu_down(cpu, 0, target);
1043}
1044
1045static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
1046{
1047        int err;
1048
1049        cpu_maps_update_begin();
1050        err = cpu_down_maps_locked(cpu, target);
1051        cpu_maps_update_done();
1052        return err;
1053}
1054
1055int cpu_down(unsigned int cpu)
1056{
1057        return do_cpu_down(cpu, CPUHP_OFFLINE);
1058}
1059EXPORT_SYMBOL(cpu_down);
1060
1061int remove_cpu(unsigned int cpu)
1062{
1063        int ret;
1064
1065        lock_device_hotplug();
1066        ret = device_offline(get_cpu_device(cpu));
1067        unlock_device_hotplug();
1068
1069        return ret;
1070}
1071EXPORT_SYMBOL_GPL(remove_cpu);
1072
1073#else
1074#define takedown_cpu            NULL
1075#endif /*CONFIG_HOTPLUG_CPU*/
1076
1077/**
1078 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1079 * @cpu: cpu that just started
1080 *
1081 * It must be called by the arch code on the new cpu, before the new cpu
1082 * enables interrupts and before the "boot" cpu returns from __cpu_up().
1083 */
1084void notify_cpu_starting(unsigned int cpu)
1085{
1086        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1087        enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1088        int ret;
1089
1090        rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
1091        st->booted_once = true;
1092        while (st->state < target) {
1093                st->state++;
1094                ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
1095                /*
1096                 * STARTING must not fail!
1097                 */
1098                WARN_ON_ONCE(ret);
1099        }
1100}
1101
1102/*
1103 * Called from the idle task. Wake up the controlling task which brings the
1104 * stopper and the hotplug thread of the upcoming CPU up and then delegates
1105 * the rest of the online bringup to the hotplug thread.
1106 */
1107void cpuhp_online_idle(enum cpuhp_state state)
1108{
1109        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1110
1111        /* Happens for the boot cpu */
1112        if (state != CPUHP_AP_ONLINE_IDLE)
1113                return;
1114
1115        st->state = CPUHP_AP_ONLINE_IDLE;
1116        complete_ap_thread(st, true);
1117}
1118
1119/* Requires cpu_add_remove_lock to be held */
1120static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1121{
1122        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1123        struct task_struct *idle;
1124        int ret = 0;
1125
1126        cpus_write_lock();
1127
1128        if (!cpu_present(cpu)) {
1129                ret = -EINVAL;
1130                goto out;
1131        }
1132
1133        /*
1134         * The caller of do_cpu_up might have raced with another
1135         * caller. Ignore it for now.
1136         */
1137        if (st->state >= target)
1138                goto out;
1139
1140        if (st->state == CPUHP_OFFLINE) {
1141                /* Let it fail before we try to bring the cpu up */
1142                idle = idle_thread_get(cpu);
1143                if (IS_ERR(idle)) {
1144                        ret = PTR_ERR(idle);
1145                        goto out;
1146                }
1147        }
1148
1149        cpuhp_tasks_frozen = tasks_frozen;
1150
1151        cpuhp_set_state(st, target);
1152        /*
1153         * If the current CPU state is in the range of the AP hotplug thread,
1154         * then we need to kick the thread once more.
1155         */
1156        if (st->state > CPUHP_BRINGUP_CPU) {
1157                ret = cpuhp_kick_ap_work(cpu);
1158                /*
1159                 * The AP side has done the error rollback already. Just
1160                 * return the error code..
1161                 */
1162                if (ret)
1163                        goto out;
1164        }
1165
1166        /*
1167         * Try to reach the target state. We max out on the BP at
1168         * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1169         * responsible for bringing it up to the target state.
1170         */
1171        target = min((int)target, CPUHP_BRINGUP_CPU);
1172        ret = cpuhp_up_callbacks(cpu, st, target);
1173out:
1174        cpus_write_unlock();
1175        arch_smt_update();
1176        return ret;
1177}
1178
1179static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
1180{
1181        int err = 0;
1182
1183        if (!cpu_possible(cpu)) {
1184                pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
1185                       cpu);
1186#if defined(CONFIG_IA64)
1187                pr_err("please check additional_cpus= boot parameter\n");
1188#endif
1189                return -EINVAL;
1190        }
1191
1192        err = try_online_node(cpu_to_node(cpu));
1193        if (err)
1194                return err;
1195
1196        cpu_maps_update_begin();
1197
1198        if (cpu_hotplug_disabled) {
1199                err = -EBUSY;
1200                goto out;
1201        }
1202        if (!cpu_smt_allowed(cpu)) {
1203                err = -EPERM;
1204                goto out;
1205        }
1206
1207        err = _cpu_up(cpu, 0, target);
1208out:
1209        cpu_maps_update_done();
1210        return err;
1211}
1212
1213int cpu_up(unsigned int cpu)
1214{
1215        return do_cpu_up(cpu, CPUHP_ONLINE);
1216}
1217EXPORT_SYMBOL_GPL(cpu_up);
1218
1219int add_cpu(unsigned int cpu)
1220{
1221        int ret;
1222
1223        lock_device_hotplug();
1224        ret = device_online(get_cpu_device(cpu));
1225        unlock_device_hotplug();
1226
1227        return ret;
1228}
1229EXPORT_SYMBOL_GPL(add_cpu);
1230
1231#ifdef CONFIG_PM_SLEEP_SMP
1232static cpumask_var_t frozen_cpus;
1233
1234int freeze_secondary_cpus(int primary)
1235{
1236        int cpu, error = 0;
1237
1238        cpu_maps_update_begin();
1239        if (primary == -1) {
1240                primary = cpumask_first(cpu_online_mask);
1241                if (!housekeeping_cpu(primary, HK_FLAG_TIMER))
1242                        primary = housekeeping_any_cpu(HK_FLAG_TIMER);
1243        } else {
1244                if (!cpu_online(primary))
1245                        primary = cpumask_first(cpu_online_mask);
1246        }
1247
1248        /*
1249         * We take down all of the non-boot CPUs in one shot to avoid races
1250         * with the userspace trying to use the CPU hotplug at the same time
1251         */
1252        cpumask_clear(frozen_cpus);
1253
1254        pr_info("Disabling non-boot CPUs ...\n");
1255        for_each_online_cpu(cpu) {
1256                if (cpu == primary)
1257                        continue;
1258                trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1259                error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1260                trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1261                if (!error)
1262                        cpumask_set_cpu(cpu, frozen_cpus);
1263                else {
1264                        pr_err("Error taking CPU%d down: %d\n", cpu, error);
1265                        break;
1266                }
1267        }
1268
1269        if (!error)
1270                BUG_ON(num_online_cpus() > 1);
1271        else
1272                pr_err("Non-boot CPUs are not disabled\n");
1273
1274        /*
1275         * Make sure the CPUs won't be enabled by someone else. We need to do
1276         * this even in case of failure as all disable_nonboot_cpus() users are
1277         * supposed to do enable_nonboot_cpus() on the failure path.
1278         */
1279        cpu_hotplug_disabled++;
1280
1281        cpu_maps_update_done();
1282        return error;
1283}
1284
1285void __weak arch_enable_nonboot_cpus_begin(void)
1286{
1287}
1288
1289void __weak arch_enable_nonboot_cpus_end(void)
1290{
1291}
1292
1293void enable_nonboot_cpus(void)
1294{
1295        int cpu, error;
1296
1297        /* Allow everyone to use the CPU hotplug again */
1298        cpu_maps_update_begin();
1299        __cpu_hotplug_enable();
1300        if (cpumask_empty(frozen_cpus))
1301                goto out;
1302
1303        pr_info("Enabling non-boot CPUs ...\n");
1304
1305        arch_enable_nonboot_cpus_begin();
1306
1307        for_each_cpu(cpu, frozen_cpus) {
1308                trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1309                error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1310                trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1311                if (!error) {
1312                        pr_info("CPU%d is up\n", cpu);
1313                        continue;
1314                }
1315                pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1316        }
1317
1318        arch_enable_nonboot_cpus_end();
1319
1320        cpumask_clear(frozen_cpus);
1321out:
1322        cpu_maps_update_done();
1323}
1324
1325static int __init alloc_frozen_cpus(void)
1326{
1327        if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1328                return -ENOMEM;
1329        return 0;
1330}
1331core_initcall(alloc_frozen_cpus);
1332
1333/*
1334 * When callbacks for CPU hotplug notifications are being executed, we must
1335 * ensure that the state of the system with respect to the tasks being frozen
1336 * or not, as reported by the notification, remains unchanged *throughout the
1337 * duration* of the execution of the callbacks.
1338 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1339 *
1340 * This synchronization is implemented by mutually excluding regular CPU
1341 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1342 * Hibernate notifications.
1343 */
1344static int
1345cpu_hotplug_pm_callback(struct notifier_block *nb,
1346                        unsigned long action, void *ptr)
1347{
1348        switch (action) {
1349
1350        case PM_SUSPEND_PREPARE:
1351        case PM_HIBERNATION_PREPARE:
1352                cpu_hotplug_disable();
1353                break;
1354
1355        case PM_POST_SUSPEND:
1356        case PM_POST_HIBERNATION:
1357                cpu_hotplug_enable();
1358                break;
1359
1360        default:
1361                return NOTIFY_DONE;
1362        }
1363
1364        return NOTIFY_OK;
1365}
1366
1367
1368static int __init cpu_hotplug_pm_sync_init(void)
1369{
1370        /*
1371         * cpu_hotplug_pm_callback has higher priority than x86
1372         * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1373         * to disable cpu hotplug to avoid cpu hotplug race.
1374         */
1375        pm_notifier(cpu_hotplug_pm_callback, 0);
1376        return 0;
1377}
1378core_initcall(cpu_hotplug_pm_sync_init);
1379
1380#endif /* CONFIG_PM_SLEEP_SMP */
1381
1382int __boot_cpu_id;
1383
1384#endif /* CONFIG_SMP */
1385
1386/* Boot processor state steps */
1387static struct cpuhp_step cpuhp_hp_states[] = {
1388        [CPUHP_OFFLINE] = {
1389                .name                   = "offline",
1390                .startup.single         = NULL,
1391                .teardown.single        = NULL,
1392        },
1393#ifdef CONFIG_SMP
1394        [CPUHP_CREATE_THREADS]= {
1395                .name                   = "threads:prepare",
1396                .startup.single         = smpboot_create_threads,
1397                .teardown.single        = NULL,
1398                .cant_stop              = true,
1399        },
1400        [CPUHP_PERF_PREPARE] = {
1401                .name                   = "perf:prepare",
1402                .startup.single         = perf_event_init_cpu,
1403                .teardown.single        = perf_event_exit_cpu,
1404        },
1405        [CPUHP_WORKQUEUE_PREP] = {
1406                .name                   = "workqueue:prepare",
1407                .startup.single         = workqueue_prepare_cpu,
1408                .teardown.single        = NULL,
1409        },
1410        [CPUHP_HRTIMERS_PREPARE] = {
1411                .name                   = "hrtimers:prepare",
1412                .startup.single         = hrtimers_prepare_cpu,
1413                .teardown.single        = hrtimers_dead_cpu,
1414        },
1415        [CPUHP_SMPCFD_PREPARE] = {
1416                .name                   = "smpcfd:prepare",
1417                .startup.single         = smpcfd_prepare_cpu,
1418                .teardown.single        = smpcfd_dead_cpu,
1419        },
1420        [CPUHP_RELAY_PREPARE] = {
1421                .name                   = "relay:prepare",
1422                .startup.single         = relay_prepare_cpu,
1423                .teardown.single        = NULL,
1424        },
1425        [CPUHP_SLAB_PREPARE] = {
1426                .name                   = "slab:prepare",
1427                .startup.single         = slab_prepare_cpu,
1428                .teardown.single        = slab_dead_cpu,
1429        },
1430        [CPUHP_RCUTREE_PREP] = {
1431                .name                   = "RCU/tree:prepare",
1432                .startup.single         = rcutree_prepare_cpu,
1433                .teardown.single        = rcutree_dead_cpu,
1434        },
1435        /*
1436         * On the tear-down path, timers_dead_cpu() must be invoked
1437         * before blk_mq_queue_reinit_notify() from notify_dead(),
1438         * otherwise a RCU stall occurs.
1439         */
1440        [CPUHP_TIMERS_PREPARE] = {
1441                .name                   = "timers:dead",
1442                .startup.single         = timers_prepare_cpu,
1443                .teardown.single        = timers_dead_cpu,
1444        },
1445        /* Kicks the plugged cpu into life */
1446        [CPUHP_BRINGUP_CPU] = {
1447                .name                   = "cpu:bringup",
1448                .startup.single         = bringup_cpu,
1449                .teardown.single        = finish_cpu,
1450                .cant_stop              = true,
1451        },
1452        /* Final state before CPU kills itself */
1453        [CPUHP_AP_IDLE_DEAD] = {
1454                .name                   = "idle:dead",
1455        },
1456        /*
1457         * Last state before CPU enters the idle loop to die. Transient state
1458         * for synchronization.
1459         */
1460        [CPUHP_AP_OFFLINE] = {
1461                .name                   = "ap:offline",
1462                .cant_stop              = true,
1463        },
1464        /* First state is scheduler control. Interrupts are disabled */
1465        [CPUHP_AP_SCHED_STARTING] = {
1466                .name                   = "sched:starting",
1467                .startup.single         = sched_cpu_starting,
1468                .teardown.single        = sched_cpu_dying,
1469        },
1470        [CPUHP_AP_RCUTREE_DYING] = {
1471                .name                   = "RCU/tree:dying",
1472                .startup.single         = NULL,
1473                .teardown.single        = rcutree_dying_cpu,
1474        },
1475        [CPUHP_AP_SMPCFD_DYING] = {
1476                .name                   = "smpcfd:dying",
1477                .startup.single         = NULL,
1478                .teardown.single        = smpcfd_dying_cpu,
1479        },
1480        /* Entry state on starting. Interrupts enabled from here on. Transient
1481         * state for synchronsization */
1482        [CPUHP_AP_ONLINE] = {
1483                .name                   = "ap:online",
1484        },
1485        /*
1486         * Handled on controll processor until the plugged processor manages
1487         * this itself.
1488         */
1489        [CPUHP_TEARDOWN_CPU] = {
1490                .name                   = "cpu:teardown",
1491                .startup.single         = NULL,
1492                .teardown.single        = takedown_cpu,
1493                .cant_stop              = true,
1494        },
1495        /* Handle smpboot threads park/unpark */
1496        [CPUHP_AP_SMPBOOT_THREADS] = {
1497                .name                   = "smpboot/threads:online",
1498                .startup.single         = smpboot_unpark_threads,
1499                .teardown.single        = smpboot_park_threads,
1500        },
1501        [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
1502                .name                   = "irq/affinity:online",
1503                .startup.single         = irq_affinity_online_cpu,
1504                .teardown.single        = NULL,
1505        },
1506        [CPUHP_AP_PERF_ONLINE] = {
1507                .name                   = "perf:online",
1508                .startup.single         = perf_event_init_cpu,
1509                .teardown.single        = perf_event_exit_cpu,
1510        },
1511        [CPUHP_AP_WORKQUEUE_ONLINE] = {
1512                .name                   = "workqueue:online",
1513                .startup.single         = workqueue_online_cpu,
1514                .teardown.single        = workqueue_offline_cpu,
1515        },
1516        [CPUHP_AP_RCUTREE_ONLINE] = {
1517                .name                   = "RCU/tree:online",
1518                .startup.single         = rcutree_online_cpu,
1519                .teardown.single        = rcutree_offline_cpu,
1520        },
1521#endif
1522        /*
1523         * The dynamically registered state space is here
1524         */
1525
1526#ifdef CONFIG_SMP
1527        /* Last state is scheduler control setting the cpu active */
1528        [CPUHP_AP_ACTIVE] = {
1529                .name                   = "sched:active",
1530                .startup.single         = sched_cpu_activate,
1531                .teardown.single        = sched_cpu_deactivate,
1532        },
1533#endif
1534
1535        /* CPU is fully up and running. */
1536        [CPUHP_ONLINE] = {
1537                .name                   = "online",
1538                .startup.single         = NULL,
1539                .teardown.single        = NULL,
1540        },
1541};
1542
1543/* Sanity check for callbacks */
1544static int cpuhp_cb_check(enum cpuhp_state state)
1545{
1546        if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
1547                return -EINVAL;
1548        return 0;
1549}
1550
1551/*
1552 * Returns a free for dynamic slot assignment of the Online state. The states
1553 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1554 * by having no name assigned.
1555 */
1556static int cpuhp_reserve_state(enum cpuhp_state state)
1557{
1558        enum cpuhp_state i, end;
1559        struct cpuhp_step *step;
1560
1561        switch (state) {
1562        case CPUHP_AP_ONLINE_DYN:
1563                step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
1564                end = CPUHP_AP_ONLINE_DYN_END;
1565                break;
1566        case CPUHP_BP_PREPARE_DYN:
1567                step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
1568                end = CPUHP_BP_PREPARE_DYN_END;
1569                break;
1570        default:
1571                return -EINVAL;
1572        }
1573
1574        for (i = state; i <= end; i++, step++) {
1575                if (!step->name)
1576                        return i;
1577        }
1578        WARN(1, "No more dynamic states available for CPU hotplug\n");
1579        return -ENOSPC;
1580}
1581
1582static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
1583                                 int (*startup)(unsigned int cpu),
1584                                 int (*teardown)(unsigned int cpu),
1585                                 bool multi_instance)
1586{
1587        /* (Un)Install the callbacks for further cpu hotplug operations */
1588        struct cpuhp_step *sp;
1589        int ret = 0;
1590
1591        /*
1592         * If name is NULL, then the state gets removed.
1593         *
1594         * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
1595         * the first allocation from these dynamic ranges, so the removal
1596         * would trigger a new allocation and clear the wrong (already
1597         * empty) state, leaving the callbacks of the to be cleared state
1598         * dangling, which causes wreckage on the next hotplug operation.
1599         */
1600        if (name && (state == CPUHP_AP_ONLINE_DYN ||
1601                     state == CPUHP_BP_PREPARE_DYN)) {
1602                ret = cpuhp_reserve_state(state);
1603                if (ret < 0)
1604                        return ret;
1605                state = ret;
1606        }
1607        sp = cpuhp_get_step(state);
1608        if (name && sp->name)
1609                return -EBUSY;
1610
1611        sp->startup.single = startup;
1612        sp->teardown.single = teardown;
1613        sp->name = name;
1614        sp->multi_instance = multi_instance;
1615        INIT_HLIST_HEAD(&sp->list);
1616        return ret;
1617}
1618
1619static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1620{
1621        return cpuhp_get_step(state)->teardown.single;
1622}
1623
1624/*
1625 * Call the startup/teardown function for a step either on the AP or
1626 * on the current CPU.
1627 */
1628static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1629                            struct hlist_node *node)
1630{
1631        struct cpuhp_step *sp = cpuhp_get_step(state);
1632        int ret;
1633
1634        /*
1635         * If there's nothing to do, we done.
1636         * Relies on the union for multi_instance.
1637         */
1638        if ((bringup && !sp->startup.single) ||
1639            (!bringup && !sp->teardown.single))
1640                return 0;
1641        /*
1642         * The non AP bound callbacks can fail on bringup. On teardown
1643         * e.g. module removal we crash for now.
1644         */
1645#ifdef CONFIG_SMP
1646        if (cpuhp_is_ap_state(state))
1647                ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1648        else
1649                ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1650#else
1651        ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1652#endif
1653        BUG_ON(ret && !bringup);
1654        return ret;
1655}
1656
1657/*
1658 * Called from __cpuhp_setup_state on a recoverable failure.
1659 *
1660 * Note: The teardown callbacks for rollback are not allowed to fail!
1661 */
1662static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1663                                   struct hlist_node *node)
1664{
1665        int cpu;
1666
1667        /* Roll back the already executed steps on the other cpus */
1668        for_each_present_cpu(cpu) {
1669                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1670                int cpustate = st->state;
1671
1672                if (cpu >= failedcpu)
1673                        break;
1674
1675                /* Did we invoke the startup call on that cpu ? */
1676                if (cpustate >= state)
1677                        cpuhp_issue_call(cpu, state, false, node);
1678        }
1679}
1680
1681int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
1682                                          struct hlist_node *node,
1683                                          bool invoke)
1684{
1685        struct cpuhp_step *sp;
1686        int cpu;
1687        int ret;
1688
1689        lockdep_assert_cpus_held();
1690
1691        sp = cpuhp_get_step(state);
1692        if (sp->multi_instance == false)
1693                return -EINVAL;
1694
1695        mutex_lock(&cpuhp_state_mutex);
1696
1697        if (!invoke || !sp->startup.multi)
1698                goto add_node;
1699
1700        /*
1701         * Try to call the startup callback for each present cpu
1702         * depending on the hotplug state of the cpu.
1703         */
1704        for_each_present_cpu(cpu) {
1705                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1706                int cpustate = st->state;
1707
1708                if (cpustate < state)
1709                        continue;
1710
1711                ret = cpuhp_issue_call(cpu, state, true, node);
1712                if (ret) {
1713                        if (sp->teardown.multi)
1714                                cpuhp_rollback_install(cpu, state, node);
1715                        goto unlock;
1716                }
1717        }
1718add_node:
1719        ret = 0;
1720        hlist_add_head(node, &sp->list);
1721unlock:
1722        mutex_unlock(&cpuhp_state_mutex);
1723        return ret;
1724}
1725
1726int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
1727                               bool invoke)
1728{
1729        int ret;
1730
1731        cpus_read_lock();
1732        ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
1733        cpus_read_unlock();
1734        return ret;
1735}
1736EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
1737
1738/**
1739 * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
1740 * @state:              The state to setup
1741 * @invoke:             If true, the startup function is invoked for cpus where
1742 *                      cpu state >= @state
1743 * @startup:            startup callback function
1744 * @teardown:           teardown callback function
1745 * @multi_instance:     State is set up for multiple instances which get
1746 *                      added afterwards.
1747 *
1748 * The caller needs to hold cpus read locked while calling this function.
1749 * Returns:
1750 *   On success:
1751 *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
1752 *      0 for all other states
1753 *   On failure: proper (negative) error code
1754 */
1755int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
1756                                   const char *name, bool invoke,
1757                                   int (*startup)(unsigned int cpu),
1758                                   int (*teardown)(unsigned int cpu),
1759                                   bool multi_instance)
1760{
1761        int cpu, ret = 0;
1762        bool dynstate;
1763
1764        lockdep_assert_cpus_held();
1765
1766        if (cpuhp_cb_check(state) || !name)
1767                return -EINVAL;
1768
1769        mutex_lock(&cpuhp_state_mutex);
1770
1771        ret = cpuhp_store_callbacks(state, name, startup, teardown,
1772                                    multi_instance);
1773
1774        dynstate = state == CPUHP_AP_ONLINE_DYN;
1775        if (ret > 0 && dynstate) {
1776                state = ret;
1777                ret = 0;
1778        }
1779
1780        if (ret || !invoke || !startup)
1781                goto out;
1782
1783        /*
1784         * Try to call the startup callback for each present cpu
1785         * depending on the hotplug state of the cpu.
1786         */
1787        for_each_present_cpu(cpu) {
1788                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1789                int cpustate = st->state;
1790
1791                if (cpustate < state)
1792                        continue;
1793
1794                ret = cpuhp_issue_call(cpu, state, true, NULL);
1795                if (ret) {
1796                        if (teardown)
1797                                cpuhp_rollback_install(cpu, state, NULL);
1798                        cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1799                        goto out;
1800                }
1801        }
1802out:
1803        mutex_unlock(&cpuhp_state_mutex);
1804        /*
1805         * If the requested state is CPUHP_AP_ONLINE_DYN, return the
1806         * dynamically allocated state in case of success.
1807         */
1808        if (!ret && dynstate)
1809                return state;
1810        return ret;
1811}
1812EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
1813
1814int __cpuhp_setup_state(enum cpuhp_state state,
1815                        const char *name, bool invoke,
1816                        int (*startup)(unsigned int cpu),
1817                        int (*teardown)(unsigned int cpu),
1818                        bool multi_instance)
1819{
1820        int ret;
1821
1822        cpus_read_lock();
1823        ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
1824                                             teardown, multi_instance);
1825        cpus_read_unlock();
1826        return ret;
1827}
1828EXPORT_SYMBOL(__cpuhp_setup_state);
1829
1830int __cpuhp_state_remove_instance(enum cpuhp_state state,
1831                                  struct hlist_node *node, bool invoke)
1832{
1833        struct cpuhp_step *sp = cpuhp_get_step(state);
1834        int cpu;
1835
1836        BUG_ON(cpuhp_cb_check(state));
1837
1838        if (!sp->multi_instance)
1839                return -EINVAL;
1840
1841        cpus_read_lock();
1842        mutex_lock(&cpuhp_state_mutex);
1843
1844        if (!invoke || !cpuhp_get_teardown_cb(state))
1845                goto remove;
1846        /*
1847         * Call the teardown callback for each present cpu depending
1848         * on the hotplug state of the cpu. This function is not
1849         * allowed to fail currently!
1850         */
1851        for_each_present_cpu(cpu) {
1852                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1853                int cpustate = st->state;
1854
1855                if (cpustate >= state)
1856                        cpuhp_issue_call(cpu, state, false, node);
1857        }
1858
1859remove:
1860        hlist_del(node);
1861        mutex_unlock(&cpuhp_state_mutex);
1862        cpus_read_unlock();
1863
1864        return 0;
1865}
1866EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
1867
1868/**
1869 * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
1870 * @state:      The state to remove
1871 * @invoke:     If true, the teardown function is invoked for cpus where
1872 *              cpu state >= @state
1873 *
1874 * The caller needs to hold cpus read locked while calling this function.
1875 * The teardown callback is currently not allowed to fail. Think
1876 * about module removal!
1877 */
1878void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
1879{
1880        struct cpuhp_step *sp = cpuhp_get_step(state);
1881        int cpu;
1882
1883        BUG_ON(cpuhp_cb_check(state));
1884
1885        lockdep_assert_cpus_held();
1886
1887        mutex_lock(&cpuhp_state_mutex);
1888        if (sp->multi_instance) {
1889                WARN(!hlist_empty(&sp->list),
1890                     "Error: Removing state %d which has instances left.\n",
1891                     state);
1892                goto remove;
1893        }
1894
1895        if (!invoke || !cpuhp_get_teardown_cb(state))
1896                goto remove;
1897
1898        /*
1899         * Call the teardown callback for each present cpu depending
1900         * on the hotplug state of the cpu. This function is not
1901         * allowed to fail currently!
1902         */
1903        for_each_present_cpu(cpu) {
1904                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1905                int cpustate = st->state;
1906
1907                if (cpustate >= state)
1908                        cpuhp_issue_call(cpu, state, false, NULL);
1909        }
1910remove:
1911        cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1912        mutex_unlock(&cpuhp_state_mutex);
1913}
1914EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
1915
1916void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
1917{
1918        cpus_read_lock();
1919        __cpuhp_remove_state_cpuslocked(state, invoke);
1920        cpus_read_unlock();
1921}
1922EXPORT_SYMBOL(__cpuhp_remove_state);
1923
1924#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
1925static ssize_t show_cpuhp_state(struct device *dev,
1926                                struct device_attribute *attr, char *buf)
1927{
1928        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1929
1930        return sprintf(buf, "%d\n", st->state);
1931}
1932static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
1933
1934static ssize_t write_cpuhp_target(struct device *dev,
1935                                  struct device_attribute *attr,
1936                                  const char *buf, size_t count)
1937{
1938        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1939        struct cpuhp_step *sp;
1940        int target, ret;
1941
1942        ret = kstrtoint(buf, 10, &target);
1943        if (ret)
1944                return ret;
1945
1946#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
1947        if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
1948                return -EINVAL;
1949#else
1950        if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
1951                return -EINVAL;
1952#endif
1953
1954        ret = lock_device_hotplug_sysfs();
1955        if (ret)
1956                return ret;
1957
1958        mutex_lock(&cpuhp_state_mutex);
1959        sp = cpuhp_get_step(target);
1960        ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
1961        mutex_unlock(&cpuhp_state_mutex);
1962        if (ret)
1963                goto out;
1964
1965        if (st->state < target)
1966                ret = do_cpu_up(dev->id, target);
1967        else
1968                ret = do_cpu_down(dev->id, target);
1969out:
1970        unlock_device_hotplug();
1971        return ret ? ret : count;
1972}
1973
1974static ssize_t show_cpuhp_target(struct device *dev,
1975                                 struct device_attribute *attr, char *buf)
1976{
1977        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1978
1979        return sprintf(buf, "%d\n", st->target);
1980}
1981static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
1982
1983
1984static ssize_t write_cpuhp_fail(struct device *dev,
1985                                struct device_attribute *attr,
1986                                const char *buf, size_t count)
1987{
1988        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1989        struct cpuhp_step *sp;
1990        int fail, ret;
1991
1992        ret = kstrtoint(buf, 10, &fail);
1993        if (ret)
1994                return ret;
1995
1996        /*
1997         * Cannot fail STARTING/DYING callbacks.
1998         */
1999        if (cpuhp_is_atomic_state(fail))
2000                return -EINVAL;
2001
2002        /*
2003         * Cannot fail anything that doesn't have callbacks.
2004         */
2005        mutex_lock(&cpuhp_state_mutex);
2006        sp = cpuhp_get_step(fail);
2007        if (!sp->startup.single && !sp->teardown.single)
2008                ret = -EINVAL;
2009        mutex_unlock(&cpuhp_state_mutex);
2010        if (ret)
2011                return ret;
2012
2013        st->fail = fail;
2014
2015        return count;
2016}
2017
2018static ssize_t show_cpuhp_fail(struct device *dev,
2019                               struct device_attribute *attr, char *buf)
2020{
2021        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2022
2023        return sprintf(buf, "%d\n", st->fail);
2024}
2025
2026static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
2027
2028static struct attribute *cpuhp_cpu_attrs[] = {
2029        &dev_attr_state.attr,
2030        &dev_attr_target.attr,
2031        &dev_attr_fail.attr,
2032        NULL
2033};
2034
2035static const struct attribute_group cpuhp_cpu_attr_group = {
2036        .attrs = cpuhp_cpu_attrs,
2037        .name = "hotplug",
2038        NULL
2039};
2040
2041static ssize_t show_cpuhp_states(struct device *dev,
2042                                 struct device_attribute *attr, char *buf)
2043{
2044        ssize_t cur, res = 0;
2045        int i;
2046
2047        mutex_lock(&cpuhp_state_mutex);
2048        for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
2049                struct cpuhp_step *sp = cpuhp_get_step(i);
2050
2051                if (sp->name) {
2052                        cur = sprintf(buf, "%3d: %s\n", i, sp->name);
2053                        buf += cur;
2054                        res += cur;
2055                }
2056        }
2057        mutex_unlock(&cpuhp_state_mutex);
2058        return res;
2059}
2060static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
2061
2062static struct attribute *cpuhp_cpu_root_attrs[] = {
2063        &dev_attr_states.attr,
2064        NULL
2065};
2066
2067static const struct attribute_group cpuhp_cpu_root_attr_group = {
2068        .attrs = cpuhp_cpu_root_attrs,
2069        .name = "hotplug",
2070        NULL
2071};
2072
2073#ifdef CONFIG_HOTPLUG_SMT
2074
2075static void cpuhp_offline_cpu_device(unsigned int cpu)
2076{
2077        struct device *dev = get_cpu_device(cpu);
2078
2079        dev->offline = true;
2080        /* Tell user space about the state change */
2081        kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2082}
2083
2084static void cpuhp_online_cpu_device(unsigned int cpu)
2085{
2086        struct device *dev = get_cpu_device(cpu);
2087
2088        dev->offline = false;
2089        /* Tell user space about the state change */
2090        kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2091}
2092
2093static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2094{
2095        int cpu, ret = 0;
2096
2097        cpu_maps_update_begin();
2098        for_each_online_cpu(cpu) {
2099                if (topology_is_primary_thread(cpu))
2100                        continue;
2101                ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2102                if (ret)
2103                        break;
2104                /*
2105                 * As this needs to hold the cpu maps lock it's impossible
2106                 * to call device_offline() because that ends up calling
2107                 * cpu_down() which takes cpu maps lock. cpu maps lock
2108                 * needs to be held as this might race against in kernel
2109                 * abusers of the hotplug machinery (thermal management).
2110                 *
2111                 * So nothing would update device:offline state. That would
2112                 * leave the sysfs entry stale and prevent onlining after
2113                 * smt control has been changed to 'off' again. This is
2114                 * called under the sysfs hotplug lock, so it is properly
2115                 * serialized against the regular offline usage.
2116                 */
2117                cpuhp_offline_cpu_device(cpu);
2118        }
2119        if (!ret) {
2120                cpu_smt_control = ctrlval;
2121                arch_smt_update();
2122        }
2123        cpu_maps_update_done();
2124        return ret;
2125}
2126
2127static int cpuhp_smt_enable(void)
2128{
2129        int cpu, ret = 0;
2130
2131        cpu_maps_update_begin();
2132        cpu_smt_control = CPU_SMT_ENABLED;
2133        arch_smt_update();
2134        for_each_present_cpu(cpu) {
2135                /* Skip online CPUs and CPUs on offline nodes */
2136                if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
2137                        continue;
2138                ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2139                if (ret)
2140                        break;
2141                /* See comment in cpuhp_smt_disable() */
2142                cpuhp_online_cpu_device(cpu);
2143        }
2144        cpu_maps_update_done();
2145        return ret;
2146}
2147
2148
2149static ssize_t
2150__store_smt_control(struct device *dev, struct device_attribute *attr,
2151                    const char *buf, size_t count)
2152{
2153        int ctrlval, ret;
2154
2155        if (sysfs_streq(buf, "on"))
2156                ctrlval = CPU_SMT_ENABLED;
2157        else if (sysfs_streq(buf, "off"))
2158                ctrlval = CPU_SMT_DISABLED;
2159        else if (sysfs_streq(buf, "forceoff"))
2160                ctrlval = CPU_SMT_FORCE_DISABLED;
2161        else
2162                return -EINVAL;
2163
2164        if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
2165                return -EPERM;
2166
2167        ret = lock_device_hotplug_sysfs();
2168        if (ret)
2169                return ret;
2170
2171        if (ctrlval != cpu_smt_control) {
2172                switch (ctrlval) {
2173                case CPU_SMT_ENABLED:
2174                        ret = cpuhp_smt_enable();
2175                        break;
2176                case CPU_SMT_DISABLED:
2177                case CPU_SMT_FORCE_DISABLED:
2178                        ret = cpuhp_smt_disable(ctrlval);
2179                        break;
2180                }
2181        }
2182
2183        unlock_device_hotplug();
2184        return ret ? ret : count;
2185}
2186
2187#else /* !CONFIG_HOTPLUG_SMT */
2188static ssize_t
2189__store_smt_control(struct device *dev, struct device_attribute *attr,
2190                    const char *buf, size_t count)
2191{
2192        return 0;
2193}
2194#endif /* CONFIG_HOTPLUG_SMT */
2195
2196static const char *smt_states[] = {
2197        [CPU_SMT_ENABLED]               = "on",
2198        [CPU_SMT_DISABLED]              = "off",
2199        [CPU_SMT_FORCE_DISABLED]        = "forceoff",
2200        [CPU_SMT_NOT_SUPPORTED]         = "notsupported",
2201};
2202
2203static ssize_t
2204show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
2205{
2206        return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]);
2207}
2208
2209static ssize_t
2210store_smt_control(struct device *dev, struct device_attribute *attr,
2211                  const char *buf, size_t count)
2212{
2213
2214        if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
2215                return -ENODEV;
2216
2217        return __store_smt_control(dev, attr, buf, count);
2218}
2219static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
2220
2221static ssize_t
2222show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
2223{
2224        return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
2225}
2226static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
2227
2228static struct attribute *cpuhp_smt_attrs[] = {
2229        &dev_attr_control.attr,
2230        &dev_attr_active.attr,
2231        NULL
2232};
2233
2234static const struct attribute_group cpuhp_smt_attr_group = {
2235        .attrs = cpuhp_smt_attrs,
2236        .name = "smt",
2237        NULL
2238};
2239
2240static int __init cpu_smt_sysfs_init(void)
2241{
2242        return sysfs_create_group(&cpu_subsys.dev_root->kobj,
2243                                  &cpuhp_smt_attr_group);
2244}
2245
2246static int __init cpuhp_sysfs_init(void)
2247{
2248        int cpu, ret;
2249
2250        ret = cpu_smt_sysfs_init();
2251        if (ret)
2252                return ret;
2253
2254        ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
2255                                 &cpuhp_cpu_root_attr_group);
2256        if (ret)
2257                return ret;
2258
2259        for_each_possible_cpu(cpu) {
2260                struct device *dev = get_cpu_device(cpu);
2261
2262                if (!dev)
2263                        continue;
2264                ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
2265                if (ret)
2266                        return ret;
2267        }
2268        return 0;
2269}
2270device_initcall(cpuhp_sysfs_init);
2271#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
2272
2273/*
2274 * cpu_bit_bitmap[] is a special, "compressed" data structure that
2275 * represents all NR_CPUS bits binary values of 1<<nr.
2276 *
2277 * It is used by cpumask_of() to get a constant address to a CPU
2278 * mask value that has a single bit set only.
2279 */
2280
2281/* cpu_bit_bitmap[0] is empty - so we can back into it */
2282#define MASK_DECLARE_1(x)       [x+1][0] = (1UL << (x))
2283#define MASK_DECLARE_2(x)       MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
2284#define MASK_DECLARE_4(x)       MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
2285#define MASK_DECLARE_8(x)       MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
2286
2287const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
2288
2289        MASK_DECLARE_8(0),      MASK_DECLARE_8(8),
2290        MASK_DECLARE_8(16),     MASK_DECLARE_8(24),
2291#if BITS_PER_LONG > 32
2292        MASK_DECLARE_8(32),     MASK_DECLARE_8(40),
2293        MASK_DECLARE_8(48),     MASK_DECLARE_8(56),
2294#endif
2295};
2296EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
2297
2298const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
2299EXPORT_SYMBOL(cpu_all_bits);
2300
2301#ifdef CONFIG_INIT_ALL_POSSIBLE
2302struct cpumask __cpu_possible_mask __read_mostly
2303        = {CPU_BITS_ALL};
2304#else
2305struct cpumask __cpu_possible_mask __read_mostly;
2306#endif
2307EXPORT_SYMBOL(__cpu_possible_mask);
2308
2309struct cpumask __cpu_online_mask __read_mostly;
2310EXPORT_SYMBOL(__cpu_online_mask);
2311
2312struct cpumask __cpu_present_mask __read_mostly;
2313EXPORT_SYMBOL(__cpu_present_mask);
2314
2315struct cpumask __cpu_active_mask __read_mostly;
2316EXPORT_SYMBOL(__cpu_active_mask);
2317
2318void init_cpu_present(const struct cpumask *src)
2319{
2320        cpumask_copy(&__cpu_present_mask, src);
2321}
2322
2323void init_cpu_possible(const struct cpumask *src)
2324{
2325        cpumask_copy(&__cpu_possible_mask, src);
2326}
2327
2328void init_cpu_online(const struct cpumask *src)
2329{
2330        cpumask_copy(&__cpu_online_mask, src);
2331}
2332
2333/*
2334 * Activate the first processor.
2335 */
2336void __init boot_cpu_init(void)
2337{
2338        int cpu = smp_processor_id();
2339
2340        /* Mark the boot cpu "present", "online" etc for SMP and UP case */
2341        set_cpu_online(cpu, true);
2342        set_cpu_active(cpu, true);
2343        set_cpu_present(cpu, true);
2344        set_cpu_possible(cpu, true);
2345
2346#ifdef CONFIG_SMP
2347        __boot_cpu_id = cpu;
2348#endif
2349}
2350
2351/*
2352 * Must be called _AFTER_ setting up the per_cpu areas
2353 */
2354void __init boot_cpu_hotplug_init(void)
2355{
2356        this_cpu_write(cpuhp_state.booted_once, true);
2357        this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2358}
2359
2360/*
2361 * These are used for a global "mitigations=" cmdline option for toggling
2362 * optional CPU mitigations.
2363 */
2364enum cpu_mitigations {
2365        CPU_MITIGATIONS_OFF,
2366        CPU_MITIGATIONS_AUTO,
2367        CPU_MITIGATIONS_AUTO_NOSMT,
2368};
2369
2370static enum cpu_mitigations cpu_mitigations __ro_after_init =
2371        CPU_MITIGATIONS_AUTO;
2372
2373static int __init mitigations_parse_cmdline(char *arg)
2374{
2375        if (!strcmp(arg, "off"))
2376                cpu_mitigations = CPU_MITIGATIONS_OFF;
2377        else if (!strcmp(arg, "auto"))
2378                cpu_mitigations = CPU_MITIGATIONS_AUTO;
2379        else if (!strcmp(arg, "auto,nosmt"))
2380                cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
2381
2382        return 0;
2383}
2384early_param("mitigations", mitigations_parse_cmdline);
2385
2386/* mitigations=off */
2387bool cpu_mitigations_off(void)
2388{
2389        return cpu_mitigations == CPU_MITIGATIONS_OFF;
2390}
2391EXPORT_SYMBOL_GPL(cpu_mitigations_off);
2392
2393/* mitigations=auto,nosmt */
2394bool cpu_mitigations_auto_nosmt(void)
2395{
2396        return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
2397}
2398EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
2399