linux/kernel/stop_machine.c
<<
>>
Prefs
   1/*
   2 * kernel/stop_machine.c
   3 *
   4 * Copyright (C) 2008, 2005     IBM Corporation.
   5 * Copyright (C) 2008, 2005     Rusty Russell rusty@rustcorp.com.au
   6 * Copyright (C) 2010           SUSE Linux Products GmbH
   7 * Copyright (C) 2010           Tejun Heo <tj@kernel.org>
   8 *
   9 * This file is released under the GPLv2 and any later version.
  10 */
  11#include <linux/completion.h>
  12#include <linux/cpu.h>
  13#include <linux/init.h>
  14#include <linux/kthread.h>
  15#include <linux/export.h>
  16#include <linux/percpu.h>
  17#include <linux/sched.h>
  18#include <linux/stop_machine.h>
  19#include <linux/interrupt.h>
  20#include <linux/kallsyms.h>
  21#include <linux/smpboot.h>
  22#include <linux/atomic.h>
  23
  24/*
  25 * Structure to determine completion condition and record errors.  May
  26 * be shared by works on different cpus.
  27 */
  28struct cpu_stop_done {
  29        atomic_t                nr_todo;        /* nr left to execute */
  30        bool                    executed;       /* actually executed? */
  31        int                     ret;            /* collected return value */
  32        struct completion       completion;     /* fired if nr_todo reaches 0 */
  33};
  34
  35/* the actual stopper, one per every possible cpu, enabled on online cpus */
  36struct cpu_stopper {
  37        spinlock_t              lock;
  38        bool                    enabled;        /* is this stopper enabled? */
  39        struct list_head        works;          /* list of pending works */
  40};
  41
  42static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
  43static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task);
  44static bool stop_machine_initialized = false;
  45
  46static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
  47{
  48        memset(done, 0, sizeof(*done));
  49        atomic_set(&done->nr_todo, nr_todo);
  50        init_completion(&done->completion);
  51}
  52
  53/* signal completion unless @done is NULL */
  54static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
  55{
  56        if (done) {
  57                if (executed)
  58                        done->executed = true;
  59                if (atomic_dec_and_test(&done->nr_todo))
  60                        complete(&done->completion);
  61        }
  62}
  63
  64/* queue @work to @stopper.  if offline, @work is completed immediately */
  65static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
  66{
  67        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
  68        struct task_struct *p = per_cpu(cpu_stopper_task, cpu);
  69
  70        unsigned long flags;
  71
  72        spin_lock_irqsave(&stopper->lock, flags);
  73
  74        if (stopper->enabled) {
  75                list_add_tail(&work->list, &stopper->works);
  76                wake_up_process(p);
  77        } else
  78                cpu_stop_signal_done(work->done, false);
  79
  80        spin_unlock_irqrestore(&stopper->lock, flags);
  81}
  82
  83/**
  84 * stop_one_cpu - stop a cpu
  85 * @cpu: cpu to stop
  86 * @fn: function to execute
  87 * @arg: argument to @fn
  88 *
  89 * Execute @fn(@arg) on @cpu.  @fn is run in a process context with
  90 * the highest priority preempting any task on the cpu and
  91 * monopolizing it.  This function returns after the execution is
  92 * complete.
  93 *
  94 * This function doesn't guarantee @cpu stays online till @fn
  95 * completes.  If @cpu goes down in the middle, execution may happen
  96 * partially or fully on different cpus.  @fn should either be ready
  97 * for that or the caller should ensure that @cpu stays online until
  98 * this function completes.
  99 *
 100 * CONTEXT:
 101 * Might sleep.
 102 *
 103 * RETURNS:
 104 * -ENOENT if @fn(@arg) was not executed because @cpu was offline;
 105 * otherwise, the return value of @fn.
 106 */
 107int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
 108{
 109        struct cpu_stop_done done;
 110        struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
 111
 112        cpu_stop_init_done(&done, 1);
 113        cpu_stop_queue_work(cpu, &work);
 114        wait_for_completion(&done.completion);
 115        return done.executed ? done.ret : -ENOENT;
 116}
 117
 118/**
 119 * stop_one_cpu_nowait - stop a cpu but don't wait for completion
 120 * @cpu: cpu to stop
 121 * @fn: function to execute
 122 * @arg: argument to @fn
 123 *
 124 * Similar to stop_one_cpu() but doesn't wait for completion.  The
 125 * caller is responsible for ensuring @work_buf is currently unused
 126 * and will remain untouched until stopper starts executing @fn.
 127 *
 128 * CONTEXT:
 129 * Don't care.
 130 */
 131void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 132                        struct cpu_stop_work *work_buf)
 133{
 134        *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
 135        cpu_stop_queue_work(cpu, work_buf);
 136}
 137
 138/* static data for stop_cpus */
 139static DEFINE_MUTEX(stop_cpus_mutex);
 140static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
 141
 142static void queue_stop_cpus_work(const struct cpumask *cpumask,
 143                                 cpu_stop_fn_t fn, void *arg,
 144                                 struct cpu_stop_done *done)
 145{
 146        struct cpu_stop_work *work;
 147        unsigned int cpu;
 148
 149        /* initialize works and done */
 150        for_each_cpu(cpu, cpumask) {
 151                work = &per_cpu(stop_cpus_work, cpu);
 152                work->fn = fn;
 153                work->arg = arg;
 154                work->done = done;
 155        }
 156
 157        /*
 158         * Disable preemption while queueing to avoid getting
 159         * preempted by a stopper which might wait for other stoppers
 160         * to enter @fn which can lead to deadlock.
 161         */
 162        preempt_disable();
 163        for_each_cpu(cpu, cpumask)
 164                cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
 165        preempt_enable();
 166}
 167
 168static int __stop_cpus(const struct cpumask *cpumask,
 169                       cpu_stop_fn_t fn, void *arg)
 170{
 171        struct cpu_stop_done done;
 172
 173        cpu_stop_init_done(&done, cpumask_weight(cpumask));
 174        queue_stop_cpus_work(cpumask, fn, arg, &done);
 175        wait_for_completion(&done.completion);
 176        return done.executed ? done.ret : -ENOENT;
 177}
 178
 179/**
 180 * stop_cpus - stop multiple cpus
 181 * @cpumask: cpus to stop
 182 * @fn: function to execute
 183 * @arg: argument to @fn
 184 *
 185 * Execute @fn(@arg) on online cpus in @cpumask.  On each target cpu,
 186 * @fn is run in a process context with the highest priority
 187 * preempting any task on the cpu and monopolizing it.  This function
 188 * returns after all executions are complete.
 189 *
 190 * This function doesn't guarantee the cpus in @cpumask stay online
 191 * till @fn completes.  If some cpus go down in the middle, execution
 192 * on the cpu may happen partially or fully on different cpus.  @fn
 193 * should either be ready for that or the caller should ensure that
 194 * the cpus stay online until this function completes.
 195 *
 196 * All stop_cpus() calls are serialized making it safe for @fn to wait
 197 * for all cpus to start executing it.
 198 *
 199 * CONTEXT:
 200 * Might sleep.
 201 *
 202 * RETURNS:
 203 * -ENOENT if @fn(@arg) was not executed at all because all cpus in
 204 * @cpumask were offline; otherwise, 0 if all executions of @fn
 205 * returned 0, any non zero return value if any returned non zero.
 206 */
 207int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 208{
 209        int ret;
 210
 211        /* static works are used, process one request at a time */
 212        mutex_lock(&stop_cpus_mutex);
 213        ret = __stop_cpus(cpumask, fn, arg);
 214        mutex_unlock(&stop_cpus_mutex);
 215        return ret;
 216}
 217
 218/**
 219 * try_stop_cpus - try to stop multiple cpus
 220 * @cpumask: cpus to stop
 221 * @fn: function to execute
 222 * @arg: argument to @fn
 223 *
 224 * Identical to stop_cpus() except that it fails with -EAGAIN if
 225 * someone else is already using the facility.
 226 *
 227 * CONTEXT:
 228 * Might sleep.
 229 *
 230 * RETURNS:
 231 * -EAGAIN if someone else is already stopping cpus, -ENOENT if
 232 * @fn(@arg) was not executed at all because all cpus in @cpumask were
 233 * offline; otherwise, 0 if all executions of @fn returned 0, any non
 234 * zero return value if any returned non zero.
 235 */
 236int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 237{
 238        int ret;
 239
 240        /* static works are used, process one request at a time */
 241        if (!mutex_trylock(&stop_cpus_mutex))
 242                return -EAGAIN;
 243        ret = __stop_cpus(cpumask, fn, arg);
 244        mutex_unlock(&stop_cpus_mutex);
 245        return ret;
 246}
 247
 248static int cpu_stop_should_run(unsigned int cpu)
 249{
 250        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 251        unsigned long flags;
 252        int run;
 253
 254        spin_lock_irqsave(&stopper->lock, flags);
 255        run = !list_empty(&stopper->works);
 256        spin_unlock_irqrestore(&stopper->lock, flags);
 257        return run;
 258}
 259
 260static void cpu_stopper_thread(unsigned int cpu)
 261{
 262        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 263        struct cpu_stop_work *work;
 264        int ret;
 265
 266repeat:
 267        work = NULL;
 268        spin_lock_irq(&stopper->lock);
 269        if (!list_empty(&stopper->works)) {
 270                work = list_first_entry(&stopper->works,
 271                                        struct cpu_stop_work, list);
 272                list_del_init(&work->list);
 273        }
 274        spin_unlock_irq(&stopper->lock);
 275
 276        if (work) {
 277                cpu_stop_fn_t fn = work->fn;
 278                void *arg = work->arg;
 279                struct cpu_stop_done *done = work->done;
 280                char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
 281
 282                /* cpu stop callbacks are not allowed to sleep */
 283                preempt_disable();
 284
 285                ret = fn(arg);
 286                if (ret)
 287                        done->ret = ret;
 288
 289                /* restore preemption and check it's still balanced */
 290                preempt_enable();
 291                WARN_ONCE(preempt_count(),
 292                          "cpu_stop: %s(%p) leaked preempt count\n",
 293                          kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
 294                                          ksym_buf), arg);
 295
 296                cpu_stop_signal_done(done, true);
 297                goto repeat;
 298        }
 299}
 300
 301extern void sched_set_stop_task(int cpu, struct task_struct *stop);
 302
 303static void cpu_stop_create(unsigned int cpu)
 304{
 305        sched_set_stop_task(cpu, per_cpu(cpu_stopper_task, cpu));
 306}
 307
 308static void cpu_stop_park(unsigned int cpu)
 309{
 310        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 311        struct cpu_stop_work *work;
 312        unsigned long flags;
 313
 314        /* drain remaining works */
 315        spin_lock_irqsave(&stopper->lock, flags);
 316        list_for_each_entry(work, &stopper->works, list)
 317                cpu_stop_signal_done(work->done, false);
 318        stopper->enabled = false;
 319        spin_unlock_irqrestore(&stopper->lock, flags);
 320}
 321
 322static void cpu_stop_unpark(unsigned int cpu)
 323{
 324        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 325
 326        spin_lock_irq(&stopper->lock);
 327        stopper->enabled = true;
 328        spin_unlock_irq(&stopper->lock);
 329}
 330
 331static struct smp_hotplug_thread cpu_stop_threads = {
 332        .store                  = &cpu_stopper_task,
 333        .thread_should_run      = cpu_stop_should_run,
 334        .thread_fn              = cpu_stopper_thread,
 335        .thread_comm            = "migration/%u",
 336        .create                 = cpu_stop_create,
 337        .setup                  = cpu_stop_unpark,
 338        .park                   = cpu_stop_park,
 339        .pre_unpark             = cpu_stop_unpark,
 340        .selfparking            = true,
 341};
 342
 343static int __init cpu_stop_init(void)
 344{
 345        unsigned int cpu;
 346
 347        for_each_possible_cpu(cpu) {
 348                struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 349
 350                spin_lock_init(&stopper->lock);
 351                INIT_LIST_HEAD(&stopper->works);
 352        }
 353
 354        BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
 355        stop_machine_initialized = true;
 356        return 0;
 357}
 358early_initcall(cpu_stop_init);
 359
 360#ifdef CONFIG_STOP_MACHINE
 361
 362/* This controls the threads on each CPU. */
 363enum stopmachine_state {
 364        /* Dummy starting state for thread. */
 365        STOPMACHINE_NONE,
 366        /* Awaiting everyone to be scheduled. */
 367        STOPMACHINE_PREPARE,
 368        /* Disable interrupts. */
 369        STOPMACHINE_DISABLE_IRQ,
 370        /* Run the function */
 371        STOPMACHINE_RUN,
 372        /* Exit */
 373        STOPMACHINE_EXIT,
 374};
 375
 376struct stop_machine_data {
 377        int                     (*fn)(void *);
 378        void                    *data;
 379        /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
 380        unsigned int            num_threads;
 381        const struct cpumask    *active_cpus;
 382
 383        enum stopmachine_state  state;
 384        atomic_t                thread_ack;
 385};
 386
 387static void set_state(struct stop_machine_data *smdata,
 388                      enum stopmachine_state newstate)
 389{
 390        /* Reset ack counter. */
 391        atomic_set(&smdata->thread_ack, smdata->num_threads);
 392        smp_wmb();
 393        smdata->state = newstate;
 394}
 395
 396/* Last one to ack a state moves to the next state. */
 397static void ack_state(struct stop_machine_data *smdata)
 398{
 399        if (atomic_dec_and_test(&smdata->thread_ack))
 400                set_state(smdata, smdata->state + 1);
 401}
 402
 403/* This is the cpu_stop function which stops the CPU. */
 404static int stop_machine_cpu_stop(void *data)
 405{
 406        struct stop_machine_data *smdata = data;
 407        enum stopmachine_state curstate = STOPMACHINE_NONE;
 408        int cpu = smp_processor_id(), err = 0;
 409        unsigned long flags;
 410        bool is_active;
 411
 412        /*
 413         * When called from stop_machine_from_inactive_cpu(), irq might
 414         * already be disabled.  Save the state and restore it on exit.
 415         */
 416        local_save_flags(flags);
 417
 418        if (!smdata->active_cpus)
 419                is_active = cpu == cpumask_first(cpu_online_mask);
 420        else
 421                is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
 422
 423        /* Simple state machine */
 424        do {
 425                /* Chill out and ensure we re-read stopmachine_state. */
 426                cpu_relax();
 427                if (smdata->state != curstate) {
 428                        curstate = smdata->state;
 429                        switch (curstate) {
 430                        case STOPMACHINE_DISABLE_IRQ:
 431                                local_irq_disable();
 432                                hard_irq_disable();
 433                                break;
 434                        case STOPMACHINE_RUN:
 435                                if (is_active)
 436                                        err = smdata->fn(smdata->data);
 437                                break;
 438                        default:
 439                                break;
 440                        }
 441                        ack_state(smdata);
 442                }
 443        } while (curstate != STOPMACHINE_EXIT);
 444
 445        local_irq_restore(flags);
 446        return err;
 447}
 448
 449int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 450{
 451        struct stop_machine_data smdata = { .fn = fn, .data = data,
 452                                            .num_threads = num_online_cpus(),
 453                                            .active_cpus = cpus };
 454
 455        if (!stop_machine_initialized) {
 456                /*
 457                 * Handle the case where stop_machine() is called
 458                 * early in boot before stop_machine() has been
 459                 * initialized.
 460                 */
 461                unsigned long flags;
 462                int ret;
 463
 464                WARN_ON_ONCE(smdata.num_threads != 1);
 465
 466                local_irq_save(flags);
 467                hard_irq_disable();
 468                ret = (*fn)(data);
 469                local_irq_restore(flags);
 470
 471                return ret;
 472        }
 473
 474        /* Set the initial state and stop all online cpus. */
 475        set_state(&smdata, STOPMACHINE_PREPARE);
 476        return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
 477}
 478
 479int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 480{
 481        int ret;
 482
 483        /* No CPUs can come up or down during this. */
 484        get_online_cpus();
 485        ret = __stop_machine(fn, data, cpus);
 486        put_online_cpus();
 487        return ret;
 488}
 489EXPORT_SYMBOL_GPL(stop_machine);
 490
 491/**
 492 * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU
 493 * @fn: the function to run
 494 * @data: the data ptr for the @fn()
 495 * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
 496 *
 497 * This is identical to stop_machine() but can be called from a CPU which
 498 * is not active.  The local CPU is in the process of hotplug (so no other
 499 * CPU hotplug can start) and not marked active and doesn't have enough
 500 * context to sleep.
 501 *
 502 * This function provides stop_machine() functionality for such state by
 503 * using busy-wait for synchronization and executing @fn directly for local
 504 * CPU.
 505 *
 506 * CONTEXT:
 507 * Local CPU is inactive.  Temporarily stops all active CPUs.
 508 *
 509 * RETURNS:
 510 * 0 if all executions of @fn returned 0, any non zero return value if any
 511 * returned non zero.
 512 */
 513int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
 514                                  const struct cpumask *cpus)
 515{
 516        struct stop_machine_data smdata = { .fn = fn, .data = data,
 517                                            .active_cpus = cpus };
 518        struct cpu_stop_done done;
 519        int ret;
 520
 521        /* Local CPU must be inactive and CPU hotplug in progress. */
 522        BUG_ON(cpu_active(raw_smp_processor_id()));
 523        smdata.num_threads = num_active_cpus() + 1;     /* +1 for local */
 524
 525        /* No proper task established and can't sleep - busy wait for lock. */
 526        while (!mutex_trylock(&stop_cpus_mutex))
 527                cpu_relax();
 528
 529        /* Schedule work on other CPUs and execute directly for local CPU */
 530        set_state(&smdata, STOPMACHINE_PREPARE);
 531        cpu_stop_init_done(&done, num_active_cpus());
 532        queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata,
 533                             &done);
 534        ret = stop_machine_cpu_stop(&smdata);
 535
 536        /* Busy wait for completion. */
 537        while (!completion_done(&done.completion))
 538                cpu_relax();
 539
 540        mutex_unlock(&stop_cpus_mutex);
 541        return ret ?: done.ret;
 542}
 543
 544#endif  /* CONFIG_STOP_MACHINE */
 545