linux/kernel/stop_machine.c
<<
>>
Prefs
   1/*
   2 * kernel/stop_machine.c
   3 *
   4 * Copyright (C) 2008, 2005     IBM Corporation.
   5 * Copyright (C) 2008, 2005     Rusty Russell rusty@rustcorp.com.au
   6 * Copyright (C) 2010           SUSE Linux Products GmbH
   7 * Copyright (C) 2010           Tejun Heo <tj@kernel.org>
   8 *
   9 * This file is released under the GPLv2 and any later version.
  10 */
  11#include <linux/completion.h>
  12#include <linux/cpu.h>
  13#include <linux/init.h>
  14#include <linux/kthread.h>
  15#include <linux/export.h>
  16#include <linux/percpu.h>
  17#include <linux/sched.h>
  18#include <linux/stop_machine.h>
  19#include <linux/interrupt.h>
  20#include <linux/kallsyms.h>
  21
  22#include <linux/atomic.h>
  23
  24/*
  25 * Structure to determine completion condition and record errors.  May
  26 * be shared by works on different cpus.
  27 */
  28struct cpu_stop_done {
  29        atomic_t                nr_todo;        /* nr left to execute */
  30        bool                    executed;       /* actually executed? */
  31        int                     ret;            /* collected return value */
  32        struct completion       completion;     /* fired if nr_todo reaches 0 */
  33};
  34
  35/* the actual stopper, one per every possible cpu, enabled on online cpus */
  36struct cpu_stopper {
  37        spinlock_t              lock;
  38        bool                    enabled;        /* is this stopper enabled? */
  39        struct list_head        works;          /* list of pending works */
  40        struct task_struct      *thread;        /* stopper thread */
  41};
  42
  43static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
  44static bool stop_machine_initialized = false;
  45
  46static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
  47{
  48        memset(done, 0, sizeof(*done));
  49        atomic_set(&done->nr_todo, nr_todo);
  50        init_completion(&done->completion);
  51}
  52
  53/* signal completion unless @done is NULL */
  54static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
  55{
  56        if (done) {
  57                if (executed)
  58                        done->executed = true;
  59                if (atomic_dec_and_test(&done->nr_todo))
  60                        complete(&done->completion);
  61        }
  62}
  63
  64/* queue @work to @stopper.  if offline, @work is completed immediately */
  65static void cpu_stop_queue_work(struct cpu_stopper *stopper,
  66                                struct cpu_stop_work *work)
  67{
  68        unsigned long flags;
  69
  70        spin_lock_irqsave(&stopper->lock, flags);
  71
  72        if (stopper->enabled) {
  73                list_add_tail(&work->list, &stopper->works);
  74                wake_up_process(stopper->thread);
  75        } else
  76                cpu_stop_signal_done(work->done, false);
  77
  78        spin_unlock_irqrestore(&stopper->lock, flags);
  79}
  80
  81/**
  82 * stop_one_cpu - stop a cpu
  83 * @cpu: cpu to stop
  84 * @fn: function to execute
  85 * @arg: argument to @fn
  86 *
  87 * Execute @fn(@arg) on @cpu.  @fn is run in a process context with
  88 * the highest priority preempting any task on the cpu and
  89 * monopolizing it.  This function returns after the execution is
  90 * complete.
  91 *
  92 * This function doesn't guarantee @cpu stays online till @fn
  93 * completes.  If @cpu goes down in the middle, execution may happen
  94 * partially or fully on different cpus.  @fn should either be ready
  95 * for that or the caller should ensure that @cpu stays online until
  96 * this function completes.
  97 *
  98 * CONTEXT:
  99 * Might sleep.
 100 *
 101 * RETURNS:
 102 * -ENOENT if @fn(@arg) was not executed because @cpu was offline;
 103 * otherwise, the return value of @fn.
 104 */
 105int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
 106{
 107        struct cpu_stop_done done;
 108        struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
 109
 110        cpu_stop_init_done(&done, 1);
 111        cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work);
 112        wait_for_completion(&done.completion);
 113        return done.executed ? done.ret : -ENOENT;
 114}
 115
 116/**
 117 * stop_one_cpu_nowait - stop a cpu but don't wait for completion
 118 * @cpu: cpu to stop
 119 * @fn: function to execute
 120 * @arg: argument to @fn
 121 *
 122 * Similar to stop_one_cpu() but doesn't wait for completion.  The
 123 * caller is responsible for ensuring @work_buf is currently unused
 124 * and will remain untouched until stopper starts executing @fn.
 125 *
 126 * CONTEXT:
 127 * Don't care.
 128 */
 129void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 130                        struct cpu_stop_work *work_buf)
 131{
 132        *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
 133        cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf);
 134}
 135
 136/* static data for stop_cpus */
 137static DEFINE_MUTEX(stop_cpus_mutex);
 138static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
 139
 140static void queue_stop_cpus_work(const struct cpumask *cpumask,
 141                                 cpu_stop_fn_t fn, void *arg,
 142                                 struct cpu_stop_done *done)
 143{
 144        struct cpu_stop_work *work;
 145        unsigned int cpu;
 146
 147        /* initialize works and done */
 148        for_each_cpu(cpu, cpumask) {
 149                work = &per_cpu(stop_cpus_work, cpu);
 150                work->fn = fn;
 151                work->arg = arg;
 152                work->done = done;
 153        }
 154
 155        /*
 156         * Disable preemption while queueing to avoid getting
 157         * preempted by a stopper which might wait for other stoppers
 158         * to enter @fn which can lead to deadlock.
 159         */
 160        preempt_disable();
 161        for_each_cpu(cpu, cpumask)
 162                cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu),
 163                                    &per_cpu(stop_cpus_work, cpu));
 164        preempt_enable();
 165}
 166
 167static int __stop_cpus(const struct cpumask *cpumask,
 168                       cpu_stop_fn_t fn, void *arg)
 169{
 170        struct cpu_stop_done done;
 171
 172        cpu_stop_init_done(&done, cpumask_weight(cpumask));
 173        queue_stop_cpus_work(cpumask, fn, arg, &done);
 174        wait_for_completion(&done.completion);
 175        return done.executed ? done.ret : -ENOENT;
 176}
 177
 178/**
 179 * stop_cpus - stop multiple cpus
 180 * @cpumask: cpus to stop
 181 * @fn: function to execute
 182 * @arg: argument to @fn
 183 *
 184 * Execute @fn(@arg) on online cpus in @cpumask.  On each target cpu,
 185 * @fn is run in a process context with the highest priority
 186 * preempting any task on the cpu and monopolizing it.  This function
 187 * returns after all executions are complete.
 188 *
 189 * This function doesn't guarantee the cpus in @cpumask stay online
 190 * till @fn completes.  If some cpus go down in the middle, execution
 191 * on the cpu may happen partially or fully on different cpus.  @fn
 192 * should either be ready for that or the caller should ensure that
 193 * the cpus stay online until this function completes.
 194 *
 195 * All stop_cpus() calls are serialized making it safe for @fn to wait
 196 * for all cpus to start executing it.
 197 *
 198 * CONTEXT:
 199 * Might sleep.
 200 *
 201 * RETURNS:
 202 * -ENOENT if @fn(@arg) was not executed at all because all cpus in
 203 * @cpumask were offline; otherwise, 0 if all executions of @fn
 204 * returned 0, any non zero return value if any returned non zero.
 205 */
 206int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 207{
 208        int ret;
 209
 210        /* static works are used, process one request at a time */
 211        mutex_lock(&stop_cpus_mutex);
 212        ret = __stop_cpus(cpumask, fn, arg);
 213        mutex_unlock(&stop_cpus_mutex);
 214        return ret;
 215}
 216
 217/**
 218 * try_stop_cpus - try to stop multiple cpus
 219 * @cpumask: cpus to stop
 220 * @fn: function to execute
 221 * @arg: argument to @fn
 222 *
 223 * Identical to stop_cpus() except that it fails with -EAGAIN if
 224 * someone else is already using the facility.
 225 *
 226 * CONTEXT:
 227 * Might sleep.
 228 *
 229 * RETURNS:
 230 * -EAGAIN if someone else is already stopping cpus, -ENOENT if
 231 * @fn(@arg) was not executed at all because all cpus in @cpumask were
 232 * offline; otherwise, 0 if all executions of @fn returned 0, any non
 233 * zero return value if any returned non zero.
 234 */
 235int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 236{
 237        int ret;
 238
 239        /* static works are used, process one request at a time */
 240        if (!mutex_trylock(&stop_cpus_mutex))
 241                return -EAGAIN;
 242        ret = __stop_cpus(cpumask, fn, arg);
 243        mutex_unlock(&stop_cpus_mutex);
 244        return ret;
 245}
 246
 247static int cpu_stopper_thread(void *data)
 248{
 249        struct cpu_stopper *stopper = data;
 250        struct cpu_stop_work *work;
 251        int ret;
 252
 253repeat:
 254        set_current_state(TASK_INTERRUPTIBLE);  /* mb paired w/ kthread_stop */
 255
 256        if (kthread_should_stop()) {
 257                __set_current_state(TASK_RUNNING);
 258                return 0;
 259        }
 260
 261        work = NULL;
 262        spin_lock_irq(&stopper->lock);
 263        if (!list_empty(&stopper->works)) {
 264                work = list_first_entry(&stopper->works,
 265                                        struct cpu_stop_work, list);
 266                list_del_init(&work->list);
 267        }
 268        spin_unlock_irq(&stopper->lock);
 269
 270        if (work) {
 271                cpu_stop_fn_t fn = work->fn;
 272                void *arg = work->arg;
 273                struct cpu_stop_done *done = work->done;
 274                char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
 275
 276                __set_current_state(TASK_RUNNING);
 277
 278                /* cpu stop callbacks are not allowed to sleep */
 279                preempt_disable();
 280
 281                ret = fn(arg);
 282                if (ret)
 283                        done->ret = ret;
 284
 285                /* restore preemption and check it's still balanced */
 286                preempt_enable();
 287                WARN_ONCE(preempt_count(),
 288                          "cpu_stop: %s(%p) leaked preempt count\n",
 289                          kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
 290                                          ksym_buf), arg);
 291
 292                cpu_stop_signal_done(done, true);
 293        } else
 294                schedule();
 295
 296        goto repeat;
 297}
 298
 299extern void sched_set_stop_task(int cpu, struct task_struct *stop);
 300
 301/* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */
 302static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
 303                                           unsigned long action, void *hcpu)
 304{
 305        unsigned int cpu = (unsigned long)hcpu;
 306        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 307        struct task_struct *p;
 308
 309        switch (action & ~CPU_TASKS_FROZEN) {
 310        case CPU_UP_PREPARE:
 311                BUG_ON(stopper->thread || stopper->enabled ||
 312                       !list_empty(&stopper->works));
 313                p = kthread_create_on_node(cpu_stopper_thread,
 314                                           stopper,
 315                                           cpu_to_node(cpu),
 316                                           "migration/%d", cpu);
 317                if (IS_ERR(p))
 318                        return notifier_from_errno(PTR_ERR(p));
 319                get_task_struct(p);
 320                kthread_bind(p, cpu);
 321                sched_set_stop_task(cpu, p);
 322                stopper->thread = p;
 323                break;
 324
 325        case CPU_ONLINE:
 326                /* strictly unnecessary, as first user will wake it */
 327                wake_up_process(stopper->thread);
 328                /* mark enabled */
 329                spin_lock_irq(&stopper->lock);
 330                stopper->enabled = true;
 331                spin_unlock_irq(&stopper->lock);
 332                break;
 333
 334#ifdef CONFIG_HOTPLUG_CPU
 335        case CPU_UP_CANCELED:
 336        case CPU_POST_DEAD:
 337        {
 338                struct cpu_stop_work *work;
 339
 340                sched_set_stop_task(cpu, NULL);
 341                /* kill the stopper */
 342                kthread_stop(stopper->thread);
 343                /* drain remaining works */
 344                spin_lock_irq(&stopper->lock);
 345                list_for_each_entry(work, &stopper->works, list)
 346                        cpu_stop_signal_done(work->done, false);
 347                stopper->enabled = false;
 348                spin_unlock_irq(&stopper->lock);
 349                /* release the stopper */
 350                put_task_struct(stopper->thread);
 351                stopper->thread = NULL;
 352                break;
 353        }
 354#endif
 355        }
 356
 357        return NOTIFY_OK;
 358}
 359
 360/*
 361 * Give it a higher priority so that cpu stopper is available to other
 362 * cpu notifiers.  It currently shares the same priority as sched
 363 * migration_notifier.
 364 */
 365static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = {
 366        .notifier_call  = cpu_stop_cpu_callback,
 367        .priority       = 10,
 368};
 369
 370static int __init cpu_stop_init(void)
 371{
 372        void *bcpu = (void *)(long)smp_processor_id();
 373        unsigned int cpu;
 374        int err;
 375
 376        for_each_possible_cpu(cpu) {
 377                struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 378
 379                spin_lock_init(&stopper->lock);
 380                INIT_LIST_HEAD(&stopper->works);
 381        }
 382
 383        /* start one for the boot cpu */
 384        err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE,
 385                                    bcpu);
 386        BUG_ON(err != NOTIFY_OK);
 387        cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu);
 388        register_cpu_notifier(&cpu_stop_cpu_notifier);
 389
 390        stop_machine_initialized = true;
 391
 392        return 0;
 393}
 394early_initcall(cpu_stop_init);
 395
 396#ifdef CONFIG_STOP_MACHINE
 397
 398/* This controls the threads on each CPU. */
 399enum stopmachine_state {
 400        /* Dummy starting state for thread. */
 401        STOPMACHINE_NONE,
 402        /* Awaiting everyone to be scheduled. */
 403        STOPMACHINE_PREPARE,
 404        /* Disable interrupts. */
 405        STOPMACHINE_DISABLE_IRQ,
 406        /* Run the function */
 407        STOPMACHINE_RUN,
 408        /* Exit */
 409        STOPMACHINE_EXIT,
 410};
 411
 412struct stop_machine_data {
 413        int                     (*fn)(void *);
 414        void                    *data;
 415        /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
 416        unsigned int            num_threads;
 417        const struct cpumask    *active_cpus;
 418
 419        enum stopmachine_state  state;
 420        atomic_t                thread_ack;
 421};
 422
 423static void set_state(struct stop_machine_data *smdata,
 424                      enum stopmachine_state newstate)
 425{
 426        /* Reset ack counter. */
 427        atomic_set(&smdata->thread_ack, smdata->num_threads);
 428        smp_wmb();
 429        smdata->state = newstate;
 430}
 431
 432/* Last one to ack a state moves to the next state. */
 433static void ack_state(struct stop_machine_data *smdata)
 434{
 435        if (atomic_dec_and_test(&smdata->thread_ack))
 436                set_state(smdata, smdata->state + 1);
 437}
 438
 439/* This is the cpu_stop function which stops the CPU. */
 440static int stop_machine_cpu_stop(void *data)
 441{
 442        struct stop_machine_data *smdata = data;
 443        enum stopmachine_state curstate = STOPMACHINE_NONE;
 444        int cpu = smp_processor_id(), err = 0;
 445        unsigned long flags;
 446        bool is_active;
 447
 448        /*
 449         * When called from stop_machine_from_inactive_cpu(), irq might
 450         * already be disabled.  Save the state and restore it on exit.
 451         */
 452        local_save_flags(flags);
 453
 454        if (!smdata->active_cpus)
 455                is_active = cpu == cpumask_first(cpu_online_mask);
 456        else
 457                is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
 458
 459        /* Simple state machine */
 460        do {
 461                /* Chill out and ensure we re-read stopmachine_state. */
 462                cpu_relax();
 463                if (smdata->state != curstate) {
 464                        curstate = smdata->state;
 465                        switch (curstate) {
 466                        case STOPMACHINE_DISABLE_IRQ:
 467                                local_irq_disable();
 468                                hard_irq_disable();
 469                                break;
 470                        case STOPMACHINE_RUN:
 471                                if (is_active)
 472                                        err = smdata->fn(smdata->data);
 473                                break;
 474                        default:
 475                                break;
 476                        }
 477                        ack_state(smdata);
 478                }
 479        } while (curstate != STOPMACHINE_EXIT);
 480
 481        local_irq_restore(flags);
 482        return err;
 483}
 484
 485int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 486{
 487        struct stop_machine_data smdata = { .fn = fn, .data = data,
 488                                            .num_threads = num_online_cpus(),
 489                                            .active_cpus = cpus };
 490
 491        if (!stop_machine_initialized) {
 492                /*
 493                 * Handle the case where stop_machine() is called
 494                 * early in boot before stop_machine() has been
 495                 * initialized.
 496                 */
 497                unsigned long flags;
 498                int ret;
 499
 500                WARN_ON_ONCE(smdata.num_threads != 1);
 501
 502                local_irq_save(flags);
 503                hard_irq_disable();
 504                ret = (*fn)(data);
 505                local_irq_restore(flags);
 506
 507                return ret;
 508        }
 509
 510        /* Set the initial state and stop all online cpus. */
 511        set_state(&smdata, STOPMACHINE_PREPARE);
 512        return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
 513}
 514
 515int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 516{
 517        int ret;
 518
 519        /* No CPUs can come up or down during this. */
 520        get_online_cpus();
 521        ret = __stop_machine(fn, data, cpus);
 522        put_online_cpus();
 523        return ret;
 524}
 525EXPORT_SYMBOL_GPL(stop_machine);
 526
 527/**
 528 * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU
 529 * @fn: the function to run
 530 * @data: the data ptr for the @fn()
 531 * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
 532 *
 533 * This is identical to stop_machine() but can be called from a CPU which
 534 * is not active.  The local CPU is in the process of hotplug (so no other
 535 * CPU hotplug can start) and not marked active and doesn't have enough
 536 * context to sleep.
 537 *
 538 * This function provides stop_machine() functionality for such state by
 539 * using busy-wait for synchronization and executing @fn directly for local
 540 * CPU.
 541 *
 542 * CONTEXT:
 543 * Local CPU is inactive.  Temporarily stops all active CPUs.
 544 *
 545 * RETURNS:
 546 * 0 if all executions of @fn returned 0, any non zero return value if any
 547 * returned non zero.
 548 */
 549int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
 550                                  const struct cpumask *cpus)
 551{
 552        struct stop_machine_data smdata = { .fn = fn, .data = data,
 553                                            .active_cpus = cpus };
 554        struct cpu_stop_done done;
 555        int ret;
 556
 557        /* Local CPU must be inactive and CPU hotplug in progress. */
 558        BUG_ON(cpu_active(raw_smp_processor_id()));
 559        smdata.num_threads = num_active_cpus() + 1;     /* +1 for local */
 560
 561        /* No proper task established and can't sleep - busy wait for lock. */
 562        while (!mutex_trylock(&stop_cpus_mutex))
 563                cpu_relax();
 564
 565        /* Schedule work on other CPUs and execute directly for local CPU */
 566        set_state(&smdata, STOPMACHINE_PREPARE);
 567        cpu_stop_init_done(&done, num_active_cpus());
 568        queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata,
 569                             &done);
 570        ret = stop_machine_cpu_stop(&smdata);
 571
 572        /* Busy wait for completion. */
 573        while (!completion_done(&done.completion))
 574                cpu_relax();
 575
 576        mutex_unlock(&stop_cpus_mutex);
 577        return ret ?: done.ret;
 578}
 579
 580#endif  /* CONFIG_STOP_MACHINE */
 581