linux/kernel/stop_machine.c
<<
>>
Prefs
   1/*
   2 * kernel/stop_machine.c
   3 *
   4 * Copyright (C) 2008, 2005     IBM Corporation.
   5 * Copyright (C) 2008, 2005     Rusty Russell rusty@rustcorp.com.au
   6 * Copyright (C) 2010           SUSE Linux Products GmbH
   7 * Copyright (C) 2010           Tejun Heo <tj@kernel.org>
   8 *
   9 * This file is released under the GPLv2 and any later version.
  10 */
  11#include <linux/completion.h>
  12#include <linux/cpu.h>
  13#include <linux/init.h>
  14#include <linux/kthread.h>
  15#include <linux/export.h>
  16#include <linux/percpu.h>
  17#include <linux/sched.h>
  18#include <linux/stop_machine.h>
  19#include <linux/interrupt.h>
  20#include <linux/kallsyms.h>
  21#include <linux/smpboot.h>
  22#include <linux/atomic.h>
  23#include <linux/lglock.h>
  24
  25/*
  26 * Structure to determine completion condition and record errors.  May
  27 * be shared by works on different cpus.
  28 */
  29struct cpu_stop_done {
  30        atomic_t                nr_todo;        /* nr left to execute */
  31        int                     ret;            /* collected return value */
  32        struct completion       completion;     /* fired if nr_todo reaches 0 */
  33};
  34
  35/* the actual stopper, one per every possible cpu, enabled on online cpus */
  36struct cpu_stopper {
  37        struct task_struct      *thread;
  38
  39        spinlock_t              lock;
  40        bool                    enabled;        /* is this stopper enabled? */
  41        struct list_head        works;          /* list of pending works */
  42
  43        struct cpu_stop_work    stop_work;      /* for stop_cpus */
  44};
  45
  46static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
  47static bool stop_machine_initialized = false;
  48
  49/*
  50 * Avoids a race between stop_two_cpus and global stop_cpus, where
  51 * the stoppers could get queued up in reverse order, leading to
  52 * system deadlock. Using an lglock means stop_two_cpus remains
  53 * relatively cheap.
  54 */
  55DEFINE_STATIC_LGLOCK(stop_cpus_lock);
  56
  57static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
  58{
  59        memset(done, 0, sizeof(*done));
  60        atomic_set(&done->nr_todo, nr_todo);
  61        init_completion(&done->completion);
  62}
  63
  64/* signal completion unless @done is NULL */
  65static void cpu_stop_signal_done(struct cpu_stop_done *done)
  66{
  67        if (atomic_dec_and_test(&done->nr_todo))
  68                complete(&done->completion);
  69}
  70
  71static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
  72                                        struct cpu_stop_work *work)
  73{
  74        list_add_tail(&work->list, &stopper->works);
  75        wake_up_process(stopper->thread);
  76}
  77
  78/* queue @work to @stopper.  if offline, @work is completed immediately */
  79static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
  80{
  81        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
  82        unsigned long flags;
  83        bool enabled;
  84
  85        spin_lock_irqsave(&stopper->lock, flags);
  86        enabled = stopper->enabled;
  87        if (enabled)
  88                __cpu_stop_queue_work(stopper, work);
  89        else if (work->done)
  90                cpu_stop_signal_done(work->done);
  91        spin_unlock_irqrestore(&stopper->lock, flags);
  92
  93        return enabled;
  94}
  95
  96/**
  97 * stop_one_cpu - stop a cpu
  98 * @cpu: cpu to stop
  99 * @fn: function to execute
 100 * @arg: argument to @fn
 101 *
 102 * Execute @fn(@arg) on @cpu.  @fn is run in a process context with
 103 * the highest priority preempting any task on the cpu and
 104 * monopolizing it.  This function returns after the execution is
 105 * complete.
 106 *
 107 * This function doesn't guarantee @cpu stays online till @fn
 108 * completes.  If @cpu goes down in the middle, execution may happen
 109 * partially or fully on different cpus.  @fn should either be ready
 110 * for that or the caller should ensure that @cpu stays online until
 111 * this function completes.
 112 *
 113 * CONTEXT:
 114 * Might sleep.
 115 *
 116 * RETURNS:
 117 * -ENOENT if @fn(@arg) was not executed because @cpu was offline;
 118 * otherwise, the return value of @fn.
 119 */
 120int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
 121{
 122        struct cpu_stop_done done;
 123        struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
 124
 125        cpu_stop_init_done(&done, 1);
 126        if (!cpu_stop_queue_work(cpu, &work))
 127                return -ENOENT;
 128        wait_for_completion(&done.completion);
 129        return done.ret;
 130}
 131
 132/* This controls the threads on each CPU. */
 133enum multi_stop_state {
 134        /* Dummy starting state for thread. */
 135        MULTI_STOP_NONE,
 136        /* Awaiting everyone to be scheduled. */
 137        MULTI_STOP_PREPARE,
 138        /* Disable interrupts. */
 139        MULTI_STOP_DISABLE_IRQ,
 140        /* Run the function */
 141        MULTI_STOP_RUN,
 142        /* Exit */
 143        MULTI_STOP_EXIT,
 144};
 145
 146struct multi_stop_data {
 147        cpu_stop_fn_t           fn;
 148        void                    *data;
 149        /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
 150        unsigned int            num_threads;
 151        const struct cpumask    *active_cpus;
 152
 153        enum multi_stop_state   state;
 154        atomic_t                thread_ack;
 155};
 156
 157static void set_state(struct multi_stop_data *msdata,
 158                      enum multi_stop_state newstate)
 159{
 160        /* Reset ack counter. */
 161        atomic_set(&msdata->thread_ack, msdata->num_threads);
 162        smp_wmb();
 163        msdata->state = newstate;
 164}
 165
 166/* Last one to ack a state moves to the next state. */
 167static void ack_state(struct multi_stop_data *msdata)
 168{
 169        if (atomic_dec_and_test(&msdata->thread_ack))
 170                set_state(msdata, msdata->state + 1);
 171}
 172
 173/* This is the cpu_stop function which stops the CPU. */
 174static int multi_cpu_stop(void *data)
 175{
 176        struct multi_stop_data *msdata = data;
 177        enum multi_stop_state curstate = MULTI_STOP_NONE;
 178        int cpu = smp_processor_id(), err = 0;
 179        unsigned long flags;
 180        bool is_active;
 181
 182        /*
 183         * When called from stop_machine_from_inactive_cpu(), irq might
 184         * already be disabled.  Save the state and restore it on exit.
 185         */
 186        local_save_flags(flags);
 187
 188        if (!msdata->active_cpus)
 189                is_active = cpu == cpumask_first(cpu_online_mask);
 190        else
 191                is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
 192
 193        /* Simple state machine */
 194        do {
 195                /* Chill out and ensure we re-read multi_stop_state. */
 196                cpu_relax();
 197                if (msdata->state != curstate) {
 198                        curstate = msdata->state;
 199                        switch (curstate) {
 200                        case MULTI_STOP_DISABLE_IRQ:
 201                                local_irq_disable();
 202                                hard_irq_disable();
 203                                break;
 204                        case MULTI_STOP_RUN:
 205                                if (is_active)
 206                                        err = msdata->fn(msdata->data);
 207                                break;
 208                        default:
 209                                break;
 210                        }
 211                        ack_state(msdata);
 212                }
 213        } while (curstate != MULTI_STOP_EXIT);
 214
 215        local_irq_restore(flags);
 216        return err;
 217}
 218
 219static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
 220                                    int cpu2, struct cpu_stop_work *work2)
 221{
 222        struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
 223        struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
 224        int err;
 225
 226        lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
 227        spin_lock_irq(&stopper1->lock);
 228        spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
 229
 230        err = -ENOENT;
 231        if (!stopper1->enabled || !stopper2->enabled)
 232                goto unlock;
 233
 234        err = 0;
 235        __cpu_stop_queue_work(stopper1, work1);
 236        __cpu_stop_queue_work(stopper2, work2);
 237unlock:
 238        spin_unlock(&stopper2->lock);
 239        spin_unlock_irq(&stopper1->lock);
 240        lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
 241
 242        return err;
 243}
 244/**
 245 * stop_two_cpus - stops two cpus
 246 * @cpu1: the cpu to stop
 247 * @cpu2: the other cpu to stop
 248 * @fn: function to execute
 249 * @arg: argument to @fn
 250 *
 251 * Stops both the current and specified CPU and runs @fn on one of them.
 252 *
 253 * returns when both are completed.
 254 */
 255int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
 256{
 257        struct cpu_stop_done done;
 258        struct cpu_stop_work work1, work2;
 259        struct multi_stop_data msdata;
 260
 261        msdata = (struct multi_stop_data){
 262                .fn = fn,
 263                .data = arg,
 264                .num_threads = 2,
 265                .active_cpus = cpumask_of(cpu1),
 266        };
 267
 268        work1 = work2 = (struct cpu_stop_work){
 269                .fn = multi_cpu_stop,
 270                .arg = &msdata,
 271                .done = &done
 272        };
 273
 274        cpu_stop_init_done(&done, 2);
 275        set_state(&msdata, MULTI_STOP_PREPARE);
 276
 277        if (cpu1 > cpu2)
 278                swap(cpu1, cpu2);
 279        if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
 280                return -ENOENT;
 281
 282        wait_for_completion(&done.completion);
 283        return done.ret;
 284}
 285
 286/**
 287 * stop_one_cpu_nowait - stop a cpu but don't wait for completion
 288 * @cpu: cpu to stop
 289 * @fn: function to execute
 290 * @arg: argument to @fn
 291 * @work_buf: pointer to cpu_stop_work structure
 292 *
 293 * Similar to stop_one_cpu() but doesn't wait for completion.  The
 294 * caller is responsible for ensuring @work_buf is currently unused
 295 * and will remain untouched until stopper starts executing @fn.
 296 *
 297 * CONTEXT:
 298 * Don't care.
 299 *
 300 * RETURNS:
 301 * true if cpu_stop_work was queued successfully and @fn will be called,
 302 * false otherwise.
 303 */
 304bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 305                        struct cpu_stop_work *work_buf)
 306{
 307        *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
 308        return cpu_stop_queue_work(cpu, work_buf);
 309}
 310
 311/* static data for stop_cpus */
 312static DEFINE_MUTEX(stop_cpus_mutex);
 313
 314static bool queue_stop_cpus_work(const struct cpumask *cpumask,
 315                                 cpu_stop_fn_t fn, void *arg,
 316                                 struct cpu_stop_done *done)
 317{
 318        struct cpu_stop_work *work;
 319        unsigned int cpu;
 320        bool queued = false;
 321
 322        /*
 323         * Disable preemption while queueing to avoid getting
 324         * preempted by a stopper which might wait for other stoppers
 325         * to enter @fn which can lead to deadlock.
 326         */
 327        lg_global_lock(&stop_cpus_lock);
 328        for_each_cpu(cpu, cpumask) {
 329                work = &per_cpu(cpu_stopper.stop_work, cpu);
 330                work->fn = fn;
 331                work->arg = arg;
 332                work->done = done;
 333                if (cpu_stop_queue_work(cpu, work))
 334                        queued = true;
 335        }
 336        lg_global_unlock(&stop_cpus_lock);
 337
 338        return queued;
 339}
 340
 341static int __stop_cpus(const struct cpumask *cpumask,
 342                       cpu_stop_fn_t fn, void *arg)
 343{
 344        struct cpu_stop_done done;
 345
 346        cpu_stop_init_done(&done, cpumask_weight(cpumask));
 347        if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
 348                return -ENOENT;
 349        wait_for_completion(&done.completion);
 350        return done.ret;
 351}
 352
 353/**
 354 * stop_cpus - stop multiple cpus
 355 * @cpumask: cpus to stop
 356 * @fn: function to execute
 357 * @arg: argument to @fn
 358 *
 359 * Execute @fn(@arg) on online cpus in @cpumask.  On each target cpu,
 360 * @fn is run in a process context with the highest priority
 361 * preempting any task on the cpu and monopolizing it.  This function
 362 * returns after all executions are complete.
 363 *
 364 * This function doesn't guarantee the cpus in @cpumask stay online
 365 * till @fn completes.  If some cpus go down in the middle, execution
 366 * on the cpu may happen partially or fully on different cpus.  @fn
 367 * should either be ready for that or the caller should ensure that
 368 * the cpus stay online until this function completes.
 369 *
 370 * All stop_cpus() calls are serialized making it safe for @fn to wait
 371 * for all cpus to start executing it.
 372 *
 373 * CONTEXT:
 374 * Might sleep.
 375 *
 376 * RETURNS:
 377 * -ENOENT if @fn(@arg) was not executed at all because all cpus in
 378 * @cpumask were offline; otherwise, 0 if all executions of @fn
 379 * returned 0, any non zero return value if any returned non zero.
 380 */
 381int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 382{
 383        int ret;
 384
 385        /* static works are used, process one request at a time */
 386        mutex_lock(&stop_cpus_mutex);
 387        ret = __stop_cpus(cpumask, fn, arg);
 388        mutex_unlock(&stop_cpus_mutex);
 389        return ret;
 390}
 391
 392/**
 393 * try_stop_cpus - try to stop multiple cpus
 394 * @cpumask: cpus to stop
 395 * @fn: function to execute
 396 * @arg: argument to @fn
 397 *
 398 * Identical to stop_cpus() except that it fails with -EAGAIN if
 399 * someone else is already using the facility.
 400 *
 401 * CONTEXT:
 402 * Might sleep.
 403 *
 404 * RETURNS:
 405 * -EAGAIN if someone else is already stopping cpus, -ENOENT if
 406 * @fn(@arg) was not executed at all because all cpus in @cpumask were
 407 * offline; otherwise, 0 if all executions of @fn returned 0, any non
 408 * zero return value if any returned non zero.
 409 */
 410int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 411{
 412        int ret;
 413
 414        /* static works are used, process one request at a time */
 415        if (!mutex_trylock(&stop_cpus_mutex))
 416                return -EAGAIN;
 417        ret = __stop_cpus(cpumask, fn, arg);
 418        mutex_unlock(&stop_cpus_mutex);
 419        return ret;
 420}
 421
 422static int cpu_stop_should_run(unsigned int cpu)
 423{
 424        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 425        unsigned long flags;
 426        int run;
 427
 428        spin_lock_irqsave(&stopper->lock, flags);
 429        run = !list_empty(&stopper->works);
 430        spin_unlock_irqrestore(&stopper->lock, flags);
 431        return run;
 432}
 433
 434static void cpu_stopper_thread(unsigned int cpu)
 435{
 436        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 437        struct cpu_stop_work *work;
 438
 439repeat:
 440        work = NULL;
 441        spin_lock_irq(&stopper->lock);
 442        if (!list_empty(&stopper->works)) {
 443                work = list_first_entry(&stopper->works,
 444                                        struct cpu_stop_work, list);
 445                list_del_init(&work->list);
 446        }
 447        spin_unlock_irq(&stopper->lock);
 448
 449        if (work) {
 450                cpu_stop_fn_t fn = work->fn;
 451                void *arg = work->arg;
 452                struct cpu_stop_done *done = work->done;
 453                int ret;
 454
 455                /* cpu stop callbacks must not sleep, make in_atomic() == T */
 456                preempt_count_inc();
 457                ret = fn(arg);
 458                if (done) {
 459                        if (ret)
 460                                done->ret = ret;
 461                        cpu_stop_signal_done(done);
 462                }
 463                preempt_count_dec();
 464                WARN_ONCE(preempt_count(),
 465                          "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg);
 466                goto repeat;
 467        }
 468}
 469
 470void stop_machine_park(int cpu)
 471{
 472        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 473        /*
 474         * Lockless. cpu_stopper_thread() will take stopper->lock and flush
 475         * the pending works before it parks, until then it is fine to queue
 476         * the new works.
 477         */
 478        stopper->enabled = false;
 479        kthread_park(stopper->thread);
 480}
 481
 482extern void sched_set_stop_task(int cpu, struct task_struct *stop);
 483
 484static void cpu_stop_create(unsigned int cpu)
 485{
 486        sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
 487}
 488
 489static void cpu_stop_park(unsigned int cpu)
 490{
 491        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 492
 493        WARN_ON(!list_empty(&stopper->works));
 494}
 495
 496void stop_machine_unpark(int cpu)
 497{
 498        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 499
 500        stopper->enabled = true;
 501        kthread_unpark(stopper->thread);
 502}
 503
 504static struct smp_hotplug_thread cpu_stop_threads = {
 505        .store                  = &cpu_stopper.thread,
 506        .thread_should_run      = cpu_stop_should_run,
 507        .thread_fn              = cpu_stopper_thread,
 508        .thread_comm            = "migration/%u",
 509        .create                 = cpu_stop_create,
 510        .park                   = cpu_stop_park,
 511        .selfparking            = true,
 512};
 513
 514static int __init cpu_stop_init(void)
 515{
 516        unsigned int cpu;
 517
 518        for_each_possible_cpu(cpu) {
 519                struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 520
 521                spin_lock_init(&stopper->lock);
 522                INIT_LIST_HEAD(&stopper->works);
 523        }
 524
 525        BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
 526        stop_machine_unpark(raw_smp_processor_id());
 527        stop_machine_initialized = true;
 528        return 0;
 529}
 530early_initcall(cpu_stop_init);
 531
 532static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 533{
 534        struct multi_stop_data msdata = {
 535                .fn = fn,
 536                .data = data,
 537                .num_threads = num_online_cpus(),
 538                .active_cpus = cpus,
 539        };
 540
 541        if (!stop_machine_initialized) {
 542                /*
 543                 * Handle the case where stop_machine() is called
 544                 * early in boot before stop_machine() has been
 545                 * initialized.
 546                 */
 547                unsigned long flags;
 548                int ret;
 549
 550                WARN_ON_ONCE(msdata.num_threads != 1);
 551
 552                local_irq_save(flags);
 553                hard_irq_disable();
 554                ret = (*fn)(data);
 555                local_irq_restore(flags);
 556
 557                return ret;
 558        }
 559
 560        /* Set the initial state and stop all online cpus. */
 561        set_state(&msdata, MULTI_STOP_PREPARE);
 562        return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
 563}
 564
 565int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 566{
 567        int ret;
 568
 569        /* No CPUs can come up or down during this. */
 570        get_online_cpus();
 571        ret = __stop_machine(fn, data, cpus);
 572        put_online_cpus();
 573        return ret;
 574}
 575EXPORT_SYMBOL_GPL(stop_machine);
 576
 577/**
 578 * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU
 579 * @fn: the function to run
 580 * @data: the data ptr for the @fn()
 581 * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
 582 *
 583 * This is identical to stop_machine() but can be called from a CPU which
 584 * is not active.  The local CPU is in the process of hotplug (so no other
 585 * CPU hotplug can start) and not marked active and doesn't have enough
 586 * context to sleep.
 587 *
 588 * This function provides stop_machine() functionality for such state by
 589 * using busy-wait for synchronization and executing @fn directly for local
 590 * CPU.
 591 *
 592 * CONTEXT:
 593 * Local CPU is inactive.  Temporarily stops all active CPUs.
 594 *
 595 * RETURNS:
 596 * 0 if all executions of @fn returned 0, any non zero return value if any
 597 * returned non zero.
 598 */
 599int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 600                                  const struct cpumask *cpus)
 601{
 602        struct multi_stop_data msdata = { .fn = fn, .data = data,
 603                                            .active_cpus = cpus };
 604        struct cpu_stop_done done;
 605        int ret;
 606
 607        /* Local CPU must be inactive and CPU hotplug in progress. */
 608        BUG_ON(cpu_active(raw_smp_processor_id()));
 609        msdata.num_threads = num_active_cpus() + 1;     /* +1 for local */
 610
 611        /* No proper task established and can't sleep - busy wait for lock. */
 612        while (!mutex_trylock(&stop_cpus_mutex))
 613                cpu_relax();
 614
 615        /* Schedule work on other CPUs and execute directly for local CPU */
 616        set_state(&msdata, MULTI_STOP_PREPARE);
 617        cpu_stop_init_done(&done, num_active_cpus());
 618        queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
 619                             &done);
 620        ret = multi_cpu_stop(&msdata);
 621
 622        /* Busy wait for completion. */
 623        while (!completion_done(&done.completion))
 624                cpu_relax();
 625
 626        mutex_unlock(&stop_cpus_mutex);
 627        return ret ?: done.ret;
 628}
 629