1
2
3
4
5
6
7
8
9
10
11#include <linux/completion.h>
12#include <linux/cpu.h>
13#include <linux/init.h>
14#include <linux/kthread.h>
15#include <linux/export.h>
16#include <linux/percpu.h>
17#include <linux/sched.h>
18#include <linux/stop_machine.h>
19#include <linux/interrupt.h>
20#include <linux/kallsyms.h>
21#include <linux/smpboot.h>
22#include <linux/atomic.h>
23#include <linux/nmi.h>
24
25
26
27
28
29struct cpu_stop_done {
30 atomic_t nr_todo;
31 int ret;
32 struct completion completion;
33};
34
35
36struct cpu_stopper {
37 struct task_struct *thread;
38
39 spinlock_t lock;
40 bool enabled;
41 struct list_head works;
42
43 struct cpu_stop_work stop_work;
44};
45
46static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
47static bool stop_machine_initialized = false;
48
49
50static DEFINE_MUTEX(stop_cpus_mutex);
51static bool stop_cpus_in_progress;
52
53static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
54{
55 memset(done, 0, sizeof(*done));
56 atomic_set(&done->nr_todo, nr_todo);
57 init_completion(&done->completion);
58}
59
60
61static void cpu_stop_signal_done(struct cpu_stop_done *done)
62{
63 if (atomic_dec_and_test(&done->nr_todo))
64 complete(&done->completion);
65}
66
67static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
68 struct cpu_stop_work *work)
69{
70 list_add_tail(&work->list, &stopper->works);
71 wake_up_process(stopper->thread);
72}
73
74
75static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
76{
77 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
78 unsigned long flags;
79 bool enabled;
80
81 spin_lock_irqsave(&stopper->lock, flags);
82 enabled = stopper->enabled;
83 if (enabled)
84 __cpu_stop_queue_work(stopper, work);
85 else if (work->done)
86 cpu_stop_signal_done(work->done);
87 spin_unlock_irqrestore(&stopper->lock, flags);
88
89 return enabled;
90}
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
117{
118 struct cpu_stop_done done;
119 struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
120
121 cpu_stop_init_done(&done, 1);
122 if (!cpu_stop_queue_work(cpu, &work))
123 return -ENOENT;
124
125
126
127
128 cond_resched();
129 wait_for_completion(&done.completion);
130 return done.ret;
131}
132
133
134enum multi_stop_state {
135
136 MULTI_STOP_NONE,
137
138 MULTI_STOP_PREPARE,
139
140 MULTI_STOP_DISABLE_IRQ,
141
142 MULTI_STOP_RUN,
143
144 MULTI_STOP_EXIT,
145};
146
147struct multi_stop_data {
148 cpu_stop_fn_t fn;
149 void *data;
150
151 unsigned int num_threads;
152 const struct cpumask *active_cpus;
153
154 enum multi_stop_state state;
155 atomic_t thread_ack;
156};
157
158static void set_state(struct multi_stop_data *msdata,
159 enum multi_stop_state newstate)
160{
161
162 atomic_set(&msdata->thread_ack, msdata->num_threads);
163 smp_wmb();
164 msdata->state = newstate;
165}
166
167
168static void ack_state(struct multi_stop_data *msdata)
169{
170 if (atomic_dec_and_test(&msdata->thread_ack))
171 set_state(msdata, msdata->state + 1);
172}
173
174
175static int multi_cpu_stop(void *data)
176{
177 struct multi_stop_data *msdata = data;
178 enum multi_stop_state curstate = MULTI_STOP_NONE;
179 int cpu = smp_processor_id(), err = 0;
180 unsigned long flags;
181 bool is_active;
182
183
184
185
186
187 local_save_flags(flags);
188
189 if (!msdata->active_cpus)
190 is_active = cpu == cpumask_first(cpu_online_mask);
191 else
192 is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
193
194
195 do {
196
197 cpu_relax_yield();
198 if (msdata->state != curstate) {
199 curstate = msdata->state;
200 switch (curstate) {
201 case MULTI_STOP_DISABLE_IRQ:
202 local_irq_disable();
203 hard_irq_disable();
204 break;
205 case MULTI_STOP_RUN:
206 if (is_active)
207 err = msdata->fn(msdata->data);
208 break;
209 default:
210 break;
211 }
212 ack_state(msdata);
213 } else if (curstate > MULTI_STOP_PREPARE) {
214
215
216
217
218
219 touch_nmi_watchdog();
220 }
221 } while (curstate != MULTI_STOP_EXIT);
222
223 local_irq_restore(flags);
224 return err;
225}
226
227static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
228 int cpu2, struct cpu_stop_work *work2)
229{
230 struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
231 struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
232 int err;
233retry:
234 spin_lock_irq(&stopper1->lock);
235 spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
236
237 err = -ENOENT;
238 if (!stopper1->enabled || !stopper2->enabled)
239 goto unlock;
240
241
242
243
244
245
246
247
248
249
250 err = -EDEADLK;
251 if (unlikely(stop_cpus_in_progress))
252 goto unlock;
253
254 err = 0;
255 __cpu_stop_queue_work(stopper1, work1);
256 __cpu_stop_queue_work(stopper2, work2);
257unlock:
258 spin_unlock(&stopper2->lock);
259 spin_unlock_irq(&stopper1->lock);
260
261 if (unlikely(err == -EDEADLK)) {
262 while (stop_cpus_in_progress)
263 cpu_relax();
264 goto retry;
265 }
266 return err;
267}
268
269
270
271
272
273
274
275
276
277
278
279int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
280{
281 struct cpu_stop_done done;
282 struct cpu_stop_work work1, work2;
283 struct multi_stop_data msdata;
284
285 msdata = (struct multi_stop_data){
286 .fn = fn,
287 .data = arg,
288 .num_threads = 2,
289 .active_cpus = cpumask_of(cpu1),
290 };
291
292 work1 = work2 = (struct cpu_stop_work){
293 .fn = multi_cpu_stop,
294 .arg = &msdata,
295 .done = &done
296 };
297
298 cpu_stop_init_done(&done, 2);
299 set_state(&msdata, MULTI_STOP_PREPARE);
300
301 if (cpu1 > cpu2)
302 swap(cpu1, cpu2);
303 if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
304 return -ENOENT;
305
306 wait_for_completion(&done.completion);
307 return done.ret;
308}
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
329 struct cpu_stop_work *work_buf)
330{
331 *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
332 return cpu_stop_queue_work(cpu, work_buf);
333}
334
335static bool queue_stop_cpus_work(const struct cpumask *cpumask,
336 cpu_stop_fn_t fn, void *arg,
337 struct cpu_stop_done *done)
338{
339 struct cpu_stop_work *work;
340 unsigned int cpu;
341 bool queued = false;
342
343
344
345
346
347
348 preempt_disable();
349 stop_cpus_in_progress = true;
350 for_each_cpu(cpu, cpumask) {
351 work = &per_cpu(cpu_stopper.stop_work, cpu);
352 work->fn = fn;
353 work->arg = arg;
354 work->done = done;
355 if (cpu_stop_queue_work(cpu, work))
356 queued = true;
357 }
358 stop_cpus_in_progress = false;
359 preempt_enable();
360
361 return queued;
362}
363
364static int __stop_cpus(const struct cpumask *cpumask,
365 cpu_stop_fn_t fn, void *arg)
366{
367 struct cpu_stop_done done;
368
369 cpu_stop_init_done(&done, cpumask_weight(cpumask));
370 if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
371 return -ENOENT;
372 wait_for_completion(&done.completion);
373 return done.ret;
374}
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
405{
406 int ret;
407
408
409 mutex_lock(&stop_cpus_mutex);
410 ret = __stop_cpus(cpumask, fn, arg);
411 mutex_unlock(&stop_cpus_mutex);
412 return ret;
413}
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
434{
435 int ret;
436
437
438 if (!mutex_trylock(&stop_cpus_mutex))
439 return -EAGAIN;
440 ret = __stop_cpus(cpumask, fn, arg);
441 mutex_unlock(&stop_cpus_mutex);
442 return ret;
443}
444
445static int cpu_stop_should_run(unsigned int cpu)
446{
447 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
448 unsigned long flags;
449 int run;
450
451 spin_lock_irqsave(&stopper->lock, flags);
452 run = !list_empty(&stopper->works);
453 spin_unlock_irqrestore(&stopper->lock, flags);
454 return run;
455}
456
457static void cpu_stopper_thread(unsigned int cpu)
458{
459 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
460 struct cpu_stop_work *work;
461
462repeat:
463 work = NULL;
464 spin_lock_irq(&stopper->lock);
465 if (!list_empty(&stopper->works)) {
466 work = list_first_entry(&stopper->works,
467 struct cpu_stop_work, list);
468 list_del_init(&work->list);
469 }
470 spin_unlock_irq(&stopper->lock);
471
472 if (work) {
473 cpu_stop_fn_t fn = work->fn;
474 void *arg = work->arg;
475 struct cpu_stop_done *done = work->done;
476 int ret;
477
478
479 preempt_count_inc();
480 ret = fn(arg);
481 if (done) {
482 if (ret)
483 done->ret = ret;
484 cpu_stop_signal_done(done);
485 }
486 preempt_count_dec();
487 WARN_ONCE(preempt_count(),
488 "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg);
489 goto repeat;
490 }
491}
492
493void stop_machine_park(int cpu)
494{
495 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
496
497
498
499
500
501 stopper->enabled = false;
502 kthread_park(stopper->thread);
503}
504
505extern void sched_set_stop_task(int cpu, struct task_struct *stop);
506
507static void cpu_stop_create(unsigned int cpu)
508{
509 sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
510}
511
512static void cpu_stop_park(unsigned int cpu)
513{
514 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
515
516 WARN_ON(!list_empty(&stopper->works));
517}
518
519void stop_machine_unpark(int cpu)
520{
521 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
522
523 stopper->enabled = true;
524 kthread_unpark(stopper->thread);
525}
526
527static struct smp_hotplug_thread cpu_stop_threads = {
528 .store = &cpu_stopper.thread,
529 .thread_should_run = cpu_stop_should_run,
530 .thread_fn = cpu_stopper_thread,
531 .thread_comm = "migration/%u",
532 .create = cpu_stop_create,
533 .park = cpu_stop_park,
534 .selfparking = true,
535};
536
537static int __init cpu_stop_init(void)
538{
539 unsigned int cpu;
540
541 for_each_possible_cpu(cpu) {
542 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
543
544 spin_lock_init(&stopper->lock);
545 INIT_LIST_HEAD(&stopper->works);
546 }
547
548 BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
549 stop_machine_unpark(raw_smp_processor_id());
550 stop_machine_initialized = true;
551 return 0;
552}
553early_initcall(cpu_stop_init);
554
555int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
556 const struct cpumask *cpus)
557{
558 struct multi_stop_data msdata = {
559 .fn = fn,
560 .data = data,
561 .num_threads = num_online_cpus(),
562 .active_cpus = cpus,
563 };
564
565 lockdep_assert_cpus_held();
566
567 if (!stop_machine_initialized) {
568
569
570
571
572
573 unsigned long flags;
574 int ret;
575
576 WARN_ON_ONCE(msdata.num_threads != 1);
577
578 local_irq_save(flags);
579 hard_irq_disable();
580 ret = (*fn)(data);
581 local_irq_restore(flags);
582
583 return ret;
584 }
585
586
587 set_state(&msdata, MULTI_STOP_PREPARE);
588 return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
589}
590
591int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
592{
593 int ret;
594
595
596 cpus_read_lock();
597 ret = stop_machine_cpuslocked(fn, data, cpus);
598 cpus_read_unlock();
599 return ret;
600}
601EXPORT_SYMBOL_GPL(stop_machine);
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
626 const struct cpumask *cpus)
627{
628 struct multi_stop_data msdata = { .fn = fn, .data = data,
629 .active_cpus = cpus };
630 struct cpu_stop_done done;
631 int ret;
632
633
634 BUG_ON(cpu_active(raw_smp_processor_id()));
635 msdata.num_threads = num_active_cpus() + 1;
636
637
638 while (!mutex_trylock(&stop_cpus_mutex))
639 cpu_relax();
640
641
642 set_state(&msdata, MULTI_STOP_PREPARE);
643 cpu_stop_init_done(&done, num_active_cpus());
644 queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
645 &done);
646 ret = multi_cpu_stop(&msdata);
647
648
649 while (!completion_done(&done.completion))
650 cpu_relax();
651
652 mutex_unlock(&stop_cpus_mutex);
653 return ret ?: done.ret;
654}
655