1
2
3
4
5
6
7
8
9
10
11#include <linux/completion.h>
12#include <linux/cpu.h>
13#include <linux/init.h>
14#include <linux/kthread.h>
15#include <linux/export.h>
16#include <linux/percpu.h>
17#include <linux/sched.h>
18#include <linux/stop_machine.h>
19#include <linux/interrupt.h>
20#include <linux/kallsyms.h>
21#include <linux/smpboot.h>
22#include <linux/atomic.h>
23#include <linux/nmi.h>
24#include <linux/sched/wake_q.h>
25
26
27
28
29
30struct cpu_stop_done {
31 atomic_t nr_todo;
32 int ret;
33 struct completion completion;
34};
35
36
37struct cpu_stopper {
38 struct task_struct *thread;
39
40 raw_spinlock_t lock;
41 bool enabled;
42 struct list_head works;
43
44 struct cpu_stop_work stop_work;
45};
46
47static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
48static bool stop_machine_initialized = false;
49
50
51static DEFINE_MUTEX(stop_cpus_mutex);
52static bool stop_cpus_in_progress;
53
54static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
55{
56 memset(done, 0, sizeof(*done));
57 atomic_set(&done->nr_todo, nr_todo);
58 init_completion(&done->completion);
59}
60
61
62static void cpu_stop_signal_done(struct cpu_stop_done *done)
63{
64 if (atomic_dec_and_test(&done->nr_todo))
65 complete(&done->completion);
66}
67
68static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
69 struct cpu_stop_work *work,
70 struct wake_q_head *wakeq)
71{
72 list_add_tail(&work->list, &stopper->works);
73 wake_q_add(wakeq, stopper->thread);
74}
75
76
77static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
78{
79 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
80 DEFINE_WAKE_Q(wakeq);
81 unsigned long flags;
82 bool enabled;
83
84 preempt_disable();
85 raw_spin_lock_irqsave(&stopper->lock, flags);
86 enabled = stopper->enabled;
87 if (enabled)
88 __cpu_stop_queue_work(stopper, work, &wakeq);
89 else if (work->done)
90 cpu_stop_signal_done(work->done);
91 raw_spin_unlock_irqrestore(&stopper->lock, flags);
92
93 wake_up_q(&wakeq);
94 preempt_enable();
95
96 return enabled;
97}
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
124{
125 struct cpu_stop_done done;
126 struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
127
128 cpu_stop_init_done(&done, 1);
129 if (!cpu_stop_queue_work(cpu, &work))
130 return -ENOENT;
131
132
133
134
135 cond_resched();
136 wait_for_completion(&done.completion);
137 return done.ret;
138}
139
140
141enum multi_stop_state {
142
143 MULTI_STOP_NONE,
144
145 MULTI_STOP_PREPARE,
146
147 MULTI_STOP_DISABLE_IRQ,
148
149 MULTI_STOP_RUN,
150
151 MULTI_STOP_EXIT,
152};
153
154struct multi_stop_data {
155 cpu_stop_fn_t fn;
156 void *data;
157
158 unsigned int num_threads;
159 const struct cpumask *active_cpus;
160
161 enum multi_stop_state state;
162 atomic_t thread_ack;
163};
164
165static void set_state(struct multi_stop_data *msdata,
166 enum multi_stop_state newstate)
167{
168
169 atomic_set(&msdata->thread_ack, msdata->num_threads);
170 smp_wmb();
171 msdata->state = newstate;
172}
173
174
175static void ack_state(struct multi_stop_data *msdata)
176{
177 if (atomic_dec_and_test(&msdata->thread_ack))
178 set_state(msdata, msdata->state + 1);
179}
180
181
182static int multi_cpu_stop(void *data)
183{
184 struct multi_stop_data *msdata = data;
185 enum multi_stop_state curstate = MULTI_STOP_NONE;
186 int cpu = smp_processor_id(), err = 0;
187 unsigned long flags;
188 bool is_active;
189
190
191
192
193
194 local_save_flags(flags);
195
196 if (!msdata->active_cpus)
197 is_active = cpu == cpumask_first(cpu_online_mask);
198 else
199 is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
200
201
202 do {
203
204 cpu_relax_yield();
205 if (msdata->state != curstate) {
206 curstate = msdata->state;
207 switch (curstate) {
208 case MULTI_STOP_DISABLE_IRQ:
209 local_irq_disable();
210 hard_irq_disable();
211 break;
212 case MULTI_STOP_RUN:
213 if (is_active)
214 err = msdata->fn(msdata->data);
215 break;
216 default:
217 break;
218 }
219 ack_state(msdata);
220 } else if (curstate > MULTI_STOP_PREPARE) {
221
222
223
224
225
226 touch_nmi_watchdog();
227 }
228 } while (curstate != MULTI_STOP_EXIT);
229
230 local_irq_restore(flags);
231 return err;
232}
233
234static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
235 int cpu2, struct cpu_stop_work *work2)
236{
237 struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
238 struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
239 DEFINE_WAKE_Q(wakeq);
240 int err;
241
242retry:
243
244
245
246
247
248
249
250 preempt_disable();
251 raw_spin_lock_irq(&stopper1->lock);
252 raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
253
254 if (!stopper1->enabled || !stopper2->enabled) {
255 err = -ENOENT;
256 goto unlock;
257 }
258
259
260
261
262
263
264
265
266
267
268
269 if (unlikely(stop_cpus_in_progress)) {
270 err = -EDEADLK;
271 goto unlock;
272 }
273
274 err = 0;
275 __cpu_stop_queue_work(stopper1, work1, &wakeq);
276 __cpu_stop_queue_work(stopper2, work2, &wakeq);
277
278unlock:
279 raw_spin_unlock(&stopper2->lock);
280 raw_spin_unlock_irq(&stopper1->lock);
281
282 if (unlikely(err == -EDEADLK)) {
283 preempt_enable();
284
285 while (stop_cpus_in_progress)
286 cpu_relax();
287
288 goto retry;
289 }
290
291 wake_up_q(&wakeq);
292 preempt_enable();
293
294 return err;
295}
296
297
298
299
300
301
302
303
304
305
306
307int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
308{
309 struct cpu_stop_done done;
310 struct cpu_stop_work work1, work2;
311 struct multi_stop_data msdata;
312
313 msdata = (struct multi_stop_data){
314 .fn = fn,
315 .data = arg,
316 .num_threads = 2,
317 .active_cpus = cpumask_of(cpu1),
318 };
319
320 work1 = work2 = (struct cpu_stop_work){
321 .fn = multi_cpu_stop,
322 .arg = &msdata,
323 .done = &done
324 };
325
326 cpu_stop_init_done(&done, 2);
327 set_state(&msdata, MULTI_STOP_PREPARE);
328
329 if (cpu1 > cpu2)
330 swap(cpu1, cpu2);
331 if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
332 return -ENOENT;
333
334 wait_for_completion(&done.completion);
335 return done.ret;
336}
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
357 struct cpu_stop_work *work_buf)
358{
359 *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
360 return cpu_stop_queue_work(cpu, work_buf);
361}
362
363static bool queue_stop_cpus_work(const struct cpumask *cpumask,
364 cpu_stop_fn_t fn, void *arg,
365 struct cpu_stop_done *done)
366{
367 struct cpu_stop_work *work;
368 unsigned int cpu;
369 bool queued = false;
370
371
372
373
374
375
376 preempt_disable();
377 stop_cpus_in_progress = true;
378 for_each_cpu(cpu, cpumask) {
379 work = &per_cpu(cpu_stopper.stop_work, cpu);
380 work->fn = fn;
381 work->arg = arg;
382 work->done = done;
383 if (cpu_stop_queue_work(cpu, work))
384 queued = true;
385 }
386 stop_cpus_in_progress = false;
387 preempt_enable();
388
389 return queued;
390}
391
392static int __stop_cpus(const struct cpumask *cpumask,
393 cpu_stop_fn_t fn, void *arg)
394{
395 struct cpu_stop_done done;
396
397 cpu_stop_init_done(&done, cpumask_weight(cpumask));
398 if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
399 return -ENOENT;
400 wait_for_completion(&done.completion);
401 return done.ret;
402}
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
433{
434 int ret;
435
436
437 mutex_lock(&stop_cpus_mutex);
438 ret = __stop_cpus(cpumask, fn, arg);
439 mutex_unlock(&stop_cpus_mutex);
440 return ret;
441}
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
462{
463 int ret;
464
465
466 if (!mutex_trylock(&stop_cpus_mutex))
467 return -EAGAIN;
468 ret = __stop_cpus(cpumask, fn, arg);
469 mutex_unlock(&stop_cpus_mutex);
470 return ret;
471}
472
473static int cpu_stop_should_run(unsigned int cpu)
474{
475 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
476 unsigned long flags;
477 int run;
478
479 raw_spin_lock_irqsave(&stopper->lock, flags);
480 run = !list_empty(&stopper->works);
481 raw_spin_unlock_irqrestore(&stopper->lock, flags);
482 return run;
483}
484
485static void cpu_stopper_thread(unsigned int cpu)
486{
487 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
488 struct cpu_stop_work *work;
489
490repeat:
491 work = NULL;
492 raw_spin_lock_irq(&stopper->lock);
493 if (!list_empty(&stopper->works)) {
494 work = list_first_entry(&stopper->works,
495 struct cpu_stop_work, list);
496 list_del_init(&work->list);
497 }
498 raw_spin_unlock_irq(&stopper->lock);
499
500 if (work) {
501 cpu_stop_fn_t fn = work->fn;
502 void *arg = work->arg;
503 struct cpu_stop_done *done = work->done;
504 int ret;
505
506
507 preempt_count_inc();
508 ret = fn(arg);
509 if (done) {
510 if (ret)
511 done->ret = ret;
512 cpu_stop_signal_done(done);
513 }
514 preempt_count_dec();
515 WARN_ONCE(preempt_count(),
516 "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg);
517 goto repeat;
518 }
519}
520
521void stop_machine_park(int cpu)
522{
523 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
524
525
526
527
528
529 stopper->enabled = false;
530 kthread_park(stopper->thread);
531}
532
533extern void sched_set_stop_task(int cpu, struct task_struct *stop);
534
535static void cpu_stop_create(unsigned int cpu)
536{
537 sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
538}
539
540static void cpu_stop_park(unsigned int cpu)
541{
542 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
543
544 WARN_ON(!list_empty(&stopper->works));
545}
546
547void stop_machine_unpark(int cpu)
548{
549 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
550
551 stopper->enabled = true;
552 kthread_unpark(stopper->thread);
553}
554
555static struct smp_hotplug_thread cpu_stop_threads = {
556 .store = &cpu_stopper.thread,
557 .thread_should_run = cpu_stop_should_run,
558 .thread_fn = cpu_stopper_thread,
559 .thread_comm = "migration/%u",
560 .create = cpu_stop_create,
561 .park = cpu_stop_park,
562 .selfparking = true,
563};
564
565static int __init cpu_stop_init(void)
566{
567 unsigned int cpu;
568
569 for_each_possible_cpu(cpu) {
570 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
571
572 raw_spin_lock_init(&stopper->lock);
573 INIT_LIST_HEAD(&stopper->works);
574 }
575
576 BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
577 stop_machine_unpark(raw_smp_processor_id());
578 stop_machine_initialized = true;
579 return 0;
580}
581early_initcall(cpu_stop_init);
582
583int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
584 const struct cpumask *cpus)
585{
586 struct multi_stop_data msdata = {
587 .fn = fn,
588 .data = data,
589 .num_threads = num_online_cpus(),
590 .active_cpus = cpus,
591 };
592
593 lockdep_assert_cpus_held();
594
595 if (!stop_machine_initialized) {
596
597
598
599
600
601 unsigned long flags;
602 int ret;
603
604 WARN_ON_ONCE(msdata.num_threads != 1);
605
606 local_irq_save(flags);
607 hard_irq_disable();
608 ret = (*fn)(data);
609 local_irq_restore(flags);
610
611 return ret;
612 }
613
614
615 set_state(&msdata, MULTI_STOP_PREPARE);
616 return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
617}
618
619int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
620{
621 int ret;
622
623
624 cpus_read_lock();
625 ret = stop_machine_cpuslocked(fn, data, cpus);
626 cpus_read_unlock();
627 return ret;
628}
629EXPORT_SYMBOL_GPL(stop_machine);
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
654 const struct cpumask *cpus)
655{
656 struct multi_stop_data msdata = { .fn = fn, .data = data,
657 .active_cpus = cpus };
658 struct cpu_stop_done done;
659 int ret;
660
661
662 BUG_ON(cpu_active(raw_smp_processor_id()));
663 msdata.num_threads = num_active_cpus() + 1;
664
665
666 while (!mutex_trylock(&stop_cpus_mutex))
667 cpu_relax();
668
669
670 set_state(&msdata, MULTI_STOP_PREPARE);
671 cpu_stop_init_done(&done, num_active_cpus());
672 queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
673 &done);
674 ret = multi_cpu_stop(&msdata);
675
676
677 while (!completion_done(&done.completion))
678 cpu_relax();
679
680 mutex_unlock(&stop_cpus_mutex);
681 return ret ?: done.ret;
682}
683