1
2
3
4
5
6
7
8
9
10
11#include <linux/completion.h>
12#include <linux/cpu.h>
13#include <linux/init.h>
14#include <linux/kthread.h>
15#include <linux/export.h>
16#include <linux/percpu.h>
17#include <linux/sched.h>
18#include <linux/stop_machine.h>
19#include <linux/interrupt.h>
20#include <linux/kallsyms.h>
21#include <linux/smpboot.h>
22#include <linux/atomic.h>
23#include <linux/nmi.h>
24#include <linux/sched/wake_q.h>
25
26
27
28
29
30struct cpu_stop_done {
31 atomic_t nr_todo;
32 int ret;
33 struct completion completion;
34};
35
36
37struct cpu_stopper {
38 struct task_struct *thread;
39
40 raw_spinlock_t lock;
41 bool enabled;
42 struct list_head works;
43
44 struct cpu_stop_work stop_work;
45};
46
47static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
48static bool stop_machine_initialized = false;
49
50
51static DEFINE_MUTEX(stop_cpus_mutex);
52static bool stop_cpus_in_progress;
53
54static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
55{
56 memset(done, 0, sizeof(*done));
57 atomic_set(&done->nr_todo, nr_todo);
58 init_completion(&done->completion);
59}
60
61
62static void cpu_stop_signal_done(struct cpu_stop_done *done)
63{
64 if (atomic_dec_and_test(&done->nr_todo))
65 complete(&done->completion);
66}
67
68static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
69 struct cpu_stop_work *work,
70 struct wake_q_head *wakeq)
71{
72 list_add_tail(&work->list, &stopper->works);
73 wake_q_add(wakeq, stopper->thread);
74}
75
76
77static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
78{
79 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
80 DEFINE_WAKE_Q(wakeq);
81 unsigned long flags;
82 bool enabled;
83
84 raw_spin_lock_irqsave(&stopper->lock, flags);
85 enabled = stopper->enabled;
86 if (enabled)
87 __cpu_stop_queue_work(stopper, work, &wakeq);
88 else if (work->done)
89 cpu_stop_signal_done(work->done);
90 raw_spin_unlock_irqrestore(&stopper->lock, flags);
91
92 wake_up_q(&wakeq);
93
94 return enabled;
95}
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
122{
123 struct cpu_stop_done done;
124 struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
125
126 cpu_stop_init_done(&done, 1);
127 if (!cpu_stop_queue_work(cpu, &work))
128 return -ENOENT;
129
130
131
132
133 cond_resched();
134 wait_for_completion(&done.completion);
135 return done.ret;
136}
137
138
139enum multi_stop_state {
140
141 MULTI_STOP_NONE,
142
143 MULTI_STOP_PREPARE,
144
145 MULTI_STOP_DISABLE_IRQ,
146
147 MULTI_STOP_RUN,
148
149 MULTI_STOP_EXIT,
150};
151
152struct multi_stop_data {
153 cpu_stop_fn_t fn;
154 void *data;
155
156 unsigned int num_threads;
157 const struct cpumask *active_cpus;
158
159 enum multi_stop_state state;
160 atomic_t thread_ack;
161};
162
163static void set_state(struct multi_stop_data *msdata,
164 enum multi_stop_state newstate)
165{
166
167 atomic_set(&msdata->thread_ack, msdata->num_threads);
168 smp_wmb();
169 msdata->state = newstate;
170}
171
172
173static void ack_state(struct multi_stop_data *msdata)
174{
175 if (atomic_dec_and_test(&msdata->thread_ack))
176 set_state(msdata, msdata->state + 1);
177}
178
179
180static int multi_cpu_stop(void *data)
181{
182 struct multi_stop_data *msdata = data;
183 enum multi_stop_state curstate = MULTI_STOP_NONE;
184 int cpu = smp_processor_id(), err = 0;
185 unsigned long flags;
186 bool is_active;
187
188
189
190
191
192 local_save_flags(flags);
193
194 if (!msdata->active_cpus)
195 is_active = cpu == cpumask_first(cpu_online_mask);
196 else
197 is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
198
199
200 do {
201
202 cpu_relax_yield();
203 if (msdata->state != curstate) {
204 curstate = msdata->state;
205 switch (curstate) {
206 case MULTI_STOP_DISABLE_IRQ:
207 local_irq_disable();
208 hard_irq_disable();
209 break;
210 case MULTI_STOP_RUN:
211 if (is_active)
212 err = msdata->fn(msdata->data);
213 break;
214 default:
215 break;
216 }
217 ack_state(msdata);
218 } else if (curstate > MULTI_STOP_PREPARE) {
219
220
221
222
223
224 touch_nmi_watchdog();
225 }
226 } while (curstate != MULTI_STOP_EXIT);
227
228 local_irq_restore(flags);
229 return err;
230}
231
232static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
233 int cpu2, struct cpu_stop_work *work2)
234{
235 struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
236 struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
237 DEFINE_WAKE_Q(wakeq);
238 int err;
239retry:
240 raw_spin_lock_irq(&stopper1->lock);
241 raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
242
243 err = -ENOENT;
244 if (!stopper1->enabled || !stopper2->enabled)
245 goto unlock;
246
247
248
249
250
251
252
253
254
255
256 err = -EDEADLK;
257 if (unlikely(stop_cpus_in_progress))
258 goto unlock;
259
260 err = 0;
261 __cpu_stop_queue_work(stopper1, work1, &wakeq);
262 __cpu_stop_queue_work(stopper2, work2, &wakeq);
263
264
265
266
267
268
269
270
271 preempt_disable();
272unlock:
273 raw_spin_unlock(&stopper2->lock);
274 raw_spin_unlock_irq(&stopper1->lock);
275
276 if (unlikely(err == -EDEADLK)) {
277 while (stop_cpus_in_progress)
278 cpu_relax();
279 goto retry;
280 }
281
282 if (!err) {
283 wake_up_q(&wakeq);
284 preempt_enable();
285 }
286
287 return err;
288}
289
290
291
292
293
294
295
296
297
298
299
300int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
301{
302 struct cpu_stop_done done;
303 struct cpu_stop_work work1, work2;
304 struct multi_stop_data msdata;
305
306 msdata = (struct multi_stop_data){
307 .fn = fn,
308 .data = arg,
309 .num_threads = 2,
310 .active_cpus = cpumask_of(cpu1),
311 };
312
313 work1 = work2 = (struct cpu_stop_work){
314 .fn = multi_cpu_stop,
315 .arg = &msdata,
316 .done = &done
317 };
318
319 cpu_stop_init_done(&done, 2);
320 set_state(&msdata, MULTI_STOP_PREPARE);
321
322 if (cpu1 > cpu2)
323 swap(cpu1, cpu2);
324 if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
325 return -ENOENT;
326
327 wait_for_completion(&done.completion);
328 return done.ret;
329}
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
350 struct cpu_stop_work *work_buf)
351{
352 *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
353 return cpu_stop_queue_work(cpu, work_buf);
354}
355
356static bool queue_stop_cpus_work(const struct cpumask *cpumask,
357 cpu_stop_fn_t fn, void *arg,
358 struct cpu_stop_done *done)
359{
360 struct cpu_stop_work *work;
361 unsigned int cpu;
362 bool queued = false;
363
364
365
366
367
368
369 preempt_disable();
370 stop_cpus_in_progress = true;
371 for_each_cpu(cpu, cpumask) {
372 work = &per_cpu(cpu_stopper.stop_work, cpu);
373 work->fn = fn;
374 work->arg = arg;
375 work->done = done;
376 if (cpu_stop_queue_work(cpu, work))
377 queued = true;
378 }
379 stop_cpus_in_progress = false;
380 preempt_enable();
381
382 return queued;
383}
384
385static int __stop_cpus(const struct cpumask *cpumask,
386 cpu_stop_fn_t fn, void *arg)
387{
388 struct cpu_stop_done done;
389
390 cpu_stop_init_done(&done, cpumask_weight(cpumask));
391 if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
392 return -ENOENT;
393 wait_for_completion(&done.completion);
394 return done.ret;
395}
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
426{
427 int ret;
428
429
430 mutex_lock(&stop_cpus_mutex);
431 ret = __stop_cpus(cpumask, fn, arg);
432 mutex_unlock(&stop_cpus_mutex);
433 return ret;
434}
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
455{
456 int ret;
457
458
459 if (!mutex_trylock(&stop_cpus_mutex))
460 return -EAGAIN;
461 ret = __stop_cpus(cpumask, fn, arg);
462 mutex_unlock(&stop_cpus_mutex);
463 return ret;
464}
465
466static int cpu_stop_should_run(unsigned int cpu)
467{
468 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
469 unsigned long flags;
470 int run;
471
472 raw_spin_lock_irqsave(&stopper->lock, flags);
473 run = !list_empty(&stopper->works);
474 raw_spin_unlock_irqrestore(&stopper->lock, flags);
475 return run;
476}
477
478static void cpu_stopper_thread(unsigned int cpu)
479{
480 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
481 struct cpu_stop_work *work;
482
483repeat:
484 work = NULL;
485 raw_spin_lock_irq(&stopper->lock);
486 if (!list_empty(&stopper->works)) {
487 work = list_first_entry(&stopper->works,
488 struct cpu_stop_work, list);
489 list_del_init(&work->list);
490 }
491 raw_spin_unlock_irq(&stopper->lock);
492
493 if (work) {
494 cpu_stop_fn_t fn = work->fn;
495 void *arg = work->arg;
496 struct cpu_stop_done *done = work->done;
497 int ret;
498
499
500 preempt_count_inc();
501 ret = fn(arg);
502 if (done) {
503 if (ret)
504 done->ret = ret;
505 cpu_stop_signal_done(done);
506 }
507 preempt_count_dec();
508 WARN_ONCE(preempt_count(),
509 "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg);
510 goto repeat;
511 }
512}
513
514void stop_machine_park(int cpu)
515{
516 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
517
518
519
520
521
522 stopper->enabled = false;
523 kthread_park(stopper->thread);
524}
525
526extern void sched_set_stop_task(int cpu, struct task_struct *stop);
527
528static void cpu_stop_create(unsigned int cpu)
529{
530 sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
531}
532
533static void cpu_stop_park(unsigned int cpu)
534{
535 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
536
537 WARN_ON(!list_empty(&stopper->works));
538}
539
540void stop_machine_unpark(int cpu)
541{
542 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
543
544 stopper->enabled = true;
545 kthread_unpark(stopper->thread);
546}
547
548static struct smp_hotplug_thread cpu_stop_threads = {
549 .store = &cpu_stopper.thread,
550 .thread_should_run = cpu_stop_should_run,
551 .thread_fn = cpu_stopper_thread,
552 .thread_comm = "migration/%u",
553 .create = cpu_stop_create,
554 .park = cpu_stop_park,
555 .selfparking = true,
556};
557
558static int __init cpu_stop_init(void)
559{
560 unsigned int cpu;
561
562 for_each_possible_cpu(cpu) {
563 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
564
565 raw_spin_lock_init(&stopper->lock);
566 INIT_LIST_HEAD(&stopper->works);
567 }
568
569 BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
570 stop_machine_unpark(raw_smp_processor_id());
571 stop_machine_initialized = true;
572 return 0;
573}
574early_initcall(cpu_stop_init);
575
576int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
577 const struct cpumask *cpus)
578{
579 struct multi_stop_data msdata = {
580 .fn = fn,
581 .data = data,
582 .num_threads = num_online_cpus(),
583 .active_cpus = cpus,
584 };
585
586 lockdep_assert_cpus_held();
587
588 if (!stop_machine_initialized) {
589
590
591
592
593
594 unsigned long flags;
595 int ret;
596
597 WARN_ON_ONCE(msdata.num_threads != 1);
598
599 local_irq_save(flags);
600 hard_irq_disable();
601 ret = (*fn)(data);
602 local_irq_restore(flags);
603
604 return ret;
605 }
606
607
608 set_state(&msdata, MULTI_STOP_PREPARE);
609 return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
610}
611
612int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
613{
614 int ret;
615
616
617 cpus_read_lock();
618 ret = stop_machine_cpuslocked(fn, data, cpus);
619 cpus_read_unlock();
620 return ret;
621}
622EXPORT_SYMBOL_GPL(stop_machine);
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
647 const struct cpumask *cpus)
648{
649 struct multi_stop_data msdata = { .fn = fn, .data = data,
650 .active_cpus = cpus };
651 struct cpu_stop_done done;
652 int ret;
653
654
655 BUG_ON(cpu_active(raw_smp_processor_id()));
656 msdata.num_threads = num_active_cpus() + 1;
657
658
659 while (!mutex_trylock(&stop_cpus_mutex))
660 cpu_relax();
661
662
663 set_state(&msdata, MULTI_STOP_PREPARE);
664 cpu_stop_init_done(&done, num_active_cpus());
665 queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
666 &done);
667 ret = multi_cpu_stop(&msdata);
668
669
670 while (!completion_done(&done.completion))
671 cpu_relax();
672
673 mutex_unlock(&stop_cpus_mutex);
674 return ret ?: done.ret;
675}
676