1
2
3
4
5
6
7
8
9
10#include <linux/compiler.h>
11#include <linux/completion.h>
12#include <linux/cpu.h>
13#include <linux/init.h>
14#include <linux/kthread.h>
15#include <linux/export.h>
16#include <linux/percpu.h>
17#include <linux/sched.h>
18#include <linux/stop_machine.h>
19#include <linux/interrupt.h>
20#include <linux/kallsyms.h>
21#include <linux/smpboot.h>
22#include <linux/atomic.h>
23#include <linux/nmi.h>
24#include <linux/sched/wake_q.h>
25
26
27
28
29
30struct cpu_stop_done {
31 atomic_t nr_todo;
32 int ret;
33 struct completion completion;
34};
35
36
37struct cpu_stopper {
38 struct task_struct *thread;
39
40 raw_spinlock_t lock;
41 bool enabled;
42 struct list_head works;
43
44 struct cpu_stop_work stop_work;
45};
46
47static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
48static bool stop_machine_initialized = false;
49
50
51static DEFINE_MUTEX(stop_cpus_mutex);
52static bool stop_cpus_in_progress;
53
54static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
55{
56 memset(done, 0, sizeof(*done));
57 atomic_set(&done->nr_todo, nr_todo);
58 init_completion(&done->completion);
59}
60
61
62static void cpu_stop_signal_done(struct cpu_stop_done *done)
63{
64 if (atomic_dec_and_test(&done->nr_todo))
65 complete(&done->completion);
66}
67
68static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
69 struct cpu_stop_work *work,
70 struct wake_q_head *wakeq)
71{
72 list_add_tail(&work->list, &stopper->works);
73 wake_q_add(wakeq, stopper->thread);
74}
75
76
77static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
78{
79 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
80 DEFINE_WAKE_Q(wakeq);
81 unsigned long flags;
82 bool enabled;
83
84 preempt_disable();
85 raw_spin_lock_irqsave(&stopper->lock, flags);
86 enabled = stopper->enabled;
87 if (enabled)
88 __cpu_stop_queue_work(stopper, work, &wakeq);
89 else if (work->done)
90 cpu_stop_signal_done(work->done);
91 raw_spin_unlock_irqrestore(&stopper->lock, flags);
92
93 wake_up_q(&wakeq);
94 preempt_enable();
95
96 return enabled;
97}
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
124{
125 struct cpu_stop_done done;
126 struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
127
128 cpu_stop_init_done(&done, 1);
129 if (!cpu_stop_queue_work(cpu, &work))
130 return -ENOENT;
131
132
133
134
135 cond_resched();
136 wait_for_completion(&done.completion);
137 return done.ret;
138}
139
140
141enum multi_stop_state {
142
143 MULTI_STOP_NONE,
144
145 MULTI_STOP_PREPARE,
146
147 MULTI_STOP_DISABLE_IRQ,
148
149 MULTI_STOP_RUN,
150
151 MULTI_STOP_EXIT,
152};
153
154struct multi_stop_data {
155 cpu_stop_fn_t fn;
156 void *data;
157
158 unsigned int num_threads;
159 const struct cpumask *active_cpus;
160
161 enum multi_stop_state state;
162 atomic_t thread_ack;
163};
164
165static void set_state(struct multi_stop_data *msdata,
166 enum multi_stop_state newstate)
167{
168
169 atomic_set(&msdata->thread_ack, msdata->num_threads);
170 smp_wmb();
171 WRITE_ONCE(msdata->state, newstate);
172}
173
174
175static void ack_state(struct multi_stop_data *msdata)
176{
177 if (atomic_dec_and_test(&msdata->thread_ack))
178 set_state(msdata, msdata->state + 1);
179}
180
181void __weak stop_machine_yield(const struct cpumask *cpumask)
182{
183 cpu_relax();
184}
185
186
187static int multi_cpu_stop(void *data)
188{
189 struct multi_stop_data *msdata = data;
190 enum multi_stop_state newstate, curstate = MULTI_STOP_NONE;
191 int cpu = smp_processor_id(), err = 0;
192 const struct cpumask *cpumask;
193 unsigned long flags;
194 bool is_active;
195
196
197
198
199
200 local_save_flags(flags);
201
202 if (!msdata->active_cpus) {
203 cpumask = cpu_online_mask;
204 is_active = cpu == cpumask_first(cpumask);
205 } else {
206 cpumask = msdata->active_cpus;
207 is_active = cpumask_test_cpu(cpu, cpumask);
208 }
209
210
211 do {
212
213 stop_machine_yield(cpumask);
214 newstate = READ_ONCE(msdata->state);
215 if (newstate != curstate) {
216 curstate = newstate;
217 switch (curstate) {
218 case MULTI_STOP_DISABLE_IRQ:
219 local_irq_disable();
220 hard_irq_disable();
221 break;
222 case MULTI_STOP_RUN:
223 if (is_active)
224 err = msdata->fn(msdata->data);
225 break;
226 default:
227 break;
228 }
229 ack_state(msdata);
230 } else if (curstate > MULTI_STOP_PREPARE) {
231
232
233
234
235
236 touch_nmi_watchdog();
237 }
238 rcu_momentary_dyntick_idle();
239 } while (curstate != MULTI_STOP_EXIT);
240
241 local_irq_restore(flags);
242 return err;
243}
244
245static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
246 int cpu2, struct cpu_stop_work *work2)
247{
248 struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
249 struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
250 DEFINE_WAKE_Q(wakeq);
251 int err;
252
253retry:
254
255
256
257
258
259
260
261 preempt_disable();
262 raw_spin_lock_irq(&stopper1->lock);
263 raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
264
265 if (!stopper1->enabled || !stopper2->enabled) {
266 err = -ENOENT;
267 goto unlock;
268 }
269
270
271
272
273
274
275
276
277
278
279
280 if (unlikely(stop_cpus_in_progress)) {
281 err = -EDEADLK;
282 goto unlock;
283 }
284
285 err = 0;
286 __cpu_stop_queue_work(stopper1, work1, &wakeq);
287 __cpu_stop_queue_work(stopper2, work2, &wakeq);
288
289unlock:
290 raw_spin_unlock(&stopper2->lock);
291 raw_spin_unlock_irq(&stopper1->lock);
292
293 if (unlikely(err == -EDEADLK)) {
294 preempt_enable();
295
296 while (stop_cpus_in_progress)
297 cpu_relax();
298
299 goto retry;
300 }
301
302 wake_up_q(&wakeq);
303 preempt_enable();
304
305 return err;
306}
307
308
309
310
311
312
313
314
315
316
317
318int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
319{
320 struct cpu_stop_done done;
321 struct cpu_stop_work work1, work2;
322 struct multi_stop_data msdata;
323
324 msdata = (struct multi_stop_data){
325 .fn = fn,
326 .data = arg,
327 .num_threads = 2,
328 .active_cpus = cpumask_of(cpu1),
329 };
330
331 work1 = work2 = (struct cpu_stop_work){
332 .fn = multi_cpu_stop,
333 .arg = &msdata,
334 .done = &done
335 };
336
337 cpu_stop_init_done(&done, 2);
338 set_state(&msdata, MULTI_STOP_PREPARE);
339
340 if (cpu1 > cpu2)
341 swap(cpu1, cpu2);
342 if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
343 return -ENOENT;
344
345 wait_for_completion(&done.completion);
346 return done.ret;
347}
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
368 struct cpu_stop_work *work_buf)
369{
370 *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
371 return cpu_stop_queue_work(cpu, work_buf);
372}
373
374static bool queue_stop_cpus_work(const struct cpumask *cpumask,
375 cpu_stop_fn_t fn, void *arg,
376 struct cpu_stop_done *done)
377{
378 struct cpu_stop_work *work;
379 unsigned int cpu;
380 bool queued = false;
381
382
383
384
385
386
387 preempt_disable();
388 stop_cpus_in_progress = true;
389 barrier();
390 for_each_cpu(cpu, cpumask) {
391 work = &per_cpu(cpu_stopper.stop_work, cpu);
392 work->fn = fn;
393 work->arg = arg;
394 work->done = done;
395 if (cpu_stop_queue_work(cpu, work))
396 queued = true;
397 }
398 barrier();
399 stop_cpus_in_progress = false;
400 preempt_enable();
401
402 return queued;
403}
404
405static int __stop_cpus(const struct cpumask *cpumask,
406 cpu_stop_fn_t fn, void *arg)
407{
408 struct cpu_stop_done done;
409
410 cpu_stop_init_done(&done, cpumask_weight(cpumask));
411 if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
412 return -ENOENT;
413 wait_for_completion(&done.completion);
414 return done.ret;
415}
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445static int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
446{
447 int ret;
448
449
450 mutex_lock(&stop_cpus_mutex);
451 ret = __stop_cpus(cpumask, fn, arg);
452 mutex_unlock(&stop_cpus_mutex);
453 return ret;
454}
455
456static int cpu_stop_should_run(unsigned int cpu)
457{
458 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
459 unsigned long flags;
460 int run;
461
462 raw_spin_lock_irqsave(&stopper->lock, flags);
463 run = !list_empty(&stopper->works);
464 raw_spin_unlock_irqrestore(&stopper->lock, flags);
465 return run;
466}
467
468static void cpu_stopper_thread(unsigned int cpu)
469{
470 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
471 struct cpu_stop_work *work;
472
473repeat:
474 work = NULL;
475 raw_spin_lock_irq(&stopper->lock);
476 if (!list_empty(&stopper->works)) {
477 work = list_first_entry(&stopper->works,
478 struct cpu_stop_work, list);
479 list_del_init(&work->list);
480 }
481 raw_spin_unlock_irq(&stopper->lock);
482
483 if (work) {
484 cpu_stop_fn_t fn = work->fn;
485 void *arg = work->arg;
486 struct cpu_stop_done *done = work->done;
487 int ret;
488
489
490 preempt_count_inc();
491 ret = fn(arg);
492 if (done) {
493 if (ret)
494 done->ret = ret;
495 cpu_stop_signal_done(done);
496 }
497 preempt_count_dec();
498 WARN_ONCE(preempt_count(),
499 "cpu_stop: %ps(%p) leaked preempt count\n", fn, arg);
500 goto repeat;
501 }
502}
503
504void stop_machine_park(int cpu)
505{
506 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
507
508
509
510
511
512 stopper->enabled = false;
513 kthread_park(stopper->thread);
514}
515
516extern void sched_set_stop_task(int cpu, struct task_struct *stop);
517
518static void cpu_stop_create(unsigned int cpu)
519{
520 sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
521}
522
523static void cpu_stop_park(unsigned int cpu)
524{
525 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
526
527 WARN_ON(!list_empty(&stopper->works));
528}
529
530void stop_machine_unpark(int cpu)
531{
532 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
533
534 stopper->enabled = true;
535 kthread_unpark(stopper->thread);
536}
537
538static struct smp_hotplug_thread cpu_stop_threads = {
539 .store = &cpu_stopper.thread,
540 .thread_should_run = cpu_stop_should_run,
541 .thread_fn = cpu_stopper_thread,
542 .thread_comm = "migration/%u",
543 .create = cpu_stop_create,
544 .park = cpu_stop_park,
545 .selfparking = true,
546};
547
548static int __init cpu_stop_init(void)
549{
550 unsigned int cpu;
551
552 for_each_possible_cpu(cpu) {
553 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
554
555 raw_spin_lock_init(&stopper->lock);
556 INIT_LIST_HEAD(&stopper->works);
557 }
558
559 BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
560 stop_machine_unpark(raw_smp_processor_id());
561 stop_machine_initialized = true;
562 return 0;
563}
564early_initcall(cpu_stop_init);
565
566int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
567 const struct cpumask *cpus)
568{
569 struct multi_stop_data msdata = {
570 .fn = fn,
571 .data = data,
572 .num_threads = num_online_cpus(),
573 .active_cpus = cpus,
574 };
575
576 lockdep_assert_cpus_held();
577
578 if (!stop_machine_initialized) {
579
580
581
582
583
584 unsigned long flags;
585 int ret;
586
587 WARN_ON_ONCE(msdata.num_threads != 1);
588
589 local_irq_save(flags);
590 hard_irq_disable();
591 ret = (*fn)(data);
592 local_irq_restore(flags);
593
594 return ret;
595 }
596
597
598 set_state(&msdata, MULTI_STOP_PREPARE);
599 return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
600}
601
602int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
603{
604 int ret;
605
606
607 cpus_read_lock();
608 ret = stop_machine_cpuslocked(fn, data, cpus);
609 cpus_read_unlock();
610 return ret;
611}
612EXPORT_SYMBOL_GPL(stop_machine);
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
637 const struct cpumask *cpus)
638{
639 struct multi_stop_data msdata = { .fn = fn, .data = data,
640 .active_cpus = cpus };
641 struct cpu_stop_done done;
642 int ret;
643
644
645 BUG_ON(cpu_active(raw_smp_processor_id()));
646 msdata.num_threads = num_active_cpus() + 1;
647
648
649 while (!mutex_trylock(&stop_cpus_mutex))
650 cpu_relax();
651
652
653 set_state(&msdata, MULTI_STOP_PREPARE);
654 cpu_stop_init_done(&done, num_active_cpus());
655 queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
656 &done);
657 ret = multi_cpu_stop(&msdata);
658
659
660 while (!completion_done(&done.completion))
661 cpu_relax();
662
663 mutex_unlock(&stop_cpus_mutex);
664 return ret ?: done.ret;
665}
666