1
2
3
4
5
6
7
8
9
10
11#include <linux/completion.h>
12#include <linux/cpu.h>
13#include <linux/init.h>
14#include <linux/kthread.h>
15#include <linux/export.h>
16#include <linux/percpu.h>
17#include <linux/sched.h>
18#include <linux/stop_machine.h>
19#include <linux/interrupt.h>
20#include <linux/kallsyms.h>
21#include <linux/smpboot.h>
22#include <linux/atomic.h>
23#include <linux/lglock.h>
24#include <linux/nmi.h>
25
26
27
28
29
30struct cpu_stop_done {
31 atomic_t nr_todo;
32 int ret;
33 struct completion completion;
34};
35
36
37struct cpu_stopper {
38 struct task_struct *thread;
39
40 spinlock_t lock;
41 bool enabled;
42 struct list_head works;
43
44 struct cpu_stop_work stop_work;
45};
46
47static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
48static bool stop_machine_initialized = false;
49
50
51
52
53
54
55
56DEFINE_STATIC_LGLOCK(stop_cpus_lock);
57
58static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
59{
60 memset(done, 0, sizeof(*done));
61 atomic_set(&done->nr_todo, nr_todo);
62 init_completion(&done->completion);
63}
64
65
66static void cpu_stop_signal_done(struct cpu_stop_done *done)
67{
68 if (atomic_dec_and_test(&done->nr_todo))
69 complete(&done->completion);
70}
71
72static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
73 struct cpu_stop_work *work)
74{
75 list_add_tail(&work->list, &stopper->works);
76 wake_up_process(stopper->thread);
77}
78
79
80static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
81{
82 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
83 unsigned long flags;
84 bool enabled;
85
86 spin_lock_irqsave(&stopper->lock, flags);
87 enabled = stopper->enabled;
88 if (enabled)
89 __cpu_stop_queue_work(stopper, work);
90 else if (work->done)
91 cpu_stop_signal_done(work->done);
92 spin_unlock_irqrestore(&stopper->lock, flags);
93
94 return enabled;
95}
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
122{
123 struct cpu_stop_done done;
124 struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
125
126 cpu_stop_init_done(&done, 1);
127 if (!cpu_stop_queue_work(cpu, &work))
128 return -ENOENT;
129 wait_for_completion(&done.completion);
130 return done.ret;
131}
132
133
134enum multi_stop_state {
135
136 MULTI_STOP_NONE,
137
138 MULTI_STOP_PREPARE,
139
140 MULTI_STOP_DISABLE_IRQ,
141
142 MULTI_STOP_RUN,
143
144 MULTI_STOP_EXIT,
145};
146
147struct multi_stop_data {
148 cpu_stop_fn_t fn;
149 void *data;
150
151 unsigned int num_threads;
152 const struct cpumask *active_cpus;
153
154 enum multi_stop_state state;
155 atomic_t thread_ack;
156};
157
158static void set_state(struct multi_stop_data *msdata,
159 enum multi_stop_state newstate)
160{
161
162 atomic_set(&msdata->thread_ack, msdata->num_threads);
163 smp_wmb();
164 msdata->state = newstate;
165}
166
167
168static void ack_state(struct multi_stop_data *msdata)
169{
170 if (atomic_dec_and_test(&msdata->thread_ack))
171 set_state(msdata, msdata->state + 1);
172}
173
174
175static int multi_cpu_stop(void *data)
176{
177 struct multi_stop_data *msdata = data;
178 enum multi_stop_state curstate = MULTI_STOP_NONE;
179 int cpu = smp_processor_id(), err = 0;
180 unsigned long flags;
181 bool is_active;
182
183
184
185
186
187 local_save_flags(flags);
188
189 if (!msdata->active_cpus)
190 is_active = cpu == cpumask_first(cpu_online_mask);
191 else
192 is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
193
194
195 do {
196
197 cpu_relax();
198 if (msdata->state != curstate) {
199 curstate = msdata->state;
200 switch (curstate) {
201 case MULTI_STOP_DISABLE_IRQ:
202 local_irq_disable();
203 hard_irq_disable();
204 break;
205 case MULTI_STOP_RUN:
206 if (is_active)
207 err = msdata->fn(msdata->data);
208 break;
209 default:
210 break;
211 }
212 ack_state(msdata);
213 } else if (curstate > MULTI_STOP_PREPARE) {
214
215
216
217
218
219 touch_nmi_watchdog();
220 }
221 } while (curstate != MULTI_STOP_EXIT);
222
223 local_irq_restore(flags);
224 return err;
225}
226
227static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
228 int cpu2, struct cpu_stop_work *work2)
229{
230 struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
231 struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
232 int err;
233
234 lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
235 spin_lock_irq(&stopper1->lock);
236 spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
237
238 err = -ENOENT;
239 if (!stopper1->enabled || !stopper2->enabled)
240 goto unlock;
241
242 err = 0;
243 __cpu_stop_queue_work(stopper1, work1);
244 __cpu_stop_queue_work(stopper2, work2);
245unlock:
246 spin_unlock(&stopper2->lock);
247 spin_unlock_irq(&stopper1->lock);
248 lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
249
250 return err;
251}
252
253
254
255
256
257
258
259
260
261
262
263int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
264{
265 struct cpu_stop_done done;
266 struct cpu_stop_work work1, work2;
267 struct multi_stop_data msdata;
268
269 msdata = (struct multi_stop_data){
270 .fn = fn,
271 .data = arg,
272 .num_threads = 2,
273 .active_cpus = cpumask_of(cpu1),
274 };
275
276 work1 = work2 = (struct cpu_stop_work){
277 .fn = multi_cpu_stop,
278 .arg = &msdata,
279 .done = &done
280 };
281
282 cpu_stop_init_done(&done, 2);
283 set_state(&msdata, MULTI_STOP_PREPARE);
284
285 if (cpu1 > cpu2)
286 swap(cpu1, cpu2);
287 if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
288 return -ENOENT;
289
290 wait_for_completion(&done.completion);
291 return done.ret;
292}
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
313 struct cpu_stop_work *work_buf)
314{
315 *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
316 return cpu_stop_queue_work(cpu, work_buf);
317}
318
319
320static DEFINE_MUTEX(stop_cpus_mutex);
321
322static bool queue_stop_cpus_work(const struct cpumask *cpumask,
323 cpu_stop_fn_t fn, void *arg,
324 struct cpu_stop_done *done)
325{
326 struct cpu_stop_work *work;
327 unsigned int cpu;
328 bool queued = false;
329
330
331
332
333
334
335 lg_global_lock(&stop_cpus_lock);
336 for_each_cpu(cpu, cpumask) {
337 work = &per_cpu(cpu_stopper.stop_work, cpu);
338 work->fn = fn;
339 work->arg = arg;
340 work->done = done;
341 if (cpu_stop_queue_work(cpu, work))
342 queued = true;
343 }
344 lg_global_unlock(&stop_cpus_lock);
345
346 return queued;
347}
348
349static int __stop_cpus(const struct cpumask *cpumask,
350 cpu_stop_fn_t fn, void *arg)
351{
352 struct cpu_stop_done done;
353
354 cpu_stop_init_done(&done, cpumask_weight(cpumask));
355 if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
356 return -ENOENT;
357 wait_for_completion(&done.completion);
358 return done.ret;
359}
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
390{
391 int ret;
392
393
394 mutex_lock(&stop_cpus_mutex);
395 ret = __stop_cpus(cpumask, fn, arg);
396 mutex_unlock(&stop_cpus_mutex);
397 return ret;
398}
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
419{
420 int ret;
421
422
423 if (!mutex_trylock(&stop_cpus_mutex))
424 return -EAGAIN;
425 ret = __stop_cpus(cpumask, fn, arg);
426 mutex_unlock(&stop_cpus_mutex);
427 return ret;
428}
429
430static int cpu_stop_should_run(unsigned int cpu)
431{
432 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
433 unsigned long flags;
434 int run;
435
436 spin_lock_irqsave(&stopper->lock, flags);
437 run = !list_empty(&stopper->works);
438 spin_unlock_irqrestore(&stopper->lock, flags);
439 return run;
440}
441
442static void cpu_stopper_thread(unsigned int cpu)
443{
444 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
445 struct cpu_stop_work *work;
446
447repeat:
448 work = NULL;
449 spin_lock_irq(&stopper->lock);
450 if (!list_empty(&stopper->works)) {
451 work = list_first_entry(&stopper->works,
452 struct cpu_stop_work, list);
453 list_del_init(&work->list);
454 }
455 spin_unlock_irq(&stopper->lock);
456
457 if (work) {
458 cpu_stop_fn_t fn = work->fn;
459 void *arg = work->arg;
460 struct cpu_stop_done *done = work->done;
461 int ret;
462
463
464 preempt_count_inc();
465 ret = fn(arg);
466 if (done) {
467 if (ret)
468 done->ret = ret;
469 cpu_stop_signal_done(done);
470 }
471 preempt_count_dec();
472 WARN_ONCE(preempt_count(),
473 "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg);
474 goto repeat;
475 }
476}
477
478void stop_machine_park(int cpu)
479{
480 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
481
482
483
484
485
486 stopper->enabled = false;
487 kthread_park(stopper->thread);
488}
489
490extern void sched_set_stop_task(int cpu, struct task_struct *stop);
491
492static void cpu_stop_create(unsigned int cpu)
493{
494 sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
495}
496
497static void cpu_stop_park(unsigned int cpu)
498{
499 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
500
501 WARN_ON(!list_empty(&stopper->works));
502}
503
504void stop_machine_unpark(int cpu)
505{
506 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
507
508 stopper->enabled = true;
509 kthread_unpark(stopper->thread);
510}
511
512static struct smp_hotplug_thread cpu_stop_threads = {
513 .store = &cpu_stopper.thread,
514 .thread_should_run = cpu_stop_should_run,
515 .thread_fn = cpu_stopper_thread,
516 .thread_comm = "migration/%u",
517 .create = cpu_stop_create,
518 .park = cpu_stop_park,
519 .selfparking = true,
520};
521
522static int __init cpu_stop_init(void)
523{
524 unsigned int cpu;
525
526 for_each_possible_cpu(cpu) {
527 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
528
529 spin_lock_init(&stopper->lock);
530 INIT_LIST_HEAD(&stopper->works);
531 }
532
533 BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
534 stop_machine_unpark(raw_smp_processor_id());
535 stop_machine_initialized = true;
536 return 0;
537}
538early_initcall(cpu_stop_init);
539
540static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
541{
542 struct multi_stop_data msdata = {
543 .fn = fn,
544 .data = data,
545 .num_threads = num_online_cpus(),
546 .active_cpus = cpus,
547 };
548
549 if (!stop_machine_initialized) {
550
551
552
553
554
555 unsigned long flags;
556 int ret;
557
558 WARN_ON_ONCE(msdata.num_threads != 1);
559
560 local_irq_save(flags);
561 hard_irq_disable();
562 ret = (*fn)(data);
563 local_irq_restore(flags);
564
565 return ret;
566 }
567
568
569 set_state(&msdata, MULTI_STOP_PREPARE);
570 return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
571}
572
573int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
574{
575 int ret;
576
577
578 get_online_cpus();
579 ret = __stop_machine(fn, data, cpus);
580 put_online_cpus();
581 return ret;
582}
583EXPORT_SYMBOL_GPL(stop_machine);
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
608 const struct cpumask *cpus)
609{
610 struct multi_stop_data msdata = { .fn = fn, .data = data,
611 .active_cpus = cpus };
612 struct cpu_stop_done done;
613 int ret;
614
615
616 BUG_ON(cpu_active(raw_smp_processor_id()));
617 msdata.num_threads = num_active_cpus() + 1;
618
619
620 while (!mutex_trylock(&stop_cpus_mutex))
621 cpu_relax();
622
623
624 set_state(&msdata, MULTI_STOP_PREPARE);
625 cpu_stop_init_done(&done, num_active_cpus());
626 queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
627 &done);
628 ret = multi_cpu_stop(&msdata);
629
630
631 while (!completion_done(&done.completion))
632 cpu_relax();
633
634 mutex_unlock(&stop_cpus_mutex);
635 return ret ?: done.ret;
636}
637