1
2
3
4
5
6
7
8
9
10
11#include <linux/completion.h>
12#include <linux/cpu.h>
13#include <linux/init.h>
14#include <linux/kthread.h>
15#include <linux/export.h>
16#include <linux/percpu.h>
17#include <linux/sched.h>
18#include <linux/stop_machine.h>
19#include <linux/interrupt.h>
20#include <linux/kallsyms.h>
21#include <linux/smpboot.h>
22#include <linux/atomic.h>
23#include <linux/lglock.h>
24
25
26
27
28
29struct cpu_stop_done {
30 atomic_t nr_todo;
31 int ret;
32 struct completion completion;
33};
34
35
36struct cpu_stopper {
37 struct task_struct *thread;
38
39 spinlock_t lock;
40 bool enabled;
41 struct list_head works;
42
43 struct cpu_stop_work stop_work;
44};
45
46static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
47static bool stop_machine_initialized = false;
48
49
50
51
52
53
54
55DEFINE_STATIC_LGLOCK(stop_cpus_lock);
56
57static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
58{
59 memset(done, 0, sizeof(*done));
60 atomic_set(&done->nr_todo, nr_todo);
61 init_completion(&done->completion);
62}
63
64
65static void cpu_stop_signal_done(struct cpu_stop_done *done)
66{
67 if (atomic_dec_and_test(&done->nr_todo))
68 complete(&done->completion);
69}
70
71static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
72 struct cpu_stop_work *work)
73{
74 list_add_tail(&work->list, &stopper->works);
75 wake_up_process(stopper->thread);
76}
77
78
79static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
80{
81 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
82 unsigned long flags;
83 bool enabled;
84
85 spin_lock_irqsave(&stopper->lock, flags);
86 enabled = stopper->enabled;
87 if (enabled)
88 __cpu_stop_queue_work(stopper, work);
89 else if (work->done)
90 cpu_stop_signal_done(work->done);
91 spin_unlock_irqrestore(&stopper->lock, flags);
92
93 return enabled;
94}
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
121{
122 struct cpu_stop_done done;
123 struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
124
125 cpu_stop_init_done(&done, 1);
126 if (!cpu_stop_queue_work(cpu, &work))
127 return -ENOENT;
128 wait_for_completion(&done.completion);
129 return done.ret;
130}
131
132
133enum multi_stop_state {
134
135 MULTI_STOP_NONE,
136
137 MULTI_STOP_PREPARE,
138
139 MULTI_STOP_DISABLE_IRQ,
140
141 MULTI_STOP_RUN,
142
143 MULTI_STOP_EXIT,
144};
145
146struct multi_stop_data {
147 cpu_stop_fn_t fn;
148 void *data;
149
150 unsigned int num_threads;
151 const struct cpumask *active_cpus;
152
153 enum multi_stop_state state;
154 atomic_t thread_ack;
155};
156
157static void set_state(struct multi_stop_data *msdata,
158 enum multi_stop_state newstate)
159{
160
161 atomic_set(&msdata->thread_ack, msdata->num_threads);
162 smp_wmb();
163 msdata->state = newstate;
164}
165
166
167static void ack_state(struct multi_stop_data *msdata)
168{
169 if (atomic_dec_and_test(&msdata->thread_ack))
170 set_state(msdata, msdata->state + 1);
171}
172
173
174static int multi_cpu_stop(void *data)
175{
176 struct multi_stop_data *msdata = data;
177 enum multi_stop_state curstate = MULTI_STOP_NONE;
178 int cpu = smp_processor_id(), err = 0;
179 unsigned long flags;
180 bool is_active;
181
182
183
184
185
186 local_save_flags(flags);
187
188 if (!msdata->active_cpus)
189 is_active = cpu == cpumask_first(cpu_online_mask);
190 else
191 is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
192
193
194 do {
195
196 cpu_relax();
197 if (msdata->state != curstate) {
198 curstate = msdata->state;
199 switch (curstate) {
200 case MULTI_STOP_DISABLE_IRQ:
201 local_irq_disable();
202 hard_irq_disable();
203 break;
204 case MULTI_STOP_RUN:
205 if (is_active)
206 err = msdata->fn(msdata->data);
207 break;
208 default:
209 break;
210 }
211 ack_state(msdata);
212 }
213 } while (curstate != MULTI_STOP_EXIT);
214
215 local_irq_restore(flags);
216 return err;
217}
218
219static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
220 int cpu2, struct cpu_stop_work *work2)
221{
222 struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
223 struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
224 int err;
225
226 lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
227 spin_lock_irq(&stopper1->lock);
228 spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
229
230 err = -ENOENT;
231 if (!stopper1->enabled || !stopper2->enabled)
232 goto unlock;
233
234 err = 0;
235 __cpu_stop_queue_work(stopper1, work1);
236 __cpu_stop_queue_work(stopper2, work2);
237unlock:
238 spin_unlock(&stopper2->lock);
239 spin_unlock_irq(&stopper1->lock);
240 lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
241
242 return err;
243}
244
245
246
247
248
249
250
251
252
253
254
255int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg)
256{
257 struct cpu_stop_done done;
258 struct cpu_stop_work work1, work2;
259 struct multi_stop_data msdata;
260
261 msdata = (struct multi_stop_data){
262 .fn = fn,
263 .data = arg,
264 .num_threads = 2,
265 .active_cpus = cpumask_of(cpu1),
266 };
267
268 work1 = work2 = (struct cpu_stop_work){
269 .fn = multi_cpu_stop,
270 .arg = &msdata,
271 .done = &done
272 };
273
274 cpu_stop_init_done(&done, 2);
275 set_state(&msdata, MULTI_STOP_PREPARE);
276
277 if (cpu1 > cpu2)
278 swap(cpu1, cpu2);
279 if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
280 return -ENOENT;
281
282 wait_for_completion(&done.completion);
283 return done.ret;
284}
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
305 struct cpu_stop_work *work_buf)
306{
307 *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
308 return cpu_stop_queue_work(cpu, work_buf);
309}
310
311
312static DEFINE_MUTEX(stop_cpus_mutex);
313
314static bool queue_stop_cpus_work(const struct cpumask *cpumask,
315 cpu_stop_fn_t fn, void *arg,
316 struct cpu_stop_done *done)
317{
318 struct cpu_stop_work *work;
319 unsigned int cpu;
320 bool queued = false;
321
322
323
324
325
326
327 lg_global_lock(&stop_cpus_lock);
328 for_each_cpu(cpu, cpumask) {
329 work = &per_cpu(cpu_stopper.stop_work, cpu);
330 work->fn = fn;
331 work->arg = arg;
332 work->done = done;
333 if (cpu_stop_queue_work(cpu, work))
334 queued = true;
335 }
336 lg_global_unlock(&stop_cpus_lock);
337
338 return queued;
339}
340
341static int __stop_cpus(const struct cpumask *cpumask,
342 cpu_stop_fn_t fn, void *arg)
343{
344 struct cpu_stop_done done;
345
346 cpu_stop_init_done(&done, cpumask_weight(cpumask));
347 if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
348 return -ENOENT;
349 wait_for_completion(&done.completion);
350 return done.ret;
351}
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
382{
383 int ret;
384
385
386 mutex_lock(&stop_cpus_mutex);
387 ret = __stop_cpus(cpumask, fn, arg);
388 mutex_unlock(&stop_cpus_mutex);
389 return ret;
390}
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
411{
412 int ret;
413
414
415 if (!mutex_trylock(&stop_cpus_mutex))
416 return -EAGAIN;
417 ret = __stop_cpus(cpumask, fn, arg);
418 mutex_unlock(&stop_cpus_mutex);
419 return ret;
420}
421
422static int cpu_stop_should_run(unsigned int cpu)
423{
424 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
425 unsigned long flags;
426 int run;
427
428 spin_lock_irqsave(&stopper->lock, flags);
429 run = !list_empty(&stopper->works);
430 spin_unlock_irqrestore(&stopper->lock, flags);
431 return run;
432}
433
434static void cpu_stopper_thread(unsigned int cpu)
435{
436 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
437 struct cpu_stop_work *work;
438
439repeat:
440 work = NULL;
441 spin_lock_irq(&stopper->lock);
442 if (!list_empty(&stopper->works)) {
443 work = list_first_entry(&stopper->works,
444 struct cpu_stop_work, list);
445 list_del_init(&work->list);
446 }
447 spin_unlock_irq(&stopper->lock);
448
449 if (work) {
450 cpu_stop_fn_t fn = work->fn;
451 void *arg = work->arg;
452 struct cpu_stop_done *done = work->done;
453 int ret;
454
455
456 preempt_count_inc();
457 ret = fn(arg);
458 if (done) {
459 if (ret)
460 done->ret = ret;
461 cpu_stop_signal_done(done);
462 }
463 preempt_count_dec();
464 WARN_ONCE(preempt_count(),
465 "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg);
466 goto repeat;
467 }
468}
469
470void stop_machine_park(int cpu)
471{
472 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
473
474
475
476
477
478 stopper->enabled = false;
479 kthread_park(stopper->thread);
480}
481
482extern void sched_set_stop_task(int cpu, struct task_struct *stop);
483
484static void cpu_stop_create(unsigned int cpu)
485{
486 sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu));
487}
488
489static void cpu_stop_park(unsigned int cpu)
490{
491 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
492
493 WARN_ON(!list_empty(&stopper->works));
494}
495
496void stop_machine_unpark(int cpu)
497{
498 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
499
500 stopper->enabled = true;
501 kthread_unpark(stopper->thread);
502}
503
504static struct smp_hotplug_thread cpu_stop_threads = {
505 .store = &cpu_stopper.thread,
506 .thread_should_run = cpu_stop_should_run,
507 .thread_fn = cpu_stopper_thread,
508 .thread_comm = "migration/%u",
509 .create = cpu_stop_create,
510 .park = cpu_stop_park,
511 .selfparking = true,
512};
513
514static int __init cpu_stop_init(void)
515{
516 unsigned int cpu;
517
518 for_each_possible_cpu(cpu) {
519 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
520
521 spin_lock_init(&stopper->lock);
522 INIT_LIST_HEAD(&stopper->works);
523 }
524
525 BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads));
526 stop_machine_unpark(raw_smp_processor_id());
527 stop_machine_initialized = true;
528 return 0;
529}
530early_initcall(cpu_stop_init);
531
532static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
533{
534 struct multi_stop_data msdata = {
535 .fn = fn,
536 .data = data,
537 .num_threads = num_online_cpus(),
538 .active_cpus = cpus,
539 };
540
541 if (!stop_machine_initialized) {
542
543
544
545
546
547 unsigned long flags;
548 int ret;
549
550 WARN_ON_ONCE(msdata.num_threads != 1);
551
552 local_irq_save(flags);
553 hard_irq_disable();
554 ret = (*fn)(data);
555 local_irq_restore(flags);
556
557 return ret;
558 }
559
560
561 set_state(&msdata, MULTI_STOP_PREPARE);
562 return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
563}
564
565int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
566{
567 int ret;
568
569
570 get_online_cpus();
571 ret = __stop_machine(fn, data, cpus);
572 put_online_cpus();
573 return ret;
574}
575EXPORT_SYMBOL_GPL(stop_machine);
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
600 const struct cpumask *cpus)
601{
602 struct multi_stop_data msdata = { .fn = fn, .data = data,
603 .active_cpus = cpus };
604 struct cpu_stop_done done;
605 int ret;
606
607
608 BUG_ON(cpu_active(raw_smp_processor_id()));
609 msdata.num_threads = num_active_cpus() + 1;
610
611
612 while (!mutex_trylock(&stop_cpus_mutex))
613 cpu_relax();
614
615
616 set_state(&msdata, MULTI_STOP_PREPARE);
617 cpu_stop_init_done(&done, num_active_cpus());
618 queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata,
619 &done);
620 ret = multi_cpu_stop(&msdata);
621
622
623 while (!completion_done(&done.completion))
624 cpu_relax();
625
626 mutex_unlock(&stop_cpus_mutex);
627 return ret ?: done.ret;
628}
629