1
2
3
4
5
6
7
8
9
10
11#include <linux/completion.h>
12#include <linux/cpu.h>
13#include <linux/init.h>
14#include <linux/kthread.h>
15#include <linux/module.h>
16#include <linux/percpu.h>
17#include <linux/sched.h>
18#include <linux/stop_machine.h>
19#include <linux/interrupt.h>
20#include <linux/kallsyms.h>
21
22#include <asm/atomic.h>
23
24
25
26
27
28struct cpu_stop_done {
29 atomic_t nr_todo;
30 bool executed;
31 int ret;
32 struct completion completion;
33};
34
35
36struct cpu_stopper {
37 spinlock_t lock;
38 bool enabled;
39 struct list_head works;
40 struct task_struct *thread;
41};
42
43static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
44
45static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
46{
47 memset(done, 0, sizeof(*done));
48 atomic_set(&done->nr_todo, nr_todo);
49 init_completion(&done->completion);
50}
51
52
53static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
54{
55 if (done) {
56 if (executed)
57 done->executed = true;
58 if (atomic_dec_and_test(&done->nr_todo))
59 complete(&done->completion);
60 }
61}
62
63
64static void cpu_stop_queue_work(struct cpu_stopper *stopper,
65 struct cpu_stop_work *work)
66{
67 unsigned long flags;
68
69 spin_lock_irqsave(&stopper->lock, flags);
70
71 if (stopper->enabled) {
72 list_add_tail(&work->list, &stopper->works);
73 wake_up_process(stopper->thread);
74 } else
75 cpu_stop_signal_done(work->done, false);
76
77 spin_unlock_irqrestore(&stopper->lock, flags);
78}
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
105{
106 struct cpu_stop_done done;
107 struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
108
109 cpu_stop_init_done(&done, 1);
110 cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work);
111 wait_for_completion(&done.completion);
112 return done.executed ? done.ret : -ENOENT;
113}
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
129 struct cpu_stop_work *work_buf)
130{
131 *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
132 cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf);
133}
134
135
136static DEFINE_MUTEX(stop_cpus_mutex);
137static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
138
139int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
140{
141 struct cpu_stop_work *work;
142 struct cpu_stop_done done;
143 unsigned int cpu;
144
145
146 for_each_cpu(cpu, cpumask) {
147 work = &per_cpu(stop_cpus_work, cpu);
148 work->fn = fn;
149 work->arg = arg;
150 work->done = &done;
151 }
152 cpu_stop_init_done(&done, cpumask_weight(cpumask));
153
154
155
156
157
158
159 preempt_disable();
160 for_each_cpu(cpu, cpumask)
161 cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu),
162 &per_cpu(stop_cpus_work, cpu));
163 preempt_enable();
164
165 wait_for_completion(&done.completion);
166 return done.executed ? done.ret : -ENOENT;
167}
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
198{
199 int ret;
200
201
202 mutex_lock(&stop_cpus_mutex);
203 ret = __stop_cpus(cpumask, fn, arg);
204 mutex_unlock(&stop_cpus_mutex);
205 return ret;
206}
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
227{
228 int ret;
229
230
231 if (!mutex_trylock(&stop_cpus_mutex))
232 return -EAGAIN;
233 ret = __stop_cpus(cpumask, fn, arg);
234 mutex_unlock(&stop_cpus_mutex);
235 return ret;
236}
237
238static int cpu_stopper_thread(void *data)
239{
240 struct cpu_stopper *stopper = data;
241 struct cpu_stop_work *work;
242 int ret;
243
244repeat:
245 set_current_state(TASK_INTERRUPTIBLE);
246
247 if (kthread_should_stop()) {
248 __set_current_state(TASK_RUNNING);
249 return 0;
250 }
251
252 work = NULL;
253 spin_lock_irq(&stopper->lock);
254 if (!list_empty(&stopper->works)) {
255 work = list_first_entry(&stopper->works,
256 struct cpu_stop_work, list);
257 list_del_init(&work->list);
258 }
259 spin_unlock_irq(&stopper->lock);
260
261 if (work) {
262 cpu_stop_fn_t fn = work->fn;
263 void *arg = work->arg;
264 struct cpu_stop_done *done = work->done;
265 char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
266
267 __set_current_state(TASK_RUNNING);
268
269
270 preempt_disable();
271
272 ret = fn(arg);
273 if (ret)
274 done->ret = ret;
275
276
277 preempt_enable();
278 WARN_ONCE(preempt_count(),
279 "cpu_stop: %s(%p) leaked preempt count\n",
280 kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
281 ksym_buf), arg);
282
283 cpu_stop_signal_done(done, true);
284 } else
285 schedule();
286
287 goto repeat;
288}
289
290extern void sched_set_stop_task(int cpu, struct task_struct *stop);
291
292
293static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
294 unsigned long action, void *hcpu)
295{
296 unsigned int cpu = (unsigned long)hcpu;
297 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
298 struct task_struct *p;
299
300 switch (action & ~CPU_TASKS_FROZEN) {
301 case CPU_UP_PREPARE:
302 BUG_ON(stopper->thread || stopper->enabled ||
303 !list_empty(&stopper->works));
304 p = kthread_create_on_node(cpu_stopper_thread,
305 stopper,
306 cpu_to_node(cpu),
307 "migration/%d", cpu);
308 if (IS_ERR(p))
309 return notifier_from_errno(PTR_ERR(p));
310 get_task_struct(p);
311 kthread_bind(p, cpu);
312 sched_set_stop_task(cpu, p);
313 stopper->thread = p;
314 break;
315
316 case CPU_ONLINE:
317
318 wake_up_process(stopper->thread);
319
320 spin_lock_irq(&stopper->lock);
321 stopper->enabled = true;
322 spin_unlock_irq(&stopper->lock);
323 break;
324
325#ifdef CONFIG_HOTPLUG_CPU
326 case CPU_UP_CANCELED:
327 case CPU_POST_DEAD:
328 {
329 struct cpu_stop_work *work;
330
331 sched_set_stop_task(cpu, NULL);
332
333 kthread_stop(stopper->thread);
334
335 spin_lock_irq(&stopper->lock);
336 list_for_each_entry(work, &stopper->works, list)
337 cpu_stop_signal_done(work->done, false);
338 stopper->enabled = false;
339 spin_unlock_irq(&stopper->lock);
340
341 put_task_struct(stopper->thread);
342 stopper->thread = NULL;
343 break;
344 }
345#endif
346 }
347
348 return NOTIFY_OK;
349}
350
351
352
353
354
355
356static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = {
357 .notifier_call = cpu_stop_cpu_callback,
358 .priority = 10,
359};
360
361static int __init cpu_stop_init(void)
362{
363 void *bcpu = (void *)(long)smp_processor_id();
364 unsigned int cpu;
365 int err;
366
367 for_each_possible_cpu(cpu) {
368 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
369
370 spin_lock_init(&stopper->lock);
371 INIT_LIST_HEAD(&stopper->works);
372 }
373
374
375 err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE,
376 bcpu);
377 BUG_ON(err != NOTIFY_OK);
378 cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu);
379 register_cpu_notifier(&cpu_stop_cpu_notifier);
380
381 return 0;
382}
383early_initcall(cpu_stop_init);
384
385#ifdef CONFIG_STOP_MACHINE
386
387
388enum stopmachine_state {
389
390 STOPMACHINE_NONE,
391
392 STOPMACHINE_PREPARE,
393
394 STOPMACHINE_DISABLE_IRQ,
395
396 STOPMACHINE_RUN,
397
398 STOPMACHINE_EXIT,
399};
400
401struct stop_machine_data {
402 int (*fn)(void *);
403 void *data;
404
405 unsigned int num_threads;
406 const struct cpumask *active_cpus;
407
408 enum stopmachine_state state;
409 atomic_t thread_ack;
410};
411
412static void set_state(struct stop_machine_data *smdata,
413 enum stopmachine_state newstate)
414{
415
416 atomic_set(&smdata->thread_ack, smdata->num_threads);
417 smp_wmb();
418 smdata->state = newstate;
419}
420
421
422static void ack_state(struct stop_machine_data *smdata)
423{
424 if (atomic_dec_and_test(&smdata->thread_ack))
425 set_state(smdata, smdata->state + 1);
426}
427
428
429static int stop_machine_cpu_stop(void *data)
430{
431 struct stop_machine_data *smdata = data;
432 enum stopmachine_state curstate = STOPMACHINE_NONE;
433 int cpu = smp_processor_id(), err = 0;
434 bool is_active;
435
436 if (!smdata->active_cpus)
437 is_active = cpu == cpumask_first(cpu_online_mask);
438 else
439 is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
440
441
442 do {
443
444 cpu_relax();
445 if (smdata->state != curstate) {
446 curstate = smdata->state;
447 switch (curstate) {
448 case STOPMACHINE_DISABLE_IRQ:
449 local_irq_disable();
450 hard_irq_disable();
451 break;
452 case STOPMACHINE_RUN:
453 if (is_active)
454 err = smdata->fn(smdata->data);
455 break;
456 default:
457 break;
458 }
459 ack_state(smdata);
460 }
461 } while (curstate != STOPMACHINE_EXIT);
462
463 local_irq_enable();
464 return err;
465}
466
467int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
468{
469 struct stop_machine_data smdata = { .fn = fn, .data = data,
470 .num_threads = num_online_cpus(),
471 .active_cpus = cpus };
472
473
474 set_state(&smdata, STOPMACHINE_PREPARE);
475 return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
476}
477
478int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
479{
480 int ret;
481
482
483 get_online_cpus();
484 ret = __stop_machine(fn, data, cpus);
485 put_online_cpus();
486 return ret;
487}
488EXPORT_SYMBOL_GPL(stop_machine);
489
490#endif
491