1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/module.h>
17#include <linux/profile.h>
18#include <linux/bootmem.h>
19#include <linux/notifier.h>
20#include <linux/mm.h>
21#include <linux/cpumask.h>
22#include <linux/cpu.h>
23#include <linux/highmem.h>
24#include <linux/mutex.h>
25#include <linux/slab.h>
26#include <linux/vmalloc.h>
27#include <asm/sections.h>
28#include <asm/irq_regs.h>
29#include <asm/ptrace.h>
30
31struct profile_hit {
32 u32 pc, hits;
33};
34#define PROFILE_GRPSHIFT 3
35#define PROFILE_GRPSZ (1 << PROFILE_GRPSHIFT)
36#define NR_PROFILE_HIT (PAGE_SIZE/sizeof(struct profile_hit))
37#define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ)
38
39
40static int (*timer_hook)(struct pt_regs *) __read_mostly;
41
42static atomic_t *prof_buffer;
43static unsigned long prof_len, prof_shift;
44
45int prof_on __read_mostly;
46EXPORT_SYMBOL_GPL(prof_on);
47
48static cpumask_var_t prof_cpu_mask;
49#ifdef CONFIG_SMP
50static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
51static DEFINE_PER_CPU(int, cpu_profile_flip);
52static DEFINE_MUTEX(profile_flip_mutex);
53#endif
54
55int profile_setup(char *str)
56{
57 static char schedstr[] = "schedule";
58 static char sleepstr[] = "sleep";
59 static char kvmstr[] = "kvm";
60 int par;
61
62 if (!strncmp(str, sleepstr, strlen(sleepstr))) {
63#ifdef CONFIG_SCHEDSTATS
64 prof_on = SLEEP_PROFILING;
65 if (str[strlen(sleepstr)] == ',')
66 str += strlen(sleepstr) + 1;
67 if (get_option(&str, &par))
68 prof_shift = par;
69 printk(KERN_INFO
70 "kernel sleep profiling enabled (shift: %ld)\n",
71 prof_shift);
72#else
73 printk(KERN_WARNING
74 "kernel sleep profiling requires CONFIG_SCHEDSTATS\n");
75#endif
76 } else if (!strncmp(str, schedstr, strlen(schedstr))) {
77 prof_on = SCHED_PROFILING;
78 if (str[strlen(schedstr)] == ',')
79 str += strlen(schedstr) + 1;
80 if (get_option(&str, &par))
81 prof_shift = par;
82 printk(KERN_INFO
83 "kernel schedule profiling enabled (shift: %ld)\n",
84 prof_shift);
85 } else if (!strncmp(str, kvmstr, strlen(kvmstr))) {
86 prof_on = KVM_PROFILING;
87 if (str[strlen(kvmstr)] == ',')
88 str += strlen(kvmstr) + 1;
89 if (get_option(&str, &par))
90 prof_shift = par;
91 printk(KERN_INFO
92 "kernel KVM profiling enabled (shift: %ld)\n",
93 prof_shift);
94 } else if (get_option(&str, &par)) {
95 prof_shift = par;
96 prof_on = CPU_PROFILING;
97 printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n",
98 prof_shift);
99 }
100 return 1;
101}
102__setup("profile=", profile_setup);
103
104
105int __ref profile_init(void)
106{
107 int buffer_bytes;
108 if (!prof_on)
109 return 0;
110
111
112 prof_len = (_etext - _stext) >> prof_shift;
113 buffer_bytes = prof_len*sizeof(atomic_t);
114
115 if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
116 return -ENOMEM;
117
118 cpumask_copy(prof_cpu_mask, cpu_possible_mask);
119
120 prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL|__GFP_NOWARN);
121 if (prof_buffer)
122 return 0;
123
124 prof_buffer = alloc_pages_exact(buffer_bytes,
125 GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
126 if (prof_buffer)
127 return 0;
128
129 prof_buffer = vmalloc(buffer_bytes);
130 if (prof_buffer)
131 return 0;
132
133 free_cpumask_var(prof_cpu_mask);
134 return -ENOMEM;
135}
136
137
138
139static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
140static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
141static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
142
143void profile_task_exit(struct task_struct *task)
144{
145 blocking_notifier_call_chain(&task_exit_notifier, 0, task);
146}
147
148int profile_handoff_task(struct task_struct *task)
149{
150 int ret;
151 ret = atomic_notifier_call_chain(&task_free_notifier, 0, task);
152 return (ret == NOTIFY_OK) ? 1 : 0;
153}
154
155void profile_munmap(unsigned long addr)
156{
157 blocking_notifier_call_chain(&munmap_notifier, 0, (void *)addr);
158}
159
160int task_handoff_register(struct notifier_block *n)
161{
162 return atomic_notifier_chain_register(&task_free_notifier, n);
163}
164EXPORT_SYMBOL_GPL(task_handoff_register);
165
166int task_handoff_unregister(struct notifier_block *n)
167{
168 return atomic_notifier_chain_unregister(&task_free_notifier, n);
169}
170EXPORT_SYMBOL_GPL(task_handoff_unregister);
171
172int profile_event_register(enum profile_type type, struct notifier_block *n)
173{
174 int err = -EINVAL;
175
176 switch (type) {
177 case PROFILE_TASK_EXIT:
178 err = blocking_notifier_chain_register(
179 &task_exit_notifier, n);
180 break;
181 case PROFILE_MUNMAP:
182 err = blocking_notifier_chain_register(
183 &munmap_notifier, n);
184 break;
185 }
186
187 return err;
188}
189EXPORT_SYMBOL_GPL(profile_event_register);
190
191int profile_event_unregister(enum profile_type type, struct notifier_block *n)
192{
193 int err = -EINVAL;
194
195 switch (type) {
196 case PROFILE_TASK_EXIT:
197 err = blocking_notifier_chain_unregister(
198 &task_exit_notifier, n);
199 break;
200 case PROFILE_MUNMAP:
201 err = blocking_notifier_chain_unregister(
202 &munmap_notifier, n);
203 break;
204 }
205
206 return err;
207}
208EXPORT_SYMBOL_GPL(profile_event_unregister);
209
210int register_timer_hook(int (*hook)(struct pt_regs *))
211{
212 if (timer_hook)
213 return -EBUSY;
214 timer_hook = hook;
215 return 0;
216}
217EXPORT_SYMBOL_GPL(register_timer_hook);
218
219void unregister_timer_hook(int (*hook)(struct pt_regs *))
220{
221 WARN_ON(hook != timer_hook);
222 timer_hook = NULL;
223
224 synchronize_sched();
225}
226EXPORT_SYMBOL_GPL(unregister_timer_hook);
227
228
229#ifdef CONFIG_SMP
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261static void __profile_flip_buffers(void *unused)
262{
263 int cpu = smp_processor_id();
264
265 per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu);
266}
267
268static void profile_flip_buffers(void)
269{
270 int i, j, cpu;
271
272 mutex_lock(&profile_flip_mutex);
273 j = per_cpu(cpu_profile_flip, get_cpu());
274 put_cpu();
275 on_each_cpu(__profile_flip_buffers, NULL, 1);
276 for_each_online_cpu(cpu) {
277 struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
278 for (i = 0; i < NR_PROFILE_HIT; ++i) {
279 if (!hits[i].hits) {
280 if (hits[i].pc)
281 hits[i].pc = 0;
282 continue;
283 }
284 atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
285 hits[i].hits = hits[i].pc = 0;
286 }
287 }
288 mutex_unlock(&profile_flip_mutex);
289}
290
291static void profile_discard_flip_buffers(void)
292{
293 int i, cpu;
294
295 mutex_lock(&profile_flip_mutex);
296 i = per_cpu(cpu_profile_flip, get_cpu());
297 put_cpu();
298 on_each_cpu(__profile_flip_buffers, NULL, 1);
299 for_each_online_cpu(cpu) {
300 struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
301 memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
302 }
303 mutex_unlock(&profile_flip_mutex);
304}
305
306void profile_hits(int type, void *__pc, unsigned int nr_hits)
307{
308 unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
309 int i, j, cpu;
310 struct profile_hit *hits;
311
312 if (prof_on != type || !prof_buffer)
313 return;
314 pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
315 i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
316 secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
317 cpu = get_cpu();
318 hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)];
319 if (!hits) {
320 put_cpu();
321 return;
322 }
323
324
325
326
327
328 local_irq_save(flags);
329 do {
330 for (j = 0; j < PROFILE_GRPSZ; ++j) {
331 if (hits[i + j].pc == pc) {
332 hits[i + j].hits += nr_hits;
333 goto out;
334 } else if (!hits[i + j].hits) {
335 hits[i + j].pc = pc;
336 hits[i + j].hits = nr_hits;
337 goto out;
338 }
339 }
340 i = (i + secondary) & (NR_PROFILE_HIT - 1);
341 } while (i != primary);
342
343
344
345
346
347 atomic_add(nr_hits, &prof_buffer[pc]);
348 for (i = 0; i < NR_PROFILE_HIT; ++i) {
349 atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
350 hits[i].pc = hits[i].hits = 0;
351 }
352out:
353 local_irq_restore(flags);
354 put_cpu();
355}
356
357static int __cpuinit profile_cpu_callback(struct notifier_block *info,
358 unsigned long action, void *__cpu)
359{
360 int node, cpu = (unsigned long)__cpu;
361 struct page *page;
362
363 switch (action) {
364 case CPU_UP_PREPARE:
365 case CPU_UP_PREPARE_FROZEN:
366 node = cpu_to_node(cpu);
367 per_cpu(cpu_profile_flip, cpu) = 0;
368 if (!per_cpu(cpu_profile_hits, cpu)[1]) {
369 page = alloc_pages_exact_node(node,
370 GFP_KERNEL | __GFP_ZERO,
371 0);
372 if (!page)
373 return NOTIFY_BAD;
374 per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
375 }
376 if (!per_cpu(cpu_profile_hits, cpu)[0]) {
377 page = alloc_pages_exact_node(node,
378 GFP_KERNEL | __GFP_ZERO,
379 0);
380 if (!page)
381 goto out_free;
382 per_cpu(cpu_profile_hits, cpu)[0] = page_address(page);
383 }
384 break;
385out_free:
386 page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
387 per_cpu(cpu_profile_hits, cpu)[1] = NULL;
388 __free_page(page);
389 return NOTIFY_BAD;
390 case CPU_ONLINE:
391 case CPU_ONLINE_FROZEN:
392 if (prof_cpu_mask != NULL)
393 cpumask_set_cpu(cpu, prof_cpu_mask);
394 break;
395 case CPU_UP_CANCELED:
396 case CPU_UP_CANCELED_FROZEN:
397 case CPU_DEAD:
398 case CPU_DEAD_FROZEN:
399 if (prof_cpu_mask != NULL)
400 cpumask_clear_cpu(cpu, prof_cpu_mask);
401 if (per_cpu(cpu_profile_hits, cpu)[0]) {
402 page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
403 per_cpu(cpu_profile_hits, cpu)[0] = NULL;
404 __free_page(page);
405 }
406 if (per_cpu(cpu_profile_hits, cpu)[1]) {
407 page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
408 per_cpu(cpu_profile_hits, cpu)[1] = NULL;
409 __free_page(page);
410 }
411 break;
412 }
413 return NOTIFY_OK;
414}
415#else
416#define profile_flip_buffers() do { } while (0)
417#define profile_discard_flip_buffers() do { } while (0)
418#define profile_cpu_callback NULL
419
420void profile_hits(int type, void *__pc, unsigned int nr_hits)
421{
422 unsigned long pc;
423
424 if (prof_on != type || !prof_buffer)
425 return;
426 pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
427 atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]);
428}
429#endif
430EXPORT_SYMBOL_GPL(profile_hits);
431
432void profile_tick(int type)
433{
434 struct pt_regs *regs = get_irq_regs();
435
436 if (type == CPU_PROFILING && timer_hook)
437 timer_hook(regs);
438 if (!user_mode(regs) && prof_cpu_mask != NULL &&
439 cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
440 profile_hit(type, (void *)profile_pc(regs));
441}
442
443#ifdef CONFIG_PROC_FS
444#include <linux/proc_fs.h>
445#include <linux/seq_file.h>
446#include <asm/uaccess.h>
447
448static int prof_cpu_mask_proc_show(struct seq_file *m, void *v)
449{
450 seq_cpumask(m, prof_cpu_mask);
451 seq_putc(m, '\n');
452 return 0;
453}
454
455static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file)
456{
457 return single_open(file, prof_cpu_mask_proc_show, NULL);
458}
459
460static ssize_t prof_cpu_mask_proc_write(struct file *file,
461 const char __user *buffer, size_t count, loff_t *pos)
462{
463 cpumask_var_t new_value;
464 int err;
465
466 if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
467 return -ENOMEM;
468
469 err = cpumask_parse_user(buffer, count, new_value);
470 if (!err) {
471 cpumask_copy(prof_cpu_mask, new_value);
472 err = count;
473 }
474 free_cpumask_var(new_value);
475 return err;
476}
477
478static const struct file_operations prof_cpu_mask_proc_fops = {
479 .open = prof_cpu_mask_proc_open,
480 .read = seq_read,
481 .llseek = seq_lseek,
482 .release = single_release,
483 .write = prof_cpu_mask_proc_write,
484};
485
486void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
487{
488
489 proc_create("prof_cpu_mask", 0600, root_irq_dir, &prof_cpu_mask_proc_fops);
490}
491
492
493
494
495
496
497
498static ssize_t
499read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
500{
501 unsigned long p = *ppos;
502 ssize_t read;
503 char *pnt;
504 unsigned int sample_step = 1 << prof_shift;
505
506 profile_flip_buffers();
507 if (p >= (prof_len+1)*sizeof(unsigned int))
508 return 0;
509 if (count > (prof_len+1)*sizeof(unsigned int) - p)
510 count = (prof_len+1)*sizeof(unsigned int) - p;
511 read = 0;
512
513 while (p < sizeof(unsigned int) && count > 0) {
514 if (put_user(*((char *)(&sample_step)+p), buf))
515 return -EFAULT;
516 buf++; p++; count--; read++;
517 }
518 pnt = (char *)prof_buffer + p - sizeof(atomic_t);
519 if (copy_to_user(buf, (void *)pnt, count))
520 return -EFAULT;
521 read += count;
522 *ppos += read;
523 return read;
524}
525
526
527
528
529
530
531
532static ssize_t write_profile(struct file *file, const char __user *buf,
533 size_t count, loff_t *ppos)
534{
535#ifdef CONFIG_SMP
536 extern int setup_profiling_timer(unsigned int multiplier);
537
538 if (count == sizeof(int)) {
539 unsigned int multiplier;
540
541 if (copy_from_user(&multiplier, buf, sizeof(int)))
542 return -EFAULT;
543
544 if (setup_profiling_timer(multiplier))
545 return -EINVAL;
546 }
547#endif
548 profile_discard_flip_buffers();
549 memset(prof_buffer, 0, prof_len * sizeof(atomic_t));
550 return count;
551}
552
553static const struct file_operations proc_profile_operations = {
554 .read = read_profile,
555 .write = write_profile,
556};
557
558#ifdef CONFIG_SMP
559static void profile_nop(void *unused)
560{
561}
562
563static int create_hash_tables(void)
564{
565 int cpu;
566
567 for_each_online_cpu(cpu) {
568 int node = cpu_to_node(cpu);
569 struct page *page;
570
571 page = alloc_pages_exact_node(node,
572 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
573 0);
574 if (!page)
575 goto out_cleanup;
576 per_cpu(cpu_profile_hits, cpu)[1]
577 = (struct profile_hit *)page_address(page);
578 page = alloc_pages_exact_node(node,
579 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
580 0);
581 if (!page)
582 goto out_cleanup;
583 per_cpu(cpu_profile_hits, cpu)[0]
584 = (struct profile_hit *)page_address(page);
585 }
586 return 0;
587out_cleanup:
588 prof_on = 0;
589 smp_mb();
590 on_each_cpu(profile_nop, NULL, 1);
591 for_each_online_cpu(cpu) {
592 struct page *page;
593
594 if (per_cpu(cpu_profile_hits, cpu)[0]) {
595 page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
596 per_cpu(cpu_profile_hits, cpu)[0] = NULL;
597 __free_page(page);
598 }
599 if (per_cpu(cpu_profile_hits, cpu)[1]) {
600 page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]);
601 per_cpu(cpu_profile_hits, cpu)[1] = NULL;
602 __free_page(page);
603 }
604 }
605 return -1;
606}
607#else
608#define create_hash_tables() ({ 0; })
609#endif
610
611int __ref create_proc_profile(void)
612{
613 struct proc_dir_entry *entry;
614
615 if (!prof_on)
616 return 0;
617 if (create_hash_tables())
618 return -ENOMEM;
619 entry = proc_create("profile", S_IWUSR | S_IRUGO,
620 NULL, &proc_profile_operations);
621 if (!entry)
622 return 0;
623 entry->size = (1+prof_len) * sizeof(atomic_t);
624 hotcpu_notifier(profile_cpu_callback, 0);
625 return 0;
626}
627module_init(create_proc_profile);
628#endif
629