1
2
3
4
5
6
7
8
9
10
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/cpu.h>
14#include <linux/smp.h>
15#include <linux/idr.h>
16#include <linux/file.h>
17#include <linux/poll.h>
18#include <linux/slab.h>
19#include <linux/hash.h>
20#include <linux/tick.h>
21#include <linux/sysfs.h>
22#include <linux/dcache.h>
23#include <linux/percpu.h>
24#include <linux/ptrace.h>
25#include <linux/reboot.h>
26#include <linux/vmstat.h>
27#include <linux/device.h>
28#include <linux/export.h>
29#include <linux/vmalloc.h>
30#include <linux/hardirq.h>
31#include <linux/hugetlb.h>
32#include <linux/rculist.h>
33#include <linux/uaccess.h>
34#include <linux/syscalls.h>
35#include <linux/anon_inodes.h>
36#include <linux/kernel_stat.h>
37#include <linux/cgroup.h>
38#include <linux/perf_event.h>
39#include <linux/trace_events.h>
40#include <linux/hw_breakpoint.h>
41#include <linux/mm_types.h>
42#include <linux/module.h>
43#include <linux/mman.h>
44#include <linux/compat.h>
45#include <linux/bpf.h>
46#include <linux/filter.h>
47#include <linux/namei.h>
48#include <linux/parser.h>
49#include <linux/sched/clock.h>
50#include <linux/sched/mm.h>
51#include <linux/proc_ns.h>
52#include <linux/mount.h>
53#include <linux/min_heap.h>
54#include <linux/highmem.h>
55#include <linux/pgtable.h>
56#include <linux/buildid.h>
57
58#include "internal.h"
59
60#include <asm/irq_regs.h>
61
62typedef int (*remote_function_f)(void *);
63
64struct remote_function_call {
65 struct task_struct *p;
66 remote_function_f func;
67 void *info;
68 int ret;
69};
70
71static void remote_function(void *data)
72{
73 struct remote_function_call *tfc = data;
74 struct task_struct *p = tfc->p;
75
76 if (p) {
77
78 if (task_cpu(p) != smp_processor_id())
79 return;
80
81
82
83
84
85
86 tfc->ret = -ESRCH;
87 if (p != current)
88 return;
89 }
90
91 tfc->ret = tfc->func(tfc->info);
92}
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107static int
108task_function_call(struct task_struct *p, remote_function_f func, void *info)
109{
110 struct remote_function_call data = {
111 .p = p,
112 .func = func,
113 .info = info,
114 .ret = -EAGAIN,
115 };
116 int ret;
117
118 for (;;) {
119 ret = smp_call_function_single(task_cpu(p), remote_function,
120 &data, 1);
121 if (!ret)
122 ret = data.ret;
123
124 if (ret != -EAGAIN)
125 break;
126
127 cond_resched();
128 }
129
130 return ret;
131}
132
133
134
135
136
137
138
139
140
141
142
143static int cpu_function_call(int cpu, remote_function_f func, void *info)
144{
145 struct remote_function_call data = {
146 .p = NULL,
147 .func = func,
148 .info = info,
149 .ret = -ENXIO,
150 };
151
152 smp_call_function_single(cpu, remote_function, &data, 1);
153
154 return data.ret;
155}
156
157static inline struct perf_cpu_context *
158__get_cpu_context(struct perf_event_context *ctx)
159{
160 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
161}
162
163static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
164 struct perf_event_context *ctx)
165{
166 raw_spin_lock(&cpuctx->ctx.lock);
167 if (ctx)
168 raw_spin_lock(&ctx->lock);
169}
170
171static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
172 struct perf_event_context *ctx)
173{
174 if (ctx)
175 raw_spin_unlock(&ctx->lock);
176 raw_spin_unlock(&cpuctx->ctx.lock);
177}
178
179#define TASK_TOMBSTONE ((void *)-1L)
180
181static bool is_kernel_event(struct perf_event *event)
182{
183 return READ_ONCE(event->owner) == TASK_TOMBSTONE;
184}
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
206 struct perf_event_context *, void *);
207
208struct event_function_struct {
209 struct perf_event *event;
210 event_f func;
211 void *data;
212};
213
214static int event_function(void *info)
215{
216 struct event_function_struct *efs = info;
217 struct perf_event *event = efs->event;
218 struct perf_event_context *ctx = event->ctx;
219 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
220 struct perf_event_context *task_ctx = cpuctx->task_ctx;
221 int ret = 0;
222
223 lockdep_assert_irqs_disabled();
224
225 perf_ctx_lock(cpuctx, task_ctx);
226
227
228
229
230 if (ctx->task) {
231 if (ctx->task != current) {
232 ret = -ESRCH;
233 goto unlock;
234 }
235
236
237
238
239
240
241
242
243 WARN_ON_ONCE(!ctx->is_active);
244
245
246
247
248 WARN_ON_ONCE(task_ctx != ctx);
249 } else {
250 WARN_ON_ONCE(&cpuctx->ctx != ctx);
251 }
252
253 efs->func(event, cpuctx, ctx, efs->data);
254unlock:
255 perf_ctx_unlock(cpuctx, task_ctx);
256
257 return ret;
258}
259
260static void event_function_call(struct perf_event *event, event_f func, void *data)
261{
262 struct perf_event_context *ctx = event->ctx;
263 struct task_struct *task = READ_ONCE(ctx->task);
264 struct event_function_struct efs = {
265 .event = event,
266 .func = func,
267 .data = data,
268 };
269
270 if (!event->parent) {
271
272
273
274
275
276 lockdep_assert_held(&ctx->mutex);
277 }
278
279 if (!task) {
280 cpu_function_call(event->cpu, event_function, &efs);
281 return;
282 }
283
284 if (task == TASK_TOMBSTONE)
285 return;
286
287again:
288 if (!task_function_call(task, event_function, &efs))
289 return;
290
291 raw_spin_lock_irq(&ctx->lock);
292
293
294
295
296 task = ctx->task;
297 if (task == TASK_TOMBSTONE) {
298 raw_spin_unlock_irq(&ctx->lock);
299 return;
300 }
301 if (ctx->is_active) {
302 raw_spin_unlock_irq(&ctx->lock);
303 goto again;
304 }
305 func(event, NULL, ctx, data);
306 raw_spin_unlock_irq(&ctx->lock);
307}
308
309
310
311
312
313static void event_function_local(struct perf_event *event, event_f func, void *data)
314{
315 struct perf_event_context *ctx = event->ctx;
316 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
317 struct task_struct *task = READ_ONCE(ctx->task);
318 struct perf_event_context *task_ctx = NULL;
319
320 lockdep_assert_irqs_disabled();
321
322 if (task) {
323 if (task == TASK_TOMBSTONE)
324 return;
325
326 task_ctx = ctx;
327 }
328
329 perf_ctx_lock(cpuctx, task_ctx);
330
331 task = ctx->task;
332 if (task == TASK_TOMBSTONE)
333 goto unlock;
334
335 if (task) {
336
337
338
339
340
341 if (ctx->is_active) {
342 if (WARN_ON_ONCE(task != current))
343 goto unlock;
344
345 if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
346 goto unlock;
347 }
348 } else {
349 WARN_ON_ONCE(&cpuctx->ctx != ctx);
350 }
351
352 func(event, cpuctx, ctx, data);
353unlock:
354 perf_ctx_unlock(cpuctx, task_ctx);
355}
356
357#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
358 PERF_FLAG_FD_OUTPUT |\
359 PERF_FLAG_PID_CGROUP |\
360 PERF_FLAG_FD_CLOEXEC)
361
362
363
364
365#define PERF_SAMPLE_BRANCH_PERM_PLM \
366 (PERF_SAMPLE_BRANCH_KERNEL |\
367 PERF_SAMPLE_BRANCH_HV)
368
369enum event_type_t {
370 EVENT_FLEXIBLE = 0x1,
371 EVENT_PINNED = 0x2,
372 EVENT_TIME = 0x4,
373
374 EVENT_CPU = 0x8,
375 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
376};
377
378
379
380
381
382
383static void perf_sched_delayed(struct work_struct *work);
384DEFINE_STATIC_KEY_FALSE(perf_sched_events);
385static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
386static DEFINE_MUTEX(perf_sched_mutex);
387static atomic_t perf_sched_count;
388
389static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
390static DEFINE_PER_CPU(int, perf_sched_cb_usages);
391static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
392
393static atomic_t nr_mmap_events __read_mostly;
394static atomic_t nr_comm_events __read_mostly;
395static atomic_t nr_namespaces_events __read_mostly;
396static atomic_t nr_task_events __read_mostly;
397static atomic_t nr_freq_events __read_mostly;
398static atomic_t nr_switch_events __read_mostly;
399static atomic_t nr_ksymbol_events __read_mostly;
400static atomic_t nr_bpf_events __read_mostly;
401static atomic_t nr_cgroup_events __read_mostly;
402static atomic_t nr_text_poke_events __read_mostly;
403static atomic_t nr_build_id_events __read_mostly;
404
405static LIST_HEAD(pmus);
406static DEFINE_MUTEX(pmus_lock);
407static struct srcu_struct pmus_srcu;
408static cpumask_var_t perf_online_mask;
409static struct kmem_cache *perf_event_cache;
410
411
412
413
414
415
416
417
418int sysctl_perf_event_paranoid __read_mostly = 2;
419
420
421int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024);
422
423
424
425
426#define DEFAULT_MAX_SAMPLE_RATE 100000
427#define DEFAULT_SAMPLE_PERIOD_NS (NSEC_PER_SEC / DEFAULT_MAX_SAMPLE_RATE)
428#define DEFAULT_CPU_TIME_MAX_PERCENT 25
429
430int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
431
432static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
433static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS;
434
435static int perf_sample_allowed_ns __read_mostly =
436 DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100;
437
438static void update_perf_cpu_limits(void)
439{
440 u64 tmp = perf_sample_period_ns;
441
442 tmp *= sysctl_perf_cpu_time_max_percent;
443 tmp = div_u64(tmp, 100);
444 if (!tmp)
445 tmp = 1;
446
447 WRITE_ONCE(perf_sample_allowed_ns, tmp);
448}
449
450static bool perf_rotate_context(struct perf_cpu_context *cpuctx);
451
452int perf_proc_update_handler(struct ctl_table *table, int write,
453 void *buffer, size_t *lenp, loff_t *ppos)
454{
455 int ret;
456 int perf_cpu = sysctl_perf_cpu_time_max_percent;
457
458
459
460 if (write && (perf_cpu == 100 || perf_cpu == 0))
461 return -EINVAL;
462
463 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
464 if (ret || !write)
465 return ret;
466
467 max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
468 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
469 update_perf_cpu_limits();
470
471 return 0;
472}
473
474int sysctl_perf_cpu_time_max_percent __read_mostly = DEFAULT_CPU_TIME_MAX_PERCENT;
475
476int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
477 void *buffer, size_t *lenp, loff_t *ppos)
478{
479 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
480
481 if (ret || !write)
482 return ret;
483
484 if (sysctl_perf_cpu_time_max_percent == 100 ||
485 sysctl_perf_cpu_time_max_percent == 0) {
486 printk(KERN_WARNING
487 "perf: Dynamic interrupt throttling disabled, can hang your system!\n");
488 WRITE_ONCE(perf_sample_allowed_ns, 0);
489 } else {
490 update_perf_cpu_limits();
491 }
492
493 return 0;
494}
495
496
497
498
499
500
501
502#define NR_ACCUMULATED_SAMPLES 128
503static DEFINE_PER_CPU(u64, running_sample_length);
504
505static u64 __report_avg;
506static u64 __report_allowed;
507
508static void perf_duration_warn(struct irq_work *w)
509{
510 printk_ratelimited(KERN_INFO
511 "perf: interrupt took too long (%lld > %lld), lowering "
512 "kernel.perf_event_max_sample_rate to %d\n",
513 __report_avg, __report_allowed,
514 sysctl_perf_event_sample_rate);
515}
516
517static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn);
518
519void perf_sample_event_took(u64 sample_len_ns)
520{
521 u64 max_len = READ_ONCE(perf_sample_allowed_ns);
522 u64 running_len;
523 u64 avg_len;
524 u32 max;
525
526 if (max_len == 0)
527 return;
528
529
530 running_len = __this_cpu_read(running_sample_length);
531 running_len -= running_len/NR_ACCUMULATED_SAMPLES;
532 running_len += sample_len_ns;
533 __this_cpu_write(running_sample_length, running_len);
534
535
536
537
538
539
540 avg_len = running_len/NR_ACCUMULATED_SAMPLES;
541 if (avg_len <= max_len)
542 return;
543
544 __report_avg = avg_len;
545 __report_allowed = max_len;
546
547
548
549
550 avg_len += avg_len / 4;
551 max = (TICK_NSEC / 100) * sysctl_perf_cpu_time_max_percent;
552 if (avg_len < max)
553 max /= (u32)avg_len;
554 else
555 max = 1;
556
557 WRITE_ONCE(perf_sample_allowed_ns, avg_len);
558 WRITE_ONCE(max_samples_per_tick, max);
559
560 sysctl_perf_event_sample_rate = max * HZ;
561 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
562
563 if (!irq_work_queue(&perf_duration_work)) {
564 early_printk("perf: interrupt took too long (%lld > %lld), lowering "
565 "kernel.perf_event_max_sample_rate to %d\n",
566 __report_avg, __report_allowed,
567 sysctl_perf_event_sample_rate);
568 }
569}
570
571static atomic64_t perf_event_id;
572
573static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
574 enum event_type_t event_type);
575
576static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
577 enum event_type_t event_type,
578 struct task_struct *task);
579
580static void update_context_time(struct perf_event_context *ctx);
581static u64 perf_event_time(struct perf_event *event);
582
583void __weak perf_event_print_debug(void) { }
584
585static inline u64 perf_clock(void)
586{
587 return local_clock();
588}
589
590static inline u64 perf_event_clock(struct perf_event *event)
591{
592 return event->clock();
593}
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617static __always_inline enum perf_event_state
618__perf_effective_state(struct perf_event *event)
619{
620 struct perf_event *leader = event->group_leader;
621
622 if (leader->state <= PERF_EVENT_STATE_OFF)
623 return leader->state;
624
625 return event->state;
626}
627
628static __always_inline void
629__perf_update_times(struct perf_event *event, u64 now, u64 *enabled, u64 *running)
630{
631 enum perf_event_state state = __perf_effective_state(event);
632 u64 delta = now - event->tstamp;
633
634 *enabled = event->total_time_enabled;
635 if (state >= PERF_EVENT_STATE_INACTIVE)
636 *enabled += delta;
637
638 *running = event->total_time_running;
639 if (state >= PERF_EVENT_STATE_ACTIVE)
640 *running += delta;
641}
642
643static void perf_event_update_time(struct perf_event *event)
644{
645 u64 now = perf_event_time(event);
646
647 __perf_update_times(event, now, &event->total_time_enabled,
648 &event->total_time_running);
649 event->tstamp = now;
650}
651
652static void perf_event_update_sibling_time(struct perf_event *leader)
653{
654 struct perf_event *sibling;
655
656 for_each_sibling_event(sibling, leader)
657 perf_event_update_time(sibling);
658}
659
660static void
661perf_event_set_state(struct perf_event *event, enum perf_event_state state)
662{
663 if (event->state == state)
664 return;
665
666 perf_event_update_time(event);
667
668
669
670
671 if ((event->state < 0) ^ (state < 0))
672 perf_event_update_sibling_time(event);
673
674 WRITE_ONCE(event->state, state);
675}
676
677#ifdef CONFIG_CGROUP_PERF
678
679static inline bool
680perf_cgroup_match(struct perf_event *event)
681{
682 struct perf_event_context *ctx = event->ctx;
683 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
684
685
686 if (!event->cgrp)
687 return true;
688
689
690 if (!cpuctx->cgrp)
691 return false;
692
693
694
695
696
697
698
699 return cgroup_is_descendant(cpuctx->cgrp->css.cgroup,
700 event->cgrp->css.cgroup);
701}
702
703static inline void perf_detach_cgroup(struct perf_event *event)
704{
705 css_put(&event->cgrp->css);
706 event->cgrp = NULL;
707}
708
709static inline int is_cgroup_event(struct perf_event *event)
710{
711 return event->cgrp != NULL;
712}
713
714static inline u64 perf_cgroup_event_time(struct perf_event *event)
715{
716 struct perf_cgroup_info *t;
717
718 t = per_cpu_ptr(event->cgrp->info, event->cpu);
719 return t->time;
720}
721
722static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
723{
724 struct perf_cgroup_info *info;
725 u64 now;
726
727 now = perf_clock();
728
729 info = this_cpu_ptr(cgrp->info);
730
731 info->time += now - info->timestamp;
732 info->timestamp = now;
733}
734
735static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
736{
737 struct perf_cgroup *cgrp = cpuctx->cgrp;
738 struct cgroup_subsys_state *css;
739
740 if (cgrp) {
741 for (css = &cgrp->css; css; css = css->parent) {
742 cgrp = container_of(css, struct perf_cgroup, css);
743 __update_cgrp_time(cgrp);
744 }
745 }
746}
747
748static inline void update_cgrp_time_from_event(struct perf_event *event)
749{
750 struct perf_cgroup *cgrp;
751
752
753
754
755
756 if (!is_cgroup_event(event))
757 return;
758
759 cgrp = perf_cgroup_from_task(current, event->ctx);
760
761
762
763 if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
764 __update_cgrp_time(event->cgrp);
765}
766
767static inline void
768perf_cgroup_set_timestamp(struct task_struct *task,
769 struct perf_event_context *ctx)
770{
771 struct perf_cgroup *cgrp;
772 struct perf_cgroup_info *info;
773 struct cgroup_subsys_state *css;
774
775
776
777
778
779
780 if (!task || !ctx->nr_cgroups)
781 return;
782
783 cgrp = perf_cgroup_from_task(task, ctx);
784
785 for (css = &cgrp->css; css; css = css->parent) {
786 cgrp = container_of(css, struct perf_cgroup, css);
787 info = this_cpu_ptr(cgrp->info);
788 info->timestamp = ctx->timestamp;
789 }
790}
791
792static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);
793
794#define PERF_CGROUP_SWOUT 0x1
795#define PERF_CGROUP_SWIN 0x2
796
797
798
799
800
801
802
803static void perf_cgroup_switch(struct task_struct *task, int mode)
804{
805 struct perf_cpu_context *cpuctx;
806 struct list_head *list;
807 unsigned long flags;
808
809
810
811
812
813 local_irq_save(flags);
814
815 list = this_cpu_ptr(&cgrp_cpuctx_list);
816 list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) {
817 WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
818
819 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
820 perf_pmu_disable(cpuctx->ctx.pmu);
821
822 if (mode & PERF_CGROUP_SWOUT) {
823 cpu_ctx_sched_out(cpuctx, EVENT_ALL);
824
825
826
827
828 cpuctx->cgrp = NULL;
829 }
830
831 if (mode & PERF_CGROUP_SWIN) {
832 WARN_ON_ONCE(cpuctx->cgrp);
833
834
835
836
837
838
839
840 cpuctx->cgrp = perf_cgroup_from_task(task,
841 &cpuctx->ctx);
842 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
843 }
844 perf_pmu_enable(cpuctx->ctx.pmu);
845 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
846 }
847
848 local_irq_restore(flags);
849}
850
851static inline void perf_cgroup_sched_out(struct task_struct *task,
852 struct task_struct *next)
853{
854 struct perf_cgroup *cgrp1;
855 struct perf_cgroup *cgrp2 = NULL;
856
857 rcu_read_lock();
858
859
860
861
862
863 cgrp1 = perf_cgroup_from_task(task, NULL);
864 cgrp2 = perf_cgroup_from_task(next, NULL);
865
866
867
868
869
870
871 if (cgrp1 != cgrp2)
872 perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
873
874 rcu_read_unlock();
875}
876
877static inline void perf_cgroup_sched_in(struct task_struct *prev,
878 struct task_struct *task)
879{
880 struct perf_cgroup *cgrp1;
881 struct perf_cgroup *cgrp2 = NULL;
882
883 rcu_read_lock();
884
885
886
887
888
889 cgrp1 = perf_cgroup_from_task(task, NULL);
890 cgrp2 = perf_cgroup_from_task(prev, NULL);
891
892
893
894
895
896
897 if (cgrp1 != cgrp2)
898 perf_cgroup_switch(task, PERF_CGROUP_SWIN);
899
900 rcu_read_unlock();
901}
902
903static int perf_cgroup_ensure_storage(struct perf_event *event,
904 struct cgroup_subsys_state *css)
905{
906 struct perf_cpu_context *cpuctx;
907 struct perf_event **storage;
908 int cpu, heap_size, ret = 0;
909
910
911
912
913
914 for (heap_size = 1; css; css = css->parent)
915 heap_size++;
916
917 for_each_possible_cpu(cpu) {
918 cpuctx = per_cpu_ptr(event->pmu->pmu_cpu_context, cpu);
919 if (heap_size <= cpuctx->heap_size)
920 continue;
921
922 storage = kmalloc_node(heap_size * sizeof(struct perf_event *),
923 GFP_KERNEL, cpu_to_node(cpu));
924 if (!storage) {
925 ret = -ENOMEM;
926 break;
927 }
928
929 raw_spin_lock_irq(&cpuctx->ctx.lock);
930 if (cpuctx->heap_size < heap_size) {
931 swap(cpuctx->heap, storage);
932 if (storage == cpuctx->heap_default)
933 storage = NULL;
934 cpuctx->heap_size = heap_size;
935 }
936 raw_spin_unlock_irq(&cpuctx->ctx.lock);
937
938 kfree(storage);
939 }
940
941 return ret;
942}
943
944static inline int perf_cgroup_connect(int fd, struct perf_event *event,
945 struct perf_event_attr *attr,
946 struct perf_event *group_leader)
947{
948 struct perf_cgroup *cgrp;
949 struct cgroup_subsys_state *css;
950 struct fd f = fdget(fd);
951 int ret = 0;
952
953 if (!f.file)
954 return -EBADF;
955
956 css = css_tryget_online_from_dir(f.file->f_path.dentry,
957 &perf_event_cgrp_subsys);
958 if (IS_ERR(css)) {
959 ret = PTR_ERR(css);
960 goto out;
961 }
962
963 ret = perf_cgroup_ensure_storage(event, css);
964 if (ret)
965 goto out;
966
967 cgrp = container_of(css, struct perf_cgroup, css);
968 event->cgrp = cgrp;
969
970
971
972
973
974
975 if (group_leader && group_leader->cgrp != cgrp) {
976 perf_detach_cgroup(event);
977 ret = -EINVAL;
978 }
979out:
980 fdput(f);
981 return ret;
982}
983
984static inline void
985perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
986{
987 struct perf_cgroup_info *t;
988 t = per_cpu_ptr(event->cgrp->info, event->cpu);
989 event->shadow_ctx_time = now - t->timestamp;
990}
991
992static inline void
993perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
994{
995 struct perf_cpu_context *cpuctx;
996
997 if (!is_cgroup_event(event))
998 return;
999
1000
1001
1002
1003
1004 cpuctx = container_of(ctx, struct perf_cpu_context, ctx);
1005
1006
1007
1008
1009
1010
1011
1012 if (ctx->is_active && !cpuctx->cgrp) {
1013 struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
1014
1015 if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
1016 cpuctx->cgrp = cgrp;
1017 }
1018
1019 if (ctx->nr_cgroups++)
1020 return;
1021
1022 list_add(&cpuctx->cgrp_cpuctx_entry,
1023 per_cpu_ptr(&cgrp_cpuctx_list, event->cpu));
1024}
1025
1026static inline void
1027perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
1028{
1029 struct perf_cpu_context *cpuctx;
1030
1031 if (!is_cgroup_event(event))
1032 return;
1033
1034
1035
1036
1037
1038 cpuctx = container_of(ctx, struct perf_cpu_context, ctx);
1039
1040 if (--ctx->nr_cgroups)
1041 return;
1042
1043 if (ctx->is_active && cpuctx->cgrp)
1044 cpuctx->cgrp = NULL;
1045
1046 list_del(&cpuctx->cgrp_cpuctx_entry);
1047}
1048
1049#else
1050
1051static inline bool
1052perf_cgroup_match(struct perf_event *event)
1053{
1054 return true;
1055}
1056
1057static inline void perf_detach_cgroup(struct perf_event *event)
1058{}
1059
1060static inline int is_cgroup_event(struct perf_event *event)
1061{
1062 return 0;
1063}
1064
1065static inline void update_cgrp_time_from_event(struct perf_event *event)
1066{
1067}
1068
1069static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
1070{
1071}
1072
1073static inline void perf_cgroup_sched_out(struct task_struct *task,
1074 struct task_struct *next)
1075{
1076}
1077
1078static inline void perf_cgroup_sched_in(struct task_struct *prev,
1079 struct task_struct *task)
1080{
1081}
1082
1083static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
1084 struct perf_event_attr *attr,
1085 struct perf_event *group_leader)
1086{
1087 return -EINVAL;
1088}
1089
1090static inline void
1091perf_cgroup_set_timestamp(struct task_struct *task,
1092 struct perf_event_context *ctx)
1093{
1094}
1095
1096static inline void
1097perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
1098{
1099}
1100
1101static inline void
1102perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
1103{
1104}
1105
1106static inline u64 perf_cgroup_event_time(struct perf_event *event)
1107{
1108 return 0;
1109}
1110
1111static inline void
1112perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
1113{
1114}
1115
1116static inline void
1117perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
1118{
1119}
1120#endif
1121
1122
1123
1124
1125
1126#define PERF_CPU_HRTIMER (1000 / HZ)
1127
1128
1129
1130static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
1131{
1132 struct perf_cpu_context *cpuctx;
1133 bool rotations;
1134
1135 lockdep_assert_irqs_disabled();
1136
1137 cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
1138 rotations = perf_rotate_context(cpuctx);
1139
1140 raw_spin_lock(&cpuctx->hrtimer_lock);
1141 if (rotations)
1142 hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
1143 else
1144 cpuctx->hrtimer_active = 0;
1145 raw_spin_unlock(&cpuctx->hrtimer_lock);
1146
1147 return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART;
1148}
1149
1150static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
1151{
1152 struct hrtimer *timer = &cpuctx->hrtimer;
1153 struct pmu *pmu = cpuctx->ctx.pmu;
1154 u64 interval;
1155
1156
1157 if (pmu->task_ctx_nr == perf_sw_context)
1158 return;
1159
1160
1161
1162
1163
1164 interval = pmu->hrtimer_interval_ms;
1165 if (interval < 1)
1166 interval = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
1167
1168 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
1169
1170 raw_spin_lock_init(&cpuctx->hrtimer_lock);
1171 hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1172 timer->function = perf_mux_hrtimer_handler;
1173}
1174
1175static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
1176{
1177 struct hrtimer *timer = &cpuctx->hrtimer;
1178 struct pmu *pmu = cpuctx->ctx.pmu;
1179 unsigned long flags;
1180
1181
1182 if (pmu->task_ctx_nr == perf_sw_context)
1183 return 0;
1184
1185 raw_spin_lock_irqsave(&cpuctx->hrtimer_lock, flags);
1186 if (!cpuctx->hrtimer_active) {
1187 cpuctx->hrtimer_active = 1;
1188 hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
1189 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
1190 }
1191 raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
1192
1193 return 0;
1194}
1195
1196void perf_pmu_disable(struct pmu *pmu)
1197{
1198 int *count = this_cpu_ptr(pmu->pmu_disable_count);
1199 if (!(*count)++)
1200 pmu->pmu_disable(pmu);
1201}
1202
1203void perf_pmu_enable(struct pmu *pmu)
1204{
1205 int *count = this_cpu_ptr(pmu->pmu_disable_count);
1206 if (!--(*count))
1207 pmu->pmu_enable(pmu);
1208}
1209
1210static DEFINE_PER_CPU(struct list_head, active_ctx_list);
1211
1212
1213
1214
1215
1216
1217
1218static void perf_event_ctx_activate(struct perf_event_context *ctx)
1219{
1220 struct list_head *head = this_cpu_ptr(&active_ctx_list);
1221
1222 lockdep_assert_irqs_disabled();
1223
1224 WARN_ON(!list_empty(&ctx->active_ctx_list));
1225
1226 list_add(&ctx->active_ctx_list, head);
1227}
1228
1229static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
1230{
1231 lockdep_assert_irqs_disabled();
1232
1233 WARN_ON(list_empty(&ctx->active_ctx_list));
1234
1235 list_del_init(&ctx->active_ctx_list);
1236}
1237
1238static void get_ctx(struct perf_event_context *ctx)
1239{
1240 refcount_inc(&ctx->refcount);
1241}
1242
1243static void *alloc_task_ctx_data(struct pmu *pmu)
1244{
1245 if (pmu->task_ctx_cache)
1246 return kmem_cache_zalloc(pmu->task_ctx_cache, GFP_KERNEL);
1247
1248 return NULL;
1249}
1250
1251static void free_task_ctx_data(struct pmu *pmu, void *task_ctx_data)
1252{
1253 if (pmu->task_ctx_cache && task_ctx_data)
1254 kmem_cache_free(pmu->task_ctx_cache, task_ctx_data);
1255}
1256
1257static void free_ctx(struct rcu_head *head)
1258{
1259 struct perf_event_context *ctx;
1260
1261 ctx = container_of(head, struct perf_event_context, rcu_head);
1262 free_task_ctx_data(ctx->pmu, ctx->task_ctx_data);
1263 kfree(ctx);
1264}
1265
1266static void put_ctx(struct perf_event_context *ctx)
1267{
1268 if (refcount_dec_and_test(&ctx->refcount)) {
1269 if (ctx->parent_ctx)
1270 put_ctx(ctx->parent_ctx);
1271 if (ctx->task && ctx->task != TASK_TOMBSTONE)
1272 put_task_struct(ctx->task);
1273 call_rcu(&ctx->rcu_head, free_ctx);
1274 }
1275}
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343static struct perf_event_context *
1344perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
1345{
1346 struct perf_event_context *ctx;
1347
1348again:
1349 rcu_read_lock();
1350 ctx = READ_ONCE(event->ctx);
1351 if (!refcount_inc_not_zero(&ctx->refcount)) {
1352 rcu_read_unlock();
1353 goto again;
1354 }
1355 rcu_read_unlock();
1356
1357 mutex_lock_nested(&ctx->mutex, nesting);
1358 if (event->ctx != ctx) {
1359 mutex_unlock(&ctx->mutex);
1360 put_ctx(ctx);
1361 goto again;
1362 }
1363
1364 return ctx;
1365}
1366
1367static inline struct perf_event_context *
1368perf_event_ctx_lock(struct perf_event *event)
1369{
1370 return perf_event_ctx_lock_nested(event, 0);
1371}
1372
1373static void perf_event_ctx_unlock(struct perf_event *event,
1374 struct perf_event_context *ctx)
1375{
1376 mutex_unlock(&ctx->mutex);
1377 put_ctx(ctx);
1378}
1379
1380
1381
1382
1383
1384
1385static __must_check struct perf_event_context *
1386unclone_ctx(struct perf_event_context *ctx)
1387{
1388 struct perf_event_context *parent_ctx = ctx->parent_ctx;
1389
1390 lockdep_assert_held(&ctx->lock);
1391
1392 if (parent_ctx)
1393 ctx->parent_ctx = NULL;
1394 ctx->generation++;
1395
1396 return parent_ctx;
1397}
1398
1399static u32 perf_event_pid_type(struct perf_event *event, struct task_struct *p,
1400 enum pid_type type)
1401{
1402 u32 nr;
1403
1404
1405
1406 if (event->parent)
1407 event = event->parent;
1408
1409 nr = __task_pid_nr_ns(p, type, event->ns);
1410
1411 if (!nr && !pid_alive(p))
1412 nr = -1;
1413 return nr;
1414}
1415
1416static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
1417{
1418 return perf_event_pid_type(event, p, PIDTYPE_TGID);
1419}
1420
1421static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
1422{
1423 return perf_event_pid_type(event, p, PIDTYPE_PID);
1424}
1425
1426
1427
1428
1429
1430static u64 primary_event_id(struct perf_event *event)
1431{
1432 u64 id = event->id;
1433
1434 if (event->parent)
1435 id = event->parent->id;
1436
1437 return id;
1438}
1439
1440
1441
1442
1443
1444
1445
1446static struct perf_event_context *
1447perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
1448{
1449 struct perf_event_context *ctx;
1450
1451retry:
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461 local_irq_save(*flags);
1462 rcu_read_lock();
1463 ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
1464 if (ctx) {
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475 raw_spin_lock(&ctx->lock);
1476 if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
1477 raw_spin_unlock(&ctx->lock);
1478 rcu_read_unlock();
1479 local_irq_restore(*flags);
1480 goto retry;
1481 }
1482
1483 if (ctx->task == TASK_TOMBSTONE ||
1484 !refcount_inc_not_zero(&ctx->refcount)) {
1485 raw_spin_unlock(&ctx->lock);
1486 ctx = NULL;
1487 } else {
1488 WARN_ON_ONCE(ctx->task != task);
1489 }
1490 }
1491 rcu_read_unlock();
1492 if (!ctx)
1493 local_irq_restore(*flags);
1494 return ctx;
1495}
1496
1497
1498
1499
1500
1501
1502static struct perf_event_context *
1503perf_pin_task_context(struct task_struct *task, int ctxn)
1504{
1505 struct perf_event_context *ctx;
1506 unsigned long flags;
1507
1508 ctx = perf_lock_task_context(task, ctxn, &flags);
1509 if (ctx) {
1510 ++ctx->pin_count;
1511 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1512 }
1513 return ctx;
1514}
1515
1516static void perf_unpin_context(struct perf_event_context *ctx)
1517{
1518 unsigned long flags;
1519
1520 raw_spin_lock_irqsave(&ctx->lock, flags);
1521 --ctx->pin_count;
1522 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1523}
1524
1525
1526
1527
1528static void update_context_time(struct perf_event_context *ctx)
1529{
1530 u64 now = perf_clock();
1531
1532 ctx->time += now - ctx->timestamp;
1533 ctx->timestamp = now;
1534}
1535
1536static u64 perf_event_time(struct perf_event *event)
1537{
1538 struct perf_event_context *ctx = event->ctx;
1539
1540 if (is_cgroup_event(event))
1541 return perf_cgroup_event_time(event);
1542
1543 return ctx ? ctx->time : 0;
1544}
1545
1546static enum event_type_t get_event_type(struct perf_event *event)
1547{
1548 struct perf_event_context *ctx = event->ctx;
1549 enum event_type_t event_type;
1550
1551 lockdep_assert_held(&ctx->lock);
1552
1553
1554
1555
1556
1557 if (event->group_leader != event)
1558 event = event->group_leader;
1559
1560 event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE;
1561 if (!ctx->task)
1562 event_type |= EVENT_CPU;
1563
1564 return event_type;
1565}
1566
1567
1568
1569
1570static void init_event_group(struct perf_event *event)
1571{
1572 RB_CLEAR_NODE(&event->group_node);
1573 event->group_index = 0;
1574}
1575
1576
1577
1578
1579
1580static struct perf_event_groups *
1581get_event_groups(struct perf_event *event, struct perf_event_context *ctx)
1582{
1583 if (event->attr.pinned)
1584 return &ctx->pinned_groups;
1585 else
1586 return &ctx->flexible_groups;
1587}
1588
1589
1590
1591
1592static void perf_event_groups_init(struct perf_event_groups *groups)
1593{
1594 groups->tree = RB_ROOT;
1595 groups->index = 0;
1596}
1597
1598static inline struct cgroup *event_cgroup(const struct perf_event *event)
1599{
1600 struct cgroup *cgroup = NULL;
1601
1602#ifdef CONFIG_CGROUP_PERF
1603 if (event->cgrp)
1604 cgroup = event->cgrp->css.cgroup;
1605#endif
1606
1607 return cgroup;
1608}
1609
1610
1611
1612
1613
1614
1615
1616static __always_inline int
1617perf_event_groups_cmp(const int left_cpu, const struct cgroup *left_cgroup,
1618 const u64 left_group_index, const struct perf_event *right)
1619{
1620 if (left_cpu < right->cpu)
1621 return -1;
1622 if (left_cpu > right->cpu)
1623 return 1;
1624
1625#ifdef CONFIG_CGROUP_PERF
1626 {
1627 const struct cgroup *right_cgroup = event_cgroup(right);
1628
1629 if (left_cgroup != right_cgroup) {
1630 if (!left_cgroup) {
1631
1632
1633
1634
1635 return -1;
1636 }
1637 if (!right_cgroup) {
1638
1639
1640
1641
1642 return 1;
1643 }
1644
1645 if (cgroup_id(left_cgroup) < cgroup_id(right_cgroup))
1646 return -1;
1647
1648 return 1;
1649 }
1650 }
1651#endif
1652
1653 if (left_group_index < right->group_index)
1654 return -1;
1655 if (left_group_index > right->group_index)
1656 return 1;
1657
1658 return 0;
1659}
1660
1661#define __node_2_pe(node) \
1662 rb_entry((node), struct perf_event, group_node)
1663
1664static inline bool __group_less(struct rb_node *a, const struct rb_node *b)
1665{
1666 struct perf_event *e = __node_2_pe(a);
1667 return perf_event_groups_cmp(e->cpu, event_cgroup(e), e->group_index,
1668 __node_2_pe(b)) < 0;
1669}
1670
1671struct __group_key {
1672 int cpu;
1673 struct cgroup *cgroup;
1674};
1675
1676static inline int __group_cmp(const void *key, const struct rb_node *node)
1677{
1678 const struct __group_key *a = key;
1679 const struct perf_event *b = __node_2_pe(node);
1680
1681
1682 return perf_event_groups_cmp(a->cpu, a->cgroup, b->group_index, b);
1683}
1684
1685
1686
1687
1688
1689
1690static void
1691perf_event_groups_insert(struct perf_event_groups *groups,
1692 struct perf_event *event)
1693{
1694 event->group_index = ++groups->index;
1695
1696 rb_add(&event->group_node, &groups->tree, __group_less);
1697}
1698
1699
1700
1701
1702static void
1703add_event_to_groups(struct perf_event *event, struct perf_event_context *ctx)
1704{
1705 struct perf_event_groups *groups;
1706
1707 groups = get_event_groups(event, ctx);
1708 perf_event_groups_insert(groups, event);
1709}
1710
1711
1712
1713
1714static void
1715perf_event_groups_delete(struct perf_event_groups *groups,
1716 struct perf_event *event)
1717{
1718 WARN_ON_ONCE(RB_EMPTY_NODE(&event->group_node) ||
1719 RB_EMPTY_ROOT(&groups->tree));
1720
1721 rb_erase(&event->group_node, &groups->tree);
1722 init_event_group(event);
1723}
1724
1725
1726
1727
1728static void
1729del_event_from_groups(struct perf_event *event, struct perf_event_context *ctx)
1730{
1731 struct perf_event_groups *groups;
1732
1733 groups = get_event_groups(event, ctx);
1734 perf_event_groups_delete(groups, event);
1735}
1736
1737
1738
1739
1740static struct perf_event *
1741perf_event_groups_first(struct perf_event_groups *groups, int cpu,
1742 struct cgroup *cgrp)
1743{
1744 struct __group_key key = {
1745 .cpu = cpu,
1746 .cgroup = cgrp,
1747 };
1748 struct rb_node *node;
1749
1750 node = rb_find_first(&key, &groups->tree, __group_cmp);
1751 if (node)
1752 return __node_2_pe(node);
1753
1754 return NULL;
1755}
1756
1757
1758
1759
1760static struct perf_event *
1761perf_event_groups_next(struct perf_event *event)
1762{
1763 struct __group_key key = {
1764 .cpu = event->cpu,
1765 .cgroup = event_cgroup(event),
1766 };
1767 struct rb_node *next;
1768
1769 next = rb_next_match(&key, &event->group_node, __group_cmp);
1770 if (next)
1771 return __node_2_pe(next);
1772
1773 return NULL;
1774}
1775
1776
1777
1778
1779#define perf_event_groups_for_each(event, groups) \
1780 for (event = rb_entry_safe(rb_first(&((groups)->tree)), \
1781 typeof(*event), group_node); event; \
1782 event = rb_entry_safe(rb_next(&event->group_node), \
1783 typeof(*event), group_node))
1784
1785
1786
1787
1788
1789static void
1790list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1791{
1792 lockdep_assert_held(&ctx->lock);
1793
1794 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
1795 event->attach_state |= PERF_ATTACH_CONTEXT;
1796
1797 event->tstamp = perf_event_time(event);
1798
1799
1800
1801
1802
1803
1804 if (event->group_leader == event) {
1805 event->group_caps = event->event_caps;
1806 add_event_to_groups(event, ctx);
1807 }
1808
1809 list_add_rcu(&event->event_entry, &ctx->event_list);
1810 ctx->nr_events++;
1811 if (event->attr.inherit_stat)
1812 ctx->nr_stat++;
1813
1814 if (event->state > PERF_EVENT_STATE_OFF)
1815 perf_cgroup_event_enable(event, ctx);
1816
1817 ctx->generation++;
1818}
1819
1820
1821
1822
1823static inline void perf_event__state_init(struct perf_event *event)
1824{
1825 event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF :
1826 PERF_EVENT_STATE_INACTIVE;
1827}
1828
1829static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
1830{
1831 int entry = sizeof(u64);
1832 int size = 0;
1833 int nr = 1;
1834
1835 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1836 size += sizeof(u64);
1837
1838 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1839 size += sizeof(u64);
1840
1841 if (event->attr.read_format & PERF_FORMAT_ID)
1842 entry += sizeof(u64);
1843
1844 if (event->attr.read_format & PERF_FORMAT_GROUP) {
1845 nr += nr_siblings;
1846 size += sizeof(u64);
1847 }
1848
1849 size += entry * nr;
1850 event->read_size = size;
1851}
1852
1853static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
1854{
1855 struct perf_sample_data *data;
1856 u16 size = 0;
1857
1858 if (sample_type & PERF_SAMPLE_IP)
1859 size += sizeof(data->ip);
1860
1861 if (sample_type & PERF_SAMPLE_ADDR)
1862 size += sizeof(data->addr);
1863
1864 if (sample_type & PERF_SAMPLE_PERIOD)
1865 size += sizeof(data->period);
1866
1867 if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
1868 size += sizeof(data->weight.full);
1869
1870 if (sample_type & PERF_SAMPLE_READ)
1871 size += event->read_size;
1872
1873 if (sample_type & PERF_SAMPLE_DATA_SRC)
1874 size += sizeof(data->data_src.val);
1875
1876 if (sample_type & PERF_SAMPLE_TRANSACTION)
1877 size += sizeof(data->txn);
1878
1879 if (sample_type & PERF_SAMPLE_PHYS_ADDR)
1880 size += sizeof(data->phys_addr);
1881
1882 if (sample_type & PERF_SAMPLE_CGROUP)
1883 size += sizeof(data->cgroup);
1884
1885 if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
1886 size += sizeof(data->data_page_size);
1887
1888 if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
1889 size += sizeof(data->code_page_size);
1890
1891 event->header_size = size;
1892}
1893
1894
1895
1896
1897
1898static void perf_event__header_size(struct perf_event *event)
1899{
1900 __perf_event_read_size(event,
1901 event->group_leader->nr_siblings);
1902 __perf_event_header_size(event, event->attr.sample_type);
1903}
1904
1905static void perf_event__id_header_size(struct perf_event *event)
1906{
1907 struct perf_sample_data *data;
1908 u64 sample_type = event->attr.sample_type;
1909 u16 size = 0;
1910
1911 if (sample_type & PERF_SAMPLE_TID)
1912 size += sizeof(data->tid_entry);
1913
1914 if (sample_type & PERF_SAMPLE_TIME)
1915 size += sizeof(data->time);
1916
1917 if (sample_type & PERF_SAMPLE_IDENTIFIER)
1918 size += sizeof(data->id);
1919
1920 if (sample_type & PERF_SAMPLE_ID)
1921 size += sizeof(data->id);
1922
1923 if (sample_type & PERF_SAMPLE_STREAM_ID)
1924 size += sizeof(data->stream_id);
1925
1926 if (sample_type & PERF_SAMPLE_CPU)
1927 size += sizeof(data->cpu_entry);
1928
1929 event->id_header_size = size;
1930}
1931
1932static bool perf_event_validate_size(struct perf_event *event)
1933{
1934
1935
1936
1937
1938 __perf_event_read_size(event, event->group_leader->nr_siblings + 1);
1939 __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
1940 perf_event__id_header_size(event);
1941
1942
1943
1944
1945
1946 if (event->read_size + event->header_size +
1947 event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
1948 return false;
1949
1950 return true;
1951}
1952
1953static void perf_group_attach(struct perf_event *event)
1954{
1955 struct perf_event *group_leader = event->group_leader, *pos;
1956
1957 lockdep_assert_held(&event->ctx->lock);
1958
1959
1960
1961
1962 if (event->attach_state & PERF_ATTACH_GROUP)
1963 return;
1964
1965 event->attach_state |= PERF_ATTACH_GROUP;
1966
1967 if (group_leader == event)
1968 return;
1969
1970 WARN_ON_ONCE(group_leader->ctx != event->ctx);
1971
1972 group_leader->group_caps &= event->event_caps;
1973
1974 list_add_tail(&event->sibling_list, &group_leader->sibling_list);
1975 group_leader->nr_siblings++;
1976
1977 perf_event__header_size(group_leader);
1978
1979 for_each_sibling_event(pos, group_leader)
1980 perf_event__header_size(pos);
1981}
1982
1983
1984
1985
1986
1987static void
1988list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1989{
1990 WARN_ON_ONCE(event->ctx != ctx);
1991 lockdep_assert_held(&ctx->lock);
1992
1993
1994
1995
1996 if (!(event->attach_state & PERF_ATTACH_CONTEXT))
1997 return;
1998
1999 event->attach_state &= ~PERF_ATTACH_CONTEXT;
2000
2001 ctx->nr_events--;
2002 if (event->attr.inherit_stat)
2003 ctx->nr_stat--;
2004
2005 list_del_rcu(&event->event_entry);
2006
2007 if (event->group_leader == event)
2008 del_event_from_groups(event, ctx);
2009
2010
2011
2012
2013
2014
2015
2016
2017 if (event->state > PERF_EVENT_STATE_OFF) {
2018 perf_cgroup_event_disable(event, ctx);
2019 perf_event_set_state(event, PERF_EVENT_STATE_OFF);
2020 }
2021
2022 ctx->generation++;
2023}
2024
2025static int
2026perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event)
2027{
2028 if (!has_aux(aux_event))
2029 return 0;
2030
2031 if (!event->pmu->aux_output_match)
2032 return 0;
2033
2034 return event->pmu->aux_output_match(aux_event);
2035}
2036
2037static void put_event(struct perf_event *event);
2038static void event_sched_out(struct perf_event *event,
2039 struct perf_cpu_context *cpuctx,
2040 struct perf_event_context *ctx);
2041
2042static void perf_put_aux_event(struct perf_event *event)
2043{
2044 struct perf_event_context *ctx = event->ctx;
2045 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2046 struct perf_event *iter;
2047
2048
2049
2050
2051 if (event->aux_event) {
2052 iter = event->aux_event;
2053 event->aux_event = NULL;
2054 put_event(iter);
2055 return;
2056 }
2057
2058
2059
2060
2061
2062 for_each_sibling_event(iter, event->group_leader) {
2063 if (iter->aux_event != event)
2064 continue;
2065
2066 iter->aux_event = NULL;
2067 put_event(event);
2068
2069
2070
2071
2072
2073
2074 event_sched_out(iter, cpuctx, ctx);
2075 perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
2076 }
2077}
2078
2079static bool perf_need_aux_event(struct perf_event *event)
2080{
2081 return !!event->attr.aux_output || !!event->attr.aux_sample_size;
2082}
2083
2084static int perf_get_aux_event(struct perf_event *event,
2085 struct perf_event *group_leader)
2086{
2087
2088
2089
2090
2091
2092
2093 if (!group_leader)
2094 return 0;
2095
2096
2097
2098
2099 if (event->attr.aux_output && event->attr.aux_sample_size)
2100 return 0;
2101
2102 if (event->attr.aux_output &&
2103 !perf_aux_output_match(event, group_leader))
2104 return 0;
2105
2106 if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux)
2107 return 0;
2108
2109 if (!atomic_long_inc_not_zero(&group_leader->refcount))
2110 return 0;
2111
2112
2113
2114
2115
2116
2117
2118 event->aux_event = group_leader;
2119
2120 return 1;
2121}
2122
2123static inline struct list_head *get_event_list(struct perf_event *event)
2124{
2125 struct perf_event_context *ctx = event->ctx;
2126 return event->attr.pinned ? &ctx->pinned_active : &ctx->flexible_active;
2127}
2128
2129
2130
2131
2132
2133
2134
2135static inline void perf_remove_sibling_event(struct perf_event *event)
2136{
2137 struct perf_event_context *ctx = event->ctx;
2138 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2139
2140 event_sched_out(event, cpuctx, ctx);
2141 perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
2142}
2143
2144static void perf_group_detach(struct perf_event *event)
2145{
2146 struct perf_event *leader = event->group_leader;
2147 struct perf_event *sibling, *tmp;
2148 struct perf_event_context *ctx = event->ctx;
2149
2150 lockdep_assert_held(&ctx->lock);
2151
2152
2153
2154
2155 if (!(event->attach_state & PERF_ATTACH_GROUP))
2156 return;
2157
2158 event->attach_state &= ~PERF_ATTACH_GROUP;
2159
2160 perf_put_aux_event(event);
2161
2162
2163
2164
2165 if (leader != event) {
2166 list_del_init(&event->sibling_list);
2167 event->group_leader->nr_siblings--;
2168 goto out;
2169 }
2170
2171
2172
2173
2174
2175
2176 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {
2177
2178 if (sibling->event_caps & PERF_EV_CAP_SIBLING)
2179 perf_remove_sibling_event(sibling);
2180
2181 sibling->group_leader = sibling;
2182 list_del_init(&sibling->sibling_list);
2183
2184
2185 sibling->group_caps = event->group_caps;
2186
2187 if (!RB_EMPTY_NODE(&event->group_node)) {
2188 add_event_to_groups(sibling, event->ctx);
2189
2190 if (sibling->state == PERF_EVENT_STATE_ACTIVE)
2191 list_add_tail(&sibling->active_list, get_event_list(sibling));
2192 }
2193
2194 WARN_ON_ONCE(sibling->ctx != event->ctx);
2195 }
2196
2197out:
2198 for_each_sibling_event(tmp, leader)
2199 perf_event__header_size(tmp);
2200
2201 perf_event__header_size(leader);
2202}
2203
2204static void sync_child_event(struct perf_event *child_event);
2205
2206static void perf_child_detach(struct perf_event *event)
2207{
2208 struct perf_event *parent_event = event->parent;
2209
2210 if (!(event->attach_state & PERF_ATTACH_CHILD))
2211 return;
2212
2213 event->attach_state &= ~PERF_ATTACH_CHILD;
2214
2215 if (WARN_ON_ONCE(!parent_event))
2216 return;
2217
2218 lockdep_assert_held(&parent_event->child_mutex);
2219
2220 sync_child_event(event);
2221 list_del_init(&event->child_list);
2222}
2223
2224static bool is_orphaned_event(struct perf_event *event)
2225{
2226 return event->state == PERF_EVENT_STATE_DEAD;
2227}
2228
2229static inline int __pmu_filter_match(struct perf_event *event)
2230{
2231 struct pmu *pmu = event->pmu;
2232 return pmu->filter_match ? pmu->filter_match(event) : 1;
2233}
2234
2235
2236
2237
2238
2239
2240
2241static inline int pmu_filter_match(struct perf_event *event)
2242{
2243 struct perf_event *sibling;
2244
2245 if (!__pmu_filter_match(event))
2246 return 0;
2247
2248 for_each_sibling_event(sibling, event) {
2249 if (!__pmu_filter_match(sibling))
2250 return 0;
2251 }
2252
2253 return 1;
2254}
2255
2256static inline int
2257event_filter_match(struct perf_event *event)
2258{
2259 return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
2260 perf_cgroup_match(event) && pmu_filter_match(event);
2261}
2262
2263static void
2264event_sched_out(struct perf_event *event,
2265 struct perf_cpu_context *cpuctx,
2266 struct perf_event_context *ctx)
2267{
2268 enum perf_event_state state = PERF_EVENT_STATE_INACTIVE;
2269
2270 WARN_ON_ONCE(event->ctx != ctx);
2271 lockdep_assert_held(&ctx->lock);
2272
2273 if (event->state != PERF_EVENT_STATE_ACTIVE)
2274 return;
2275
2276
2277
2278
2279
2280
2281 list_del_init(&event->active_list);
2282
2283 perf_pmu_disable(event->pmu);
2284
2285 event->pmu->del(event, 0);
2286 event->oncpu = -1;
2287
2288 if (READ_ONCE(event->pending_disable) >= 0) {
2289 WRITE_ONCE(event->pending_disable, -1);
2290 perf_cgroup_event_disable(event, ctx);
2291 state = PERF_EVENT_STATE_OFF;
2292 }
2293 perf_event_set_state(event, state);
2294
2295 if (!is_software_event(event))
2296 cpuctx->active_oncpu--;
2297 if (!--ctx->nr_active)
2298 perf_event_ctx_deactivate(ctx);
2299 if (event->attr.freq && event->attr.sample_freq)
2300 ctx->nr_freq--;
2301 if (event->attr.exclusive || !cpuctx->active_oncpu)
2302 cpuctx->exclusive = 0;
2303
2304 perf_pmu_enable(event->pmu);
2305}
2306
2307static void
2308group_sched_out(struct perf_event *group_event,
2309 struct perf_cpu_context *cpuctx,
2310 struct perf_event_context *ctx)
2311{
2312 struct perf_event *event;
2313
2314 if (group_event->state != PERF_EVENT_STATE_ACTIVE)
2315 return;
2316
2317 perf_pmu_disable(ctx->pmu);
2318
2319 event_sched_out(group_event, cpuctx, ctx);
2320
2321
2322
2323
2324 for_each_sibling_event(event, group_event)
2325 event_sched_out(event, cpuctx, ctx);
2326
2327 perf_pmu_enable(ctx->pmu);
2328}
2329
2330#define DETACH_GROUP 0x01UL
2331#define DETACH_CHILD 0x02UL
2332
2333
2334
2335
2336
2337
2338
2339static void
2340__perf_remove_from_context(struct perf_event *event,
2341 struct perf_cpu_context *cpuctx,
2342 struct perf_event_context *ctx,
2343 void *info)
2344{
2345 unsigned long flags = (unsigned long)info;
2346
2347 if (ctx->is_active & EVENT_TIME) {
2348 update_context_time(ctx);
2349 update_cgrp_time_from_cpuctx(cpuctx);
2350 }
2351
2352 event_sched_out(event, cpuctx, ctx);
2353 if (flags & DETACH_GROUP)
2354 perf_group_detach(event);
2355 if (flags & DETACH_CHILD)
2356 perf_child_detach(event);
2357 list_del_event(event, ctx);
2358
2359 if (!ctx->nr_events && ctx->is_active) {
2360 ctx->is_active = 0;
2361 ctx->rotate_necessary = 0;
2362 if (ctx->task) {
2363 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
2364 cpuctx->task_ctx = NULL;
2365 }
2366 }
2367}
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
2380{
2381 struct perf_event_context *ctx = event->ctx;
2382
2383 lockdep_assert_held(&ctx->mutex);
2384
2385
2386
2387
2388
2389
2390 raw_spin_lock_irq(&ctx->lock);
2391 if (!ctx->is_active) {
2392 __perf_remove_from_context(event, __get_cpu_context(ctx),
2393 ctx, (void *)flags);
2394 raw_spin_unlock_irq(&ctx->lock);
2395 return;
2396 }
2397 raw_spin_unlock_irq(&ctx->lock);
2398
2399 event_function_call(event, __perf_remove_from_context, (void *)flags);
2400}
2401
2402
2403
2404
2405static void __perf_event_disable(struct perf_event *event,
2406 struct perf_cpu_context *cpuctx,
2407 struct perf_event_context *ctx,
2408 void *info)
2409{
2410 if (event->state < PERF_EVENT_STATE_INACTIVE)
2411 return;
2412
2413 if (ctx->is_active & EVENT_TIME) {
2414 update_context_time(ctx);
2415 update_cgrp_time_from_event(event);
2416 }
2417
2418 if (event == event->group_leader)
2419 group_sched_out(event, cpuctx, ctx);
2420 else
2421 event_sched_out(event, cpuctx, ctx);
2422
2423 perf_event_set_state(event, PERF_EVENT_STATE_OFF);
2424 perf_cgroup_event_disable(event, ctx);
2425}
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441static void _perf_event_disable(struct perf_event *event)
2442{
2443 struct perf_event_context *ctx = event->ctx;
2444
2445 raw_spin_lock_irq(&ctx->lock);
2446 if (event->state <= PERF_EVENT_STATE_OFF) {
2447 raw_spin_unlock_irq(&ctx->lock);
2448 return;
2449 }
2450 raw_spin_unlock_irq(&ctx->lock);
2451
2452 event_function_call(event, __perf_event_disable, NULL);
2453}
2454
2455void perf_event_disable_local(struct perf_event *event)
2456{
2457 event_function_local(event, __perf_event_disable, NULL);
2458}
2459
2460
2461
2462
2463
2464void perf_event_disable(struct perf_event *event)
2465{
2466 struct perf_event_context *ctx;
2467
2468 ctx = perf_event_ctx_lock(event);
2469 _perf_event_disable(event);
2470 perf_event_ctx_unlock(event, ctx);
2471}
2472EXPORT_SYMBOL_GPL(perf_event_disable);
2473
2474void perf_event_disable_inatomic(struct perf_event *event)
2475{
2476 WRITE_ONCE(event->pending_disable, smp_processor_id());
2477
2478 irq_work_queue(&event->pending);
2479}
2480
2481static void perf_set_shadow_time(struct perf_event *event,
2482 struct perf_event_context *ctx)
2483{
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509 if (is_cgroup_event(event))
2510 perf_cgroup_set_shadow_time(event, event->tstamp);
2511 else
2512 event->shadow_ctx_time = event->tstamp - ctx->timestamp;
2513}
2514
2515#define MAX_INTERRUPTS (~0ULL)
2516
2517static void perf_log_throttle(struct perf_event *event, int enable);
2518static void perf_log_itrace_start(struct perf_event *event);
2519
2520static int
2521event_sched_in(struct perf_event *event,
2522 struct perf_cpu_context *cpuctx,
2523 struct perf_event_context *ctx)
2524{
2525 int ret = 0;
2526
2527 WARN_ON_ONCE(event->ctx != ctx);
2528
2529 lockdep_assert_held(&ctx->lock);
2530
2531 if (event->state <= PERF_EVENT_STATE_OFF)
2532 return 0;
2533
2534 WRITE_ONCE(event->oncpu, smp_processor_id());
2535
2536
2537
2538
2539
2540 smp_wmb();
2541 perf_event_set_state(event, PERF_EVENT_STATE_ACTIVE);
2542
2543
2544
2545
2546
2547
2548 if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) {
2549 perf_log_throttle(event, 1);
2550 event->hw.interrupts = 0;
2551 }
2552
2553 perf_pmu_disable(event->pmu);
2554
2555 perf_set_shadow_time(event, ctx);
2556
2557 perf_log_itrace_start(event);
2558
2559 if (event->pmu->add(event, PERF_EF_START)) {
2560 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
2561 event->oncpu = -1;
2562 ret = -EAGAIN;
2563 goto out;
2564 }
2565
2566 if (!is_software_event(event))
2567 cpuctx->active_oncpu++;
2568 if (!ctx->nr_active++)
2569 perf_event_ctx_activate(ctx);
2570 if (event->attr.freq && event->attr.sample_freq)
2571 ctx->nr_freq++;
2572
2573 if (event->attr.exclusive)
2574 cpuctx->exclusive = 1;
2575
2576out:
2577 perf_pmu_enable(event->pmu);
2578
2579 return ret;
2580}
2581
2582static int
2583group_sched_in(struct perf_event *group_event,
2584 struct perf_cpu_context *cpuctx,
2585 struct perf_event_context *ctx)
2586{
2587 struct perf_event *event, *partial_group = NULL;
2588 struct pmu *pmu = ctx->pmu;
2589
2590 if (group_event->state == PERF_EVENT_STATE_OFF)
2591 return 0;
2592
2593 pmu->start_txn(pmu, PERF_PMU_TXN_ADD);
2594
2595 if (event_sched_in(group_event, cpuctx, ctx))
2596 goto error;
2597
2598
2599
2600
2601 for_each_sibling_event(event, group_event) {
2602 if (event_sched_in(event, cpuctx, ctx)) {
2603 partial_group = event;
2604 goto group_error;
2605 }
2606 }
2607
2608 if (!pmu->commit_txn(pmu))
2609 return 0;
2610
2611group_error:
2612
2613
2614
2615
2616
2617 for_each_sibling_event(event, group_event) {
2618 if (event == partial_group)
2619 break;
2620
2621 event_sched_out(event, cpuctx, ctx);
2622 }
2623 event_sched_out(group_event, cpuctx, ctx);
2624
2625error:
2626 pmu->cancel_txn(pmu);
2627 return -EAGAIN;
2628}
2629
2630
2631
2632
2633static int group_can_go_on(struct perf_event *event,
2634 struct perf_cpu_context *cpuctx,
2635 int can_add_hw)
2636{
2637
2638
2639
2640 if (event->group_caps & PERF_EV_CAP_SOFTWARE)
2641 return 1;
2642
2643
2644
2645
2646 if (cpuctx->exclusive)
2647 return 0;
2648
2649
2650
2651
2652 if (event->attr.exclusive && !list_empty(get_event_list(event)))
2653 return 0;
2654
2655
2656
2657
2658 return can_add_hw;
2659}
2660
2661static void add_event_to_ctx(struct perf_event *event,
2662 struct perf_event_context *ctx)
2663{
2664 list_add_event(event, ctx);
2665 perf_group_attach(event);
2666}
2667
2668static void ctx_sched_out(struct perf_event_context *ctx,
2669 struct perf_cpu_context *cpuctx,
2670 enum event_type_t event_type);
2671static void
2672ctx_sched_in(struct perf_event_context *ctx,
2673 struct perf_cpu_context *cpuctx,
2674 enum event_type_t event_type,
2675 struct task_struct *task);
2676
2677static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
2678 struct perf_event_context *ctx,
2679 enum event_type_t event_type)
2680{
2681 if (!cpuctx->task_ctx)
2682 return;
2683
2684 if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
2685 return;
2686
2687 ctx_sched_out(ctx, cpuctx, event_type);
2688}
2689
2690static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
2691 struct perf_event_context *ctx,
2692 struct task_struct *task)
2693{
2694 cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task);
2695 if (ctx)
2696 ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
2697 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
2698 if (ctx)
2699 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
2700}
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717static void ctx_resched(struct perf_cpu_context *cpuctx,
2718 struct perf_event_context *task_ctx,
2719 enum event_type_t event_type)
2720{
2721 enum event_type_t ctx_event_type;
2722 bool cpu_event = !!(event_type & EVENT_CPU);
2723
2724
2725
2726
2727
2728 if (event_type & EVENT_PINNED)
2729 event_type |= EVENT_FLEXIBLE;
2730
2731 ctx_event_type = event_type & EVENT_ALL;
2732
2733 perf_pmu_disable(cpuctx->ctx.pmu);
2734 if (task_ctx)
2735 task_ctx_sched_out(cpuctx, task_ctx, event_type);
2736
2737
2738
2739
2740
2741
2742
2743
2744 if (cpu_event)
2745 cpu_ctx_sched_out(cpuctx, ctx_event_type);
2746 else if (ctx_event_type & EVENT_PINNED)
2747 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2748
2749 perf_event_sched_in(cpuctx, task_ctx, current);
2750 perf_pmu_enable(cpuctx->ctx.pmu);
2751}
2752
2753void perf_pmu_resched(struct pmu *pmu)
2754{
2755 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
2756 struct perf_event_context *task_ctx = cpuctx->task_ctx;
2757
2758 perf_ctx_lock(cpuctx, task_ctx);
2759 ctx_resched(cpuctx, task_ctx, EVENT_ALL|EVENT_CPU);
2760 perf_ctx_unlock(cpuctx, task_ctx);
2761}
2762
2763
2764
2765
2766
2767
2768
2769static int __perf_install_in_context(void *info)
2770{
2771 struct perf_event *event = info;
2772 struct perf_event_context *ctx = event->ctx;
2773 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2774 struct perf_event_context *task_ctx = cpuctx->task_ctx;
2775 bool reprogram = true;
2776 int ret = 0;
2777
2778 raw_spin_lock(&cpuctx->ctx.lock);
2779 if (ctx->task) {
2780 raw_spin_lock(&ctx->lock);
2781 task_ctx = ctx;
2782
2783 reprogram = (ctx->task == current);
2784
2785
2786
2787
2788
2789
2790
2791
2792 if (task_curr(ctx->task) && !reprogram) {
2793 ret = -ESRCH;
2794 goto unlock;
2795 }
2796
2797 WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx);
2798 } else if (task_ctx) {
2799 raw_spin_lock(&task_ctx->lock);
2800 }
2801
2802#ifdef CONFIG_CGROUP_PERF
2803 if (event->state > PERF_EVENT_STATE_OFF && is_cgroup_event(event)) {
2804
2805
2806
2807
2808 struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
2809 reprogram = cgroup_is_descendant(cgrp->css.cgroup,
2810 event->cgrp->css.cgroup);
2811 }
2812#endif
2813
2814 if (reprogram) {
2815 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
2816 add_event_to_ctx(event, ctx);
2817 ctx_resched(cpuctx, task_ctx, get_event_type(event));
2818 } else {
2819 add_event_to_ctx(event, ctx);
2820 }
2821
2822unlock:
2823 perf_ctx_unlock(cpuctx, task_ctx);
2824
2825 return ret;
2826}
2827
2828static bool exclusive_event_installable(struct perf_event *event,
2829 struct perf_event_context *ctx);
2830
2831
2832
2833
2834
2835
2836static void
2837perf_install_in_context(struct perf_event_context *ctx,
2838 struct perf_event *event,
2839 int cpu)
2840{
2841 struct task_struct *task = READ_ONCE(ctx->task);
2842
2843 lockdep_assert_held(&ctx->mutex);
2844
2845 WARN_ON_ONCE(!exclusive_event_installable(event, ctx));
2846
2847 if (event->cpu != -1)
2848 event->cpu = cpu;
2849
2850
2851
2852
2853
2854 smp_store_release(&event->ctx, ctx);
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864 if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) {
2865 raw_spin_lock_irq(&ctx->lock);
2866 if (ctx->task == TASK_TOMBSTONE) {
2867 raw_spin_unlock_irq(&ctx->lock);
2868 return;
2869 }
2870 add_event_to_ctx(event, ctx);
2871 raw_spin_unlock_irq(&ctx->lock);
2872 return;
2873 }
2874
2875 if (!task) {
2876 cpu_function_call(cpu, __perf_install_in_context, event);
2877 return;
2878 }
2879
2880
2881
2882
2883 if (WARN_ON_ONCE(task == TASK_TOMBSTONE))
2884 return;
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916 smp_mb();
2917again:
2918 if (!task_function_call(task, __perf_install_in_context, event))
2919 return;
2920
2921 raw_spin_lock_irq(&ctx->lock);
2922 task = ctx->task;
2923 if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) {
2924
2925
2926
2927
2928
2929 raw_spin_unlock_irq(&ctx->lock);
2930 return;
2931 }
2932
2933
2934
2935
2936 if (task_curr(task)) {
2937 raw_spin_unlock_irq(&ctx->lock);
2938 goto again;
2939 }
2940 add_event_to_ctx(event, ctx);
2941 raw_spin_unlock_irq(&ctx->lock);
2942}
2943
2944
2945
2946
2947static void __perf_event_enable(struct perf_event *event,
2948 struct perf_cpu_context *cpuctx,
2949 struct perf_event_context *ctx,
2950 void *info)
2951{
2952 struct perf_event *leader = event->group_leader;
2953 struct perf_event_context *task_ctx;
2954
2955 if (event->state >= PERF_EVENT_STATE_INACTIVE ||
2956 event->state <= PERF_EVENT_STATE_ERROR)
2957 return;
2958
2959 if (ctx->is_active)
2960 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
2961
2962 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
2963 perf_cgroup_event_enable(event, ctx);
2964
2965 if (!ctx->is_active)
2966 return;
2967
2968 if (!event_filter_match(event)) {
2969 ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
2970 return;
2971 }
2972
2973
2974
2975
2976
2977 if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
2978 ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
2979 return;
2980 }
2981
2982 task_ctx = cpuctx->task_ctx;
2983 if (ctx->task)
2984 WARN_ON_ONCE(task_ctx != ctx);
2985
2986 ctx_resched(cpuctx, task_ctx, get_event_type(event));
2987}
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998static void _perf_event_enable(struct perf_event *event)
2999{
3000 struct perf_event_context *ctx = event->ctx;
3001
3002 raw_spin_lock_irq(&ctx->lock);
3003 if (event->state >= PERF_EVENT_STATE_INACTIVE ||
3004 event->state < PERF_EVENT_STATE_ERROR) {
3005out:
3006 raw_spin_unlock_irq(&ctx->lock);
3007 return;
3008 }
3009
3010
3011
3012
3013
3014
3015
3016
3017 if (event->state == PERF_EVENT_STATE_ERROR) {
3018
3019
3020
3021 if (event->event_caps & PERF_EV_CAP_SIBLING &&
3022 event->group_leader == event)
3023 goto out;
3024
3025 event->state = PERF_EVENT_STATE_OFF;
3026 }
3027 raw_spin_unlock_irq(&ctx->lock);
3028
3029 event_function_call(event, __perf_event_enable, NULL);
3030}
3031
3032
3033
3034
3035void perf_event_enable(struct perf_event *event)
3036{
3037 struct perf_event_context *ctx;
3038
3039 ctx = perf_event_ctx_lock(event);
3040 _perf_event_enable(event);
3041 perf_event_ctx_unlock(event, ctx);
3042}
3043EXPORT_SYMBOL_GPL(perf_event_enable);
3044
3045struct stop_event_data {
3046 struct perf_event *event;
3047 unsigned int restart;
3048};
3049
3050static int __perf_event_stop(void *info)
3051{
3052 struct stop_event_data *sd = info;
3053 struct perf_event *event = sd->event;
3054
3055
3056 if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
3057 return 0;
3058
3059
3060 smp_rmb();
3061
3062
3063
3064
3065
3066 if (READ_ONCE(event->oncpu) != smp_processor_id())
3067 return -EAGAIN;
3068
3069 event->pmu->stop(event, PERF_EF_UPDATE);
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080 if (sd->restart)
3081 event->pmu->start(event, 0);
3082
3083 return 0;
3084}
3085
3086static int perf_event_stop(struct perf_event *event, int restart)
3087{
3088 struct stop_event_data sd = {
3089 .event = event,
3090 .restart = restart,
3091 };
3092 int ret = 0;
3093
3094 do {
3095 if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
3096 return 0;
3097
3098
3099 smp_rmb();
3100
3101
3102
3103
3104
3105
3106 ret = cpu_function_call(READ_ONCE(event->oncpu),
3107 __perf_event_stop, &sd);
3108 } while (ret == -EAGAIN);
3109
3110 return ret;
3111}
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135void perf_event_addr_filters_sync(struct perf_event *event)
3136{
3137 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
3138
3139 if (!has_addr_filter(event))
3140 return;
3141
3142 raw_spin_lock(&ifh->lock);
3143 if (event->addr_filters_gen != event->hw.addr_filters_gen) {
3144 event->pmu->addr_filters_sync(event);
3145 event->hw.addr_filters_gen = event->addr_filters_gen;
3146 }
3147 raw_spin_unlock(&ifh->lock);
3148}
3149EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);
3150
3151static int _perf_event_refresh(struct perf_event *event, int refresh)
3152{
3153
3154
3155
3156 if (event->attr.inherit || !is_sampling_event(event))
3157 return -EINVAL;
3158
3159 atomic_add(refresh, &event->event_limit);
3160 _perf_event_enable(event);
3161
3162 return 0;
3163}
3164
3165
3166
3167
3168int perf_event_refresh(struct perf_event *event, int refresh)
3169{
3170 struct perf_event_context *ctx;
3171 int ret;
3172
3173 ctx = perf_event_ctx_lock(event);
3174 ret = _perf_event_refresh(event, refresh);
3175 perf_event_ctx_unlock(event, ctx);
3176
3177 return ret;
3178}
3179EXPORT_SYMBOL_GPL(perf_event_refresh);
3180
3181static int perf_event_modify_breakpoint(struct perf_event *bp,
3182 struct perf_event_attr *attr)
3183{
3184 int err;
3185
3186 _perf_event_disable(bp);
3187
3188 err = modify_user_hw_breakpoint_check(bp, attr, true);
3189
3190 if (!bp->attr.disabled)
3191 _perf_event_enable(bp);
3192
3193 return err;
3194}
3195
3196static int perf_event_modify_attr(struct perf_event *event,
3197 struct perf_event_attr *attr)
3198{
3199 int (*func)(struct perf_event *, struct perf_event_attr *);
3200 struct perf_event *child;
3201 int err;
3202
3203 if (event->attr.type != attr->type)
3204 return -EINVAL;
3205
3206 switch (event->attr.type) {
3207 case PERF_TYPE_BREAKPOINT:
3208 func = perf_event_modify_breakpoint;
3209 break;
3210 default:
3211
3212 return -EOPNOTSUPP;
3213 }
3214
3215 WARN_ON_ONCE(event->ctx->parent_ctx);
3216
3217 mutex_lock(&event->child_mutex);
3218 err = func(event, attr);
3219 if (err)
3220 goto out;
3221 list_for_each_entry(child, &event->child_list, child_list) {
3222 err = func(child, attr);
3223 if (err)
3224 goto out;
3225 }
3226out:
3227 mutex_unlock(&event->child_mutex);
3228 return err;
3229}
3230
3231static void ctx_sched_out(struct perf_event_context *ctx,
3232 struct perf_cpu_context *cpuctx,
3233 enum event_type_t event_type)
3234{
3235 struct perf_event *event, *tmp;
3236 int is_active = ctx->is_active;
3237
3238 lockdep_assert_held(&ctx->lock);
3239
3240 if (likely(!ctx->nr_events)) {
3241
3242
3243
3244 WARN_ON_ONCE(ctx->is_active);
3245 if (ctx->task)
3246 WARN_ON_ONCE(cpuctx->task_ctx);
3247 return;
3248 }
3249
3250 ctx->is_active &= ~event_type;
3251 if (!(ctx->is_active & EVENT_ALL))
3252 ctx->is_active = 0;
3253
3254 if (ctx->task) {
3255 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
3256 if (!ctx->is_active)
3257 cpuctx->task_ctx = NULL;
3258 }
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270 if (is_active & EVENT_TIME) {
3271
3272 update_context_time(ctx);
3273 update_cgrp_time_from_cpuctx(cpuctx);
3274 }
3275
3276 is_active ^= ctx->is_active;
3277
3278 if (!ctx->nr_active || !(is_active & EVENT_ALL))
3279 return;
3280
3281 perf_pmu_disable(ctx->pmu);
3282 if (is_active & EVENT_PINNED) {
3283 list_for_each_entry_safe(event, tmp, &ctx->pinned_active, active_list)
3284 group_sched_out(event, cpuctx, ctx);
3285 }
3286
3287 if (is_active & EVENT_FLEXIBLE) {
3288 list_for_each_entry_safe(event, tmp, &ctx->flexible_active, active_list)
3289 group_sched_out(event, cpuctx, ctx);
3290
3291
3292
3293
3294
3295
3296 ctx->rotate_necessary = 0;
3297 }
3298 perf_pmu_enable(ctx->pmu);
3299}
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309static int context_equiv(struct perf_event_context *ctx1,
3310 struct perf_event_context *ctx2)
3311{
3312 lockdep_assert_held(&ctx1->lock);
3313 lockdep_assert_held(&ctx2->lock);
3314
3315
3316 if (ctx1->pin_count || ctx2->pin_count)
3317 return 0;
3318
3319
3320 if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen)
3321 return 1;
3322
3323
3324 if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation)
3325 return 1;
3326
3327
3328
3329
3330
3331 if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx &&
3332 ctx1->parent_gen == ctx2->parent_gen)
3333 return 1;
3334
3335
3336 return 0;
3337}
3338
3339static void __perf_event_sync_stat(struct perf_event *event,
3340 struct perf_event *next_event)
3341{
3342 u64 value;
3343
3344 if (!event->attr.inherit_stat)
3345 return;
3346
3347
3348
3349
3350
3351
3352
3353
3354 if (event->state == PERF_EVENT_STATE_ACTIVE)
3355 event->pmu->read(event);
3356
3357 perf_event_update_time(event);
3358
3359
3360
3361
3362
3363 value = local64_read(&next_event->count);
3364 value = local64_xchg(&event->count, value);
3365 local64_set(&next_event->count, value);
3366
3367 swap(event->total_time_enabled, next_event->total_time_enabled);
3368 swap(event->total_time_running, next_event->total_time_running);
3369
3370
3371
3372
3373 perf_event_update_userpage(event);
3374 perf_event_update_userpage(next_event);
3375}
3376
3377static void perf_event_sync_stat(struct perf_event_context *ctx,
3378 struct perf_event_context *next_ctx)
3379{
3380 struct perf_event *event, *next_event;
3381
3382 if (!ctx->nr_stat)
3383 return;
3384
3385 update_context_time(ctx);
3386
3387 event = list_first_entry(&ctx->event_list,
3388 struct perf_event, event_entry);
3389
3390 next_event = list_first_entry(&next_ctx->event_list,
3391 struct perf_event, event_entry);
3392
3393 while (&event->event_entry != &ctx->event_list &&
3394 &next_event->event_entry != &next_ctx->event_list) {
3395
3396 __perf_event_sync_stat(event, next_event);
3397
3398 event = list_next_entry(event, event_entry);
3399 next_event = list_next_entry(next_event, event_entry);
3400 }
3401}
3402
3403static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
3404 struct task_struct *next)
3405{
3406 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
3407 struct perf_event_context *next_ctx;
3408 struct perf_event_context *parent, *next_parent;
3409 struct perf_cpu_context *cpuctx;
3410 int do_switch = 1;
3411 struct pmu *pmu;
3412
3413 if (likely(!ctx))
3414 return;
3415
3416 pmu = ctx->pmu;
3417 cpuctx = __get_cpu_context(ctx);
3418 if (!cpuctx->task_ctx)
3419 return;
3420
3421 rcu_read_lock();
3422 next_ctx = next->perf_event_ctxp[ctxn];
3423 if (!next_ctx)
3424 goto unlock;
3425
3426 parent = rcu_dereference(ctx->parent_ctx);
3427 next_parent = rcu_dereference(next_ctx->parent_ctx);
3428
3429
3430 if (!parent && !next_parent)
3431 goto unlock;
3432
3433 if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443 raw_spin_lock(&ctx->lock);
3444 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
3445 if (context_equiv(ctx, next_ctx)) {
3446
3447 WRITE_ONCE(ctx->task, next);
3448 WRITE_ONCE(next_ctx->task, task);
3449
3450 perf_pmu_disable(pmu);
3451
3452 if (cpuctx->sched_cb_usage && pmu->sched_task)
3453 pmu->sched_task(ctx, false);
3454
3455
3456
3457
3458
3459
3460
3461 if (pmu->swap_task_ctx)
3462 pmu->swap_task_ctx(ctx, next_ctx);
3463 else
3464 swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
3465
3466 perf_pmu_enable(pmu);
3467
3468
3469
3470
3471
3472
3473
3474
3475 RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], next_ctx);
3476 RCU_INIT_POINTER(next->perf_event_ctxp[ctxn], ctx);
3477
3478 do_switch = 0;
3479
3480 perf_event_sync_stat(ctx, next_ctx);
3481 }
3482 raw_spin_unlock(&next_ctx->lock);
3483 raw_spin_unlock(&ctx->lock);
3484 }
3485unlock:
3486 rcu_read_unlock();
3487
3488 if (do_switch) {
3489 raw_spin_lock(&ctx->lock);
3490 perf_pmu_disable(pmu);
3491
3492 if (cpuctx->sched_cb_usage && pmu->sched_task)
3493 pmu->sched_task(ctx, false);
3494 task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
3495
3496 perf_pmu_enable(pmu);
3497 raw_spin_unlock(&ctx->lock);
3498 }
3499}
3500
3501static DEFINE_PER_CPU(struct list_head, sched_cb_list);
3502
3503void perf_sched_cb_dec(struct pmu *pmu)
3504{
3505 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
3506
3507 this_cpu_dec(perf_sched_cb_usages);
3508
3509 if (!--cpuctx->sched_cb_usage)
3510 list_del(&cpuctx->sched_cb_entry);
3511}
3512
3513
3514void perf_sched_cb_inc(struct pmu *pmu)
3515{
3516 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
3517
3518 if (!cpuctx->sched_cb_usage++)
3519 list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
3520
3521 this_cpu_inc(perf_sched_cb_usages);
3522}
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in)
3533{
3534 struct pmu *pmu;
3535
3536 pmu = cpuctx->ctx.pmu;
3537
3538 if (WARN_ON_ONCE(!pmu->sched_task))
3539 return;
3540
3541 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
3542 perf_pmu_disable(pmu);
3543
3544 pmu->sched_task(cpuctx->task_ctx, sched_in);
3545
3546 perf_pmu_enable(pmu);
3547 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
3548}
3549
3550static void perf_pmu_sched_task(struct task_struct *prev,
3551 struct task_struct *next,
3552 bool sched_in)
3553{
3554 struct perf_cpu_context *cpuctx;
3555
3556 if (prev == next)
3557 return;
3558
3559 list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
3560
3561 if (cpuctx->task_ctx)
3562 continue;
3563
3564 __perf_pmu_sched_task(cpuctx, sched_in);
3565 }
3566}
3567
3568static void perf_event_switch(struct task_struct *task,
3569 struct task_struct *next_prev, bool sched_in);
3570
3571#define for_each_task_context_nr(ctxn) \
3572 for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585void __perf_event_task_sched_out(struct task_struct *task,
3586 struct task_struct *next)
3587{
3588 int ctxn;
3589
3590 if (__this_cpu_read(perf_sched_cb_usages))
3591 perf_pmu_sched_task(task, next, false);
3592
3593 if (atomic_read(&nr_switch_events))
3594 perf_event_switch(task, next, false);
3595
3596 for_each_task_context_nr(ctxn)
3597 perf_event_context_sched_out(task, ctxn, next);
3598
3599
3600
3601
3602
3603
3604 if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
3605 perf_cgroup_sched_out(task, next);
3606}
3607
3608
3609
3610
3611static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
3612 enum event_type_t event_type)
3613{
3614 ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
3615}
3616
3617static bool perf_less_group_idx(const void *l, const void *r)
3618{
3619 const struct perf_event *le = *(const struct perf_event **)l;
3620 const struct perf_event *re = *(const struct perf_event **)r;
3621
3622 return le->group_index < re->group_index;
3623}
3624
3625static void swap_ptr(void *l, void *r)
3626{
3627 void **lp = l, **rp = r;
3628
3629 swap(*lp, *rp);
3630}
3631
3632static const struct min_heap_callbacks perf_min_heap = {
3633 .elem_size = sizeof(struct perf_event *),
3634 .less = perf_less_group_idx,
3635 .swp = swap_ptr,
3636};
3637
3638static void __heap_add(struct min_heap *heap, struct perf_event *event)
3639{
3640 struct perf_event **itrs = heap->data;
3641
3642 if (event) {
3643 itrs[heap->nr] = event;
3644 heap->nr++;
3645 }
3646}
3647
3648static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
3649 struct perf_event_groups *groups, int cpu,
3650 int (*func)(struct perf_event *, void *),
3651 void *data)
3652{
3653#ifdef CONFIG_CGROUP_PERF
3654 struct cgroup_subsys_state *css = NULL;
3655#endif
3656
3657 struct perf_event *itrs[2];
3658 struct min_heap event_heap;
3659 struct perf_event **evt;
3660 int ret;
3661
3662 if (cpuctx) {
3663 event_heap = (struct min_heap){
3664 .data = cpuctx->heap,
3665 .nr = 0,
3666 .size = cpuctx->heap_size,
3667 };
3668
3669 lockdep_assert_held(&cpuctx->ctx.lock);
3670
3671#ifdef CONFIG_CGROUP_PERF
3672 if (cpuctx->cgrp)
3673 css = &cpuctx->cgrp->css;
3674#endif
3675 } else {
3676 event_heap = (struct min_heap){
3677 .data = itrs,
3678 .nr = 0,
3679 .size = ARRAY_SIZE(itrs),
3680 };
3681
3682 __heap_add(&event_heap, perf_event_groups_first(groups, -1, NULL));
3683 }
3684 evt = event_heap.data;
3685
3686 __heap_add(&event_heap, perf_event_groups_first(groups, cpu, NULL));
3687
3688#ifdef CONFIG_CGROUP_PERF
3689 for (; css; css = css->parent)
3690 __heap_add(&event_heap, perf_event_groups_first(groups, cpu, css->cgroup));
3691#endif
3692
3693 min_heapify_all(&event_heap, &perf_min_heap);
3694
3695 while (event_heap.nr) {
3696 ret = func(*evt, data);
3697 if (ret)
3698 return ret;
3699
3700 *evt = perf_event_groups_next(*evt);
3701 if (*evt)
3702 min_heapify(&event_heap, 0, &perf_min_heap);
3703 else
3704 min_heap_pop(&event_heap, &perf_min_heap);
3705 }
3706
3707 return 0;
3708}
3709
3710static inline bool event_update_userpage(struct perf_event *event)
3711{
3712 if (likely(!atomic_read(&event->mmap_count)))
3713 return false;
3714
3715 perf_event_update_time(event);
3716 perf_set_shadow_time(event, event->ctx);
3717 perf_event_update_userpage(event);
3718
3719 return true;
3720}
3721
3722static inline void group_update_userpage(struct perf_event *group_event)
3723{
3724 struct perf_event *event;
3725
3726 if (!event_update_userpage(group_event))
3727 return;
3728
3729 for_each_sibling_event(event, group_event)
3730 event_update_userpage(event);
3731}
3732
3733static int merge_sched_in(struct perf_event *event, void *data)
3734{
3735 struct perf_event_context *ctx = event->ctx;
3736 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
3737 int *can_add_hw = data;
3738
3739 if (event->state <= PERF_EVENT_STATE_OFF)
3740 return 0;
3741
3742 if (!event_filter_match(event))
3743 return 0;
3744
3745 if (group_can_go_on(event, cpuctx, *can_add_hw)) {
3746 if (!group_sched_in(event, cpuctx, ctx))
3747 list_add_tail(&event->active_list, get_event_list(event));
3748 }
3749
3750 if (event->state == PERF_EVENT_STATE_INACTIVE) {
3751 *can_add_hw = 0;
3752 if (event->attr.pinned) {
3753 perf_cgroup_event_disable(event, ctx);
3754 perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
3755 } else {
3756 ctx->rotate_necessary = 1;
3757 perf_mux_hrtimer_restart(cpuctx);
3758 group_update_userpage(event);
3759 }
3760 }
3761
3762 return 0;
3763}
3764
3765static void
3766ctx_pinned_sched_in(struct perf_event_context *ctx,
3767 struct perf_cpu_context *cpuctx)
3768{
3769 int can_add_hw = 1;
3770
3771 if (ctx != &cpuctx->ctx)
3772 cpuctx = NULL;
3773
3774 visit_groups_merge(cpuctx, &ctx->pinned_groups,
3775 smp_processor_id(),
3776 merge_sched_in, &can_add_hw);
3777}
3778
3779static void
3780ctx_flexible_sched_in(struct perf_event_context *ctx,
3781 struct perf_cpu_context *cpuctx)
3782{
3783 int can_add_hw = 1;
3784
3785 if (ctx != &cpuctx->ctx)
3786 cpuctx = NULL;
3787
3788 visit_groups_merge(cpuctx, &ctx->flexible_groups,
3789 smp_processor_id(),
3790 merge_sched_in, &can_add_hw);
3791}
3792
3793static void
3794ctx_sched_in(struct perf_event_context *ctx,
3795 struct perf_cpu_context *cpuctx,
3796 enum event_type_t event_type,
3797 struct task_struct *task)
3798{
3799 int is_active = ctx->is_active;
3800 u64 now;
3801
3802 lockdep_assert_held(&ctx->lock);
3803
3804 if (likely(!ctx->nr_events))
3805 return;
3806
3807 ctx->is_active |= (event_type | EVENT_TIME);
3808 if (ctx->task) {
3809 if (!is_active)
3810 cpuctx->task_ctx = ctx;
3811 else
3812 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
3813 }
3814
3815 is_active ^= ctx->is_active;
3816
3817 if (is_active & EVENT_TIME) {
3818
3819 now = perf_clock();
3820 ctx->timestamp = now;
3821 perf_cgroup_set_timestamp(task, ctx);
3822 }
3823
3824
3825
3826
3827
3828 if (is_active & EVENT_PINNED)
3829 ctx_pinned_sched_in(ctx, cpuctx);
3830
3831
3832 if (is_active & EVENT_FLEXIBLE)
3833 ctx_flexible_sched_in(ctx, cpuctx);
3834}
3835
3836static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
3837 enum event_type_t event_type,
3838 struct task_struct *task)
3839{
3840 struct perf_event_context *ctx = &cpuctx->ctx;
3841
3842 ctx_sched_in(ctx, cpuctx, event_type, task);
3843}
3844
3845static void perf_event_context_sched_in(struct perf_event_context *ctx,
3846 struct task_struct *task)
3847{
3848 struct perf_cpu_context *cpuctx;
3849 struct pmu *pmu;
3850
3851 cpuctx = __get_cpu_context(ctx);
3852
3853
3854
3855
3856
3857 pmu = ctx->pmu = cpuctx->ctx.pmu;
3858
3859 if (cpuctx->task_ctx == ctx) {
3860 if (cpuctx->sched_cb_usage)
3861 __perf_pmu_sched_task(cpuctx, true);
3862 return;
3863 }
3864
3865 perf_ctx_lock(cpuctx, ctx);
3866
3867
3868
3869
3870 if (!ctx->nr_events)
3871 goto unlock;
3872
3873 perf_pmu_disable(pmu);
3874
3875
3876
3877
3878
3879
3880
3881
3882 if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
3883 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
3884 perf_event_sched_in(cpuctx, ctx, task);
3885
3886 if (cpuctx->sched_cb_usage && pmu->sched_task)
3887 pmu->sched_task(cpuctx->task_ctx, true);
3888
3889 perf_pmu_enable(pmu);
3890
3891unlock:
3892 perf_ctx_unlock(cpuctx, ctx);
3893}
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906void __perf_event_task_sched_in(struct task_struct *prev,
3907 struct task_struct *task)
3908{
3909 struct perf_event_context *ctx;
3910 int ctxn;
3911
3912
3913
3914
3915
3916
3917
3918
3919 if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
3920 perf_cgroup_sched_in(prev, task);
3921
3922 for_each_task_context_nr(ctxn) {
3923 ctx = task->perf_event_ctxp[ctxn];
3924 if (likely(!ctx))
3925 continue;
3926
3927 perf_event_context_sched_in(ctx, task);
3928 }
3929
3930 if (atomic_read(&nr_switch_events))
3931 perf_event_switch(task, prev, true);
3932
3933 if (__this_cpu_read(perf_sched_cb_usages))
3934 perf_pmu_sched_task(prev, task, true);
3935}
3936
3937static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
3938{
3939 u64 frequency = event->attr.sample_freq;
3940 u64 sec = NSEC_PER_SEC;
3941 u64 divisor, dividend;
3942
3943 int count_fls, nsec_fls, frequency_fls, sec_fls;
3944
3945 count_fls = fls64(count);
3946 nsec_fls = fls64(nsec);
3947 frequency_fls = fls64(frequency);
3948 sec_fls = 30;
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964#define REDUCE_FLS(a, b) \
3965do { \
3966 if (a##_fls > b##_fls) { \
3967 a >>= 1; \
3968 a##_fls--; \
3969 } else { \
3970 b >>= 1; \
3971 b##_fls--; \
3972 } \
3973} while (0)
3974
3975
3976
3977
3978
3979 while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
3980 REDUCE_FLS(nsec, frequency);
3981 REDUCE_FLS(sec, count);
3982 }
3983
3984 if (count_fls + sec_fls > 64) {
3985 divisor = nsec * frequency;
3986
3987 while (count_fls + sec_fls > 64) {
3988 REDUCE_FLS(count, sec);
3989 divisor >>= 1;
3990 }
3991
3992 dividend = count * sec;
3993 } else {
3994 dividend = count * sec;
3995
3996 while (nsec_fls + frequency_fls > 64) {
3997 REDUCE_FLS(nsec, frequency);
3998 dividend >>= 1;
3999 }
4000
4001 divisor = nsec * frequency;
4002 }
4003
4004 if (!divisor)
4005 return dividend;
4006
4007 return div64_u64(dividend, divisor);
4008}
4009
4010static DEFINE_PER_CPU(int, perf_throttled_count);
4011static DEFINE_PER_CPU(u64, perf_throttled_seq);
4012
4013static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable)
4014{
4015 struct hw_perf_event *hwc = &event->hw;
4016 s64 period, sample_period;
4017 s64 delta;
4018
4019 period = perf_calculate_period(event, nsec, count);
4020
4021 delta = (s64)(period - hwc->sample_period);
4022 delta = (delta + 7) / 8;
4023
4024 sample_period = hwc->sample_period + delta;
4025
4026 if (!sample_period)
4027 sample_period = 1;
4028
4029 hwc->sample_period = sample_period;
4030
4031 if (local64_read(&hwc->period_left) > 8*sample_period) {
4032 if (disable)
4033 event->pmu->stop(event, PERF_EF_UPDATE);
4034
4035 local64_set(&hwc->period_left, 0);
4036
4037 if (disable)
4038 event->pmu->start(event, PERF_EF_RELOAD);
4039 }
4040}
4041
4042
4043
4044
4045
4046
4047static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
4048 int needs_unthr)
4049{
4050 struct perf_event *event;
4051 struct hw_perf_event *hwc;
4052 u64 now, period = TICK_NSEC;
4053 s64 delta;
4054
4055
4056
4057
4058
4059
4060 if (!(ctx->nr_freq || needs_unthr))
4061 return;
4062
4063 raw_spin_lock(&ctx->lock);
4064 perf_pmu_disable(ctx->pmu);
4065
4066 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
4067 if (event->state != PERF_EVENT_STATE_ACTIVE)
4068 continue;
4069
4070 if (!event_filter_match(event))
4071 continue;
4072
4073 perf_pmu_disable(event->pmu);
4074
4075 hwc = &event->hw;
4076
4077 if (hwc->interrupts == MAX_INTERRUPTS) {
4078 hwc->interrupts = 0;
4079 perf_log_throttle(event, 1);
4080 event->pmu->start(event, 0);
4081 }
4082
4083 if (!event->attr.freq || !event->attr.sample_freq)
4084 goto next;
4085
4086
4087
4088
4089 event->pmu->stop(event, PERF_EF_UPDATE);
4090
4091 now = local64_read(&event->count);
4092 delta = now - hwc->freq_count_stamp;
4093 hwc->freq_count_stamp = now;
4094
4095
4096
4097
4098
4099
4100
4101
4102 if (delta > 0)
4103 perf_adjust_period(event, period, delta, false);
4104
4105 event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
4106 next:
4107 perf_pmu_enable(event->pmu);
4108 }
4109
4110 perf_pmu_enable(ctx->pmu);
4111 raw_spin_unlock(&ctx->lock);
4112}
4113
4114
4115
4116
4117static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event)
4118{
4119
4120
4121
4122
4123 if (ctx->rotate_disable)
4124 return;
4125
4126 perf_event_groups_delete(&ctx->flexible_groups, event);
4127 perf_event_groups_insert(&ctx->flexible_groups, event);
4128}
4129
4130
4131static inline struct perf_event *
4132ctx_event_to_rotate(struct perf_event_context *ctx)
4133{
4134 struct perf_event *event;
4135
4136
4137 event = list_first_entry_or_null(&ctx->flexible_active,
4138 struct perf_event, active_list);
4139
4140
4141 if (!event) {
4142 event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree),
4143 typeof(*event), group_node);
4144 }
4145
4146
4147
4148
4149
4150 ctx->rotate_necessary = 0;
4151
4152 return event;
4153}
4154
4155static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
4156{
4157 struct perf_event *cpu_event = NULL, *task_event = NULL;
4158 struct perf_event_context *task_ctx = NULL;
4159 int cpu_rotate, task_rotate;
4160
4161
4162
4163
4164
4165
4166 cpu_rotate = cpuctx->ctx.rotate_necessary;
4167 task_ctx = cpuctx->task_ctx;
4168 task_rotate = task_ctx ? task_ctx->rotate_necessary : 0;
4169
4170 if (!(cpu_rotate || task_rotate))
4171 return false;
4172
4173 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
4174 perf_pmu_disable(cpuctx->ctx.pmu);
4175
4176 if (task_rotate)
4177 task_event = ctx_event_to_rotate(task_ctx);
4178 if (cpu_rotate)
4179 cpu_event = ctx_event_to_rotate(&cpuctx->ctx);
4180
4181
4182
4183
4184
4185 if (task_event || (task_ctx && cpu_event))
4186 ctx_sched_out(task_ctx, cpuctx, EVENT_FLEXIBLE);
4187 if (cpu_event)
4188 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
4189
4190 if (task_event)
4191 rotate_ctx(task_ctx, task_event);
4192 if (cpu_event)
4193 rotate_ctx(&cpuctx->ctx, cpu_event);
4194
4195 perf_event_sched_in(cpuctx, task_ctx, current);
4196
4197 perf_pmu_enable(cpuctx->ctx.pmu);
4198 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
4199
4200 return true;
4201}
4202
4203void perf_event_task_tick(void)
4204{
4205 struct list_head *head = this_cpu_ptr(&active_ctx_list);
4206 struct perf_event_context *ctx, *tmp;
4207 int throttled;
4208
4209 lockdep_assert_irqs_disabled();
4210
4211 __this_cpu_inc(perf_throttled_seq);
4212 throttled = __this_cpu_xchg(perf_throttled_count, 0);
4213 tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
4214
4215 list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
4216 perf_adjust_freq_unthr_context(ctx, throttled);
4217}
4218
4219static int event_enable_on_exec(struct perf_event *event,
4220 struct perf_event_context *ctx)
4221{
4222 if (!event->attr.enable_on_exec)
4223 return 0;
4224
4225 event->attr.enable_on_exec = 0;
4226 if (event->state >= PERF_EVENT_STATE_INACTIVE)
4227 return 0;
4228
4229 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
4230
4231 return 1;
4232}
4233
4234
4235
4236
4237
4238static void perf_event_enable_on_exec(int ctxn)
4239{
4240 struct perf_event_context *ctx, *clone_ctx = NULL;
4241 enum event_type_t event_type = 0;
4242 struct perf_cpu_context *cpuctx;
4243 struct perf_event *event;
4244 unsigned long flags;
4245 int enabled = 0;
4246
4247 local_irq_save(flags);
4248 ctx = current->perf_event_ctxp[ctxn];
4249 if (!ctx || !ctx->nr_events)
4250 goto out;
4251
4252 cpuctx = __get_cpu_context(ctx);
4253 perf_ctx_lock(cpuctx, ctx);
4254 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
4255 list_for_each_entry(event, &ctx->event_list, event_entry) {
4256 enabled |= event_enable_on_exec(event, ctx);
4257 event_type |= get_event_type(event);
4258 }
4259
4260
4261
4262
4263 if (enabled) {
4264 clone_ctx = unclone_ctx(ctx);
4265 ctx_resched(cpuctx, ctx, event_type);
4266 } else {
4267 ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
4268 }
4269 perf_ctx_unlock(cpuctx, ctx);
4270
4271out:
4272 local_irq_restore(flags);
4273
4274 if (clone_ctx)
4275 put_ctx(clone_ctx);
4276}
4277
4278static void perf_remove_from_owner(struct perf_event *event);
4279static void perf_event_exit_event(struct perf_event *event,
4280 struct perf_event_context *ctx);
4281
4282
4283
4284
4285
4286static void perf_event_remove_on_exec(int ctxn)
4287{
4288 struct perf_event_context *ctx, *clone_ctx = NULL;
4289 struct perf_event *event, *next;
4290 LIST_HEAD(free_list);
4291 unsigned long flags;
4292 bool modified = false;
4293
4294 ctx = perf_pin_task_context(current, ctxn);
4295 if (!ctx)
4296 return;
4297
4298 mutex_lock(&ctx->mutex);
4299
4300 if (WARN_ON_ONCE(ctx->task != current))
4301 goto unlock;
4302
4303 list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) {
4304 if (!event->attr.remove_on_exec)
4305 continue;
4306
4307 if (!is_kernel_event(event))
4308 perf_remove_from_owner(event);
4309
4310 modified = true;
4311
4312 perf_event_exit_event(event, ctx);
4313 }
4314
4315 raw_spin_lock_irqsave(&ctx->lock, flags);
4316 if (modified)
4317 clone_ctx = unclone_ctx(ctx);
4318 --ctx->pin_count;
4319 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4320
4321unlock:
4322 mutex_unlock(&ctx->mutex);
4323
4324 put_ctx(ctx);
4325 if (clone_ctx)
4326 put_ctx(clone_ctx);
4327}
4328
4329struct perf_read_data {
4330 struct perf_event *event;
4331 bool group;
4332 int ret;
4333};
4334
4335static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
4336{
4337 u16 local_pkg, event_pkg;
4338
4339 if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
4340 int local_cpu = smp_processor_id();
4341
4342 event_pkg = topology_physical_package_id(event_cpu);
4343 local_pkg = topology_physical_package_id(local_cpu);
4344
4345 if (event_pkg == local_pkg)
4346 return local_cpu;
4347 }
4348
4349 return event_cpu;
4350}
4351
4352
4353
4354
4355static void __perf_event_read(void *info)
4356{
4357 struct perf_read_data *data = info;
4358 struct perf_event *sub, *event = data->event;
4359 struct perf_event_context *ctx = event->ctx;
4360 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
4361 struct pmu *pmu = event->pmu;
4362
4363
4364
4365
4366
4367
4368
4369
4370 if (ctx->task && cpuctx->task_ctx != ctx)
4371 return;
4372
4373 raw_spin_lock(&ctx->lock);
4374 if (ctx->is_active & EVENT_TIME) {
4375 update_context_time(ctx);
4376 update_cgrp_time_from_event(event);
4377 }
4378
4379 perf_event_update_time(event);
4380 if (data->group)
4381 perf_event_update_sibling_time(event);
4382
4383 if (event->state != PERF_EVENT_STATE_ACTIVE)
4384 goto unlock;
4385
4386 if (!data->group) {
4387 pmu->read(event);
4388 data->ret = 0;
4389 goto unlock;
4390 }
4391
4392 pmu->start_txn(pmu, PERF_PMU_TXN_READ);
4393
4394 pmu->read(event);
4395
4396 for_each_sibling_event(sub, event) {
4397 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
4398
4399
4400
4401
4402 sub->pmu->read(sub);
4403 }
4404 }
4405
4406 data->ret = pmu->commit_txn(pmu);
4407
4408unlock:
4409 raw_spin_unlock(&ctx->lock);
4410}
4411
4412static inline u64 perf_event_count(struct perf_event *event)
4413{
4414 return local64_read(&event->count) + atomic64_read(&event->child_count);
4415}
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425int perf_event_read_local(struct perf_event *event, u64 *value,
4426 u64 *enabled, u64 *running)
4427{
4428 unsigned long flags;
4429 int ret = 0;
4430
4431
4432
4433
4434
4435 local_irq_save(flags);
4436
4437
4438
4439
4440
4441 if (event->attr.inherit) {
4442 ret = -EOPNOTSUPP;
4443 goto out;
4444 }
4445
4446
4447 if ((event->attach_state & PERF_ATTACH_TASK) &&
4448 event->hw.target != current) {
4449 ret = -EINVAL;
4450 goto out;
4451 }
4452
4453
4454 if (!(event->attach_state & PERF_ATTACH_TASK) &&
4455 event->cpu != smp_processor_id()) {
4456 ret = -EINVAL;
4457 goto out;
4458 }
4459
4460
4461 if (event->attr.pinned && event->oncpu != smp_processor_id()) {
4462 ret = -EBUSY;
4463 goto out;
4464 }
4465
4466
4467
4468
4469
4470
4471 if (event->oncpu == smp_processor_id())
4472 event->pmu->read(event);
4473
4474 *value = local64_read(&event->count);
4475 if (enabled || running) {
4476 u64 now = event->shadow_ctx_time + perf_clock();
4477 u64 __enabled, __running;
4478
4479 __perf_update_times(event, now, &__enabled, &__running);
4480 if (enabled)
4481 *enabled = __enabled;
4482 if (running)
4483 *running = __running;
4484 }
4485out:
4486 local_irq_restore(flags);
4487
4488 return ret;
4489}
4490
4491static int perf_event_read(struct perf_event *event, bool group)
4492{
4493 enum perf_event_state state = READ_ONCE(event->state);
4494 int event_cpu, ret = 0;
4495
4496
4497
4498
4499
4500again:
4501 if (state == PERF_EVENT_STATE_ACTIVE) {
4502 struct perf_read_data data;
4503
4504
4505
4506
4507
4508
4509
4510 smp_rmb();
4511
4512 event_cpu = READ_ONCE(event->oncpu);
4513 if ((unsigned)event_cpu >= nr_cpu_ids)
4514 return 0;
4515
4516 data = (struct perf_read_data){
4517 .event = event,
4518 .group = group,
4519 .ret = 0,
4520 };
4521
4522 preempt_disable();
4523 event_cpu = __perf_event_read_cpu(event, event_cpu);
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535 (void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
4536 preempt_enable();
4537 ret = data.ret;
4538
4539 } else if (state == PERF_EVENT_STATE_INACTIVE) {
4540 struct perf_event_context *ctx = event->ctx;
4541 unsigned long flags;
4542
4543 raw_spin_lock_irqsave(&ctx->lock, flags);
4544 state = event->state;
4545 if (state != PERF_EVENT_STATE_INACTIVE) {
4546 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4547 goto again;
4548 }
4549
4550
4551
4552
4553
4554 if (ctx->is_active & EVENT_TIME) {
4555 update_context_time(ctx);
4556 update_cgrp_time_from_event(event);
4557 }
4558
4559 perf_event_update_time(event);
4560 if (group)
4561 perf_event_update_sibling_time(event);
4562 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4563 }
4564
4565 return ret;
4566}
4567
4568
4569
4570
4571static void __perf_event_init_context(struct perf_event_context *ctx)
4572{
4573 raw_spin_lock_init(&ctx->lock);
4574 mutex_init(&ctx->mutex);
4575 INIT_LIST_HEAD(&ctx->active_ctx_list);
4576 perf_event_groups_init(&ctx->pinned_groups);
4577 perf_event_groups_init(&ctx->flexible_groups);
4578 INIT_LIST_HEAD(&ctx->event_list);
4579 INIT_LIST_HEAD(&ctx->pinned_active);
4580 INIT_LIST_HEAD(&ctx->flexible_active);
4581 refcount_set(&ctx->refcount, 1);
4582}
4583
4584static struct perf_event_context *
4585alloc_perf_context(struct pmu *pmu, struct task_struct *task)
4586{
4587 struct perf_event_context *ctx;
4588
4589 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
4590 if (!ctx)
4591 return NULL;
4592
4593 __perf_event_init_context(ctx);
4594 if (task)
4595 ctx->task = get_task_struct(task);
4596 ctx->pmu = pmu;
4597
4598 return ctx;
4599}
4600
4601static struct task_struct *
4602find_lively_task_by_vpid(pid_t vpid)
4603{
4604 struct task_struct *task;
4605
4606 rcu_read_lock();
4607 if (!vpid)
4608 task = current;
4609 else
4610 task = find_task_by_vpid(vpid);
4611 if (task)
4612 get_task_struct(task);
4613 rcu_read_unlock();
4614
4615 if (!task)
4616 return ERR_PTR(-ESRCH);
4617
4618 return task;
4619}
4620
4621
4622
4623
4624static struct perf_event_context *
4625find_get_context(struct pmu *pmu, struct task_struct *task,
4626 struct perf_event *event)
4627{
4628 struct perf_event_context *ctx, *clone_ctx = NULL;
4629 struct perf_cpu_context *cpuctx;
4630 void *task_ctx_data = NULL;
4631 unsigned long flags;
4632 int ctxn, err;
4633 int cpu = event->cpu;
4634
4635 if (!task) {
4636
4637 err = perf_allow_cpu(&event->attr);
4638 if (err)
4639 return ERR_PTR(err);
4640
4641 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
4642 ctx = &cpuctx->ctx;
4643 get_ctx(ctx);
4644 raw_spin_lock_irqsave(&ctx->lock, flags);
4645 ++ctx->pin_count;
4646 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4647
4648 return ctx;
4649 }
4650
4651 err = -EINVAL;
4652 ctxn = pmu->task_ctx_nr;
4653 if (ctxn < 0)
4654 goto errout;
4655
4656 if (event->attach_state & PERF_ATTACH_TASK_DATA) {
4657 task_ctx_data = alloc_task_ctx_data(pmu);
4658 if (!task_ctx_data) {
4659 err = -ENOMEM;
4660 goto errout;
4661 }
4662 }
4663
4664retry:
4665 ctx = perf_lock_task_context(task, ctxn, &flags);
4666 if (ctx) {
4667 clone_ctx = unclone_ctx(ctx);
4668 ++ctx->pin_count;
4669
4670 if (task_ctx_data && !ctx->task_ctx_data) {
4671 ctx->task_ctx_data = task_ctx_data;
4672 task_ctx_data = NULL;
4673 }
4674 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4675
4676 if (clone_ctx)
4677 put_ctx(clone_ctx);
4678 } else {
4679 ctx = alloc_perf_context(pmu, task);
4680 err = -ENOMEM;
4681 if (!ctx)
4682 goto errout;
4683
4684 if (task_ctx_data) {
4685 ctx->task_ctx_data = task_ctx_data;
4686 task_ctx_data = NULL;
4687 }
4688
4689 err = 0;
4690 mutex_lock(&task->perf_event_mutex);
4691
4692
4693
4694
4695 if (task->flags & PF_EXITING)
4696 err = -ESRCH;
4697 else if (task->perf_event_ctxp[ctxn])
4698 err = -EAGAIN;
4699 else {
4700 get_ctx(ctx);
4701 ++ctx->pin_count;
4702 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
4703 }
4704 mutex_unlock(&task->perf_event_mutex);
4705
4706 if (unlikely(err)) {
4707 put_ctx(ctx);
4708
4709 if (err == -EAGAIN)
4710 goto retry;
4711 goto errout;
4712 }
4713 }
4714
4715 free_task_ctx_data(pmu, task_ctx_data);
4716 return ctx;
4717
4718errout:
4719 free_task_ctx_data(pmu, task_ctx_data);
4720 return ERR_PTR(err);
4721}
4722
4723static void perf_event_free_filter(struct perf_event *event);
4724
4725static void free_event_rcu(struct rcu_head *head)
4726{
4727 struct perf_event *event;
4728
4729 event = container_of(head, struct perf_event, rcu_head);
4730 if (event->ns)
4731 put_pid_ns(event->ns);
4732 perf_event_free_filter(event);
4733 kmem_cache_free(perf_event_cache, event);
4734}
4735
4736static void ring_buffer_attach(struct perf_event *event,
4737 struct perf_buffer *rb);
4738
4739static void detach_sb_event(struct perf_event *event)
4740{
4741 struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
4742
4743 raw_spin_lock(&pel->lock);
4744 list_del_rcu(&event->sb_list);
4745 raw_spin_unlock(&pel->lock);
4746}
4747
4748static bool is_sb_event(struct perf_event *event)
4749{
4750 struct perf_event_attr *attr = &event->attr;
4751
4752 if (event->parent)
4753 return false;
4754
4755 if (event->attach_state & PERF_ATTACH_TASK)
4756 return false;
4757
4758 if (attr->mmap || attr->mmap_data || attr->mmap2 ||
4759 attr->comm || attr->comm_exec ||
4760 attr->task || attr->ksymbol ||
4761 attr->context_switch || attr->text_poke ||
4762 attr->bpf_event)
4763 return true;
4764 return false;
4765}
4766
4767static void unaccount_pmu_sb_event(struct perf_event *event)
4768{
4769 if (is_sb_event(event))
4770 detach_sb_event(event);
4771}
4772
4773static void unaccount_event_cpu(struct perf_event *event, int cpu)
4774{
4775 if (event->parent)
4776 return;
4777
4778 if (is_cgroup_event(event))
4779 atomic_dec(&per_cpu(perf_cgroup_events, cpu));
4780}
4781
4782#ifdef CONFIG_NO_HZ_FULL
4783static DEFINE_SPINLOCK(nr_freq_lock);
4784#endif
4785
4786static void unaccount_freq_event_nohz(void)
4787{
4788#ifdef CONFIG_NO_HZ_FULL
4789 spin_lock(&nr_freq_lock);
4790 if (atomic_dec_and_test(&nr_freq_events))
4791 tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
4792 spin_unlock(&nr_freq_lock);
4793#endif
4794}
4795
4796static void unaccount_freq_event(void)
4797{
4798 if (tick_nohz_full_enabled())
4799 unaccount_freq_event_nohz();
4800 else
4801 atomic_dec(&nr_freq_events);
4802}
4803
4804static void unaccount_event(struct perf_event *event)
4805{
4806 bool dec = false;
4807
4808 if (event->parent)
4809 return;
4810
4811 if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
4812 dec = true;
4813 if (event->attr.mmap || event->attr.mmap_data)
4814 atomic_dec(&nr_mmap_events);
4815 if (event->attr.build_id)
4816 atomic_dec(&nr_build_id_events);
4817 if (event->attr.comm)
4818 atomic_dec(&nr_comm_events);
4819 if (event->attr.namespaces)
4820 atomic_dec(&nr_namespaces_events);
4821 if (event->attr.cgroup)
4822 atomic_dec(&nr_cgroup_events);
4823 if (event->attr.task)
4824 atomic_dec(&nr_task_events);
4825 if (event->attr.freq)
4826 unaccount_freq_event();
4827 if (event->attr.context_switch) {
4828 dec = true;
4829 atomic_dec(&nr_switch_events);
4830 }
4831 if (is_cgroup_event(event))
4832 dec = true;
4833 if (has_branch_stack(event))
4834 dec = true;
4835 if (event->attr.ksymbol)
4836 atomic_dec(&nr_ksymbol_events);
4837 if (event->attr.bpf_event)
4838 atomic_dec(&nr_bpf_events);
4839 if (event->attr.text_poke)
4840 atomic_dec(&nr_text_poke_events);
4841
4842 if (dec) {
4843 if (!atomic_add_unless(&perf_sched_count, -1, 1))
4844 schedule_delayed_work(&perf_sched_work, HZ);
4845 }
4846
4847 unaccount_event_cpu(event, event->cpu);
4848
4849 unaccount_pmu_sb_event(event);
4850}
4851
4852static void perf_sched_delayed(struct work_struct *work)
4853{
4854 mutex_lock(&perf_sched_mutex);
4855 if (atomic_dec_and_test(&perf_sched_count))
4856 static_branch_disable(&perf_sched_events);
4857 mutex_unlock(&perf_sched_mutex);
4858}
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872static int exclusive_event_init(struct perf_event *event)
4873{
4874 struct pmu *pmu = event->pmu;
4875
4876 if (!is_exclusive_pmu(pmu))
4877 return 0;
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892 if (event->attach_state & PERF_ATTACH_TASK) {
4893 if (!atomic_inc_unless_negative(&pmu->exclusive_cnt))
4894 return -EBUSY;
4895 } else {
4896 if (!atomic_dec_unless_positive(&pmu->exclusive_cnt))
4897 return -EBUSY;
4898 }
4899
4900 return 0;
4901}
4902
4903static void exclusive_event_destroy(struct perf_event *event)
4904{
4905 struct pmu *pmu = event->pmu;
4906
4907 if (!is_exclusive_pmu(pmu))
4908 return;
4909
4910
4911 if (event->attach_state & PERF_ATTACH_TASK)
4912 atomic_dec(&pmu->exclusive_cnt);
4913 else
4914 atomic_inc(&pmu->exclusive_cnt);
4915}
4916
4917static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
4918{
4919 if ((e1->pmu == e2->pmu) &&
4920 (e1->cpu == e2->cpu ||
4921 e1->cpu == -1 ||
4922 e2->cpu == -1))
4923 return true;
4924 return false;
4925}
4926
4927static bool exclusive_event_installable(struct perf_event *event,
4928 struct perf_event_context *ctx)
4929{
4930 struct perf_event *iter_event;
4931 struct pmu *pmu = event->pmu;
4932
4933 lockdep_assert_held(&ctx->mutex);
4934
4935 if (!is_exclusive_pmu(pmu))
4936 return true;
4937
4938 list_for_each_entry(iter_event, &ctx->event_list, event_entry) {
4939 if (exclusive_event_match(iter_event, event))
4940 return false;
4941 }
4942
4943 return true;
4944}
4945
4946static void perf_addr_filters_splice(struct perf_event *event,
4947 struct list_head *head);
4948
4949static void _free_event(struct perf_event *event)
4950{
4951 irq_work_sync(&event->pending);
4952
4953 unaccount_event(event);
4954
4955 security_perf_event_free(event);
4956
4957 if (event->rb) {
4958
4959
4960
4961
4962
4963
4964 mutex_lock(&event->mmap_mutex);
4965 ring_buffer_attach(event, NULL);
4966 mutex_unlock(&event->mmap_mutex);
4967 }
4968
4969 if (is_cgroup_event(event))
4970 perf_detach_cgroup(event);
4971
4972 if (!event->parent) {
4973 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
4974 put_callchain_buffers();
4975 }
4976
4977 perf_event_free_bpf_prog(event);
4978 perf_addr_filters_splice(event, NULL);
4979 kfree(event->addr_filter_ranges);
4980
4981 if (event->destroy)
4982 event->destroy(event);
4983
4984
4985
4986
4987
4988 if (event->hw.target)
4989 put_task_struct(event->hw.target);
4990
4991
4992
4993
4994
4995 if (event->ctx)
4996 put_ctx(event->ctx);
4997
4998 exclusive_event_destroy(event);
4999 module_put(event->pmu->module);
5000
5001 call_rcu(&event->rcu_head, free_event_rcu);
5002}
5003
5004
5005
5006
5007
5008static void free_event(struct perf_event *event)
5009{
5010 if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
5011 "unexpected event refcount: %ld; ptr=%p\n",
5012 atomic_long_read(&event->refcount), event)) {
5013
5014 return;
5015 }
5016
5017 _free_event(event);
5018}
5019
5020
5021
5022
5023static void perf_remove_from_owner(struct perf_event *event)
5024{
5025 struct task_struct *owner;
5026
5027 rcu_read_lock();
5028
5029
5030
5031
5032
5033
5034 owner = READ_ONCE(event->owner);
5035 if (owner) {
5036
5037
5038
5039
5040
5041 get_task_struct(owner);
5042 }
5043 rcu_read_unlock();
5044
5045 if (owner) {
5046
5047
5048
5049
5050
5051
5052
5053
5054 mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING);
5055
5056
5057
5058
5059
5060
5061
5062 if (event->owner) {
5063 list_del_init(&event->owner_entry);
5064 smp_store_release(&event->owner, NULL);
5065 }
5066 mutex_unlock(&owner->perf_event_mutex);
5067 put_task_struct(owner);
5068 }
5069}
5070
5071static void put_event(struct perf_event *event)
5072{
5073 if (!atomic_long_dec_and_test(&event->refcount))
5074 return;
5075
5076 _free_event(event);
5077}
5078
5079
5080
5081
5082
5083
5084int perf_event_release_kernel(struct perf_event *event)
5085{
5086 struct perf_event_context *ctx = event->ctx;
5087 struct perf_event *child, *tmp;
5088 LIST_HEAD(free_list);
5089
5090
5091
5092
5093
5094 if (!ctx) {
5095 WARN_ON_ONCE(event->attach_state &
5096 (PERF_ATTACH_CONTEXT|PERF_ATTACH_GROUP));
5097 goto no_ctx;
5098 }
5099
5100 if (!is_kernel_event(event))
5101 perf_remove_from_owner(event);
5102
5103 ctx = perf_event_ctx_lock(event);
5104 WARN_ON_ONCE(ctx->parent_ctx);
5105 perf_remove_from_context(event, DETACH_GROUP);
5106
5107 raw_spin_lock_irq(&ctx->lock);
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119 event->state = PERF_EVENT_STATE_DEAD;
5120 raw_spin_unlock_irq(&ctx->lock);
5121
5122 perf_event_ctx_unlock(event, ctx);
5123
5124again:
5125 mutex_lock(&event->child_mutex);
5126 list_for_each_entry(child, &event->child_list, child_list) {
5127
5128
5129
5130
5131
5132 ctx = READ_ONCE(child->ctx);
5133
5134
5135
5136
5137
5138
5139
5140
5141 get_ctx(ctx);
5142
5143
5144
5145
5146
5147
5148 mutex_unlock(&event->child_mutex);
5149 mutex_lock(&ctx->mutex);
5150 mutex_lock(&event->child_mutex);
5151
5152
5153
5154
5155
5156
5157 tmp = list_first_entry_or_null(&event->child_list,
5158 struct perf_event, child_list);
5159 if (tmp == child) {
5160 perf_remove_from_context(child, DETACH_GROUP);
5161 list_move(&child->child_list, &free_list);
5162
5163
5164
5165
5166 put_event(event);
5167 }
5168
5169 mutex_unlock(&event->child_mutex);
5170 mutex_unlock(&ctx->mutex);
5171 put_ctx(ctx);
5172 goto again;
5173 }
5174 mutex_unlock(&event->child_mutex);
5175
5176 list_for_each_entry_safe(child, tmp, &free_list, child_list) {
5177 void *var = &child->ctx->refcount;
5178
5179 list_del(&child->child_list);
5180 free_event(child);
5181
5182
5183
5184
5185
5186 smp_mb();
5187 wake_up_var(var);
5188 }
5189
5190no_ctx:
5191 put_event(event);
5192 return 0;
5193}
5194EXPORT_SYMBOL_GPL(perf_event_release_kernel);
5195
5196
5197
5198
5199static int perf_release(struct inode *inode, struct file *file)
5200{
5201 perf_event_release_kernel(file->private_data);
5202 return 0;
5203}
5204
5205static u64 __perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
5206{
5207 struct perf_event *child;
5208 u64 total = 0;
5209
5210 *enabled = 0;
5211 *running = 0;
5212
5213 mutex_lock(&event->child_mutex);
5214
5215 (void)perf_event_read(event, false);
5216 total += perf_event_count(event);
5217
5218 *enabled += event->total_time_enabled +
5219 atomic64_read(&event->child_total_time_enabled);
5220 *running += event->total_time_running +
5221 atomic64_read(&event->child_total_time_running);
5222
5223 list_for_each_entry(child, &event->child_list, child_list) {
5224 (void)perf_event_read(child, false);
5225 total += perf_event_count(child);
5226 *enabled += child->total_time_enabled;
5227 *running += child->total_time_running;
5228 }
5229 mutex_unlock(&event->child_mutex);
5230
5231 return total;
5232}
5233
5234u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
5235{
5236 struct perf_event_context *ctx;
5237 u64 count;
5238
5239 ctx = perf_event_ctx_lock(event);
5240 count = __perf_event_read_value(event, enabled, running);
5241 perf_event_ctx_unlock(event, ctx);
5242
5243 return count;
5244}
5245EXPORT_SYMBOL_GPL(perf_event_read_value);
5246
5247static int __perf_read_group_add(struct perf_event *leader,
5248 u64 read_format, u64 *values)
5249{
5250 struct perf_event_context *ctx = leader->ctx;
5251 struct perf_event *sub;
5252 unsigned long flags;
5253 int n = 1;
5254 int ret;
5255
5256 ret = perf_event_read(leader, true);
5257 if (ret)
5258 return ret;
5259
5260 raw_spin_lock_irqsave(&ctx->lock, flags);
5261
5262
5263
5264
5265
5266
5267 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
5268 values[n++] += leader->total_time_enabled +
5269 atomic64_read(&leader->child_total_time_enabled);
5270 }
5271
5272 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
5273 values[n++] += leader->total_time_running +
5274 atomic64_read(&leader->child_total_time_running);
5275 }
5276
5277
5278
5279
5280 values[n++] += perf_event_count(leader);
5281 if (read_format & PERF_FORMAT_ID)
5282 values[n++] = primary_event_id(leader);
5283
5284 for_each_sibling_event(sub, leader) {
5285 values[n++] += perf_event_count(sub);
5286 if (read_format & PERF_FORMAT_ID)
5287 values[n++] = primary_event_id(sub);
5288 }
5289
5290 raw_spin_unlock_irqrestore(&ctx->lock, flags);
5291 return 0;
5292}
5293
5294static int perf_read_group(struct perf_event *event,
5295 u64 read_format, char __user *buf)
5296{
5297 struct perf_event *leader = event->group_leader, *child;
5298 struct perf_event_context *ctx = leader->ctx;
5299 int ret;
5300 u64 *values;
5301
5302 lockdep_assert_held(&ctx->mutex);
5303
5304 values = kzalloc(event->read_size, GFP_KERNEL);
5305 if (!values)
5306 return -ENOMEM;
5307
5308 values[0] = 1 + leader->nr_siblings;
5309
5310
5311
5312
5313
5314 mutex_lock(&leader->child_mutex);
5315
5316 ret = __perf_read_group_add(leader, read_format, values);
5317 if (ret)
5318 goto unlock;
5319
5320 list_for_each_entry(child, &leader->child_list, child_list) {
5321 ret = __perf_read_group_add(child, read_format, values);
5322 if (ret)
5323 goto unlock;
5324 }
5325
5326 mutex_unlock(&leader->child_mutex);
5327
5328 ret = event->read_size;
5329 if (copy_to_user(buf, values, event->read_size))
5330 ret = -EFAULT;
5331 goto out;
5332
5333unlock:
5334 mutex_unlock(&leader->child_mutex);
5335out:
5336 kfree(values);
5337 return ret;
5338}
5339
5340static int perf_read_one(struct perf_event *event,
5341 u64 read_format, char __user *buf)
5342{
5343 u64 enabled, running;
5344 u64 values[4];
5345 int n = 0;
5346
5347 values[n++] = __perf_event_read_value(event, &enabled, &running);
5348 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
5349 values[n++] = enabled;
5350 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
5351 values[n++] = running;
5352 if (read_format & PERF_FORMAT_ID)
5353 values[n++] = primary_event_id(event);
5354
5355 if (copy_to_user(buf, values, n * sizeof(u64)))
5356 return -EFAULT;
5357
5358 return n * sizeof(u64);
5359}
5360
5361static bool is_event_hup(struct perf_event *event)
5362{
5363 bool no_children;
5364
5365 if (event->state > PERF_EVENT_STATE_EXIT)
5366 return false;
5367
5368 mutex_lock(&event->child_mutex);
5369 no_children = list_empty(&event->child_list);
5370 mutex_unlock(&event->child_mutex);
5371 return no_children;
5372}
5373
5374
5375
5376
5377static ssize_t
5378__perf_read(struct perf_event *event, char __user *buf, size_t count)
5379{
5380 u64 read_format = event->attr.read_format;
5381 int ret;
5382
5383
5384
5385
5386
5387
5388 if (event->state == PERF_EVENT_STATE_ERROR)
5389 return 0;
5390
5391 if (count < event->read_size)
5392 return -ENOSPC;
5393
5394 WARN_ON_ONCE(event->ctx->parent_ctx);
5395 if (read_format & PERF_FORMAT_GROUP)
5396 ret = perf_read_group(event, read_format, buf);
5397 else
5398 ret = perf_read_one(event, read_format, buf);
5399
5400 return ret;
5401}
5402
5403static ssize_t
5404perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
5405{
5406 struct perf_event *event = file->private_data;
5407 struct perf_event_context *ctx;
5408 int ret;
5409
5410 ret = security_perf_event_read(event);
5411 if (ret)
5412 return ret;
5413
5414 ctx = perf_event_ctx_lock(event);
5415 ret = __perf_read(event, buf, count);
5416 perf_event_ctx_unlock(event, ctx);
5417
5418 return ret;
5419}
5420
5421static __poll_t perf_poll(struct file *file, poll_table *wait)
5422{
5423 struct perf_event *event = file->private_data;
5424 struct perf_buffer *rb;
5425 __poll_t events = EPOLLHUP;
5426
5427 poll_wait(file, &event->waitq, wait);
5428
5429 if (is_event_hup(event))
5430 return events;
5431
5432
5433
5434
5435
5436 mutex_lock(&event->mmap_mutex);
5437 rb = event->rb;
5438 if (rb)
5439 events = atomic_xchg(&rb->poll, 0);
5440 mutex_unlock(&event->mmap_mutex);
5441 return events;
5442}
5443
5444static void _perf_event_reset(struct perf_event *event)
5445{
5446 (void)perf_event_read(event, false);
5447 local64_set(&event->count, 0);
5448 perf_event_update_userpage(event);
5449}
5450
5451
5452u64 perf_event_pause(struct perf_event *event, bool reset)
5453{
5454 struct perf_event_context *ctx;
5455 u64 count;
5456
5457 ctx = perf_event_ctx_lock(event);
5458 WARN_ON_ONCE(event->attr.inherit);
5459 _perf_event_disable(event);
5460 count = local64_read(&event->count);
5461 if (reset)
5462 local64_set(&event->count, 0);
5463 perf_event_ctx_unlock(event, ctx);
5464
5465 return count;
5466}
5467EXPORT_SYMBOL_GPL(perf_event_pause);
5468
5469
5470
5471
5472
5473
5474
5475static void perf_event_for_each_child(struct perf_event *event,
5476 void (*func)(struct perf_event *))
5477{
5478 struct perf_event *child;
5479
5480 WARN_ON_ONCE(event->ctx->parent_ctx);
5481
5482 mutex_lock(&event->child_mutex);
5483 func(event);
5484 list_for_each_entry(child, &event->child_list, child_list)
5485 func(child);
5486 mutex_unlock(&event->child_mutex);
5487}
5488
5489static void perf_event_for_each(struct perf_event *event,
5490 void (*func)(struct perf_event *))
5491{
5492 struct perf_event_context *ctx = event->ctx;
5493 struct perf_event *sibling;
5494
5495 lockdep_assert_held(&ctx->mutex);
5496
5497 event = event->group_leader;
5498
5499 perf_event_for_each_child(event, func);
5500 for_each_sibling_event(sibling, event)
5501 perf_event_for_each_child(sibling, func);
5502}
5503
5504static void __perf_event_period(struct perf_event *event,
5505 struct perf_cpu_context *cpuctx,
5506 struct perf_event_context *ctx,
5507 void *info)
5508{
5509 u64 value = *((u64 *)info);
5510 bool active;
5511
5512 if (event->attr.freq) {
5513 event->attr.sample_freq = value;
5514 } else {
5515 event->attr.sample_period = value;
5516 event->hw.sample_period = value;
5517 }
5518
5519 active = (event->state == PERF_EVENT_STATE_ACTIVE);
5520 if (active) {
5521 perf_pmu_disable(ctx->pmu);
5522
5523
5524
5525
5526 if (event->hw.interrupts == MAX_INTERRUPTS) {
5527 event->hw.interrupts = 0;
5528 perf_log_throttle(event, 1);
5529 }
5530 event->pmu->stop(event, PERF_EF_UPDATE);
5531 }
5532
5533 local64_set(&event->hw.period_left, 0);
5534
5535 if (active) {
5536 event->pmu->start(event, PERF_EF_RELOAD);
5537 perf_pmu_enable(ctx->pmu);
5538 }
5539}
5540
5541static int perf_event_check_period(struct perf_event *event, u64 value)
5542{
5543 return event->pmu->check_period(event, value);
5544}
5545
5546static int _perf_event_period(struct perf_event *event, u64 value)
5547{
5548 if (!is_sampling_event(event))
5549 return -EINVAL;
5550
5551 if (!value)
5552 return -EINVAL;
5553
5554 if (event->attr.freq && value > sysctl_perf_event_sample_rate)
5555 return -EINVAL;
5556
5557 if (perf_event_check_period(event, value))
5558 return -EINVAL;
5559
5560 if (!event->attr.freq && (value & (1ULL << 63)))
5561 return -EINVAL;
5562
5563 event_function_call(event, __perf_event_period, &value);
5564
5565 return 0;
5566}
5567
5568int perf_event_period(struct perf_event *event, u64 value)
5569{
5570 struct perf_event_context *ctx;
5571 int ret;
5572
5573 ctx = perf_event_ctx_lock(event);
5574 ret = _perf_event_period(event, value);
5575 perf_event_ctx_unlock(event, ctx);
5576
5577 return ret;
5578}
5579EXPORT_SYMBOL_GPL(perf_event_period);
5580
5581static const struct file_operations perf_fops;
5582
5583static inline int perf_fget_light(int fd, struct fd *p)
5584{
5585 struct fd f = fdget(fd);
5586 if (!f.file)
5587 return -EBADF;
5588
5589 if (f.file->f_op != &perf_fops) {
5590 fdput(f);
5591 return -EBADF;
5592 }
5593 *p = f;
5594 return 0;
5595}
5596
5597static int perf_event_set_output(struct perf_event *event,
5598 struct perf_event *output_event);
5599static int perf_event_set_filter(struct perf_event *event, void __user *arg);
5600static int perf_copy_attr(struct perf_event_attr __user *uattr,
5601 struct perf_event_attr *attr);
5602
5603static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
5604{
5605 void (*func)(struct perf_event *);
5606 u32 flags = arg;
5607
5608 switch (cmd) {
5609 case PERF_EVENT_IOC_ENABLE:
5610 func = _perf_event_enable;
5611 break;
5612 case PERF_EVENT_IOC_DISABLE:
5613 func = _perf_event_disable;
5614 break;
5615 case PERF_EVENT_IOC_RESET:
5616 func = _perf_event_reset;
5617 break;
5618
5619 case PERF_EVENT_IOC_REFRESH:
5620 return _perf_event_refresh(event, arg);
5621
5622 case PERF_EVENT_IOC_PERIOD:
5623 {
5624 u64 value;
5625
5626 if (copy_from_user(&value, (u64 __user *)arg, sizeof(value)))
5627 return -EFAULT;
5628
5629 return _perf_event_period(event, value);
5630 }
5631 case PERF_EVENT_IOC_ID:
5632 {
5633 u64 id = primary_event_id(event);
5634
5635 if (copy_to_user((void __user *)arg, &id, sizeof(id)))
5636 return -EFAULT;
5637 return 0;
5638 }
5639
5640 case PERF_EVENT_IOC_SET_OUTPUT:
5641 {
5642 int ret;
5643 if (arg != -1) {
5644 struct perf_event *output_event;
5645 struct fd output;
5646 ret = perf_fget_light(arg, &output);
5647 if (ret)
5648 return ret;
5649 output_event = output.file->private_data;
5650 ret = perf_event_set_output(event, output_event);
5651 fdput(output);
5652 } else {
5653 ret = perf_event_set_output(event, NULL);
5654 }
5655 return ret;
5656 }
5657
5658 case PERF_EVENT_IOC_SET_FILTER:
5659 return perf_event_set_filter(event, (void __user *)arg);
5660
5661 case PERF_EVENT_IOC_SET_BPF:
5662 {
5663 struct bpf_prog *prog;
5664 int err;
5665
5666 prog = bpf_prog_get(arg);
5667 if (IS_ERR(prog))
5668 return PTR_ERR(prog);
5669
5670 err = perf_event_set_bpf_prog(event, prog, 0);
5671 if (err) {
5672 bpf_prog_put(prog);
5673 return err;
5674 }
5675
5676 return 0;
5677 }
5678
5679 case PERF_EVENT_IOC_PAUSE_OUTPUT: {
5680 struct perf_buffer *rb;
5681
5682 rcu_read_lock();
5683 rb = rcu_dereference(event->rb);
5684 if (!rb || !rb->nr_pages) {
5685 rcu_read_unlock();
5686 return -EINVAL;
5687 }
5688 rb_toggle_paused(rb, !!arg);
5689 rcu_read_unlock();
5690 return 0;
5691 }
5692
5693 case PERF_EVENT_IOC_QUERY_BPF:
5694 return perf_event_query_prog_array(event, (void __user *)arg);
5695
5696 case PERF_EVENT_IOC_MODIFY_ATTRIBUTES: {
5697 struct perf_event_attr new_attr;
5698 int err = perf_copy_attr((struct perf_event_attr __user *)arg,
5699 &new_attr);
5700
5701 if (err)
5702 return err;
5703
5704 return perf_event_modify_attr(event, &new_attr);
5705 }
5706 default:
5707 return -ENOTTY;
5708 }
5709
5710 if (flags & PERF_IOC_FLAG_GROUP)
5711 perf_event_for_each(event, func);
5712 else
5713 perf_event_for_each_child(event, func);
5714
5715 return 0;
5716}
5717
5718static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
5719{
5720 struct perf_event *event = file->private_data;
5721 struct perf_event_context *ctx;
5722 long ret;
5723
5724
5725 ret = security_perf_event_write(event);
5726 if (ret)
5727 return ret;
5728
5729 ctx = perf_event_ctx_lock(event);
5730 ret = _perf_ioctl(event, cmd, arg);
5731 perf_event_ctx_unlock(event, ctx);
5732
5733 return ret;
5734}
5735
5736#ifdef CONFIG_COMPAT
5737static long perf_compat_ioctl(struct file *file, unsigned int cmd,
5738 unsigned long arg)
5739{
5740 switch (_IOC_NR(cmd)) {
5741 case _IOC_NR(PERF_EVENT_IOC_SET_FILTER):
5742 case _IOC_NR(PERF_EVENT_IOC_ID):
5743 case _IOC_NR(PERF_EVENT_IOC_QUERY_BPF):
5744 case _IOC_NR(PERF_EVENT_IOC_MODIFY_ATTRIBUTES):
5745
5746 if (_IOC_SIZE(cmd) == sizeof(compat_uptr_t)) {
5747 cmd &= ~IOCSIZE_MASK;
5748 cmd |= sizeof(void *) << IOCSIZE_SHIFT;
5749 }
5750 break;
5751 }
5752 return perf_ioctl(file, cmd, arg);
5753}
5754#else
5755# define perf_compat_ioctl NULL
5756#endif
5757
5758int perf_event_task_enable(void)
5759{
5760 struct perf_event_context *ctx;
5761 struct perf_event *event;
5762
5763 mutex_lock(¤t->perf_event_mutex);
5764 list_for_each_entry(event, ¤t->perf_event_list, owner_entry) {
5765 ctx = perf_event_ctx_lock(event);
5766 perf_event_for_each_child(event, _perf_event_enable);
5767 perf_event_ctx_unlock(event, ctx);
5768 }
5769 mutex_unlock(¤t->perf_event_mutex);
5770
5771 return 0;
5772}
5773
5774int perf_event_task_disable(void)
5775{
5776 struct perf_event_context *ctx;
5777 struct perf_event *event;
5778
5779 mutex_lock(¤t->perf_event_mutex);
5780 list_for_each_entry(event, ¤t->perf_event_list, owner_entry) {
5781 ctx = perf_event_ctx_lock(event);
5782 perf_event_for_each_child(event, _perf_event_disable);
5783 perf_event_ctx_unlock(event, ctx);
5784 }
5785 mutex_unlock(¤t->perf_event_mutex);
5786
5787 return 0;
5788}
5789
5790static int perf_event_index(struct perf_event *event)
5791{
5792 if (event->hw.state & PERF_HES_STOPPED)
5793 return 0;
5794
5795 if (event->state != PERF_EVENT_STATE_ACTIVE)
5796 return 0;
5797
5798 return event->pmu->event_idx(event);
5799}
5800
5801static void calc_timer_values(struct perf_event *event,
5802 u64 *now,
5803 u64 *enabled,
5804 u64 *running)
5805{
5806 u64 ctx_time;
5807
5808 *now = perf_clock();
5809 ctx_time = event->shadow_ctx_time + *now;
5810 __perf_update_times(event, ctx_time, enabled, running);
5811}
5812
5813static void perf_event_init_userpage(struct perf_event *event)
5814{
5815 struct perf_event_mmap_page *userpg;
5816 struct perf_buffer *rb;
5817
5818 rcu_read_lock();
5819 rb = rcu_dereference(event->rb);
5820 if (!rb)
5821 goto unlock;
5822
5823 userpg = rb->user_page;
5824
5825
5826 userpg->cap_bit0_is_deprecated = 1;
5827 userpg->size = offsetof(struct perf_event_mmap_page, __reserved);
5828 userpg->data_offset = PAGE_SIZE;
5829 userpg->data_size = perf_data_size(rb);
5830
5831unlock:
5832 rcu_read_unlock();
5833}
5834
5835void __weak arch_perf_update_userpage(
5836 struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now)
5837{
5838}
5839
5840
5841
5842
5843
5844
5845void perf_event_update_userpage(struct perf_event *event)
5846{
5847 struct perf_event_mmap_page *userpg;
5848 struct perf_buffer *rb;
5849 u64 enabled, running, now;
5850
5851 rcu_read_lock();
5852 rb = rcu_dereference(event->rb);
5853 if (!rb)
5854 goto unlock;
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865 calc_timer_values(event, &now, &enabled, &running);
5866
5867 userpg = rb->user_page;
5868
5869
5870
5871
5872 preempt_disable();
5873 ++userpg->lock;
5874 barrier();
5875 userpg->index = perf_event_index(event);
5876 userpg->offset = perf_event_count(event);
5877 if (userpg->index)
5878 userpg->offset -= local64_read(&event->hw.prev_count);
5879
5880 userpg->time_enabled = enabled +
5881 atomic64_read(&event->child_total_time_enabled);
5882
5883 userpg->time_running = running +
5884 atomic64_read(&event->child_total_time_running);
5885
5886 arch_perf_update_userpage(event, userpg, now);
5887
5888 barrier();
5889 ++userpg->lock;
5890 preempt_enable();
5891unlock:
5892 rcu_read_unlock();
5893}
5894EXPORT_SYMBOL_GPL(perf_event_update_userpage);
5895
5896static vm_fault_t perf_mmap_fault(struct vm_fault *vmf)
5897{
5898 struct perf_event *event = vmf->vma->vm_file->private_data;
5899 struct perf_buffer *rb;
5900 vm_fault_t ret = VM_FAULT_SIGBUS;
5901
5902 if (vmf->flags & FAULT_FLAG_MKWRITE) {
5903 if (vmf->pgoff == 0)
5904 ret = 0;
5905 return ret;
5906 }
5907
5908 rcu_read_lock();
5909 rb = rcu_dereference(event->rb);
5910 if (!rb)
5911 goto unlock;
5912
5913 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
5914 goto unlock;
5915
5916 vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
5917 if (!vmf->page)
5918 goto unlock;
5919
5920 get_page(vmf->page);
5921 vmf->page->mapping = vmf->vma->vm_file->f_mapping;
5922 vmf->page->index = vmf->pgoff;
5923
5924 ret = 0;
5925unlock:
5926 rcu_read_unlock();
5927
5928 return ret;
5929}
5930
5931static void ring_buffer_attach(struct perf_event *event,
5932 struct perf_buffer *rb)
5933{
5934 struct perf_buffer *old_rb = NULL;
5935 unsigned long flags;
5936
5937 if (event->rb) {
5938
5939
5940
5941
5942 WARN_ON_ONCE(event->rcu_pending);
5943
5944 old_rb = event->rb;
5945 spin_lock_irqsave(&old_rb->event_lock, flags);
5946 list_del_rcu(&event->rb_entry);
5947 spin_unlock_irqrestore(&old_rb->event_lock, flags);
5948
5949 event->rcu_batches = get_state_synchronize_rcu();
5950 event->rcu_pending = 1;
5951 }
5952
5953 if (rb) {
5954 if (event->rcu_pending) {
5955 cond_synchronize_rcu(event->rcu_batches);
5956 event->rcu_pending = 0;
5957 }
5958
5959 spin_lock_irqsave(&rb->event_lock, flags);
5960 list_add_rcu(&event->rb_entry, &rb->event_list);
5961 spin_unlock_irqrestore(&rb->event_lock, flags);
5962 }
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974 if (has_aux(event))
5975 perf_event_stop(event, 0);
5976
5977 rcu_assign_pointer(event->rb, rb);
5978
5979 if (old_rb) {
5980 ring_buffer_put(old_rb);
5981
5982
5983
5984
5985
5986 wake_up_all(&event->waitq);
5987 }
5988}
5989
5990static void ring_buffer_wakeup(struct perf_event *event)
5991{
5992 struct perf_buffer *rb;
5993
5994 rcu_read_lock();
5995 rb = rcu_dereference(event->rb);
5996 if (rb) {
5997 list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
5998 wake_up_all(&event->waitq);
5999 }
6000 rcu_read_unlock();
6001}
6002
6003struct perf_buffer *ring_buffer_get(struct perf_event *event)
6004{
6005 struct perf_buffer *rb;
6006
6007 rcu_read_lock();
6008 rb = rcu_dereference(event->rb);
6009 if (rb) {
6010 if (!refcount_inc_not_zero(&rb->refcount))
6011 rb = NULL;
6012 }
6013 rcu_read_unlock();
6014
6015 return rb;
6016}
6017
6018void ring_buffer_put(struct perf_buffer *rb)
6019{
6020 if (!refcount_dec_and_test(&rb->refcount))
6021 return;
6022
6023 WARN_ON_ONCE(!list_empty(&rb->event_list));
6024
6025 call_rcu(&rb->rcu_head, rb_free_rcu);
6026}
6027
6028static void perf_mmap_open(struct vm_area_struct *vma)
6029{
6030 struct perf_event *event = vma->vm_file->private_data;
6031
6032 atomic_inc(&event->mmap_count);
6033 atomic_inc(&event->rb->mmap_count);
6034
6035 if (vma->vm_pgoff)
6036 atomic_inc(&event->rb->aux_mmap_count);
6037
6038 if (event->pmu->event_mapped)
6039 event->pmu->event_mapped(event, vma->vm_mm);
6040}
6041
6042static void perf_pmu_output_stop(struct perf_event *event);
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052static void perf_mmap_close(struct vm_area_struct *vma)
6053{
6054 struct perf_event *event = vma->vm_file->private_data;
6055 struct perf_buffer *rb = ring_buffer_get(event);
6056 struct user_struct *mmap_user = rb->mmap_user;
6057 int mmap_locked = rb->mmap_locked;
6058 unsigned long size = perf_data_size(rb);
6059 bool detach_rest = false;
6060
6061 if (event->pmu->event_unmapped)
6062 event->pmu->event_unmapped(event, vma->vm_mm);
6063
6064
6065
6066
6067
6068
6069 if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
6070 atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
6071
6072
6073
6074
6075
6076
6077 perf_pmu_output_stop(event);
6078
6079
6080 atomic_long_sub(rb->aux_nr_pages - rb->aux_mmap_locked, &mmap_user->locked_vm);
6081 atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
6082
6083
6084 rb_free_aux(rb);
6085 WARN_ON_ONCE(refcount_read(&rb->aux_refcount));
6086
6087 mutex_unlock(&event->mmap_mutex);
6088 }
6089
6090 if (atomic_dec_and_test(&rb->mmap_count))
6091 detach_rest = true;
6092
6093 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
6094 goto out_put;
6095
6096 ring_buffer_attach(event, NULL);
6097 mutex_unlock(&event->mmap_mutex);
6098
6099
6100 if (!detach_rest)
6101 goto out_put;
6102
6103
6104
6105
6106
6107
6108again:
6109 rcu_read_lock();
6110 list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
6111 if (!atomic_long_inc_not_zero(&event->refcount)) {
6112
6113
6114
6115
6116 continue;
6117 }
6118 rcu_read_unlock();
6119
6120 mutex_lock(&event->mmap_mutex);
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131 if (event->rb == rb)
6132 ring_buffer_attach(event, NULL);
6133
6134 mutex_unlock(&event->mmap_mutex);
6135 put_event(event);
6136
6137
6138
6139
6140
6141 goto again;
6142 }
6143 rcu_read_unlock();
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154 atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
6155 &mmap_user->locked_vm);
6156 atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
6157 free_uid(mmap_user);
6158
6159out_put:
6160 ring_buffer_put(rb);
6161}
6162
6163static const struct vm_operations_struct perf_mmap_vmops = {
6164 .open = perf_mmap_open,
6165 .close = perf_mmap_close,
6166 .fault = perf_mmap_fault,
6167 .page_mkwrite = perf_mmap_fault,
6168};
6169
6170static int perf_mmap(struct file *file, struct vm_area_struct *vma)
6171{
6172 struct perf_event *event = file->private_data;
6173 unsigned long user_locked, user_lock_limit;
6174 struct user_struct *user = current_user();
6175 struct perf_buffer *rb = NULL;
6176 unsigned long locked, lock_limit;
6177 unsigned long vma_size;
6178 unsigned long nr_pages;
6179 long user_extra = 0, extra = 0;
6180 int ret = 0, flags = 0;
6181
6182
6183
6184
6185
6186
6187 if (event->cpu == -1 && event->attr.inherit)
6188 return -EINVAL;
6189
6190 if (!(vma->vm_flags & VM_SHARED))
6191 return -EINVAL;
6192
6193 ret = security_perf_event_read(event);
6194 if (ret)
6195 return ret;
6196
6197 vma_size = vma->vm_end - vma->vm_start;
6198
6199 if (vma->vm_pgoff == 0) {
6200 nr_pages = (vma_size / PAGE_SIZE) - 1;
6201 } else {
6202
6203
6204
6205
6206
6207 u64 aux_offset, aux_size;
6208
6209 if (!event->rb)
6210 return -EINVAL;
6211
6212 nr_pages = vma_size / PAGE_SIZE;
6213
6214 mutex_lock(&event->mmap_mutex);
6215 ret = -EINVAL;
6216
6217 rb = event->rb;
6218 if (!rb)
6219 goto aux_unlock;
6220
6221 aux_offset = READ_ONCE(rb->user_page->aux_offset);
6222 aux_size = READ_ONCE(rb->user_page->aux_size);
6223
6224 if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
6225 goto aux_unlock;
6226
6227 if (aux_offset != vma->vm_pgoff << PAGE_SHIFT)
6228 goto aux_unlock;
6229
6230
6231 if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff)
6232 goto aux_unlock;
6233
6234 if (aux_size != vma_size || aux_size != nr_pages * PAGE_SIZE)
6235 goto aux_unlock;
6236
6237
6238 if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages)
6239 goto aux_unlock;
6240
6241 if (!is_power_of_2(nr_pages))
6242 goto aux_unlock;
6243
6244 if (!atomic_inc_not_zero(&rb->mmap_count))
6245 goto aux_unlock;
6246
6247 if (rb_has_aux(rb)) {
6248 atomic_inc(&rb->aux_mmap_count);
6249 ret = 0;
6250 goto unlock;
6251 }
6252
6253 atomic_set(&rb->aux_mmap_count, 1);
6254 user_extra = nr_pages;
6255
6256 goto accounting;
6257 }
6258
6259
6260
6261
6262
6263 if (nr_pages != 0 && !is_power_of_2(nr_pages))
6264 return -EINVAL;
6265
6266 if (vma_size != PAGE_SIZE * (1 + nr_pages))
6267 return -EINVAL;
6268
6269 WARN_ON_ONCE(event->ctx->parent_ctx);
6270again:
6271 mutex_lock(&event->mmap_mutex);
6272 if (event->rb) {
6273 if (event->rb->nr_pages != nr_pages) {
6274 ret = -EINVAL;
6275 goto unlock;
6276 }
6277
6278 if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
6279
6280
6281
6282
6283
6284 mutex_unlock(&event->mmap_mutex);
6285 goto again;
6286 }
6287
6288 goto unlock;
6289 }
6290
6291 user_extra = nr_pages + 1;
6292
6293accounting:
6294 user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
6295
6296
6297
6298
6299 user_lock_limit *= num_online_cpus();
6300
6301 user_locked = atomic_long_read(&user->locked_vm);
6302
6303
6304
6305
6306
6307 if (user_locked > user_lock_limit)
6308 user_locked = user_lock_limit;
6309 user_locked += user_extra;
6310
6311 if (user_locked > user_lock_limit) {
6312
6313
6314
6315
6316 extra = user_locked - user_lock_limit;
6317 user_extra -= extra;
6318 }
6319
6320 lock_limit = rlimit(RLIMIT_MEMLOCK);
6321 lock_limit >>= PAGE_SHIFT;
6322 locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
6323
6324 if ((locked > lock_limit) && perf_is_paranoid() &&
6325 !capable(CAP_IPC_LOCK)) {
6326 ret = -EPERM;
6327 goto unlock;
6328 }
6329
6330 WARN_ON(!rb && event->rb);
6331
6332 if (vma->vm_flags & VM_WRITE)
6333 flags |= RING_BUFFER_WRITABLE;
6334
6335 if (!rb) {
6336 rb = rb_alloc(nr_pages,
6337 event->attr.watermark ? event->attr.wakeup_watermark : 0,
6338 event->cpu, flags);
6339
6340 if (!rb) {
6341 ret = -ENOMEM;
6342 goto unlock;
6343 }
6344
6345 atomic_set(&rb->mmap_count, 1);
6346 rb->mmap_user = get_current_user();
6347 rb->mmap_locked = extra;
6348
6349 ring_buffer_attach(event, rb);
6350
6351 perf_event_update_time(event);
6352 perf_set_shadow_time(event, event->ctx);
6353 perf_event_init_userpage(event);
6354 perf_event_update_userpage(event);
6355 } else {
6356 ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
6357 event->attr.aux_watermark, flags);
6358 if (!ret)
6359 rb->aux_mmap_locked = extra;
6360 }
6361
6362unlock:
6363 if (!ret) {
6364 atomic_long_add(user_extra, &user->locked_vm);
6365 atomic64_add(extra, &vma->vm_mm->pinned_vm);
6366
6367 atomic_inc(&event->mmap_count);
6368 } else if (rb) {
6369 atomic_dec(&rb->mmap_count);
6370 }
6371aux_unlock:
6372 mutex_unlock(&event->mmap_mutex);
6373
6374
6375
6376
6377
6378 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
6379 vma->vm_ops = &perf_mmap_vmops;
6380
6381 if (event->pmu->event_mapped)
6382 event->pmu->event_mapped(event, vma->vm_mm);
6383
6384 return ret;
6385}
6386
6387static int perf_fasync(int fd, struct file *filp, int on)
6388{
6389 struct inode *inode = file_inode(filp);
6390 struct perf_event *event = filp->private_data;
6391 int retval;
6392
6393 inode_lock(inode);
6394 retval = fasync_helper(fd, filp, on, &event->fasync);
6395 inode_unlock(inode);
6396
6397 if (retval < 0)
6398 return retval;
6399
6400 return 0;
6401}
6402
6403static const struct file_operations perf_fops = {
6404 .llseek = no_llseek,
6405 .release = perf_release,
6406 .read = perf_read,
6407 .poll = perf_poll,
6408 .unlocked_ioctl = perf_ioctl,
6409 .compat_ioctl = perf_compat_ioctl,
6410 .mmap = perf_mmap,
6411 .fasync = perf_fasync,
6412};
6413
6414
6415
6416
6417
6418
6419
6420
6421static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
6422{
6423
6424 if (event->parent)
6425 event = event->parent;
6426 return &event->fasync;
6427}
6428
6429void perf_event_wakeup(struct perf_event *event)
6430{
6431 ring_buffer_wakeup(event);
6432
6433 if (event->pending_kill) {
6434 kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill);
6435 event->pending_kill = 0;
6436 }
6437}
6438
6439static void perf_sigtrap(struct perf_event *event)
6440{
6441
6442
6443
6444
6445
6446 if (WARN_ON_ONCE(event->ctx->task != current))
6447 return;
6448
6449
6450
6451
6452 if (current->flags & PF_EXITING)
6453 return;
6454
6455 force_sig_perf((void __user *)event->pending_addr,
6456 event->attr.type, event->attr.sig_data);
6457}
6458
6459static void perf_pending_event_disable(struct perf_event *event)
6460{
6461 int cpu = READ_ONCE(event->pending_disable);
6462
6463 if (cpu < 0)
6464 return;
6465
6466 if (cpu == smp_processor_id()) {
6467 WRITE_ONCE(event->pending_disable, -1);
6468
6469 if (event->attr.sigtrap) {
6470 perf_sigtrap(event);
6471 atomic_set_release(&event->event_limit, 1);
6472 return;
6473 }
6474
6475 perf_event_disable_local(event);
6476 return;
6477 }
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499 irq_work_queue_on(&event->pending, cpu);
6500}
6501
6502static void perf_pending_event(struct irq_work *entry)
6503{
6504 struct perf_event *event = container_of(entry, struct perf_event, pending);
6505 int rctx;
6506
6507 rctx = perf_swevent_get_recursion_context();
6508
6509
6510
6511
6512
6513 perf_pending_event_disable(event);
6514
6515 if (event->pending_wakeup) {
6516 event->pending_wakeup = 0;
6517 perf_event_wakeup(event);
6518 }
6519
6520 if (rctx >= 0)
6521 perf_swevent_put_recursion_context(rctx);
6522}
6523
6524
6525
6526
6527
6528
6529struct perf_guest_info_callbacks *perf_guest_cbs;
6530
6531int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
6532{
6533 perf_guest_cbs = cbs;
6534 return 0;
6535}
6536EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
6537
6538int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
6539{
6540 perf_guest_cbs = NULL;
6541 return 0;
6542}
6543EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
6544
6545static void
6546perf_output_sample_regs(struct perf_output_handle *handle,
6547 struct pt_regs *regs, u64 mask)
6548{
6549 int bit;
6550 DECLARE_BITMAP(_mask, 64);
6551
6552 bitmap_from_u64(_mask, mask);
6553 for_each_set_bit(bit, _mask, sizeof(mask) * BITS_PER_BYTE) {
6554 u64 val;
6555
6556 val = perf_reg_value(regs, bit);
6557 perf_output_put(handle, val);
6558 }
6559}
6560
6561static void perf_sample_regs_user(struct perf_regs *regs_user,
6562 struct pt_regs *regs)
6563{
6564 if (user_mode(regs)) {
6565 regs_user->abi = perf_reg_abi(current);
6566 regs_user->regs = regs;
6567 } else if (!(current->flags & PF_KTHREAD)) {
6568 perf_get_regs_user(regs_user, regs);
6569 } else {
6570 regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
6571 regs_user->regs = NULL;
6572 }
6573}
6574
6575static void perf_sample_regs_intr(struct perf_regs *regs_intr,
6576 struct pt_regs *regs)
6577{
6578 regs_intr->regs = regs;
6579 regs_intr->abi = perf_reg_abi(current);
6580}
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590static u64 perf_ustack_task_size(struct pt_regs *regs)
6591{
6592 unsigned long addr = perf_user_stack_pointer(regs);
6593
6594 if (!addr || addr >= TASK_SIZE)
6595 return 0;
6596
6597 return TASK_SIZE - addr;
6598}
6599
6600static u16
6601perf_sample_ustack_size(u16 stack_size, u16 header_size,
6602 struct pt_regs *regs)
6603{
6604 u64 task_size;
6605
6606
6607 if (!regs)
6608 return 0;
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620 task_size = min((u64) USHRT_MAX, perf_ustack_task_size(regs));
6621 stack_size = min(stack_size, (u16) task_size);
6622
6623
6624 header_size += 2 * sizeof(u64);
6625
6626
6627 if ((u16) (header_size + stack_size) < header_size) {
6628
6629
6630
6631
6632 stack_size = USHRT_MAX - header_size - sizeof(u64);
6633 stack_size = round_up(stack_size, sizeof(u64));
6634 }
6635
6636 return stack_size;
6637}
6638
6639static void
6640perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
6641 struct pt_regs *regs)
6642{
6643
6644 if (!regs) {
6645 u64 size = 0;
6646 perf_output_put(handle, size);
6647 } else {
6648 unsigned long sp;
6649 unsigned int rem;
6650 u64 dyn_size;
6651 mm_segment_t fs;
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665 perf_output_put(handle, dump_size);
6666
6667
6668 sp = perf_user_stack_pointer(regs);
6669 fs = force_uaccess_begin();
6670 rem = __output_copy_user(handle, (void *) sp, dump_size);
6671 force_uaccess_end(fs);
6672 dyn_size = dump_size - rem;
6673
6674 perf_output_skip(handle, rem);
6675
6676
6677 perf_output_put(handle, dyn_size);
6678 }
6679}
6680
6681static unsigned long perf_prepare_sample_aux(struct perf_event *event,
6682 struct perf_sample_data *data,
6683 size_t size)
6684{
6685 struct perf_event *sampler = event->aux_event;
6686 struct perf_buffer *rb;
6687
6688 data->aux_size = 0;
6689
6690 if (!sampler)
6691 goto out;
6692
6693 if (WARN_ON_ONCE(READ_ONCE(sampler->state) != PERF_EVENT_STATE_ACTIVE))
6694 goto out;
6695
6696 if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id()))
6697 goto out;
6698
6699 rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
6700 if (!rb)
6701 goto out;
6702
6703
6704
6705
6706
6707 if (READ_ONCE(rb->aux_in_sampling)) {
6708 data->aux_size = 0;
6709 } else {
6710 size = min_t(size_t, size, perf_aux_size(rb));
6711 data->aux_size = ALIGN(size, sizeof(u64));
6712 }
6713 ring_buffer_put(rb);
6714
6715out:
6716 return data->aux_size;
6717}
6718
6719static long perf_pmu_snapshot_aux(struct perf_buffer *rb,
6720 struct perf_event *event,
6721 struct perf_output_handle *handle,
6722 unsigned long size)
6723{
6724 unsigned long flags;
6725 long ret;
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736 local_irq_save(flags);
6737
6738
6739
6740
6741 WRITE_ONCE(rb->aux_in_sampling, 1);
6742 barrier();
6743
6744 ret = event->pmu->snapshot_aux(event, handle, size);
6745
6746 barrier();
6747 WRITE_ONCE(rb->aux_in_sampling, 0);
6748 local_irq_restore(flags);
6749
6750 return ret;
6751}
6752
6753static void perf_aux_sample_output(struct perf_event *event,
6754 struct perf_output_handle *handle,
6755 struct perf_sample_data *data)
6756{
6757 struct perf_event *sampler = event->aux_event;
6758 struct perf_buffer *rb;
6759 unsigned long pad;
6760 long size;
6761
6762 if (WARN_ON_ONCE(!sampler || !data->aux_size))
6763 return;
6764
6765 rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
6766 if (!rb)
6767 return;
6768
6769 size = perf_pmu_snapshot_aux(rb, sampler, handle, data->aux_size);
6770
6771
6772
6773
6774
6775
6776
6777 if (WARN_ON_ONCE(size < 0))
6778 goto out_put;
6779
6780
6781
6782
6783
6784 pad = data->aux_size - size;
6785 if (WARN_ON_ONCE(pad >= sizeof(u64)))
6786 pad = 8;
6787
6788 if (pad) {
6789 u64 zero = 0;
6790 perf_output_copy(handle, &zero, pad);
6791 }
6792
6793out_put:
6794 ring_buffer_put(rb);
6795}
6796
6797static void __perf_event_header__init_id(struct perf_event_header *header,
6798 struct perf_sample_data *data,
6799 struct perf_event *event)
6800{
6801 u64 sample_type = event->attr.sample_type;
6802
6803 data->type = sample_type;
6804 header->size += event->id_header_size;
6805
6806 if (sample_type & PERF_SAMPLE_TID) {
6807
6808 data->tid_entry.pid = perf_event_pid(event, current);
6809 data->tid_entry.tid = perf_event_tid(event, current);
6810 }
6811
6812 if (sample_type & PERF_SAMPLE_TIME)
6813 data->time = perf_event_clock(event);
6814
6815 if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
6816 data->id = primary_event_id(event);
6817
6818 if (sample_type & PERF_SAMPLE_STREAM_ID)
6819 data->stream_id = event->id;
6820
6821 if (sample_type & PERF_SAMPLE_CPU) {
6822 data->cpu_entry.cpu = raw_smp_processor_id();
6823 data->cpu_entry.reserved = 0;
6824 }
6825}
6826
6827void perf_event_header__init_id(struct perf_event_header *header,
6828 struct perf_sample_data *data,
6829 struct perf_event *event)
6830{
6831 if (event->attr.sample_id_all)
6832 __perf_event_header__init_id(header, data, event);
6833}
6834
6835static void __perf_event__output_id_sample(struct perf_output_handle *handle,
6836 struct perf_sample_data *data)
6837{
6838 u64 sample_type = data->type;
6839
6840 if (sample_type & PERF_SAMPLE_TID)
6841 perf_output_put(handle, data->tid_entry);
6842
6843 if (sample_type & PERF_SAMPLE_TIME)
6844 perf_output_put(handle, data->time);
6845
6846 if (sample_type & PERF_SAMPLE_ID)
6847 perf_output_put(handle, data->id);
6848
6849 if (sample_type & PERF_SAMPLE_STREAM_ID)
6850 perf_output_put(handle, data->stream_id);
6851
6852 if (sample_type & PERF_SAMPLE_CPU)
6853 perf_output_put(handle, data->cpu_entry);
6854
6855 if (sample_type & PERF_SAMPLE_IDENTIFIER)
6856 perf_output_put(handle, data->id);
6857}
6858
6859void perf_event__output_id_sample(struct perf_event *event,
6860 struct perf_output_handle *handle,
6861 struct perf_sample_data *sample)
6862{
6863 if (event->attr.sample_id_all)
6864 __perf_event__output_id_sample(handle, sample);
6865}
6866
6867static void perf_output_read_one(struct perf_output_handle *handle,
6868 struct perf_event *event,
6869 u64 enabled, u64 running)
6870{
6871 u64 read_format = event->attr.read_format;
6872 u64 values[4];
6873 int n = 0;
6874
6875 values[n++] = perf_event_count(event);
6876 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
6877 values[n++] = enabled +
6878 atomic64_read(&event->child_total_time_enabled);
6879 }
6880 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
6881 values[n++] = running +
6882 atomic64_read(&event->child_total_time_running);
6883 }
6884 if (read_format & PERF_FORMAT_ID)
6885 values[n++] = primary_event_id(event);
6886
6887 __output_copy(handle, values, n * sizeof(u64));
6888}
6889
6890static void perf_output_read_group(struct perf_output_handle *handle,
6891 struct perf_event *event,
6892 u64 enabled, u64 running)
6893{
6894 struct perf_event *leader = event->group_leader, *sub;
6895 u64 read_format = event->attr.read_format;
6896 u64 values[5];
6897 int n = 0;
6898
6899 values[n++] = 1 + leader->nr_siblings;
6900
6901 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
6902 values[n++] = enabled;
6903
6904 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
6905 values[n++] = running;
6906
6907 if ((leader != event) &&
6908 (leader->state == PERF_EVENT_STATE_ACTIVE))
6909 leader->pmu->read(leader);
6910
6911 values[n++] = perf_event_count(leader);
6912 if (read_format & PERF_FORMAT_ID)
6913 values[n++] = primary_event_id(leader);
6914
6915 __output_copy(handle, values, n * sizeof(u64));
6916
6917 for_each_sibling_event(sub, leader) {
6918 n = 0;
6919
6920 if ((sub != event) &&
6921 (sub->state == PERF_EVENT_STATE_ACTIVE))
6922 sub->pmu->read(sub);
6923
6924 values[n++] = perf_event_count(sub);
6925 if (read_format & PERF_FORMAT_ID)
6926 values[n++] = primary_event_id(sub);
6927
6928 __output_copy(handle, values, n * sizeof(u64));
6929 }
6930}
6931
6932#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
6933 PERF_FORMAT_TOTAL_TIME_RUNNING)
6934
6935
6936
6937
6938
6939
6940
6941
6942static void perf_output_read(struct perf_output_handle *handle,
6943 struct perf_event *event)
6944{
6945 u64 enabled = 0, running = 0, now;
6946 u64 read_format = event->attr.read_format;
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957 if (read_format & PERF_FORMAT_TOTAL_TIMES)
6958 calc_timer_values(event, &now, &enabled, &running);
6959
6960 if (event->attr.read_format & PERF_FORMAT_GROUP)
6961 perf_output_read_group(handle, event, enabled, running);
6962 else
6963 perf_output_read_one(handle, event, enabled, running);
6964}
6965
6966static inline bool perf_sample_save_hw_index(struct perf_event *event)
6967{
6968 return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
6969}
6970
6971void perf_output_sample(struct perf_output_handle *handle,
6972 struct perf_event_header *header,
6973 struct perf_sample_data *data,
6974 struct perf_event *event)
6975{
6976 u64 sample_type = data->type;
6977
6978 perf_output_put(handle, *header);
6979
6980 if (sample_type & PERF_SAMPLE_IDENTIFIER)
6981 perf_output_put(handle, data->id);
6982
6983 if (sample_type & PERF_SAMPLE_IP)
6984 perf_output_put(handle, data->ip);
6985
6986 if (sample_type & PERF_SAMPLE_TID)
6987 perf_output_put(handle, data->tid_entry);
6988
6989 if (sample_type & PERF_SAMPLE_TIME)
6990 perf_output_put(handle, data->time);
6991
6992 if (sample_type & PERF_SAMPLE_ADDR)
6993 perf_output_put(handle, data->addr);
6994
6995 if (sample_type & PERF_SAMPLE_ID)
6996 perf_output_put(handle, data->id);
6997
6998 if (sample_type & PERF_SAMPLE_STREAM_ID)
6999 perf_output_put(handle, data->stream_id);
7000
7001 if (sample_type & PERF_SAMPLE_CPU)
7002 perf_output_put(handle, data->cpu_entry);
7003
7004 if (sample_type & PERF_SAMPLE_PERIOD)
7005 perf_output_put(handle, data->period);
7006
7007 if (sample_type & PERF_SAMPLE_READ)
7008 perf_output_read(handle, event);
7009
7010 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
7011 int size = 1;
7012
7013 size += data->callchain->nr;
7014 size *= sizeof(u64);
7015 __output_copy(handle, data->callchain, size);
7016 }
7017
7018 if (sample_type & PERF_SAMPLE_RAW) {
7019 struct perf_raw_record *raw = data->raw;
7020
7021 if (raw) {
7022 struct perf_raw_frag *frag = &raw->frag;
7023
7024 perf_output_put(handle, raw->size);
7025 do {
7026 if (frag->copy) {
7027 __output_custom(handle, frag->copy,
7028 frag->data, frag->size);
7029 } else {
7030 __output_copy(handle, frag->data,
7031 frag->size);
7032 }
7033 if (perf_raw_frag_last(frag))
7034 break;
7035 frag = frag->next;
7036 } while (1);
7037 if (frag->pad)
7038 __output_skip(handle, NULL, frag->pad);
7039 } else {
7040 struct {
7041 u32 size;
7042 u32 data;
7043 } raw = {
7044 .size = sizeof(u32),
7045 .data = 0,
7046 };
7047 perf_output_put(handle, raw);
7048 }
7049 }
7050
7051 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
7052 if (data->br_stack) {
7053 size_t size;
7054
7055 size = data->br_stack->nr
7056 * sizeof(struct perf_branch_entry);
7057
7058 perf_output_put(handle, data->br_stack->nr);
7059 if (perf_sample_save_hw_index(event))
7060 perf_output_put(handle, data->br_stack->hw_idx);
7061 perf_output_copy(handle, data->br_stack->entries, size);
7062 } else {
7063
7064
7065
7066 u64 nr = 0;
7067 perf_output_put(handle, nr);
7068 }
7069 }
7070
7071 if (sample_type & PERF_SAMPLE_REGS_USER) {
7072 u64 abi = data->regs_user.abi;
7073
7074
7075
7076
7077
7078 perf_output_put(handle, abi);
7079
7080 if (abi) {
7081 u64 mask = event->attr.sample_regs_user;
7082 perf_output_sample_regs(handle,
7083 data->regs_user.regs,
7084 mask);
7085 }
7086 }
7087
7088 if (sample_type & PERF_SAMPLE_STACK_USER) {
7089 perf_output_sample_ustack(handle,
7090 data->stack_user_size,
7091 data->regs_user.regs);
7092 }
7093
7094 if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
7095 perf_output_put(handle, data->weight.full);
7096
7097 if (sample_type & PERF_SAMPLE_DATA_SRC)
7098 perf_output_put(handle, data->data_src.val);
7099
7100 if (sample_type & PERF_SAMPLE_TRANSACTION)
7101 perf_output_put(handle, data->txn);
7102
7103 if (sample_type & PERF_SAMPLE_REGS_INTR) {
7104 u64 abi = data->regs_intr.abi;
7105
7106
7107
7108
7109 perf_output_put(handle, abi);
7110
7111 if (abi) {
7112 u64 mask = event->attr.sample_regs_intr;
7113
7114 perf_output_sample_regs(handle,
7115 data->regs_intr.regs,
7116 mask);
7117 }
7118 }
7119
7120 if (sample_type & PERF_SAMPLE_PHYS_ADDR)
7121 perf_output_put(handle, data->phys_addr);
7122
7123 if (sample_type & PERF_SAMPLE_CGROUP)
7124 perf_output_put(handle, data->cgroup);
7125
7126 if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
7127 perf_output_put(handle, data->data_page_size);
7128
7129 if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
7130 perf_output_put(handle, data->code_page_size);
7131
7132 if (sample_type & PERF_SAMPLE_AUX) {
7133 perf_output_put(handle, data->aux_size);
7134
7135 if (data->aux_size)
7136 perf_aux_sample_output(event, handle, data);
7137 }
7138
7139 if (!event->attr.watermark) {
7140 int wakeup_events = event->attr.wakeup_events;
7141
7142 if (wakeup_events) {
7143 struct perf_buffer *rb = handle->rb;
7144 int events = local_inc_return(&rb->events);
7145
7146 if (events >= wakeup_events) {
7147 local_sub(wakeup_events, &rb->events);
7148 local_inc(&rb->wakeup);
7149 }
7150 }
7151 }
7152}
7153
7154static u64 perf_virt_to_phys(u64 virt)
7155{
7156 u64 phys_addr = 0;
7157
7158 if (!virt)
7159 return 0;
7160
7161 if (virt >= TASK_SIZE) {
7162
7163 if (virt_addr_valid((void *)(uintptr_t)virt) &&
7164 !(virt >= VMALLOC_START && virt < VMALLOC_END))
7165 phys_addr = (u64)virt_to_phys((void *)(uintptr_t)virt);
7166 } else {
7167
7168
7169
7170
7171
7172
7173
7174 if (current->mm != NULL) {
7175 struct page *p;
7176
7177 pagefault_disable();
7178 if (get_user_page_fast_only(virt, 0, &p)) {
7179 phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
7180 put_page(p);
7181 }
7182 pagefault_enable();
7183 }
7184 }
7185
7186 return phys_addr;
7187}
7188
7189
7190
7191
7192static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr)
7193{
7194 u64 size = 0;
7195
7196#ifdef CONFIG_HAVE_FAST_GUP
7197 pgd_t *pgdp, pgd;
7198 p4d_t *p4dp, p4d;
7199 pud_t *pudp, pud;
7200 pmd_t *pmdp, pmd;
7201 pte_t *ptep, pte;
7202
7203 pgdp = pgd_offset(mm, addr);
7204 pgd = READ_ONCE(*pgdp);
7205 if (pgd_none(pgd))
7206 return 0;
7207
7208 if (pgd_leaf(pgd))
7209 return pgd_leaf_size(pgd);
7210
7211 p4dp = p4d_offset_lockless(pgdp, pgd, addr);
7212 p4d = READ_ONCE(*p4dp);
7213 if (!p4d_present(p4d))
7214 return 0;
7215
7216 if (p4d_leaf(p4d))
7217 return p4d_leaf_size(p4d);
7218
7219 pudp = pud_offset_lockless(p4dp, p4d, addr);
7220 pud = READ_ONCE(*pudp);
7221 if (!pud_present(pud))
7222 return 0;
7223
7224 if (pud_leaf(pud))
7225 return pud_leaf_size(pud);
7226
7227 pmdp = pmd_offset_lockless(pudp, pud, addr);
7228 pmd = READ_ONCE(*pmdp);
7229 if (!pmd_present(pmd))
7230 return 0;
7231
7232 if (pmd_leaf(pmd))
7233 return pmd_leaf_size(pmd);
7234
7235 ptep = pte_offset_map(&pmd, addr);
7236 pte = ptep_get_lockless(ptep);
7237 if (pte_present(pte))
7238 size = pte_leaf_size(pte);
7239 pte_unmap(ptep);
7240#endif
7241
7242 return size;
7243}
7244
7245static u64 perf_get_page_size(unsigned long addr)
7246{
7247 struct mm_struct *mm;
7248 unsigned long flags;
7249 u64 size;
7250
7251 if (!addr)
7252 return 0;
7253
7254
7255
7256
7257
7258 local_irq_save(flags);
7259
7260 mm = current->mm;
7261 if (!mm) {
7262
7263
7264
7265
7266 mm = &init_mm;
7267 }
7268
7269 size = perf_get_pgtable_size(mm, addr);
7270
7271 local_irq_restore(flags);
7272
7273 return size;
7274}
7275
7276static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
7277
7278struct perf_callchain_entry *
7279perf_callchain(struct perf_event *event, struct pt_regs *regs)
7280{
7281 bool kernel = !event->attr.exclude_callchain_kernel;
7282 bool user = !event->attr.exclude_callchain_user;
7283
7284 bool crosstask = event->ctx->task && event->ctx->task != current;
7285 const u32 max_stack = event->attr.sample_max_stack;
7286 struct perf_callchain_entry *callchain;
7287
7288 if (!kernel && !user)
7289 return &__empty_callchain;
7290
7291 callchain = get_perf_callchain(regs, 0, kernel, user,
7292 max_stack, crosstask, true);
7293 return callchain ?: &__empty_callchain;
7294}
7295
7296void perf_prepare_sample(struct perf_event_header *header,
7297 struct perf_sample_data *data,
7298 struct perf_event *event,
7299 struct pt_regs *regs)
7300{
7301 u64 sample_type = event->attr.sample_type;
7302
7303 header->type = PERF_RECORD_SAMPLE;
7304 header->size = sizeof(*header) + event->header_size;
7305
7306 header->misc = 0;
7307 header->misc |= perf_misc_flags(regs);
7308
7309 __perf_event_header__init_id(header, data, event);
7310
7311 if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
7312 data->ip = perf_instruction_pointer(regs);
7313
7314 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
7315 int size = 1;
7316
7317 if (!(sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
7318 data->callchain = perf_callchain(event, regs);
7319
7320 size += data->callchain->nr;
7321
7322 header->size += size * sizeof(u64);
7323 }
7324
7325 if (sample_type & PERF_SAMPLE_RAW) {
7326 struct perf_raw_record *raw = data->raw;
7327 int size;
7328
7329 if (raw) {
7330 struct perf_raw_frag *frag = &raw->frag;
7331 u32 sum = 0;
7332
7333 do {
7334 sum += frag->size;
7335 if (perf_raw_frag_last(frag))
7336 break;
7337 frag = frag->next;
7338 } while (1);
7339
7340 size = round_up(sum + sizeof(u32), sizeof(u64));
7341 raw->size = size - sizeof(u32);
7342 frag->pad = raw->size - sum;
7343 } else {
7344 size = sizeof(u64);
7345 }
7346
7347 header->size += size;
7348 }
7349
7350 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
7351 int size = sizeof(u64);
7352 if (data->br_stack) {
7353 if (perf_sample_save_hw_index(event))
7354 size += sizeof(u64);
7355
7356 size += data->br_stack->nr
7357 * sizeof(struct perf_branch_entry);
7358 }
7359 header->size += size;
7360 }
7361
7362 if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
7363 perf_sample_regs_user(&data->regs_user, regs);
7364
7365 if (sample_type & PERF_SAMPLE_REGS_USER) {
7366
7367 int size = sizeof(u64);
7368
7369 if (data->regs_user.regs) {
7370 u64 mask = event->attr.sample_regs_user;
7371 size += hweight64(mask) * sizeof(u64);
7372 }
7373
7374 header->size += size;
7375 }
7376
7377 if (sample_type & PERF_SAMPLE_STACK_USER) {
7378
7379
7380
7381
7382
7383
7384 u16 stack_size = event->attr.sample_stack_user;
7385 u16 size = sizeof(u64);
7386
7387 stack_size = perf_sample_ustack_size(stack_size, header->size,
7388 data->regs_user.regs);
7389
7390
7391
7392
7393
7394
7395 if (stack_size)
7396 size += sizeof(u64) + stack_size;
7397
7398 data->stack_user_size = stack_size;
7399 header->size += size;
7400 }
7401
7402 if (sample_type & PERF_SAMPLE_REGS_INTR) {
7403
7404 int size = sizeof(u64);
7405
7406 perf_sample_regs_intr(&data->regs_intr, regs);
7407
7408 if (data->regs_intr.regs) {
7409 u64 mask = event->attr.sample_regs_intr;
7410
7411 size += hweight64(mask) * sizeof(u64);
7412 }
7413
7414 header->size += size;
7415 }
7416
7417 if (sample_type & PERF_SAMPLE_PHYS_ADDR)
7418 data->phys_addr = perf_virt_to_phys(data->addr);
7419
7420#ifdef CONFIG_CGROUP_PERF
7421 if (sample_type & PERF_SAMPLE_CGROUP) {
7422 struct cgroup *cgrp;
7423
7424
7425 cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup;
7426 data->cgroup = cgroup_id(cgrp);
7427 }
7428#endif
7429
7430
7431
7432
7433
7434
7435 if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
7436 data->data_page_size = perf_get_page_size(data->addr);
7437
7438 if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
7439 data->code_page_size = perf_get_page_size(data->ip);
7440
7441 if (sample_type & PERF_SAMPLE_AUX) {
7442 u64 size;
7443
7444 header->size += sizeof(u64);
7445
7446
7447
7448
7449
7450
7451
7452 size = min_t(size_t, U16_MAX - header->size,
7453 event->attr.aux_sample_size);
7454 size = rounddown(size, 8);
7455 size = perf_prepare_sample_aux(event, data, size);
7456
7457 WARN_ON_ONCE(size + header->size > U16_MAX);
7458 header->size += size;
7459 }
7460
7461
7462
7463
7464
7465
7466
7467
7468 WARN_ON_ONCE(header->size & 7);
7469}
7470
7471static __always_inline int
7472__perf_event_output(struct perf_event *event,
7473 struct perf_sample_data *data,
7474 struct pt_regs *regs,
7475 int (*output_begin)(struct perf_output_handle *,
7476 struct perf_sample_data *,
7477 struct perf_event *,
7478 unsigned int))
7479{
7480 struct perf_output_handle handle;
7481 struct perf_event_header header;
7482 int err;
7483
7484
7485 rcu_read_lock();
7486
7487 perf_prepare_sample(&header, data, event, regs);
7488
7489 err = output_begin(&handle, data, event, header.size);
7490 if (err)
7491 goto exit;
7492
7493 perf_output_sample(&handle, &header, data, event);
7494
7495 perf_output_end(&handle);
7496
7497exit:
7498 rcu_read_unlock();
7499 return err;
7500}
7501
7502void
7503perf_event_output_forward(struct perf_event *event,
7504 struct perf_sample_data *data,
7505 struct pt_regs *regs)
7506{
7507 __perf_event_output(event, data, regs, perf_output_begin_forward);
7508}
7509
7510void
7511perf_event_output_backward(struct perf_event *event,
7512 struct perf_sample_data *data,
7513 struct pt_regs *regs)
7514{
7515 __perf_event_output(event, data, regs, perf_output_begin_backward);
7516}
7517
7518int
7519perf_event_output(struct perf_event *event,
7520 struct perf_sample_data *data,
7521 struct pt_regs *regs)
7522{
7523 return __perf_event_output(event, data, regs, perf_output_begin);
7524}
7525
7526
7527
7528
7529
7530struct perf_read_event {
7531 struct perf_event_header header;
7532
7533 u32 pid;
7534 u32 tid;
7535};
7536
7537static void
7538perf_event_read_event(struct perf_event *event,
7539 struct task_struct *task)
7540{
7541 struct perf_output_handle handle;
7542 struct perf_sample_data sample;
7543 struct perf_read_event read_event = {
7544 .header = {
7545 .type = PERF_RECORD_READ,
7546 .misc = 0,
7547 .size = sizeof(read_event) + event->read_size,
7548 },
7549 .pid = perf_event_pid(event, task),
7550 .tid = perf_event_tid(event, task),
7551 };
7552 int ret;
7553
7554 perf_event_header__init_id(&read_event.header, &sample, event);
7555 ret = perf_output_begin(&handle, &sample, event, read_event.header.size);
7556 if (ret)
7557 return;
7558
7559 perf_output_put(&handle, read_event);
7560 perf_output_read(&handle, event);
7561 perf_event__output_id_sample(event, &handle, &sample);
7562
7563 perf_output_end(&handle);
7564}
7565
7566typedef void (perf_iterate_f)(struct perf_event *event, void *data);
7567
7568static void
7569perf_iterate_ctx(struct perf_event_context *ctx,
7570 perf_iterate_f output,
7571 void *data, bool all)
7572{
7573 struct perf_event *event;
7574
7575 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
7576 if (!all) {
7577 if (event->state < PERF_EVENT_STATE_INACTIVE)
7578 continue;
7579 if (!event_filter_match(event))
7580 continue;
7581 }
7582
7583 output(event, data);
7584 }
7585}
7586
7587static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
7588{
7589 struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
7590 struct perf_event *event;
7591
7592 list_for_each_entry_rcu(event, &pel->list, sb_list) {
7593
7594
7595
7596
7597
7598 if (!smp_load_acquire(&event->ctx))
7599 continue;
7600
7601 if (event->state < PERF_EVENT_STATE_INACTIVE)
7602 continue;
7603 if (!event_filter_match(event))
7604 continue;
7605 output(event, data);
7606 }
7607}
7608
7609
7610
7611
7612
7613
7614
7615static void
7616perf_iterate_sb(perf_iterate_f output, void *data,
7617 struct perf_event_context *task_ctx)
7618{
7619 struct perf_event_context *ctx;
7620 int ctxn;
7621
7622 rcu_read_lock();
7623 preempt_disable();
7624
7625
7626
7627
7628
7629
7630 if (task_ctx) {
7631 perf_iterate_ctx(task_ctx, output, data, false);
7632 goto done;
7633 }
7634
7635 perf_iterate_sb_cpu(output, data);
7636
7637 for_each_task_context_nr(ctxn) {
7638 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
7639 if (ctx)
7640 perf_iterate_ctx(ctx, output, data, false);
7641 }
7642done:
7643 preempt_enable();
7644 rcu_read_unlock();
7645}
7646
7647
7648
7649
7650
7651static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
7652{
7653 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
7654 struct perf_addr_filter *filter;
7655 unsigned int restart = 0, count = 0;
7656 unsigned long flags;
7657
7658 if (!has_addr_filter(event))
7659 return;
7660
7661 raw_spin_lock_irqsave(&ifh->lock, flags);
7662 list_for_each_entry(filter, &ifh->list, entry) {
7663 if (filter->path.dentry) {
7664 event->addr_filter_ranges[count].start = 0;
7665 event->addr_filter_ranges[count].size = 0;
7666 restart++;
7667 }
7668
7669 count++;
7670 }
7671
7672 if (restart)
7673 event->addr_filters_gen++;
7674 raw_spin_unlock_irqrestore(&ifh->lock, flags);
7675
7676 if (restart)
7677 perf_event_stop(event, 1);
7678}
7679
7680void perf_event_exec(void)
7681{
7682 struct perf_event_context *ctx;
7683 int ctxn;
7684
7685 for_each_task_context_nr(ctxn) {
7686 perf_event_enable_on_exec(ctxn);
7687 perf_event_remove_on_exec(ctxn);
7688
7689 rcu_read_lock();
7690 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
7691 if (ctx) {
7692 perf_iterate_ctx(ctx, perf_event_addr_filters_exec,
7693 NULL, true);
7694 }
7695 rcu_read_unlock();
7696 }
7697}
7698
7699struct remote_output {
7700 struct perf_buffer *rb;
7701 int err;
7702};
7703
7704static void __perf_event_output_stop(struct perf_event *event, void *data)
7705{
7706 struct perf_event *parent = event->parent;
7707 struct remote_output *ro = data;
7708 struct perf_buffer *rb = ro->rb;
7709 struct stop_event_data sd = {
7710 .event = event,
7711 };
7712
7713 if (!has_aux(event))
7714 return;
7715
7716 if (!parent)
7717 parent = event;
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729 if (rcu_dereference(parent->rb) == rb)
7730 ro->err = __perf_event_stop(&sd);
7731}
7732
7733static int __perf_pmu_output_stop(void *info)
7734{
7735 struct perf_event *event = info;
7736 struct pmu *pmu = event->ctx->pmu;
7737 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
7738 struct remote_output ro = {
7739 .rb = event->rb,
7740 };
7741
7742 rcu_read_lock();
7743 perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
7744 if (cpuctx->task_ctx)
7745 perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop,
7746 &ro, false);
7747 rcu_read_unlock();
7748
7749 return ro.err;
7750}
7751
7752static void perf_pmu_output_stop(struct perf_event *event)
7753{
7754 struct perf_event *iter;
7755 int err, cpu;
7756
7757restart:
7758 rcu_read_lock();
7759 list_for_each_entry_rcu(iter, &event->rb->event_list, rb_entry) {
7760
7761
7762
7763
7764
7765
7766 cpu = iter->cpu;
7767 if (cpu == -1)
7768 cpu = READ_ONCE(iter->oncpu);
7769
7770 if (cpu == -1)
7771 continue;
7772
7773 err = cpu_function_call(cpu, __perf_pmu_output_stop, event);
7774 if (err == -EAGAIN) {
7775 rcu_read_unlock();
7776 goto restart;
7777 }
7778 }
7779 rcu_read_unlock();
7780}
7781
7782
7783
7784
7785
7786
7787
7788struct perf_task_event {
7789 struct task_struct *task;
7790 struct perf_event_context *task_ctx;
7791
7792 struct {
7793 struct perf_event_header header;
7794
7795 u32 pid;
7796 u32 ppid;
7797 u32 tid;
7798 u32 ptid;
7799 u64 time;
7800 } event_id;
7801};
7802
7803static int perf_event_task_match(struct perf_event *event)
7804{
7805 return event->attr.comm || event->attr.mmap ||
7806 event->attr.mmap2 || event->attr.mmap_data ||
7807 event->attr.task;
7808}
7809
7810static void perf_event_task_output(struct perf_event *event,
7811 void *data)
7812{
7813 struct perf_task_event *task_event = data;
7814 struct perf_output_handle handle;
7815 struct perf_sample_data sample;
7816 struct task_struct *task = task_event->task;
7817 int ret, size = task_event->event_id.header.size;
7818
7819 if (!perf_event_task_match(event))
7820 return;
7821
7822 perf_event_header__init_id(&task_event->event_id.header, &sample, event);
7823
7824 ret = perf_output_begin(&handle, &sample, event,
7825 task_event->event_id.header.size);
7826 if (ret)
7827 goto out;
7828
7829 task_event->event_id.pid = perf_event_pid(event, task);
7830 task_event->event_id.tid = perf_event_tid(event, task);
7831
7832 if (task_event->event_id.header.type == PERF_RECORD_EXIT) {
7833 task_event->event_id.ppid = perf_event_pid(event,
7834 task->real_parent);
7835 task_event->event_id.ptid = perf_event_pid(event,
7836 task->real_parent);
7837 } else {
7838 task_event->event_id.ppid = perf_event_pid(event, current);
7839 task_event->event_id.ptid = perf_event_tid(event, current);
7840 }
7841
7842 task_event->event_id.time = perf_event_clock(event);
7843
7844 perf_output_put(&handle, task_event->event_id);
7845
7846 perf_event__output_id_sample(event, &handle, &sample);
7847
7848 perf_output_end(&handle);
7849out:
7850 task_event->event_id.header.size = size;
7851}
7852
7853static void perf_event_task(struct task_struct *task,
7854 struct perf_event_context *task_ctx,
7855 int new)
7856{
7857 struct perf_task_event task_event;
7858
7859 if (!atomic_read(&nr_comm_events) &&
7860 !atomic_read(&nr_mmap_events) &&
7861 !atomic_read(&nr_task_events))
7862 return;
7863
7864 task_event = (struct perf_task_event){
7865 .task = task,
7866 .task_ctx = task_ctx,
7867 .event_id = {
7868 .header = {
7869 .type = new ? PERF_RECORD_FORK : PERF_RECORD_EXIT,
7870 .misc = 0,
7871 .size = sizeof(task_event.event_id),
7872 },
7873
7874
7875
7876
7877
7878 },
7879 };
7880
7881 perf_iterate_sb(perf_event_task_output,
7882 &task_event,
7883 task_ctx);
7884}
7885
7886void perf_event_fork(struct task_struct *task)
7887{
7888 perf_event_task(task, NULL, 1);
7889 perf_event_namespaces(task);
7890}
7891
7892
7893
7894
7895
7896struct perf_comm_event {
7897 struct task_struct *task;
7898 char *comm;
7899 int comm_size;
7900
7901 struct {
7902 struct perf_event_header header;
7903
7904 u32 pid;
7905 u32 tid;
7906 } event_id;
7907};
7908
7909static int perf_event_comm_match(struct perf_event *event)
7910{
7911 return event->attr.comm;
7912}
7913
7914static void perf_event_comm_output(struct perf_event *event,
7915 void *data)
7916{
7917 struct perf_comm_event *comm_event = data;
7918 struct perf_output_handle handle;
7919 struct perf_sample_data sample;
7920 int size = comm_event->event_id.header.size;
7921 int ret;
7922
7923 if (!perf_event_comm_match(event))
7924 return;
7925
7926 perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
7927 ret = perf_output_begin(&handle, &sample, event,
7928 comm_event->event_id.header.size);
7929
7930 if (ret)
7931 goto out;
7932
7933 comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
7934 comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
7935
7936 perf_output_put(&handle, comm_event->event_id);
7937 __output_copy(&handle, comm_event->comm,
7938 comm_event->comm_size);
7939
7940 perf_event__output_id_sample(event, &handle, &sample);
7941
7942 perf_output_end(&handle);
7943out:
7944 comm_event->event_id.header.size = size;
7945}
7946
7947static void perf_event_comm_event(struct perf_comm_event *comm_event)
7948{
7949 char comm[TASK_COMM_LEN];
7950 unsigned int size;
7951
7952 memset(comm, 0, sizeof(comm));
7953 strlcpy(comm, comm_event->task->comm, sizeof(comm));
7954 size = ALIGN(strlen(comm)+1, sizeof(u64));
7955
7956 comm_event->comm = comm;
7957 comm_event->comm_size = size;
7958
7959 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
7960
7961 perf_iterate_sb(perf_event_comm_output,
7962 comm_event,
7963 NULL);
7964}
7965
7966void perf_event_comm(struct task_struct *task, bool exec)
7967{
7968 struct perf_comm_event comm_event;
7969
7970 if (!atomic_read(&nr_comm_events))
7971 return;
7972
7973 comm_event = (struct perf_comm_event){
7974 .task = task,
7975
7976
7977 .event_id = {
7978 .header = {
7979 .type = PERF_RECORD_COMM,
7980 .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
7981
7982 },
7983
7984
7985 },
7986 };
7987
7988 perf_event_comm_event(&comm_event);
7989}
7990
7991
7992
7993
7994
7995struct perf_namespaces_event {
7996 struct task_struct *task;
7997
7998 struct {
7999 struct perf_event_header header;
8000
8001 u32 pid;
8002 u32 tid;
8003 u64 nr_namespaces;
8004 struct perf_ns_link_info link_info[NR_NAMESPACES];
8005 } event_id;
8006};
8007
8008static int perf_event_namespaces_match(struct perf_event *event)
8009{
8010 return event->attr.namespaces;
8011}
8012
8013static void perf_event_namespaces_output(struct perf_event *event,
8014 void *data)
8015{
8016 struct perf_namespaces_event *namespaces_event = data;
8017 struct perf_output_handle handle;
8018 struct perf_sample_data sample;
8019 u16 header_size = namespaces_event->event_id.header.size;
8020 int ret;
8021
8022 if (!perf_event_namespaces_match(event))
8023 return;
8024
8025 perf_event_header__init_id(&namespaces_event->event_id.header,
8026 &sample, event);
8027 ret = perf_output_begin(&handle, &sample, event,
8028 namespaces_event->event_id.header.size);
8029 if (ret)
8030 goto out;
8031
8032 namespaces_event->event_id.pid = perf_event_pid(event,
8033 namespaces_event->task);
8034 namespaces_event->event_id.tid = perf_event_tid(event,
8035 namespaces_event->task);
8036
8037 perf_output_put(&handle, namespaces_event->event_id);
8038
8039 perf_event__output_id_sample(event, &handle, &sample);
8040
8041 perf_output_end(&handle);
8042out:
8043 namespaces_event->event_id.header.size = header_size;
8044}
8045
8046static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
8047 struct task_struct *task,
8048 const struct proc_ns_operations *ns_ops)
8049{
8050 struct path ns_path;
8051 struct inode *ns_inode;
8052 int error;
8053
8054 error = ns_get_path(&ns_path, task, ns_ops);
8055 if (!error) {
8056 ns_inode = ns_path.dentry->d_inode;
8057 ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
8058 ns_link_info->ino = ns_inode->i_ino;
8059 path_put(&ns_path);
8060 }
8061}
8062
8063void perf_event_namespaces(struct task_struct *task)
8064{
8065 struct perf_namespaces_event namespaces_event;
8066 struct perf_ns_link_info *ns_link_info;
8067
8068 if (!atomic_read(&nr_namespaces_events))
8069 return;
8070
8071 namespaces_event = (struct perf_namespaces_event){
8072 .task = task,
8073 .event_id = {
8074 .header = {
8075 .type = PERF_RECORD_NAMESPACES,
8076 .misc = 0,
8077 .size = sizeof(namespaces_event.event_id),
8078 },
8079
8080
8081 .nr_namespaces = NR_NAMESPACES,
8082
8083 },
8084 };
8085
8086 ns_link_info = namespaces_event.event_id.link_info;
8087
8088 perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
8089 task, &mntns_operations);
8090
8091#ifdef CONFIG_USER_NS
8092 perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
8093 task, &userns_operations);
8094#endif
8095#ifdef CONFIG_NET_NS
8096 perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
8097 task, &netns_operations);
8098#endif
8099#ifdef CONFIG_UTS_NS
8100 perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
8101 task, &utsns_operations);
8102#endif
8103#ifdef CONFIG_IPC_NS
8104 perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
8105 task, &ipcns_operations);
8106#endif
8107#ifdef CONFIG_PID_NS
8108 perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
8109 task, &pidns_operations);
8110#endif
8111#ifdef CONFIG_CGROUPS
8112 perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
8113 task, &cgroupns_operations);
8114#endif
8115
8116 perf_iterate_sb(perf_event_namespaces_output,
8117 &namespaces_event,
8118 NULL);
8119}
8120
8121
8122
8123
8124#ifdef CONFIG_CGROUP_PERF
8125
8126struct perf_cgroup_event {
8127 char *path;
8128 int path_size;
8129 struct {
8130 struct perf_event_header header;
8131 u64 id;
8132 char path[];
8133 } event_id;
8134};
8135
8136static int perf_event_cgroup_match(struct perf_event *event)
8137{
8138 return event->attr.cgroup;
8139}
8140
8141static void perf_event_cgroup_output(struct perf_event *event, void *data)
8142{
8143 struct perf_cgroup_event *cgroup_event = data;
8144 struct perf_output_handle handle;
8145 struct perf_sample_data sample;
8146 u16 header_size = cgroup_event->event_id.header.size;
8147 int ret;
8148
8149 if (!perf_event_cgroup_match(event))
8150 return;
8151
8152 perf_event_header__init_id(&cgroup_event->event_id.header,
8153 &sample, event);
8154 ret = perf_output_begin(&handle, &sample, event,
8155 cgroup_event->event_id.header.size);
8156 if (ret)
8157 goto out;
8158
8159 perf_output_put(&handle, cgroup_event->event_id);
8160 __output_copy(&handle, cgroup_event->path, cgroup_event->path_size);
8161
8162 perf_event__output_id_sample(event, &handle, &sample);
8163
8164 perf_output_end(&handle);
8165out:
8166 cgroup_event->event_id.header.size = header_size;
8167}
8168
8169static void perf_event_cgroup(struct cgroup *cgrp)
8170{
8171 struct perf_cgroup_event cgroup_event;
8172 char path_enomem[16] = "//enomem";
8173 char *pathname;
8174 size_t size;
8175
8176 if (!atomic_read(&nr_cgroup_events))
8177 return;
8178
8179 cgroup_event = (struct perf_cgroup_event){
8180 .event_id = {
8181 .header = {
8182 .type = PERF_RECORD_CGROUP,
8183 .misc = 0,
8184 .size = sizeof(cgroup_event.event_id),
8185 },
8186 .id = cgroup_id(cgrp),
8187 },
8188 };
8189
8190 pathname = kmalloc(PATH_MAX, GFP_KERNEL);
8191 if (pathname == NULL) {
8192 cgroup_event.path = path_enomem;
8193 } else {
8194
8195 cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64));
8196 cgroup_event.path = pathname;
8197 }
8198
8199
8200
8201
8202
8203
8204 size = strlen(cgroup_event.path) + 1;
8205 while (!IS_ALIGNED(size, sizeof(u64)))
8206 cgroup_event.path[size++] = '\0';
8207
8208 cgroup_event.event_id.header.size += size;
8209 cgroup_event.path_size = size;
8210
8211 perf_iterate_sb(perf_event_cgroup_output,
8212 &cgroup_event,
8213 NULL);
8214
8215 kfree(pathname);
8216}
8217
8218#endif
8219
8220
8221
8222
8223
8224struct perf_mmap_event {
8225 struct vm_area_struct *vma;
8226
8227 const char *file_name;
8228 int file_size;
8229 int maj, min;
8230 u64 ino;
8231 u64 ino_generation;
8232 u32 prot, flags;
8233 u8 build_id[BUILD_ID_SIZE_MAX];
8234 u32 build_id_size;
8235
8236 struct {
8237 struct perf_event_header header;
8238
8239 u32 pid;
8240 u32 tid;
8241 u64 start;
8242 u64 len;
8243 u64 pgoff;
8244 } event_id;
8245};
8246
8247static int perf_event_mmap_match(struct perf_event *event,
8248 void *data)
8249{
8250 struct perf_mmap_event *mmap_event = data;
8251 struct vm_area_struct *vma = mmap_event->vma;
8252 int executable = vma->vm_flags & VM_EXEC;
8253
8254 return (!executable && event->attr.mmap_data) ||
8255 (executable && (event->attr.mmap || event->attr.mmap2));
8256}
8257
8258static void perf_event_mmap_output(struct perf_event *event,
8259 void *data)
8260{
8261 struct perf_mmap_event *mmap_event = data;
8262 struct perf_output_handle handle;
8263 struct perf_sample_data sample;
8264 int size = mmap_event->event_id.header.size;
8265 u32 type = mmap_event->event_id.header.type;
8266 bool use_build_id;
8267 int ret;
8268
8269 if (!perf_event_mmap_match(event, data))
8270 return;
8271
8272 if (event->attr.mmap2) {
8273 mmap_event->event_id.header.type = PERF_RECORD_MMAP2;
8274 mmap_event->event_id.header.size += sizeof(mmap_event->maj);
8275 mmap_event->event_id.header.size += sizeof(mmap_event->min);
8276 mmap_event->event_id.header.size += sizeof(mmap_event->ino);
8277 mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
8278 mmap_event->event_id.header.size += sizeof(mmap_event->prot);
8279 mmap_event->event_id.header.size += sizeof(mmap_event->flags);
8280 }
8281
8282 perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
8283 ret = perf_output_begin(&handle, &sample, event,
8284 mmap_event->event_id.header.size);
8285 if (ret)
8286 goto out;
8287
8288 mmap_event->event_id.pid = perf_event_pid(event, current);
8289 mmap_event->event_id.tid = perf_event_tid(event, current);
8290
8291 use_build_id = event->attr.build_id && mmap_event->build_id_size;
8292
8293 if (event->attr.mmap2 && use_build_id)
8294 mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID;
8295
8296 perf_output_put(&handle, mmap_event->event_id);
8297
8298 if (event->attr.mmap2) {
8299 if (use_build_id) {
8300 u8 size[4] = { (u8) mmap_event->build_id_size, 0, 0, 0 };
8301
8302 __output_copy(&handle, size, 4);
8303 __output_copy(&handle, mmap_event->build_id, BUILD_ID_SIZE_MAX);
8304 } else {
8305 perf_output_put(&handle, mmap_event->maj);
8306 perf_output_put(&handle, mmap_event->min);
8307 perf_output_put(&handle, mmap_event->ino);
8308 perf_output_put(&handle, mmap_event->ino_generation);
8309 }
8310 perf_output_put(&handle, mmap_event->prot);
8311 perf_output_put(&handle, mmap_event->flags);
8312 }
8313
8314 __output_copy(&handle, mmap_event->file_name,
8315 mmap_event->file_size);
8316
8317 perf_event__output_id_sample(event, &handle, &sample);
8318
8319 perf_output_end(&handle);
8320out:
8321 mmap_event->event_id.header.size = size;
8322 mmap_event->event_id.header.type = type;
8323}
8324
8325static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
8326{
8327 struct vm_area_struct *vma = mmap_event->vma;
8328 struct file *file = vma->vm_file;
8329 int maj = 0, min = 0;
8330 u64 ino = 0, gen = 0;
8331 u32 prot = 0, flags = 0;
8332 unsigned int size;
8333 char tmp[16];
8334 char *buf = NULL;
8335 char *name;
8336
8337 if (vma->vm_flags & VM_READ)
8338 prot |= PROT_READ;
8339 if (vma->vm_flags & VM_WRITE)
8340 prot |= PROT_WRITE;
8341 if (vma->vm_flags & VM_EXEC)
8342 prot |= PROT_EXEC;
8343
8344 if (vma->vm_flags & VM_MAYSHARE)
8345 flags = MAP_SHARED;
8346 else
8347 flags = MAP_PRIVATE;
8348
8349 if (vma->vm_flags & VM_LOCKED)
8350 flags |= MAP_LOCKED;
8351 if (is_vm_hugetlb_page(vma))
8352 flags |= MAP_HUGETLB;
8353
8354 if (file) {
8355 struct inode *inode;
8356 dev_t dev;
8357
8358 buf = kmalloc(PATH_MAX, GFP_KERNEL);
8359 if (!buf) {
8360 name = "//enomem";
8361 goto cpy_name;
8362 }
8363
8364
8365
8366
8367
8368 name = file_path(file, buf, PATH_MAX - sizeof(u64));
8369 if (IS_ERR(name)) {
8370 name = "//toolong";
8371 goto cpy_name;
8372 }
8373 inode = file_inode(vma->vm_file);
8374 dev = inode->i_sb->s_dev;
8375 ino = inode->i_ino;
8376 gen = inode->i_generation;
8377 maj = MAJOR(dev);
8378 min = MINOR(dev);
8379
8380 goto got_name;
8381 } else {
8382 if (vma->vm_ops && vma->vm_ops->name) {
8383 name = (char *) vma->vm_ops->name(vma);
8384 if (name)
8385 goto cpy_name;
8386 }
8387
8388 name = (char *)arch_vma_name(vma);
8389 if (name)
8390 goto cpy_name;
8391
8392 if (vma->vm_start <= vma->vm_mm->start_brk &&
8393 vma->vm_end >= vma->vm_mm->brk) {
8394 name = "[heap]";
8395 goto cpy_name;
8396 }
8397 if (vma->vm_start <= vma->vm_mm->start_stack &&
8398 vma->vm_end >= vma->vm_mm->start_stack) {
8399 name = "[stack]";
8400 goto cpy_name;
8401 }
8402
8403 name = "//anon";
8404 goto cpy_name;
8405 }
8406
8407cpy_name:
8408 strlcpy(tmp, name, sizeof(tmp));
8409 name = tmp;
8410got_name:
8411
8412
8413
8414
8415
8416 size = strlen(name)+1;
8417 while (!IS_ALIGNED(size, sizeof(u64)))
8418 name[size++] = '\0';
8419
8420 mmap_event->file_name = name;
8421 mmap_event->file_size = size;
8422 mmap_event->maj = maj;
8423 mmap_event->min = min;
8424 mmap_event->ino = ino;
8425 mmap_event->ino_generation = gen;
8426 mmap_event->prot = prot;
8427 mmap_event->flags = flags;
8428
8429 if (!(vma->vm_flags & VM_EXEC))
8430 mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
8431
8432 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
8433
8434 if (atomic_read(&nr_build_id_events))
8435 build_id_parse(vma, mmap_event->build_id, &mmap_event->build_id_size);
8436
8437 perf_iterate_sb(perf_event_mmap_output,
8438 mmap_event,
8439 NULL);
8440
8441 kfree(buf);
8442}
8443
8444
8445
8446
8447static bool perf_addr_filter_match(struct perf_addr_filter *filter,
8448 struct file *file, unsigned long offset,
8449 unsigned long size)
8450{
8451
8452 if (!filter->path.dentry)
8453 return false;
8454
8455 if (d_inode(filter->path.dentry) != file_inode(file))
8456 return false;
8457
8458 if (filter->offset > offset + size)
8459 return false;
8460
8461 if (filter->offset + filter->size < offset)
8462 return false;
8463
8464 return true;
8465}
8466
8467static bool perf_addr_filter_vma_adjust(struct perf_addr_filter *filter,
8468 struct vm_area_struct *vma,
8469 struct perf_addr_filter_range *fr)
8470{
8471 unsigned long vma_size = vma->vm_end - vma->vm_start;
8472 unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
8473 struct file *file = vma->vm_file;
8474
8475 if (!perf_addr_filter_match(filter, file, off, vma_size))
8476 return false;
8477
8478 if (filter->offset < off) {
8479 fr->start = vma->vm_start;
8480 fr->size = min(vma_size, filter->size - (off - filter->offset));
8481 } else {
8482 fr->start = vma->vm_start + filter->offset - off;
8483 fr->size = min(vma->vm_end - fr->start, filter->size);
8484 }
8485
8486 return true;
8487}
8488
8489static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
8490{
8491 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
8492 struct vm_area_struct *vma = data;
8493 struct perf_addr_filter *filter;
8494 unsigned int restart = 0, count = 0;
8495 unsigned long flags;
8496
8497 if (!has_addr_filter(event))
8498 return;
8499
8500 if (!vma->vm_file)
8501 return;
8502
8503 raw_spin_lock_irqsave(&ifh->lock, flags);
8504 list_for_each_entry(filter, &ifh->list, entry) {
8505 if (perf_addr_filter_vma_adjust(filter, vma,
8506 &event->addr_filter_ranges[count]))
8507 restart++;
8508
8509 count++;
8510 }
8511
8512 if (restart)
8513 event->addr_filters_gen++;
8514 raw_spin_unlock_irqrestore(&ifh->lock, flags);
8515
8516 if (restart)
8517 perf_event_stop(event, 1);
8518}
8519
8520
8521
8522
8523static void perf_addr_filters_adjust(struct vm_area_struct *vma)
8524{
8525 struct perf_event_context *ctx;
8526 int ctxn;
8527
8528
8529
8530
8531
8532 if (!(vma->vm_flags & VM_EXEC))
8533 return;
8534
8535 rcu_read_lock();
8536 for_each_task_context_nr(ctxn) {
8537 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
8538 if (!ctx)
8539 continue;
8540
8541 perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true);
8542 }
8543 rcu_read_unlock();
8544}
8545
8546void perf_event_mmap(struct vm_area_struct *vma)
8547{
8548 struct perf_mmap_event mmap_event;
8549
8550 if (!atomic_read(&nr_mmap_events))
8551 return;
8552
8553 mmap_event = (struct perf_mmap_event){
8554 .vma = vma,
8555
8556
8557 .event_id = {
8558 .header = {
8559 .type = PERF_RECORD_MMAP,
8560 .misc = PERF_RECORD_MISC_USER,
8561
8562 },
8563
8564
8565 .start = vma->vm_start,
8566 .len = vma->vm_end - vma->vm_start,
8567 .pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT,
8568 },
8569
8570
8571
8572
8573
8574
8575 };
8576
8577 perf_addr_filters_adjust(vma);
8578 perf_event_mmap_event(&mmap_event);
8579}
8580
8581void perf_event_aux_event(struct perf_event *event, unsigned long head,
8582 unsigned long size, u64 flags)
8583{
8584 struct perf_output_handle handle;
8585 struct perf_sample_data sample;
8586 struct perf_aux_event {
8587 struct perf_event_header header;
8588 u64 offset;
8589 u64 size;
8590 u64 flags;
8591 } rec = {
8592 .header = {
8593 .type = PERF_RECORD_AUX,
8594 .misc = 0,
8595 .size = sizeof(rec),
8596 },
8597 .offset = head,
8598 .size = size,
8599 .flags = flags,
8600 };
8601 int ret;
8602
8603 perf_event_header__init_id(&rec.header, &sample, event);
8604 ret = perf_output_begin(&handle, &sample, event, rec.header.size);
8605
8606 if (ret)
8607 return;
8608
8609 perf_output_put(&handle, rec);
8610 perf_event__output_id_sample(event, &handle, &sample);
8611
8612 perf_output_end(&handle);
8613}
8614
8615
8616
8617
8618void perf_log_lost_samples(struct perf_event *event, u64 lost)
8619{
8620 struct perf_output_handle handle;
8621 struct perf_sample_data sample;
8622 int ret;
8623
8624 struct {
8625 struct perf_event_header header;
8626 u64 lost;
8627 } lost_samples_event = {
8628 .header = {
8629 .type = PERF_RECORD_LOST_SAMPLES,
8630 .misc = 0,
8631 .size = sizeof(lost_samples_event),
8632 },
8633 .lost = lost,
8634 };
8635
8636 perf_event_header__init_id(&lost_samples_event.header, &sample, event);
8637
8638 ret = perf_output_begin(&handle, &sample, event,
8639 lost_samples_event.header.size);
8640 if (ret)
8641 return;
8642
8643 perf_output_put(&handle, lost_samples_event);
8644 perf_event__output_id_sample(event, &handle, &sample);
8645 perf_output_end(&handle);
8646}
8647
8648
8649
8650
8651
8652struct perf_switch_event {
8653 struct task_struct *task;
8654 struct task_struct *next_prev;
8655
8656 struct {
8657 struct perf_event_header header;
8658 u32 next_prev_pid;
8659 u32 next_prev_tid;
8660 } event_id;
8661};
8662
8663static int perf_event_switch_match(struct perf_event *event)
8664{
8665 return event->attr.context_switch;
8666}
8667
8668static void perf_event_switch_output(struct perf_event *event, void *data)
8669{
8670 struct perf_switch_event *se = data;
8671 struct perf_output_handle handle;
8672 struct perf_sample_data sample;
8673 int ret;
8674
8675 if (!perf_event_switch_match(event))
8676 return;
8677
8678
8679 if (event->ctx->task) {
8680 se->event_id.header.type = PERF_RECORD_SWITCH;
8681 se->event_id.header.size = sizeof(se->event_id.header);
8682 } else {
8683 se->event_id.header.type = PERF_RECORD_SWITCH_CPU_WIDE;
8684 se->event_id.header.size = sizeof(se->event_id);
8685 se->event_id.next_prev_pid =
8686 perf_event_pid(event, se->next_prev);
8687 se->event_id.next_prev_tid =
8688 perf_event_tid(event, se->next_prev);
8689 }
8690
8691 perf_event_header__init_id(&se->event_id.header, &sample, event);
8692
8693 ret = perf_output_begin(&handle, &sample, event, se->event_id.header.size);
8694 if (ret)
8695 return;
8696
8697 if (event->ctx->task)
8698 perf_output_put(&handle, se->event_id.header);
8699 else
8700 perf_output_put(&handle, se->event_id);
8701
8702 perf_event__output_id_sample(event, &handle, &sample);
8703
8704 perf_output_end(&handle);
8705}
8706
8707static void perf_event_switch(struct task_struct *task,
8708 struct task_struct *next_prev, bool sched_in)
8709{
8710 struct perf_switch_event switch_event;
8711
8712
8713
8714 switch_event = (struct perf_switch_event){
8715 .task = task,
8716 .next_prev = next_prev,
8717 .event_id = {
8718 .header = {
8719
8720 .misc = sched_in ? 0 : PERF_RECORD_MISC_SWITCH_OUT,
8721
8722 },
8723
8724
8725 },
8726 };
8727
8728 if (!sched_in && task->on_rq) {
8729 switch_event.event_id.header.misc |=
8730 PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
8731 }
8732
8733 perf_iterate_sb(perf_event_switch_output, &switch_event, NULL);
8734}
8735
8736
8737
8738
8739
8740static void perf_log_throttle(struct perf_event *event, int enable)
8741{
8742 struct perf_output_handle handle;
8743 struct perf_sample_data sample;
8744 int ret;
8745
8746 struct {
8747 struct perf_event_header header;
8748 u64 time;
8749 u64 id;
8750 u64 stream_id;
8751 } throttle_event = {
8752 .header = {
8753 .type = PERF_RECORD_THROTTLE,
8754 .misc = 0,
8755 .size = sizeof(throttle_event),
8756 },
8757 .time = perf_event_clock(event),
8758 .id = primary_event_id(event),
8759 .stream_id = event->id,
8760 };
8761
8762 if (enable)
8763 throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
8764
8765 perf_event_header__init_id(&throttle_event.header, &sample, event);
8766
8767 ret = perf_output_begin(&handle, &sample, event,
8768 throttle_event.header.size);
8769 if (ret)
8770 return;
8771
8772 perf_output_put(&handle, throttle_event);
8773 perf_event__output_id_sample(event, &handle, &sample);
8774 perf_output_end(&handle);
8775}
8776
8777
8778
8779
8780
8781struct perf_ksymbol_event {
8782 const char *name;
8783 int name_len;
8784 struct {
8785 struct perf_event_header header;
8786 u64 addr;
8787 u32 len;
8788 u16 ksym_type;
8789 u16 flags;
8790 } event_id;
8791};
8792
8793static int perf_event_ksymbol_match(struct perf_event *event)
8794{
8795 return event->attr.ksymbol;
8796}
8797
8798static void perf_event_ksymbol_output(struct perf_event *event, void *data)
8799{
8800 struct perf_ksymbol_event *ksymbol_event = data;
8801 struct perf_output_handle handle;
8802 struct perf_sample_data sample;
8803 int ret;
8804
8805 if (!perf_event_ksymbol_match(event))
8806 return;
8807
8808 perf_event_header__init_id(&ksymbol_event->event_id.header,
8809 &sample, event);
8810 ret = perf_output_begin(&handle, &sample, event,
8811 ksymbol_event->event_id.header.size);
8812 if (ret)
8813 return;
8814
8815 perf_output_put(&handle, ksymbol_event->event_id);
8816 __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len);
8817 perf_event__output_id_sample(event, &handle, &sample);
8818
8819 perf_output_end(&handle);
8820}
8821
8822void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
8823 const char *sym)
8824{
8825 struct perf_ksymbol_event ksymbol_event;
8826 char name[KSYM_NAME_LEN];
8827 u16 flags = 0;
8828 int name_len;
8829
8830 if (!atomic_read(&nr_ksymbol_events))
8831 return;
8832
8833 if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX ||
8834 ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
8835 goto err;
8836
8837 strlcpy(name, sym, KSYM_NAME_LEN);
8838 name_len = strlen(name) + 1;
8839 while (!IS_ALIGNED(name_len, sizeof(u64)))
8840 name[name_len++] = '\0';
8841 BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64));
8842
8843 if (unregister)
8844 flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER;
8845
8846 ksymbol_event = (struct perf_ksymbol_event){
8847 .name = name,
8848 .name_len = name_len,
8849 .event_id = {
8850 .header = {
8851 .type = PERF_RECORD_KSYMBOL,
8852 .size = sizeof(ksymbol_event.event_id) +
8853 name_len,
8854 },
8855 .addr = addr,
8856 .len = len,
8857 .ksym_type = ksym_type,
8858 .flags = flags,
8859 },
8860 };
8861
8862 perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
8863 return;
8864err:
8865 WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
8866}
8867
8868
8869
8870
8871
8872struct perf_bpf_event {
8873 struct bpf_prog *prog;
8874 struct {
8875 struct perf_event_header header;
8876 u16 type;
8877 u16 flags;
8878 u32 id;
8879 u8 tag[BPF_TAG_SIZE];
8880 } event_id;
8881};
8882
8883static int perf_event_bpf_match(struct perf_event *event)
8884{
8885 return event->attr.bpf_event;
8886}
8887
8888static void perf_event_bpf_output(struct perf_event *event, void *data)
8889{
8890 struct perf_bpf_event *bpf_event = data;
8891 struct perf_output_handle handle;
8892 struct perf_sample_data sample;
8893 int ret;
8894
8895 if (!perf_event_bpf_match(event))
8896 return;
8897
8898 perf_event_header__init_id(&bpf_event->event_id.header,
8899 &sample, event);
8900 ret = perf_output_begin(&handle, data, event,
8901 bpf_event->event_id.header.size);
8902 if (ret)
8903 return;
8904
8905 perf_output_put(&handle, bpf_event->event_id);
8906 perf_event__output_id_sample(event, &handle, &sample);
8907
8908 perf_output_end(&handle);
8909}
8910
8911static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
8912 enum perf_bpf_event_type type)
8913{
8914 bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
8915 int i;
8916
8917 if (prog->aux->func_cnt == 0) {
8918 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
8919 (u64)(unsigned long)prog->bpf_func,
8920 prog->jited_len, unregister,
8921 prog->aux->ksym.name);
8922 } else {
8923 for (i = 0; i < prog->aux->func_cnt; i++) {
8924 struct bpf_prog *subprog = prog->aux->func[i];
8925
8926 perf_event_ksymbol(
8927 PERF_RECORD_KSYMBOL_TYPE_BPF,
8928 (u64)(unsigned long)subprog->bpf_func,
8929 subprog->jited_len, unregister,
8930 prog->aux->ksym.name);
8931 }
8932 }
8933}
8934
8935void perf_event_bpf_event(struct bpf_prog *prog,
8936 enum perf_bpf_event_type type,
8937 u16 flags)
8938{
8939 struct perf_bpf_event bpf_event;
8940
8941 if (type <= PERF_BPF_EVENT_UNKNOWN ||
8942 type >= PERF_BPF_EVENT_MAX)
8943 return;
8944
8945 switch (type) {
8946 case PERF_BPF_EVENT_PROG_LOAD:
8947 case PERF_BPF_EVENT_PROG_UNLOAD:
8948 if (atomic_read(&nr_ksymbol_events))
8949 perf_event_bpf_emit_ksymbols(prog, type);
8950 break;
8951 default:
8952 break;
8953 }
8954
8955 if (!atomic_read(&nr_bpf_events))
8956 return;
8957
8958 bpf_event = (struct perf_bpf_event){
8959 .prog = prog,
8960 .event_id = {
8961 .header = {
8962 .type = PERF_RECORD_BPF_EVENT,
8963 .size = sizeof(bpf_event.event_id),
8964 },
8965 .type = type,
8966 .flags = flags,
8967 .id = prog->aux->id,
8968 },
8969 };
8970
8971 BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64));
8972
8973 memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
8974 perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
8975}
8976
8977struct perf_text_poke_event {
8978 const void *old_bytes;
8979 const void *new_bytes;
8980 size_t pad;
8981 u16 old_len;
8982 u16 new_len;
8983
8984 struct {
8985 struct perf_event_header header;
8986
8987 u64 addr;
8988 } event_id;
8989};
8990
8991static int perf_event_text_poke_match(struct perf_event *event)
8992{
8993 return event->attr.text_poke;
8994}
8995
8996static void perf_event_text_poke_output(struct perf_event *event, void *data)
8997{
8998 struct perf_text_poke_event *text_poke_event = data;
8999 struct perf_output_handle handle;
9000 struct perf_sample_data sample;
9001 u64 padding = 0;
9002 int ret;
9003
9004 if (!perf_event_text_poke_match(event))
9005 return;
9006
9007 perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event);
9008
9009 ret = perf_output_begin(&handle, &sample, event,
9010 text_poke_event->event_id.header.size);
9011 if (ret)
9012 return;
9013
9014 perf_output_put(&handle, text_poke_event->event_id);
9015 perf_output_put(&handle, text_poke_event->old_len);
9016 perf_output_put(&handle, text_poke_event->new_len);
9017
9018 __output_copy(&handle, text_poke_event->old_bytes, text_poke_event->old_len);
9019 __output_copy(&handle, text_poke_event->new_bytes, text_poke_event->new_len);
9020
9021 if (text_poke_event->pad)
9022 __output_copy(&handle, &padding, text_poke_event->pad);
9023
9024 perf_event__output_id_sample(event, &handle, &sample);
9025
9026 perf_output_end(&handle);
9027}
9028
9029void perf_event_text_poke(const void *addr, const void *old_bytes,
9030 size_t old_len, const void *new_bytes, size_t new_len)
9031{
9032 struct perf_text_poke_event text_poke_event;
9033 size_t tot, pad;
9034
9035 if (!atomic_read(&nr_text_poke_events))
9036 return;
9037
9038 tot = sizeof(text_poke_event.old_len) + old_len;
9039 tot += sizeof(text_poke_event.new_len) + new_len;
9040 pad = ALIGN(tot, sizeof(u64)) - tot;
9041
9042 text_poke_event = (struct perf_text_poke_event){
9043 .old_bytes = old_bytes,
9044 .new_bytes = new_bytes,
9045 .pad = pad,
9046 .old_len = old_len,
9047 .new_len = new_len,
9048 .event_id = {
9049 .header = {
9050 .type = PERF_RECORD_TEXT_POKE,
9051 .misc = PERF_RECORD_MISC_KERNEL,
9052 .size = sizeof(text_poke_event.event_id) + tot + pad,
9053 },
9054 .addr = (unsigned long)addr,
9055 },
9056 };
9057
9058 perf_iterate_sb(perf_event_text_poke_output, &text_poke_event, NULL);
9059}
9060
9061void perf_event_itrace_started(struct perf_event *event)
9062{
9063 event->attach_state |= PERF_ATTACH_ITRACE;
9064}
9065
9066static void perf_log_itrace_start(struct perf_event *event)
9067{
9068 struct perf_output_handle handle;
9069 struct perf_sample_data sample;
9070 struct perf_aux_event {
9071 struct perf_event_header header;
9072 u32 pid;
9073 u32 tid;
9074 } rec;
9075 int ret;
9076
9077 if (event->parent)
9078 event = event->parent;
9079
9080 if (!(event->pmu->capabilities & PERF_PMU_CAP_ITRACE) ||
9081 event->attach_state & PERF_ATTACH_ITRACE)
9082 return;
9083
9084 rec.header.type = PERF_RECORD_ITRACE_START;
9085 rec.header.misc = 0;
9086 rec.header.size = sizeof(rec);
9087 rec.pid = perf_event_pid(event, current);
9088 rec.tid = perf_event_tid(event, current);
9089
9090 perf_event_header__init_id(&rec.header, &sample, event);
9091 ret = perf_output_begin(&handle, &sample, event, rec.header.size);
9092
9093 if (ret)
9094 return;
9095
9096 perf_output_put(&handle, rec);
9097 perf_event__output_id_sample(event, &handle, &sample);
9098
9099 perf_output_end(&handle);
9100}
9101
9102void perf_report_aux_output_id(struct perf_event *event, u64 hw_id)
9103{
9104 struct perf_output_handle handle;
9105 struct perf_sample_data sample;
9106 struct perf_aux_event {
9107 struct perf_event_header header;
9108 u64 hw_id;
9109 } rec;
9110 int ret;
9111
9112 if (event->parent)
9113 event = event->parent;
9114
9115 rec.header.type = PERF_RECORD_AUX_OUTPUT_HW_ID;
9116 rec.header.misc = 0;
9117 rec.header.size = sizeof(rec);
9118 rec.hw_id = hw_id;
9119
9120 perf_event_header__init_id(&rec.header, &sample, event);
9121 ret = perf_output_begin(&handle, &sample, event, rec.header.size);
9122
9123 if (ret)
9124 return;
9125
9126 perf_output_put(&handle, rec);
9127 perf_event__output_id_sample(event, &handle, &sample);
9128
9129 perf_output_end(&handle);
9130}
9131
9132static int
9133__perf_event_account_interrupt(struct perf_event *event, int throttle)
9134{
9135 struct hw_perf_event *hwc = &event->hw;
9136 int ret = 0;
9137 u64 seq;
9138
9139 seq = __this_cpu_read(perf_throttled_seq);
9140 if (seq != hwc->interrupts_seq) {
9141 hwc->interrupts_seq = seq;
9142 hwc->interrupts = 1;
9143 } else {
9144 hwc->interrupts++;
9145 if (unlikely(throttle
9146 && hwc->interrupts >= max_samples_per_tick)) {
9147 __this_cpu_inc(perf_throttled_count);
9148 tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
9149 hwc->interrupts = MAX_INTERRUPTS;
9150 perf_log_throttle(event, 0);
9151 ret = 1;
9152 }
9153 }
9154
9155 if (event->attr.freq) {
9156 u64 now = perf_clock();
9157 s64 delta = now - hwc->freq_time_stamp;
9158
9159 hwc->freq_time_stamp = now;
9160
9161 if (delta > 0 && delta < 2*TICK_NSEC)
9162 perf_adjust_period(event, delta, hwc->last_period, true);
9163 }
9164
9165 return ret;
9166}
9167
9168int perf_event_account_interrupt(struct perf_event *event)
9169{
9170 return __perf_event_account_interrupt(event, 1);
9171}
9172
9173
9174
9175
9176
9177static int __perf_event_overflow(struct perf_event *event,
9178 int throttle, struct perf_sample_data *data,
9179 struct pt_regs *regs)
9180{
9181 int events = atomic_read(&event->event_limit);
9182 int ret = 0;
9183
9184
9185
9186
9187
9188 if (unlikely(!is_sampling_event(event)))
9189 return 0;
9190
9191 ret = __perf_event_account_interrupt(event, throttle);
9192
9193
9194
9195
9196
9197
9198 event->pending_kill = POLL_IN;
9199 if (events && atomic_dec_and_test(&event->event_limit)) {
9200 ret = 1;
9201 event->pending_kill = POLL_HUP;
9202 event->pending_addr = data->addr;
9203
9204 perf_event_disable_inatomic(event);
9205 }
9206
9207 READ_ONCE(event->overflow_handler)(event, data, regs);
9208
9209 if (*perf_event_fasync(event) && event->pending_kill) {
9210 event->pending_wakeup = 1;
9211 irq_work_queue(&event->pending);
9212 }
9213
9214 return ret;
9215}
9216
9217int perf_event_overflow(struct perf_event *event,
9218 struct perf_sample_data *data,
9219 struct pt_regs *regs)
9220{
9221 return __perf_event_overflow(event, 1, data, regs);
9222}
9223
9224
9225
9226
9227
9228struct swevent_htable {
9229 struct swevent_hlist *swevent_hlist;
9230 struct mutex hlist_mutex;
9231 int hlist_refcount;
9232
9233
9234 int recursion[PERF_NR_CONTEXTS];
9235};
9236
9237static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
9238
9239
9240
9241
9242
9243
9244
9245
9246u64 perf_swevent_set_period(struct perf_event *event)
9247{
9248 struct hw_perf_event *hwc = &event->hw;
9249 u64 period = hwc->last_period;
9250 u64 nr, offset;
9251 s64 old, val;
9252
9253 hwc->last_period = hwc->sample_period;
9254
9255again:
9256 old = val = local64_read(&hwc->period_left);
9257 if (val < 0)
9258 return 0;
9259
9260 nr = div64_u64(period + val, period);
9261 offset = nr * period;
9262 val -= offset;
9263 if (local64_cmpxchg(&hwc->period_left, old, val) != old)
9264 goto again;
9265
9266 return nr;
9267}
9268
9269static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
9270 struct perf_sample_data *data,
9271 struct pt_regs *regs)
9272{
9273 struct hw_perf_event *hwc = &event->hw;
9274 int throttle = 0;
9275
9276 if (!overflow)
9277 overflow = perf_swevent_set_period(event);
9278
9279 if (hwc->interrupts == MAX_INTERRUPTS)
9280 return;
9281
9282 for (; overflow; overflow--) {
9283 if (__perf_event_overflow(event, throttle,
9284 data, regs)) {
9285
9286
9287
9288
9289 break;
9290 }
9291 throttle = 1;
9292 }
9293}
9294
9295static void perf_swevent_event(struct perf_event *event, u64 nr,
9296 struct perf_sample_data *data,
9297 struct pt_regs *regs)
9298{
9299 struct hw_perf_event *hwc = &event->hw;
9300
9301 local64_add(nr, &event->count);
9302
9303 if (!regs)
9304 return;
9305
9306 if (!is_sampling_event(event))
9307 return;
9308
9309 if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
9310 data->period = nr;
9311 return perf_swevent_overflow(event, 1, data, regs);
9312 } else
9313 data->period = event->hw.last_period;
9314
9315 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
9316 return perf_swevent_overflow(event, 1, data, regs);
9317
9318 if (local64_add_negative(nr, &hwc->period_left))
9319 return;
9320
9321 perf_swevent_overflow(event, 0, data, regs);
9322}
9323
9324static int perf_exclude_event(struct perf_event *event,
9325 struct pt_regs *regs)
9326{
9327 if (event->hw.state & PERF_HES_STOPPED)
9328 return 1;
9329
9330 if (regs) {
9331 if (event->attr.exclude_user && user_mode(regs))
9332 return 1;
9333
9334 if (event->attr.exclude_kernel && !user_mode(regs))
9335 return 1;
9336 }
9337
9338 return 0;
9339}
9340
9341static int perf_swevent_match(struct perf_event *event,
9342 enum perf_type_id type,
9343 u32 event_id,
9344 struct perf_sample_data *data,
9345 struct pt_regs *regs)
9346{
9347 if (event->attr.type != type)
9348 return 0;
9349
9350 if (event->attr.config != event_id)
9351 return 0;
9352
9353 if (perf_exclude_event(event, regs))
9354 return 0;
9355
9356 return 1;
9357}
9358
9359static inline u64 swevent_hash(u64 type, u32 event_id)
9360{
9361 u64 val = event_id | (type << 32);
9362
9363 return hash_64(val, SWEVENT_HLIST_BITS);
9364}
9365
9366static inline struct hlist_head *
9367__find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id)
9368{
9369 u64 hash = swevent_hash(type, event_id);
9370
9371 return &hlist->heads[hash];
9372}
9373
9374
9375static inline struct hlist_head *
9376find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id)
9377{
9378 struct swevent_hlist *hlist;
9379
9380 hlist = rcu_dereference(swhash->swevent_hlist);
9381 if (!hlist)
9382 return NULL;
9383
9384 return __find_swevent_head(hlist, type, event_id);
9385}
9386
9387
9388static inline struct hlist_head *
9389find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)
9390{
9391 struct swevent_hlist *hlist;
9392 u32 event_id = event->attr.config;
9393 u64 type = event->attr.type;
9394
9395
9396
9397
9398
9399
9400 hlist = rcu_dereference_protected(swhash->swevent_hlist,
9401 lockdep_is_held(&event->ctx->lock));
9402 if (!hlist)
9403 return NULL;
9404
9405 return __find_swevent_head(hlist, type, event_id);
9406}
9407
9408static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
9409 u64 nr,
9410 struct perf_sample_data *data,
9411 struct pt_regs *regs)
9412{
9413 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
9414 struct perf_event *event;
9415 struct hlist_head *head;
9416
9417 rcu_read_lock();
9418 head = find_swevent_head_rcu(swhash, type, event_id);
9419 if (!head)
9420 goto end;
9421
9422 hlist_for_each_entry_rcu(event, head, hlist_entry) {
9423 if (perf_swevent_match(event, type, event_id, data, regs))
9424 perf_swevent_event(event, nr, data, regs);
9425 }
9426end:
9427 rcu_read_unlock();
9428}
9429
9430DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
9431
9432int perf_swevent_get_recursion_context(void)
9433{
9434 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
9435
9436 return get_recursion_context(swhash->recursion);
9437}
9438EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
9439
9440void perf_swevent_put_recursion_context(int rctx)
9441{
9442 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
9443
9444 put_recursion_context(swhash->recursion, rctx);
9445}
9446
9447void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
9448{
9449 struct perf_sample_data data;
9450
9451 if (WARN_ON_ONCE(!regs))
9452 return;
9453
9454 perf_sample_data_init(&data, addr, 0);
9455 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
9456}
9457
9458void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
9459{
9460 int rctx;
9461
9462 preempt_disable_notrace();
9463 rctx = perf_swevent_get_recursion_context();
9464 if (unlikely(rctx < 0))
9465 goto fail;
9466
9467 ___perf_sw_event(event_id, nr, regs, addr);
9468
9469 perf_swevent_put_recursion_context(rctx);
9470fail:
9471 preempt_enable_notrace();
9472}
9473
9474static void perf_swevent_read(struct perf_event *event)
9475{
9476}
9477
9478static int perf_swevent_add(struct perf_event *event, int flags)
9479{
9480 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
9481 struct hw_perf_event *hwc = &event->hw;
9482 struct hlist_head *head;
9483
9484 if (is_sampling_event(event)) {
9485 hwc->last_period = hwc->sample_period;
9486 perf_swevent_set_period(event);
9487 }
9488
9489 hwc->state = !(flags & PERF_EF_START);
9490
9491 head = find_swevent_head(swhash, event);
9492 if (WARN_ON_ONCE(!head))
9493 return -EINVAL;
9494
9495 hlist_add_head_rcu(&event->hlist_entry, head);
9496 perf_event_update_userpage(event);
9497
9498 return 0;
9499}
9500
9501static void perf_swevent_del(struct perf_event *event, int flags)
9502{
9503 hlist_del_rcu(&event->hlist_entry);
9504}
9505
9506static void perf_swevent_start(struct perf_event *event, int flags)
9507{
9508 event->hw.state = 0;
9509}
9510
9511static void perf_swevent_stop(struct perf_event *event, int flags)
9512{
9513 event->hw.state = PERF_HES_STOPPED;
9514}
9515
9516
9517static inline struct swevent_hlist *
9518swevent_hlist_deref(struct swevent_htable *swhash)
9519{
9520 return rcu_dereference_protected(swhash->swevent_hlist,
9521 lockdep_is_held(&swhash->hlist_mutex));
9522}
9523
9524static void swevent_hlist_release(struct swevent_htable *swhash)
9525{
9526 struct swevent_hlist *hlist = swevent_hlist_deref(swhash);
9527
9528 if (!hlist)
9529 return;
9530
9531 RCU_INIT_POINTER(swhash->swevent_hlist, NULL);
9532 kfree_rcu(hlist, rcu_head);
9533}
9534
9535static void swevent_hlist_put_cpu(int cpu)
9536{
9537 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
9538
9539 mutex_lock(&swhash->hlist_mutex);
9540
9541 if (!--swhash->hlist_refcount)
9542 swevent_hlist_release(swhash);
9543
9544 mutex_unlock(&swhash->hlist_mutex);
9545}
9546
9547static void swevent_hlist_put(void)
9548{
9549 int cpu;
9550
9551 for_each_possible_cpu(cpu)
9552 swevent_hlist_put_cpu(cpu);
9553}
9554
9555static int swevent_hlist_get_cpu(int cpu)
9556{
9557 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
9558 int err = 0;
9559
9560 mutex_lock(&swhash->hlist_mutex);
9561 if (!swevent_hlist_deref(swhash) &&
9562 cpumask_test_cpu(cpu, perf_online_mask)) {
9563 struct swevent_hlist *hlist;
9564
9565 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
9566 if (!hlist) {
9567 err = -ENOMEM;
9568 goto exit;
9569 }
9570 rcu_assign_pointer(swhash->swevent_hlist, hlist);
9571 }
9572 swhash->hlist_refcount++;
9573exit:
9574 mutex_unlock(&swhash->hlist_mutex);
9575
9576 return err;
9577}
9578
9579static int swevent_hlist_get(void)
9580{
9581 int err, cpu, failed_cpu;
9582
9583 mutex_lock(&pmus_lock);
9584 for_each_possible_cpu(cpu) {
9585 err = swevent_hlist_get_cpu(cpu);
9586 if (err) {
9587 failed_cpu = cpu;
9588 goto fail;
9589 }
9590 }
9591 mutex_unlock(&pmus_lock);
9592 return 0;
9593fail:
9594 for_each_possible_cpu(cpu) {
9595 if (cpu == failed_cpu)
9596 break;
9597 swevent_hlist_put_cpu(cpu);
9598 }
9599 mutex_unlock(&pmus_lock);
9600 return err;
9601}
9602
9603struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
9604
9605static void sw_perf_event_destroy(struct perf_event *event)
9606{
9607 u64 event_id = event->attr.config;
9608
9609 WARN_ON(event->parent);
9610
9611 static_key_slow_dec(&perf_swevent_enabled[event_id]);
9612 swevent_hlist_put();
9613}
9614
9615static int perf_swevent_init(struct perf_event *event)
9616{
9617 u64 event_id = event->attr.config;
9618
9619 if (event->attr.type != PERF_TYPE_SOFTWARE)
9620 return -ENOENT;
9621
9622
9623
9624
9625 if (has_branch_stack(event))
9626 return -EOPNOTSUPP;
9627
9628 switch (event_id) {
9629 case PERF_COUNT_SW_CPU_CLOCK:
9630 case PERF_COUNT_SW_TASK_CLOCK:
9631 return -ENOENT;
9632
9633 default:
9634 break;
9635 }
9636
9637 if (event_id >= PERF_COUNT_SW_MAX)
9638 return -ENOENT;
9639
9640 if (!event->parent) {
9641 int err;
9642
9643 err = swevent_hlist_get();
9644 if (err)
9645 return err;
9646
9647 static_key_slow_inc(&perf_swevent_enabled[event_id]);
9648 event->destroy = sw_perf_event_destroy;
9649 }
9650
9651 return 0;
9652}
9653
9654static struct pmu perf_swevent = {
9655 .task_ctx_nr = perf_sw_context,
9656
9657 .capabilities = PERF_PMU_CAP_NO_NMI,
9658
9659 .event_init = perf_swevent_init,
9660 .add = perf_swevent_add,
9661 .del = perf_swevent_del,
9662 .start = perf_swevent_start,
9663 .stop = perf_swevent_stop,
9664 .read = perf_swevent_read,
9665};
9666
9667#ifdef CONFIG_EVENT_TRACING
9668
9669static int perf_tp_filter_match(struct perf_event *event,
9670 struct perf_sample_data *data)
9671{
9672 void *record = data->raw->frag.data;
9673
9674
9675 if (event->parent)
9676 event = event->parent;
9677
9678 if (likely(!event->filter) || filter_match_preds(event->filter, record))
9679 return 1;
9680 return 0;
9681}
9682
9683static int perf_tp_event_match(struct perf_event *event,
9684 struct perf_sample_data *data,
9685 struct pt_regs *regs)
9686{
9687 if (event->hw.state & PERF_HES_STOPPED)
9688 return 0;
9689
9690
9691
9692 if (event->attr.exclude_kernel && !user_mode(regs))
9693 return 0;
9694
9695 if (!perf_tp_filter_match(event, data))
9696 return 0;
9697
9698 return 1;
9699}
9700
9701void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
9702 struct trace_event_call *call, u64 count,
9703 struct pt_regs *regs, struct hlist_head *head,
9704 struct task_struct *task)
9705{
9706 if (bpf_prog_array_valid(call)) {
9707 *(struct pt_regs **)raw_data = regs;
9708 if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
9709 perf_swevent_put_recursion_context(rctx);
9710 return;
9711 }
9712 }
9713 perf_tp_event(call->event.type, count, raw_data, size, regs, head,
9714 rctx, task);
9715}
9716EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit);
9717
9718void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
9719 struct pt_regs *regs, struct hlist_head *head, int rctx,
9720 struct task_struct *task)
9721{
9722 struct perf_sample_data data;
9723 struct perf_event *event;
9724
9725 struct perf_raw_record raw = {
9726 .frag = {
9727 .size = entry_size,
9728 .data = record,
9729 },
9730 };
9731
9732 perf_sample_data_init(&data, 0, 0);
9733 data.raw = &raw;
9734
9735 perf_trace_buf_update(record, event_type);
9736
9737 hlist_for_each_entry_rcu(event, head, hlist_entry) {
9738 if (perf_tp_event_match(event, &data, regs))
9739 perf_swevent_event(event, count, &data, regs);
9740 }
9741
9742
9743
9744
9745
9746 if (task && task != current) {
9747 struct perf_event_context *ctx;
9748 struct trace_entry *entry = record;
9749
9750 rcu_read_lock();
9751 ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]);
9752 if (!ctx)
9753 goto unlock;
9754
9755 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
9756 if (event->cpu != smp_processor_id())
9757 continue;
9758 if (event->attr.type != PERF_TYPE_TRACEPOINT)
9759 continue;
9760 if (event->attr.config != entry->type)
9761 continue;
9762
9763 if (event->attr.sigtrap)
9764 continue;
9765 if (perf_tp_event_match(event, &data, regs))
9766 perf_swevent_event(event, count, &data, regs);
9767 }
9768unlock:
9769 rcu_read_unlock();
9770 }
9771
9772 perf_swevent_put_recursion_context(rctx);
9773}
9774EXPORT_SYMBOL_GPL(perf_tp_event);
9775
9776static void tp_perf_event_destroy(struct perf_event *event)
9777{
9778 perf_trace_destroy(event);
9779}
9780
9781static int perf_tp_event_init(struct perf_event *event)
9782{
9783 int err;
9784
9785 if (event->attr.type != PERF_TYPE_TRACEPOINT)
9786 return -ENOENT;
9787
9788
9789
9790
9791 if (has_branch_stack(event))
9792 return -EOPNOTSUPP;
9793
9794 err = perf_trace_init(event);
9795 if (err)
9796 return err;
9797
9798 event->destroy = tp_perf_event_destroy;
9799
9800 return 0;
9801}
9802
9803static struct pmu perf_tracepoint = {
9804 .task_ctx_nr = perf_sw_context,
9805
9806 .event_init = perf_tp_event_init,
9807 .add = perf_trace_add,
9808 .del = perf_trace_del,
9809 .start = perf_swevent_start,
9810 .stop = perf_swevent_stop,
9811 .read = perf_swevent_read,
9812};
9813
9814#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
9815
9816
9817
9818
9819
9820
9821
9822
9823
9824
9825
9826
9827
9828
9829enum perf_probe_config {
9830 PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0,
9831 PERF_UPROBE_REF_CTR_OFFSET_BITS = 32,
9832 PERF_UPROBE_REF_CTR_OFFSET_SHIFT = 64 - PERF_UPROBE_REF_CTR_OFFSET_BITS,
9833};
9834
9835PMU_FORMAT_ATTR(retprobe, "config:0");
9836#endif
9837
9838#ifdef CONFIG_KPROBE_EVENTS
9839static struct attribute *kprobe_attrs[] = {
9840 &format_attr_retprobe.attr,
9841 NULL,
9842};
9843
9844static struct attribute_group kprobe_format_group = {
9845 .name = "format",
9846 .attrs = kprobe_attrs,
9847};
9848
9849static const struct attribute_group *kprobe_attr_groups[] = {
9850 &kprobe_format_group,
9851 NULL,
9852};
9853
9854static int perf_kprobe_event_init(struct perf_event *event);
9855static struct pmu perf_kprobe = {
9856 .task_ctx_nr = perf_sw_context,
9857 .event_init = perf_kprobe_event_init,
9858 .add = perf_trace_add,
9859 .del = perf_trace_del,
9860 .start = perf_swevent_start,
9861 .stop = perf_swevent_stop,
9862 .read = perf_swevent_read,
9863 .attr_groups = kprobe_attr_groups,
9864};
9865
9866static int perf_kprobe_event_init(struct perf_event *event)
9867{
9868 int err;
9869 bool is_retprobe;
9870
9871 if (event->attr.type != perf_kprobe.type)
9872 return -ENOENT;
9873
9874 if (!perfmon_capable())
9875 return -EACCES;
9876
9877
9878
9879
9880 if (has_branch_stack(event))
9881 return -EOPNOTSUPP;
9882
9883 is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
9884 err = perf_kprobe_init(event, is_retprobe);
9885 if (err)
9886 return err;
9887
9888 event->destroy = perf_kprobe_destroy;
9889
9890 return 0;
9891}
9892#endif
9893
9894#ifdef CONFIG_UPROBE_EVENTS
9895PMU_FORMAT_ATTR(ref_ctr_offset, "config:32-63");
9896
9897static struct attribute *uprobe_attrs[] = {
9898 &format_attr_retprobe.attr,
9899 &format_attr_ref_ctr_offset.attr,
9900 NULL,
9901};
9902
9903static struct attribute_group uprobe_format_group = {
9904 .name = "format",
9905 .attrs = uprobe_attrs,
9906};
9907
9908static const struct attribute_group *uprobe_attr_groups[] = {
9909 &uprobe_format_group,
9910 NULL,
9911};
9912
9913static int perf_uprobe_event_init(struct perf_event *event);
9914static struct pmu perf_uprobe = {
9915 .task_ctx_nr = perf_sw_context,
9916 .event_init = perf_uprobe_event_init,
9917 .add = perf_trace_add,
9918 .del = perf_trace_del,
9919 .start = perf_swevent_start,
9920 .stop = perf_swevent_stop,
9921 .read = perf_swevent_read,
9922 .attr_groups = uprobe_attr_groups,
9923};
9924
9925static int perf_uprobe_event_init(struct perf_event *event)
9926{
9927 int err;
9928 unsigned long ref_ctr_offset;
9929 bool is_retprobe;
9930
9931 if (event->attr.type != perf_uprobe.type)
9932 return -ENOENT;
9933
9934 if (!perfmon_capable())
9935 return -EACCES;
9936
9937
9938
9939
9940 if (has_branch_stack(event))
9941 return -EOPNOTSUPP;
9942
9943 is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
9944 ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
9945 err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe);
9946 if (err)
9947 return err;
9948
9949 event->destroy = perf_uprobe_destroy;
9950
9951 return 0;
9952}
9953#endif
9954
9955static inline void perf_tp_register(void)
9956{
9957 perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
9958#ifdef CONFIG_KPROBE_EVENTS
9959 perf_pmu_register(&perf_kprobe, "kprobe", -1);
9960#endif
9961#ifdef CONFIG_UPROBE_EVENTS
9962 perf_pmu_register(&perf_uprobe, "uprobe", -1);
9963#endif
9964}
9965
9966static void perf_event_free_filter(struct perf_event *event)
9967{
9968 ftrace_profile_free_filter(event);
9969}
9970
9971#ifdef CONFIG_BPF_SYSCALL
9972static void bpf_overflow_handler(struct perf_event *event,
9973 struct perf_sample_data *data,
9974 struct pt_regs *regs)
9975{
9976 struct bpf_perf_event_data_kern ctx = {
9977 .data = data,
9978 .event = event,
9979 };
9980 struct bpf_prog *prog;
9981 int ret = 0;
9982
9983 ctx.regs = perf_arch_bpf_user_pt_regs(regs);
9984 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
9985 goto out;
9986 rcu_read_lock();
9987 prog = READ_ONCE(event->prog);
9988 if (prog)
9989 ret = bpf_prog_run(prog, &ctx);
9990 rcu_read_unlock();
9991out:
9992 __this_cpu_dec(bpf_prog_active);
9993 if (!ret)
9994 return;
9995
9996 event->orig_overflow_handler(event, data, regs);
9997}
9998
9999static int perf_event_set_bpf_handler(struct perf_event *event,
10000 struct bpf_prog *prog,
10001 u64 bpf_cookie)
10002{
10003 if (event->overflow_handler_context)
10004
10005 return -EINVAL;
10006
10007 if (event->prog)
10008 return -EEXIST;
10009
10010 if (prog->type != BPF_PROG_TYPE_PERF_EVENT)
10011 return -EINVAL;
10012
10013 if (event->attr.precise_ip &&
10014 prog->call_get_stack &&
10015 (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY) ||
10016 event->attr.exclude_callchain_kernel ||
10017 event->attr.exclude_callchain_user)) {
10018
10019
10020
10021
10022
10023
10024
10025
10026
10027 return -EPROTO;
10028 }
10029
10030 event->prog = prog;
10031 event->bpf_cookie = bpf_cookie;
10032 event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
10033 WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
10034 return 0;
10035}
10036
10037static void perf_event_free_bpf_handler(struct perf_event *event)
10038{
10039 struct bpf_prog *prog = event->prog;
10040
10041 if (!prog)
10042 return;
10043
10044 WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
10045 event->prog = NULL;
10046 bpf_prog_put(prog);
10047}
10048#else
10049static int perf_event_set_bpf_handler(struct perf_event *event,
10050 struct bpf_prog *prog,
10051 u64 bpf_cookie)
10052{
10053 return -EOPNOTSUPP;
10054}
10055static void perf_event_free_bpf_handler(struct perf_event *event)
10056{
10057}
10058#endif
10059
10060
10061
10062
10063
10064static inline bool perf_event_is_tracing(struct perf_event *event)
10065{
10066 if (event->pmu == &perf_tracepoint)
10067 return true;
10068#ifdef CONFIG_KPROBE_EVENTS
10069 if (event->pmu == &perf_kprobe)
10070 return true;
10071#endif
10072#ifdef CONFIG_UPROBE_EVENTS
10073 if (event->pmu == &perf_uprobe)
10074 return true;
10075#endif
10076 return false;
10077}
10078
10079int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
10080 u64 bpf_cookie)
10081{
10082 bool is_kprobe, is_tracepoint, is_syscall_tp;
10083
10084 if (!perf_event_is_tracing(event))
10085 return perf_event_set_bpf_handler(event, prog, bpf_cookie);
10086
10087 is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
10088 is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
10089 is_syscall_tp = is_syscall_trace_event(event->tp_event);
10090 if (!is_kprobe && !is_tracepoint && !is_syscall_tp)
10091
10092 return -EINVAL;
10093
10094 if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
10095 (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
10096 (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT))
10097 return -EINVAL;
10098
10099
10100 if (prog->kprobe_override &&
10101 !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
10102 return -EINVAL;
10103
10104 if (is_tracepoint || is_syscall_tp) {
10105 int off = trace_event_get_offsets(event->tp_event);
10106
10107 if (prog->aux->max_ctx_offset > off)
10108 return -EACCES;
10109 }
10110
10111 return perf_event_attach_bpf_prog(event, prog, bpf_cookie);
10112}
10113
10114void perf_event_free_bpf_prog(struct perf_event *event)
10115{
10116 if (!perf_event_is_tracing(event)) {
10117 perf_event_free_bpf_handler(event);
10118 return;
10119 }
10120 perf_event_detach_bpf_prog(event);
10121}
10122
10123#else
10124
10125static inline void perf_tp_register(void)
10126{
10127}
10128
10129static void perf_event_free_filter(struct perf_event *event)
10130{
10131}
10132
10133int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
10134 u64 bpf_cookie)
10135{
10136 return -ENOENT;
10137}
10138
10139void perf_event_free_bpf_prog(struct perf_event *event)
10140{
10141}
10142#endif
10143
10144#ifdef CONFIG_HAVE_HW_BREAKPOINT
10145void perf_bp_event(struct perf_event *bp, void *data)
10146{
10147 struct perf_sample_data sample;
10148 struct pt_regs *regs = data;
10149
10150 perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
10151
10152 if (!bp->hw.state && !perf_exclude_event(bp, regs))
10153 perf_swevent_event(bp, 1, &sample, regs);
10154}
10155#endif
10156
10157
10158
10159
10160static struct perf_addr_filter *
10161perf_addr_filter_new(struct perf_event *event, struct list_head *filters)
10162{
10163 int node = cpu_to_node(event->cpu == -1 ? 0 : event->cpu);
10164 struct perf_addr_filter *filter;
10165
10166 filter = kzalloc_node(sizeof(*filter), GFP_KERNEL, node);
10167 if (!filter)
10168 return NULL;
10169
10170 INIT_LIST_HEAD(&filter->entry);
10171 list_add_tail(&filter->entry, filters);
10172
10173 return filter;
10174}
10175
10176static void free_filters_list(struct list_head *filters)
10177{
10178 struct perf_addr_filter *filter, *iter;
10179
10180 list_for_each_entry_safe(filter, iter, filters, entry) {
10181 path_put(&filter->path);
10182 list_del(&filter->entry);
10183 kfree(filter);
10184 }
10185}
10186
10187
10188
10189
10190static void perf_addr_filters_splice(struct perf_event *event,
10191 struct list_head *head)
10192{
10193 unsigned long flags;
10194 LIST_HEAD(list);
10195
10196 if (!has_addr_filter(event))
10197 return;
10198
10199
10200 if (event->parent)
10201 return;
10202
10203 raw_spin_lock_irqsave(&event->addr_filters.lock, flags);
10204
10205 list_splice_init(&event->addr_filters.list, &list);
10206 if (head)
10207 list_splice(head, &event->addr_filters.list);
10208
10209 raw_spin_unlock_irqrestore(&event->addr_filters.lock, flags);
10210
10211 free_filters_list(&list);
10212}
10213
10214
10215
10216
10217
10218
10219static void perf_addr_filter_apply(struct perf_addr_filter *filter,
10220 struct mm_struct *mm,
10221 struct perf_addr_filter_range *fr)
10222{
10223 struct vm_area_struct *vma;
10224
10225 for (vma = mm->mmap; vma; vma = vma->vm_next) {
10226 if (!vma->vm_file)
10227 continue;
10228
10229 if (perf_addr_filter_vma_adjust(filter, vma, fr))
10230 return;
10231 }
10232}
10233
10234
10235
10236
10237
10238static void perf_event_addr_filters_apply(struct perf_event *event)
10239{
10240 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
10241 struct task_struct *task = READ_ONCE(event->ctx->task);
10242 struct perf_addr_filter *filter;
10243 struct mm_struct *mm = NULL;
10244 unsigned int count = 0;
10245 unsigned long flags;
10246
10247
10248
10249
10250
10251 if (task == TASK_TOMBSTONE)
10252 return;
10253
10254 if (ifh->nr_file_filters) {
10255 mm = get_task_mm(task);
10256 if (!mm)
10257 goto restart;
10258
10259 mmap_read_lock(mm);
10260 }
10261
10262 raw_spin_lock_irqsave(&ifh->lock, flags);
10263 list_for_each_entry(filter, &ifh->list, entry) {
10264 if (filter->path.dentry) {
10265
10266
10267
10268
10269 event->addr_filter_ranges[count].start = 0;
10270 event->addr_filter_ranges[count].size = 0;
10271
10272 perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
10273 } else {
10274 event->addr_filter_ranges[count].start = filter->offset;
10275 event->addr_filter_ranges[count].size = filter->size;
10276 }
10277
10278 count++;
10279 }
10280
10281 event->addr_filters_gen++;
10282 raw_spin_unlock_irqrestore(&ifh->lock, flags);
10283
10284 if (ifh->nr_file_filters) {
10285 mmap_read_unlock(mm);
10286
10287 mmput(mm);
10288 }
10289
10290restart:
10291 perf_event_stop(event, 1);
10292}
10293
10294
10295
10296
10297
10298
10299
10300
10301
10302
10303
10304
10305
10306
10307
10308
10309
10310
10311
10312
10313enum {
10314 IF_ACT_NONE = -1,
10315 IF_ACT_FILTER,
10316 IF_ACT_START,
10317 IF_ACT_STOP,
10318 IF_SRC_FILE,
10319 IF_SRC_KERNEL,
10320 IF_SRC_FILEADDR,
10321 IF_SRC_KERNELADDR,
10322};
10323
10324enum {
10325 IF_STATE_ACTION = 0,
10326 IF_STATE_SOURCE,
10327 IF_STATE_END,
10328};
10329
10330static const match_table_t if_tokens = {
10331 { IF_ACT_FILTER, "filter" },
10332 { IF_ACT_START, "start" },
10333 { IF_ACT_STOP, "stop" },
10334 { IF_SRC_FILE, "%u/%u@%s" },
10335 { IF_SRC_KERNEL, "%u/%u" },
10336 { IF_SRC_FILEADDR, "%u@%s" },
10337 { IF_SRC_KERNELADDR, "%u" },
10338 { IF_ACT_NONE, NULL },
10339};
10340
10341
10342
10343
10344static int
10345perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
10346 struct list_head *filters)
10347{
10348 struct perf_addr_filter *filter = NULL;
10349 char *start, *orig, *filename = NULL;
10350 substring_t args[MAX_OPT_ARGS];
10351 int state = IF_STATE_ACTION, token;
10352 unsigned int kernel = 0;
10353 int ret = -EINVAL;
10354
10355 orig = fstr = kstrdup(fstr, GFP_KERNEL);
10356 if (!fstr)
10357 return -ENOMEM;
10358
10359 while ((start = strsep(&fstr, " ,\n")) != NULL) {
10360 static const enum perf_addr_filter_action_t actions[] = {
10361 [IF_ACT_FILTER] = PERF_ADDR_FILTER_ACTION_FILTER,
10362 [IF_ACT_START] = PERF_ADDR_FILTER_ACTION_START,
10363 [IF_ACT_STOP] = PERF_ADDR_FILTER_ACTION_STOP,
10364 };
10365 ret = -EINVAL;
10366
10367 if (!*start)
10368 continue;
10369
10370
10371 if (state == IF_STATE_ACTION) {
10372 filter = perf_addr_filter_new(event, filters);
10373 if (!filter)
10374 goto fail;
10375 }
10376
10377 token = match_token(start, if_tokens, args);
10378 switch (token) {
10379 case IF_ACT_FILTER:
10380 case IF_ACT_START:
10381 case IF_ACT_STOP:
10382 if (state != IF_STATE_ACTION)
10383 goto fail;
10384
10385 filter->action = actions[token];
10386 state = IF_STATE_SOURCE;
10387 break;
10388
10389 case IF_SRC_KERNELADDR:
10390 case IF_SRC_KERNEL:
10391 kernel = 1;
10392 fallthrough;
10393
10394 case IF_SRC_FILEADDR:
10395 case IF_SRC_FILE:
10396 if (state != IF_STATE_SOURCE)
10397 goto fail;
10398
10399 *args[0].to = 0;
10400 ret = kstrtoul(args[0].from, 0, &filter->offset);
10401 if (ret)
10402 goto fail;
10403
10404 if (token == IF_SRC_KERNEL || token == IF_SRC_FILE) {
10405 *args[1].to = 0;
10406 ret = kstrtoul(args[1].from, 0, &filter->size);
10407 if (ret)
10408 goto fail;
10409 }
10410
10411 if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
10412 int fpos = token == IF_SRC_FILE ? 2 : 1;
10413
10414 kfree(filename);
10415 filename = match_strdup(&args[fpos]);
10416 if (!filename) {
10417 ret = -ENOMEM;
10418 goto fail;
10419 }
10420 }
10421
10422 state = IF_STATE_END;
10423 break;
10424
10425 default:
10426 goto fail;
10427 }
10428
10429
10430
10431
10432
10433
10434 if (state == IF_STATE_END) {
10435 ret = -EINVAL;
10436 if (kernel && event->attr.exclude_kernel)
10437 goto fail;
10438
10439
10440
10441
10442
10443 if (filter->action == PERF_ADDR_FILTER_ACTION_FILTER &&
10444 !filter->size)
10445 goto fail;
10446
10447 if (!kernel) {
10448 if (!filename)
10449 goto fail;
10450
10451
10452
10453
10454
10455
10456
10457
10458
10459 ret = -EOPNOTSUPP;
10460 if (!event->ctx->task)
10461 goto fail;
10462
10463
10464 ret = kern_path(filename, LOOKUP_FOLLOW,
10465 &filter->path);
10466 if (ret)
10467 goto fail;
10468
10469 ret = -EINVAL;
10470 if (!filter->path.dentry ||
10471 !S_ISREG(d_inode(filter->path.dentry)
10472 ->i_mode))
10473 goto fail;
10474
10475 event->addr_filters.nr_file_filters++;
10476 }
10477
10478
10479 state = IF_STATE_ACTION;
10480 filter = NULL;
10481 }
10482 }
10483
10484 if (state != IF_STATE_ACTION)
10485 goto fail;
10486
10487 kfree(filename);
10488 kfree(orig);
10489
10490 return 0;
10491
10492fail:
10493 kfree(filename);
10494 free_filters_list(filters);
10495 kfree(orig);
10496
10497 return ret;
10498}
10499
10500static int
10501perf_event_set_addr_filter(struct perf_event *event, char *filter_str)
10502{
10503 LIST_HEAD(filters);
10504 int ret;
10505
10506
10507
10508
10509
10510 lockdep_assert_held(&event->ctx->mutex);
10511
10512 if (WARN_ON_ONCE(event->parent))
10513 return -EINVAL;
10514
10515 ret = perf_event_parse_addr_filter(event, filter_str, &filters);
10516 if (ret)
10517 goto fail_clear_files;
10518
10519 ret = event->pmu->addr_filters_validate(&filters);
10520 if (ret)
10521 goto fail_free_filters;
10522
10523
10524 perf_addr_filters_splice(event, &filters);
10525
10526
10527 perf_event_for_each_child(event, perf_event_addr_filters_apply);
10528
10529 return ret;
10530
10531fail_free_filters:
10532 free_filters_list(&filters);
10533
10534fail_clear_files:
10535 event->addr_filters.nr_file_filters = 0;
10536
10537 return ret;
10538}
10539
10540static int perf_event_set_filter(struct perf_event *event, void __user *arg)
10541{
10542 int ret = -EINVAL;
10543 char *filter_str;
10544
10545 filter_str = strndup_user(arg, PAGE_SIZE);
10546 if (IS_ERR(filter_str))
10547 return PTR_ERR(filter_str);
10548
10549#ifdef CONFIG_EVENT_TRACING
10550 if (perf_event_is_tracing(event)) {
10551 struct perf_event_context *ctx = event->ctx;
10552
10553
10554
10555
10556
10557
10558
10559
10560
10561
10562
10563
10564 mutex_unlock(&ctx->mutex);
10565 ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
10566 mutex_lock(&ctx->mutex);
10567 } else
10568#endif
10569 if (has_addr_filter(event))
10570 ret = perf_event_set_addr_filter(event, filter_str);
10571
10572 kfree(filter_str);
10573 return ret;
10574}
10575
10576
10577
10578
10579
10580static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
10581{
10582 enum hrtimer_restart ret = HRTIMER_RESTART;
10583 struct perf_sample_data data;
10584 struct pt_regs *regs;
10585 struct perf_event *event;
10586 u64 period;
10587
10588 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
10589
10590 if (event->state != PERF_EVENT_STATE_ACTIVE)
10591 return HRTIMER_NORESTART;
10592
10593 event->pmu->read(event);
10594
10595 perf_sample_data_init(&data, 0, event->hw.last_period);
10596 regs = get_irq_regs();
10597
10598 if (regs && !perf_exclude_event(event, regs)) {
10599 if (!(event->attr.exclude_idle && is_idle_task(current)))
10600 if (__perf_event_overflow(event, 1, &data, regs))
10601 ret = HRTIMER_NORESTART;
10602 }
10603
10604 period = max_t(u64, 10000, event->hw.sample_period);
10605 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
10606
10607 return ret;
10608}
10609
10610static void perf_swevent_start_hrtimer(struct perf_event *event)
10611{
10612 struct hw_perf_event *hwc = &event->hw;
10613 s64 period;
10614
10615 if (!is_sampling_event(event))
10616 return;
10617
10618 period = local64_read(&hwc->period_left);
10619 if (period) {
10620 if (period < 0)
10621 period = 10000;
10622
10623 local64_set(&hwc->period_left, 0);
10624 } else {
10625 period = max_t(u64, 10000, hwc->sample_period);
10626 }
10627 hrtimer_start(&hwc->hrtimer, ns_to_ktime(period),
10628 HRTIMER_MODE_REL_PINNED_HARD);
10629}
10630
10631static void perf_swevent_cancel_hrtimer(struct perf_event *event)
10632{
10633 struct hw_perf_event *hwc = &event->hw;
10634
10635 if (is_sampling_event(event)) {
10636 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
10637 local64_set(&hwc->period_left, ktime_to_ns(remaining));
10638
10639 hrtimer_cancel(&hwc->hrtimer);
10640 }
10641}
10642
10643static void perf_swevent_init_hrtimer(struct perf_event *event)
10644{
10645 struct hw_perf_event *hwc = &event->hw;
10646
10647 if (!is_sampling_event(event))
10648 return;
10649
10650 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
10651 hwc->hrtimer.function = perf_swevent_hrtimer;
10652
10653
10654
10655
10656
10657 if (event->attr.freq) {
10658 long freq = event->attr.sample_freq;
10659
10660 event->attr.sample_period = NSEC_PER_SEC / freq;
10661 hwc->sample_period = event->attr.sample_period;
10662 local64_set(&hwc->period_left, hwc->sample_period);
10663 hwc->last_period = hwc->sample_period;
10664 event->attr.freq = 0;
10665 }
10666}
10667
10668
10669
10670
10671
10672static void cpu_clock_event_update(struct perf_event *event)
10673{
10674 s64 prev;
10675 u64 now;
10676
10677 now = local_clock();
10678 prev = local64_xchg(&event->hw.prev_count, now);
10679 local64_add(now - prev, &event->count);
10680}
10681
10682static void cpu_clock_event_start(struct perf_event *event, int flags)
10683{
10684 local64_set(&event->hw.prev_count, local_clock());
10685 perf_swevent_start_hrtimer(event);
10686}
10687
10688static void cpu_clock_event_stop(struct perf_event *event, int flags)
10689{
10690 perf_swevent_cancel_hrtimer(event);
10691 cpu_clock_event_update(event);
10692}
10693
10694static int cpu_clock_event_add(struct perf_event *event, int flags)
10695{
10696 if (flags & PERF_EF_START)
10697 cpu_clock_event_start(event, flags);
10698 perf_event_update_userpage(event);
10699
10700 return 0;
10701}
10702
10703static void cpu_clock_event_del(struct perf_event *event, int flags)
10704{
10705 cpu_clock_event_stop(event, flags);
10706}
10707
10708static void cpu_clock_event_read(struct perf_event *event)
10709{
10710 cpu_clock_event_update(event);
10711}
10712
10713static int cpu_clock_event_init(struct perf_event *event)
10714{
10715 if (event->attr.type != PERF_TYPE_SOFTWARE)
10716 return -ENOENT;
10717
10718 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
10719 return -ENOENT;
10720
10721
10722
10723
10724 if (has_branch_stack(event))
10725 return -EOPNOTSUPP;
10726
10727 perf_swevent_init_hrtimer(event);
10728
10729 return 0;
10730}
10731
10732static struct pmu perf_cpu_clock = {
10733 .task_ctx_nr = perf_sw_context,
10734
10735 .capabilities = PERF_PMU_CAP_NO_NMI,
10736
10737 .event_init = cpu_clock_event_init,
10738 .add = cpu_clock_event_add,
10739 .del = cpu_clock_event_del,
10740 .start = cpu_clock_event_start,
10741 .stop = cpu_clock_event_stop,
10742 .read = cpu_clock_event_read,
10743};
10744
10745
10746
10747
10748
10749static void task_clock_event_update(struct perf_event *event, u64 now)
10750{
10751 u64 prev;
10752 s64 delta;
10753
10754 prev = local64_xchg(&event->hw.prev_count, now);
10755 delta = now - prev;
10756 local64_add(delta, &event->count);
10757}
10758
10759static void task_clock_event_start(struct perf_event *event, int flags)
10760{
10761 local64_set(&event->hw.prev_count, event->ctx->time);
10762 perf_swevent_start_hrtimer(event);
10763}
10764
10765static void task_clock_event_stop(struct perf_event *event, int flags)
10766{
10767 perf_swevent_cancel_hrtimer(event);
10768 task_clock_event_update(event, event->ctx->time);
10769}
10770
10771static int task_clock_event_add(struct perf_event *event, int flags)
10772{
10773 if (flags & PERF_EF_START)
10774 task_clock_event_start(event, flags);
10775 perf_event_update_userpage(event);
10776
10777 return 0;
10778}
10779
10780static void task_clock_event_del(struct perf_event *event, int flags)
10781{
10782 task_clock_event_stop(event, PERF_EF_UPDATE);
10783}
10784
10785static void task_clock_event_read(struct perf_event *event)
10786{
10787 u64 now = perf_clock();
10788 u64 delta = now - event->ctx->timestamp;
10789 u64 time = event->ctx->time + delta;
10790
10791 task_clock_event_update(event, time);
10792}
10793
10794static int task_clock_event_init(struct perf_event *event)
10795{
10796 if (event->attr.type != PERF_TYPE_SOFTWARE)
10797 return -ENOENT;
10798
10799 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
10800 return -ENOENT;
10801
10802
10803
10804
10805 if (has_branch_stack(event))
10806 return -EOPNOTSUPP;
10807
10808 perf_swevent_init_hrtimer(event);
10809
10810 return 0;
10811}
10812
10813static struct pmu perf_task_clock = {
10814 .task_ctx_nr = perf_sw_context,
10815
10816 .capabilities = PERF_PMU_CAP_NO_NMI,
10817
10818 .event_init = task_clock_event_init,
10819 .add = task_clock_event_add,
10820 .del = task_clock_event_del,
10821 .start = task_clock_event_start,
10822 .stop = task_clock_event_stop,
10823 .read = task_clock_event_read,
10824};
10825
10826static void perf_pmu_nop_void(struct pmu *pmu)
10827{
10828}
10829
10830static void perf_pmu_nop_txn(struct pmu *pmu, unsigned int flags)
10831{
10832}
10833
10834static int perf_pmu_nop_int(struct pmu *pmu)
10835{
10836 return 0;
10837}
10838
10839static int perf_event_nop_int(struct perf_event *event, u64 value)
10840{
10841 return 0;
10842}
10843
10844static DEFINE_PER_CPU(unsigned int, nop_txn_flags);
10845
10846static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags)
10847{
10848 __this_cpu_write(nop_txn_flags, flags);
10849
10850 if (flags & ~PERF_PMU_TXN_ADD)
10851 return;
10852
10853 perf_pmu_disable(pmu);
10854}
10855
10856static int perf_pmu_commit_txn(struct pmu *pmu)
10857{
10858 unsigned int flags = __this_cpu_read(nop_txn_flags);
10859
10860 __this_cpu_write(nop_txn_flags, 0);
10861
10862 if (flags & ~PERF_PMU_TXN_ADD)
10863 return 0;
10864
10865 perf_pmu_enable(pmu);
10866 return 0;
10867}
10868
10869static void perf_pmu_cancel_txn(struct pmu *pmu)
10870{
10871 unsigned int flags = __this_cpu_read(nop_txn_flags);
10872
10873 __this_cpu_write(nop_txn_flags, 0);
10874
10875 if (flags & ~PERF_PMU_TXN_ADD)
10876 return;
10877
10878 perf_pmu_enable(pmu);
10879}
10880
10881static int perf_event_idx_default(struct perf_event *event)
10882{
10883 return 0;
10884}
10885
10886
10887
10888
10889
10890static struct perf_cpu_context __percpu *find_pmu_context(int ctxn)
10891{
10892 struct pmu *pmu;
10893
10894 if (ctxn < 0)
10895 return NULL;
10896
10897 list_for_each_entry(pmu, &pmus, entry) {
10898 if (pmu->task_ctx_nr == ctxn)
10899 return pmu->pmu_cpu_context;
10900 }
10901
10902 return NULL;
10903}
10904
10905static void free_pmu_context(struct pmu *pmu)
10906{
10907
10908
10909
10910
10911
10912 if (pmu->task_ctx_nr > perf_invalid_context)
10913 return;
10914
10915 free_percpu(pmu->pmu_cpu_context);
10916}
10917
10918
10919
10920
10921static ssize_t nr_addr_filters_show(struct device *dev,
10922 struct device_attribute *attr,
10923 char *page)
10924{
10925 struct pmu *pmu = dev_get_drvdata(dev);
10926
10927 return snprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters);
10928}
10929DEVICE_ATTR_RO(nr_addr_filters);
10930
10931static struct idr pmu_idr;
10932
10933static ssize_t
10934type_show(struct device *dev, struct device_attribute *attr, char *page)
10935{
10936 struct pmu *pmu = dev_get_drvdata(dev);
10937
10938 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
10939}
10940static DEVICE_ATTR_RO(type);
10941
10942static ssize_t
10943perf_event_mux_interval_ms_show(struct device *dev,
10944 struct device_attribute *attr,
10945 char *page)
10946{
10947 struct pmu *pmu = dev_get_drvdata(dev);
10948
10949 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms);
10950}
10951
10952static DEFINE_MUTEX(mux_interval_mutex);
10953
10954static ssize_t
10955perf_event_mux_interval_ms_store(struct device *dev,
10956 struct device_attribute *attr,
10957 const char *buf, size_t count)
10958{
10959 struct pmu *pmu = dev_get_drvdata(dev);
10960 int timer, cpu, ret;
10961
10962 ret = kstrtoint(buf, 0, &timer);
10963 if (ret)
10964 return ret;
10965
10966 if (timer < 1)
10967 return -EINVAL;
10968
10969
10970 if (timer == pmu->hrtimer_interval_ms)
10971 return count;
10972
10973 mutex_lock(&mux_interval_mutex);
10974 pmu->hrtimer_interval_ms = timer;
10975
10976
10977 cpus_read_lock();
10978 for_each_online_cpu(cpu) {
10979 struct perf_cpu_context *cpuctx;
10980 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
10981 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
10982
10983 cpu_function_call(cpu,
10984 (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
10985 }
10986 cpus_read_unlock();
10987 mutex_unlock(&mux_interval_mutex);
10988
10989 return count;
10990}
10991static DEVICE_ATTR_RW(perf_event_mux_interval_ms);
10992
10993static struct attribute *pmu_dev_attrs[] = {
10994 &dev_attr_type.attr,
10995 &dev_attr_perf_event_mux_interval_ms.attr,
10996 NULL,
10997};
10998ATTRIBUTE_GROUPS(pmu_dev);
10999
11000static int pmu_bus_running;
11001static struct bus_type pmu_bus = {
11002 .name = "event_source",
11003 .dev_groups = pmu_dev_groups,
11004};
11005
11006static void pmu_dev_release(struct device *dev)
11007{
11008 kfree(dev);
11009}
11010
11011static int pmu_dev_alloc(struct pmu *pmu)
11012{
11013 int ret = -ENOMEM;
11014
11015 pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
11016 if (!pmu->dev)
11017 goto out;
11018
11019 pmu->dev->groups = pmu->attr_groups;
11020 device_initialize(pmu->dev);
11021 ret = dev_set_name(pmu->dev, "%s", pmu->name);
11022 if (ret)
11023 goto free_dev;
11024
11025 dev_set_drvdata(pmu->dev, pmu);
11026 pmu->dev->bus = &pmu_bus;
11027 pmu->dev->release = pmu_dev_release;
11028 ret = device_add(pmu->dev);
11029 if (ret)
11030 goto free_dev;
11031
11032
11033 if (pmu->nr_addr_filters)
11034 ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters);
11035
11036 if (ret)
11037 goto del_dev;
11038
11039 if (pmu->attr_update)
11040 ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update);
11041
11042 if (ret)
11043 goto del_dev;
11044
11045out:
11046 return ret;
11047
11048del_dev:
11049 device_del(pmu->dev);
11050
11051free_dev:
11052 put_device(pmu->dev);
11053 goto out;
11054}
11055
11056static struct lock_class_key cpuctx_mutex;
11057static struct lock_class_key cpuctx_lock;
11058
11059int perf_pmu_register(struct pmu *pmu, const char *name, int type)
11060{
11061 int cpu, ret, max = PERF_TYPE_MAX;
11062
11063 mutex_lock(&pmus_lock);
11064 ret = -ENOMEM;
11065 pmu->pmu_disable_count = alloc_percpu(int);
11066 if (!pmu->pmu_disable_count)
11067 goto unlock;
11068
11069 pmu->type = -1;
11070 if (!name)
11071 goto skip_type;
11072 pmu->name = name;
11073
11074 if (type != PERF_TYPE_SOFTWARE) {
11075 if (type >= 0)
11076 max = type;
11077
11078 ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
11079 if (ret < 0)
11080 goto free_pdc;
11081
11082 WARN_ON(type >= 0 && ret != type);
11083
11084 type = ret;
11085 }
11086 pmu->type = type;
11087
11088 if (pmu_bus_running) {
11089 ret = pmu_dev_alloc(pmu);
11090 if (ret)
11091 goto free_idr;
11092 }
11093
11094skip_type:
11095 if (pmu->task_ctx_nr == perf_hw_context) {
11096 static int hw_context_taken = 0;
11097
11098
11099
11100
11101
11102
11103 if (WARN_ON_ONCE(hw_context_taken &&
11104 !(pmu->capabilities & PERF_PMU_CAP_HETEROGENEOUS_CPUS)))
11105 pmu->task_ctx_nr = perf_invalid_context;
11106
11107 hw_context_taken = 1;
11108 }
11109
11110 pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
11111 if (pmu->pmu_cpu_context)
11112 goto got_cpu_context;
11113
11114 ret = -ENOMEM;
11115 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
11116 if (!pmu->pmu_cpu_context)
11117 goto free_dev;
11118
11119 for_each_possible_cpu(cpu) {
11120 struct perf_cpu_context *cpuctx;
11121
11122 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
11123 __perf_event_init_context(&cpuctx->ctx);
11124 lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
11125 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
11126 cpuctx->ctx.pmu = pmu;
11127 cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
11128
11129 __perf_mux_hrtimer_init(cpuctx, cpu);
11130
11131 cpuctx->heap_size = ARRAY_SIZE(cpuctx->heap_default);
11132 cpuctx->heap = cpuctx->heap_default;
11133 }
11134
11135got_cpu_context:
11136 if (!pmu->start_txn) {
11137 if (pmu->pmu_enable) {
11138
11139
11140
11141
11142
11143 pmu->start_txn = perf_pmu_start_txn;
11144 pmu->commit_txn = perf_pmu_commit_txn;
11145 pmu->cancel_txn = perf_pmu_cancel_txn;
11146 } else {
11147 pmu->start_txn = perf_pmu_nop_txn;
11148 pmu->commit_txn = perf_pmu_nop_int;
11149 pmu->cancel_txn = perf_pmu_nop_void;
11150 }
11151 }
11152
11153 if (!pmu->pmu_enable) {
11154 pmu->pmu_enable = perf_pmu_nop_void;
11155 pmu->pmu_disable = perf_pmu_nop_void;
11156 }
11157
11158 if (!pmu->check_period)
11159 pmu->check_period = perf_event_nop_int;
11160
11161 if (!pmu->event_idx)
11162 pmu->event_idx = perf_event_idx_default;
11163
11164
11165
11166
11167
11168
11169 if (type == PERF_TYPE_SOFTWARE || !name)
11170 list_add_rcu(&pmu->entry, &pmus);
11171 else
11172 list_add_tail_rcu(&pmu->entry, &pmus);
11173
11174 atomic_set(&pmu->exclusive_cnt, 0);
11175 ret = 0;
11176unlock:
11177 mutex_unlock(&pmus_lock);
11178
11179 return ret;
11180
11181free_dev:
11182 device_del(pmu->dev);
11183 put_device(pmu->dev);
11184
11185free_idr:
11186 if (pmu->type != PERF_TYPE_SOFTWARE)
11187 idr_remove(&pmu_idr, pmu->type);
11188
11189free_pdc:
11190 free_percpu(pmu->pmu_disable_count);
11191 goto unlock;
11192}
11193EXPORT_SYMBOL_GPL(perf_pmu_register);
11194
11195void perf_pmu_unregister(struct pmu *pmu)
11196{
11197 mutex_lock(&pmus_lock);
11198 list_del_rcu(&pmu->entry);
11199
11200
11201
11202
11203
11204 synchronize_srcu(&pmus_srcu);
11205 synchronize_rcu();
11206
11207 free_percpu(pmu->pmu_disable_count);
11208 if (pmu->type != PERF_TYPE_SOFTWARE)
11209 idr_remove(&pmu_idr, pmu->type);
11210 if (pmu_bus_running) {
11211 if (pmu->nr_addr_filters)
11212 device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
11213 device_del(pmu->dev);
11214 put_device(pmu->dev);
11215 }
11216 free_pmu_context(pmu);
11217 mutex_unlock(&pmus_lock);
11218}
11219EXPORT_SYMBOL_GPL(perf_pmu_unregister);
11220
11221static inline bool has_extended_regs(struct perf_event *event)
11222{
11223 return (event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK) ||
11224 (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK);
11225}
11226
11227static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
11228{
11229 struct perf_event_context *ctx = NULL;
11230 int ret;
11231
11232 if (!try_module_get(pmu->module))
11233 return -ENODEV;
11234
11235
11236
11237
11238
11239
11240
11241 if (event->group_leader != event && pmu->task_ctx_nr != perf_sw_context) {
11242
11243
11244
11245
11246 ctx = perf_event_ctx_lock_nested(event->group_leader,
11247 SINGLE_DEPTH_NESTING);
11248 BUG_ON(!ctx);
11249 }
11250
11251 event->pmu = pmu;
11252 ret = pmu->event_init(event);
11253
11254 if (ctx)
11255 perf_event_ctx_unlock(event->group_leader, ctx);
11256
11257 if (!ret) {
11258 if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) &&
11259 has_extended_regs(event))
11260 ret = -EOPNOTSUPP;
11261
11262 if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
11263 event_has_any_exclude_flag(event))
11264 ret = -EINVAL;
11265
11266 if (ret && event->destroy)
11267 event->destroy(event);
11268 }
11269
11270 if (ret)
11271 module_put(pmu->module);
11272
11273 return ret;
11274}
11275
11276static struct pmu *perf_init_event(struct perf_event *event)
11277{
11278 bool extended_type = false;
11279 int idx, type, ret;
11280 struct pmu *pmu;
11281
11282 idx = srcu_read_lock(&pmus_srcu);
11283
11284
11285 if (event->parent && event->parent->pmu) {
11286 pmu = event->parent->pmu;
11287 ret = perf_try_init_event(pmu, event);
11288 if (!ret)
11289 goto unlock;
11290 }
11291
11292
11293
11294
11295
11296 type = event->attr.type;
11297 if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) {
11298 type = event->attr.config >> PERF_PMU_TYPE_SHIFT;
11299 if (!type) {
11300 type = PERF_TYPE_RAW;
11301 } else {
11302 extended_type = true;
11303 event->attr.config &= PERF_HW_EVENT_MASK;
11304 }
11305 }
11306
11307again:
11308 rcu_read_lock();
11309 pmu = idr_find(&pmu_idr, type);
11310 rcu_read_unlock();
11311 if (pmu) {
11312 if (event->attr.type != type && type != PERF_TYPE_RAW &&
11313 !(pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE))
11314 goto fail;
11315
11316 ret = perf_try_init_event(pmu, event);
11317 if (ret == -ENOENT && event->attr.type != type && !extended_type) {
11318 type = event->attr.type;
11319 goto again;
11320 }
11321
11322 if (ret)
11323 pmu = ERR_PTR(ret);
11324
11325 goto unlock;
11326 }
11327
11328 list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) {
11329 ret = perf_try_init_event(pmu, event);
11330 if (!ret)
11331 goto unlock;
11332
11333 if (ret != -ENOENT) {
11334 pmu = ERR_PTR(ret);
11335 goto unlock;
11336 }
11337 }
11338fail:
11339 pmu = ERR_PTR(-ENOENT);
11340unlock:
11341 srcu_read_unlock(&pmus_srcu, idx);
11342
11343 return pmu;
11344}
11345
11346static void attach_sb_event(struct perf_event *event)
11347{
11348 struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
11349
11350 raw_spin_lock(&pel->lock);
11351 list_add_rcu(&event->sb_list, &pel->list);
11352 raw_spin_unlock(&pel->lock);
11353}
11354
11355
11356
11357
11358
11359
11360
11361
11362static void account_pmu_sb_event(struct perf_event *event)
11363{
11364 if (is_sb_event(event))
11365 attach_sb_event(event);
11366}
11367
11368static void account_event_cpu(struct perf_event *event, int cpu)
11369{
11370 if (event->parent)
11371 return;
11372
11373 if (is_cgroup_event(event))
11374 atomic_inc(&per_cpu(perf_cgroup_events, cpu));
11375}
11376
11377
11378static void account_freq_event_nohz(void)
11379{
11380#ifdef CONFIG_NO_HZ_FULL
11381
11382 spin_lock(&nr_freq_lock);
11383 if (atomic_inc_return(&nr_freq_events) == 1)
11384 tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
11385 spin_unlock(&nr_freq_lock);
11386#endif
11387}
11388
11389static void account_freq_event(void)
11390{
11391 if (tick_nohz_full_enabled())
11392 account_freq_event_nohz();
11393 else
11394 atomic_inc(&nr_freq_events);
11395}
11396
11397
11398static void account_event(struct perf_event *event)
11399{
11400 bool inc = false;
11401
11402 if (event->parent)
11403 return;
11404
11405 if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
11406 inc = true;
11407 if (event->attr.mmap || event->attr.mmap_data)
11408 atomic_inc(&nr_mmap_events);
11409 if (event->attr.build_id)
11410 atomic_inc(&nr_build_id_events);
11411 if (event->attr.comm)
11412 atomic_inc(&nr_comm_events);
11413 if (event->attr.namespaces)
11414 atomic_inc(&nr_namespaces_events);
11415 if (event->attr.cgroup)
11416 atomic_inc(&nr_cgroup_events);
11417 if (event->attr.task)
11418 atomic_inc(&nr_task_events);
11419 if (event->attr.freq)
11420 account_freq_event();
11421 if (event->attr.context_switch) {
11422 atomic_inc(&nr_switch_events);
11423 inc = true;
11424 }
11425 if (has_branch_stack(event))
11426 inc = true;
11427 if (is_cgroup_event(event))
11428 inc = true;
11429 if (event->attr.ksymbol)
11430 atomic_inc(&nr_ksymbol_events);
11431 if (event->attr.bpf_event)
11432 atomic_inc(&nr_bpf_events);
11433 if (event->attr.text_poke)
11434 atomic_inc(&nr_text_poke_events);
11435
11436 if (inc) {
11437
11438
11439
11440
11441
11442 if (atomic_inc_not_zero(&perf_sched_count))
11443 goto enabled;
11444
11445 mutex_lock(&perf_sched_mutex);
11446 if (!atomic_read(&perf_sched_count)) {
11447 static_branch_enable(&perf_sched_events);
11448
11449
11450
11451
11452
11453 synchronize_rcu();
11454 }
11455
11456
11457
11458
11459 atomic_inc(&perf_sched_count);
11460 mutex_unlock(&perf_sched_mutex);
11461 }
11462enabled:
11463
11464 account_event_cpu(event, event->cpu);
11465
11466 account_pmu_sb_event(event);
11467}
11468
11469
11470
11471
11472static struct perf_event *
11473perf_event_alloc(struct perf_event_attr *attr, int cpu,
11474 struct task_struct *task,
11475 struct perf_event *group_leader,
11476 struct perf_event *parent_event,
11477 perf_overflow_handler_t overflow_handler,
11478 void *context, int cgroup_fd)
11479{
11480 struct pmu *pmu;
11481 struct perf_event *event;
11482 struct hw_perf_event *hwc;
11483 long err = -EINVAL;
11484 int node;
11485
11486 if ((unsigned)cpu >= nr_cpu_ids) {
11487 if (!task || cpu != -1)
11488 return ERR_PTR(-EINVAL);
11489 }
11490 if (attr->sigtrap && !task) {
11491
11492 return ERR_PTR(-EINVAL);
11493 }
11494
11495 node = (cpu >= 0) ? cpu_to_node(cpu) : -1;
11496 event = kmem_cache_alloc_node(perf_event_cache, GFP_KERNEL | __GFP_ZERO,
11497 node);
11498 if (!event)
11499 return ERR_PTR(-ENOMEM);
11500
11501
11502
11503
11504
11505 if (!group_leader)
11506 group_leader = event;
11507
11508 mutex_init(&event->child_mutex);
11509 INIT_LIST_HEAD(&event->child_list);
11510
11511 INIT_LIST_HEAD(&event->event_entry);
11512 INIT_LIST_HEAD(&event->sibling_list);
11513 INIT_LIST_HEAD(&event->active_list);
11514 init_event_group(event);
11515 INIT_LIST_HEAD(&event->rb_entry);
11516 INIT_LIST_HEAD(&event->active_entry);
11517 INIT_LIST_HEAD(&event->addr_filters.list);
11518 INIT_HLIST_NODE(&event->hlist_entry);
11519
11520
11521 init_waitqueue_head(&event->waitq);
11522 event->pending_disable = -1;
11523 init_irq_work(&event->pending, perf_pending_event);
11524
11525 mutex_init(&event->mmap_mutex);
11526 raw_spin_lock_init(&event->addr_filters.lock);
11527
11528 atomic_long_set(&event->refcount, 1);
11529 event->cpu = cpu;
11530 event->attr = *attr;
11531 event->group_leader = group_leader;
11532 event->pmu = NULL;
11533 event->oncpu = -1;
11534
11535 event->parent = parent_event;
11536
11537 event->ns = get_pid_ns(task_active_pid_ns(current));
11538 event->id = atomic64_inc_return(&perf_event_id);
11539
11540 event->state = PERF_EVENT_STATE_INACTIVE;
11541
11542 if (event->attr.sigtrap)
11543 atomic_set(&event->event_limit, 1);
11544
11545 if (task) {
11546 event->attach_state = PERF_ATTACH_TASK;
11547
11548
11549
11550
11551
11552 event->hw.target = get_task_struct(task);
11553 }
11554
11555 event->clock = &local_clock;
11556 if (parent_event)
11557 event->clock = parent_event->clock;
11558
11559 if (!overflow_handler && parent_event) {
11560 overflow_handler = parent_event->overflow_handler;
11561 context = parent_event->overflow_handler_context;
11562#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
11563 if (overflow_handler == bpf_overflow_handler) {
11564 struct bpf_prog *prog = parent_event->prog;
11565
11566 bpf_prog_inc(prog);
11567 event->prog = prog;
11568 event->orig_overflow_handler =
11569 parent_event->orig_overflow_handler;
11570 }
11571#endif
11572 }
11573
11574 if (overflow_handler) {
11575 event->overflow_handler = overflow_handler;
11576 event->overflow_handler_context = context;
11577 } else if (is_write_backward(event)){
11578 event->overflow_handler = perf_event_output_backward;
11579 event->overflow_handler_context = NULL;
11580 } else {
11581 event->overflow_handler = perf_event_output_forward;
11582 event->overflow_handler_context = NULL;
11583 }
11584
11585 perf_event__state_init(event);
11586
11587 pmu = NULL;
11588
11589 hwc = &event->hw;
11590 hwc->sample_period = attr->sample_period;
11591 if (attr->freq && attr->sample_freq)
11592 hwc->sample_period = 1;
11593 hwc->last_period = hwc->sample_period;
11594
11595 local64_set(&hwc->period_left, hwc->sample_period);
11596
11597
11598
11599
11600
11601 if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ))
11602 goto err_ns;
11603
11604 if (!has_branch_stack(event))
11605 event->attr.branch_sample_type = 0;
11606
11607 pmu = perf_init_event(event);
11608 if (IS_ERR(pmu)) {
11609 err = PTR_ERR(pmu);
11610 goto err_ns;
11611 }
11612
11613
11614
11615
11616
11617 if (pmu->task_ctx_nr == perf_invalid_context && cgroup_fd != -1) {
11618 err = -EINVAL;
11619 goto err_pmu;
11620 }
11621
11622 if (event->attr.aux_output &&
11623 !(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
11624 err = -EOPNOTSUPP;
11625 goto err_pmu;
11626 }
11627
11628 if (cgroup_fd != -1) {
11629 err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
11630 if (err)
11631 goto err_pmu;
11632 }
11633
11634 err = exclusive_event_init(event);
11635 if (err)
11636 goto err_pmu;
11637
11638 if (has_addr_filter(event)) {
11639 event->addr_filter_ranges = kcalloc(pmu->nr_addr_filters,
11640 sizeof(struct perf_addr_filter_range),
11641 GFP_KERNEL);
11642 if (!event->addr_filter_ranges) {
11643 err = -ENOMEM;
11644 goto err_per_task;
11645 }
11646
11647
11648
11649
11650
11651 if (event->parent) {
11652 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
11653
11654 raw_spin_lock_irq(&ifh->lock);
11655 memcpy(event->addr_filter_ranges,
11656 event->parent->addr_filter_ranges,
11657 pmu->nr_addr_filters * sizeof(struct perf_addr_filter_range));
11658 raw_spin_unlock_irq(&ifh->lock);
11659 }
11660
11661
11662 event->addr_filters_gen = 1;
11663 }
11664
11665 if (!event->parent) {
11666 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
11667 err = get_callchain_buffers(attr->sample_max_stack);
11668 if (err)
11669 goto err_addr_filters;
11670 }
11671 }
11672
11673 err = security_perf_event_alloc(event);
11674 if (err)
11675 goto err_callchain_buffer;
11676
11677
11678 account_event(event);
11679
11680 return event;
11681
11682err_callchain_buffer:
11683 if (!event->parent) {
11684 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
11685 put_callchain_buffers();
11686 }
11687err_addr_filters:
11688 kfree(event->addr_filter_ranges);
11689
11690err_per_task:
11691 exclusive_event_destroy(event);
11692
11693err_pmu:
11694 if (is_cgroup_event(event))
11695 perf_detach_cgroup(event);
11696 if (event->destroy)
11697 event->destroy(event);
11698 module_put(pmu->module);
11699err_ns:
11700 if (event->ns)
11701 put_pid_ns(event->ns);
11702 if (event->hw.target)
11703 put_task_struct(event->hw.target);
11704 kmem_cache_free(perf_event_cache, event);
11705
11706 return ERR_PTR(err);
11707}
11708
11709static int perf_copy_attr(struct perf_event_attr __user *uattr,
11710 struct perf_event_attr *attr)
11711{
11712 u32 size;
11713 int ret;
11714
11715
11716 memset(attr, 0, sizeof(*attr));
11717
11718 ret = get_user(size, &uattr->size);
11719 if (ret)
11720 return ret;
11721
11722
11723 if (!size)
11724 size = PERF_ATTR_SIZE_VER0;
11725 if (size < PERF_ATTR_SIZE_VER0 || size > PAGE_SIZE)
11726 goto err_size;
11727
11728 ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
11729 if (ret) {
11730 if (ret == -E2BIG)
11731 goto err_size;
11732 return ret;
11733 }
11734
11735 attr->size = size;
11736
11737 if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
11738 return -EINVAL;
11739
11740 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
11741 return -EINVAL;
11742
11743 if (attr->read_format & ~(PERF_FORMAT_MAX-1))
11744 return -EINVAL;
11745
11746 if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
11747 u64 mask = attr->branch_sample_type;
11748
11749
11750 if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))
11751 return -EINVAL;
11752
11753
11754 if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
11755 return -EINVAL;
11756
11757
11758 if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
11759
11760
11761 if (!attr->exclude_kernel)
11762 mask |= PERF_SAMPLE_BRANCH_KERNEL;
11763
11764 if (!attr->exclude_user)
11765 mask |= PERF_SAMPLE_BRANCH_USER;
11766
11767 if (!attr->exclude_hv)
11768 mask |= PERF_SAMPLE_BRANCH_HV;
11769
11770
11771
11772 attr->branch_sample_type = mask;
11773 }
11774
11775 if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) {
11776 ret = perf_allow_kernel(attr);
11777 if (ret)
11778 return ret;
11779 }
11780 }
11781
11782 if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
11783 ret = perf_reg_validate(attr->sample_regs_user);
11784 if (ret)
11785 return ret;
11786 }
11787
11788 if (attr->sample_type & PERF_SAMPLE_STACK_USER) {
11789 if (!arch_perf_have_user_stack_dump())
11790 return -ENOSYS;
11791
11792
11793
11794
11795
11796
11797 if (attr->sample_stack_user >= USHRT_MAX)
11798 return -EINVAL;
11799 else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64)))
11800 return -EINVAL;
11801 }
11802
11803 if (!attr->sample_max_stack)
11804 attr->sample_max_stack = sysctl_perf_event_max_stack;
11805
11806 if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
11807 ret = perf_reg_validate(attr->sample_regs_intr);
11808
11809#ifndef CONFIG_CGROUP_PERF
11810 if (attr->sample_type & PERF_SAMPLE_CGROUP)
11811 return -EINVAL;
11812#endif
11813 if ((attr->sample_type & PERF_SAMPLE_WEIGHT) &&
11814 (attr->sample_type & PERF_SAMPLE_WEIGHT_STRUCT))
11815 return -EINVAL;
11816
11817 if (!attr->inherit && attr->inherit_thread)
11818 return -EINVAL;
11819
11820 if (attr->remove_on_exec && attr->enable_on_exec)
11821 return -EINVAL;
11822
11823 if (attr->sigtrap && !attr->remove_on_exec)
11824 return -EINVAL;
11825
11826out:
11827 return ret;
11828
11829err_size:
11830 put_user(sizeof(*attr), &uattr->size);
11831 ret = -E2BIG;
11832 goto out;
11833}
11834
11835static int
11836perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
11837{
11838 struct perf_buffer *rb = NULL;
11839 int ret = -EINVAL;
11840
11841 if (!output_event)
11842 goto set;
11843
11844
11845 if (event == output_event)
11846 goto out;
11847
11848
11849
11850
11851 if (output_event->cpu != event->cpu)
11852 goto out;
11853
11854
11855
11856
11857 if (output_event->cpu == -1 && output_event->ctx != event->ctx)
11858 goto out;
11859
11860
11861
11862
11863 if (output_event->clock != event->clock)
11864 goto out;
11865
11866
11867
11868
11869
11870 if (is_write_backward(output_event) != is_write_backward(event))
11871 goto out;
11872
11873
11874
11875
11876 if (has_aux(event) && has_aux(output_event) &&
11877 event->pmu != output_event->pmu)
11878 goto out;
11879
11880set:
11881 mutex_lock(&event->mmap_mutex);
11882
11883 if (atomic_read(&event->mmap_count))
11884 goto unlock;
11885
11886 if (output_event) {
11887
11888 rb = ring_buffer_get(output_event);
11889 if (!rb)
11890 goto unlock;
11891 }
11892
11893 ring_buffer_attach(event, rb);
11894
11895 ret = 0;
11896unlock:
11897 mutex_unlock(&event->mmap_mutex);
11898
11899out:
11900 return ret;
11901}
11902
11903static void mutex_lock_double(struct mutex *a, struct mutex *b)
11904{
11905 if (b < a)
11906 swap(a, b);
11907
11908 mutex_lock(a);
11909 mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
11910}
11911
11912static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
11913{
11914 bool nmi_safe = false;
11915
11916 switch (clk_id) {
11917 case CLOCK_MONOTONIC:
11918 event->clock = &ktime_get_mono_fast_ns;
11919 nmi_safe = true;
11920 break;
11921
11922 case CLOCK_MONOTONIC_RAW:
11923 event->clock = &ktime_get_raw_fast_ns;
11924 nmi_safe = true;
11925 break;
11926
11927 case CLOCK_REALTIME:
11928 event->clock = &ktime_get_real_ns;
11929 break;
11930
11931 case CLOCK_BOOTTIME:
11932 event->clock = &ktime_get_boottime_ns;
11933 break;
11934
11935 case CLOCK_TAI:
11936 event->clock = &ktime_get_clocktai_ns;
11937 break;
11938
11939 default:
11940 return -EINVAL;
11941 }
11942
11943 if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI))
11944 return -EINVAL;
11945
11946 return 0;
11947}
11948
11949
11950
11951
11952
11953static struct perf_event_context *
11954__perf_event_ctx_lock_double(struct perf_event *group_leader,
11955 struct perf_event_context *ctx)
11956{
11957 struct perf_event_context *gctx;
11958
11959again:
11960 rcu_read_lock();
11961 gctx = READ_ONCE(group_leader->ctx);
11962 if (!refcount_inc_not_zero(&gctx->refcount)) {
11963 rcu_read_unlock();
11964 goto again;
11965 }
11966 rcu_read_unlock();
11967
11968 mutex_lock_double(&gctx->mutex, &ctx->mutex);
11969
11970 if (group_leader->ctx != gctx) {
11971 mutex_unlock(&ctx->mutex);
11972 mutex_unlock(&gctx->mutex);
11973 put_ctx(gctx);
11974 goto again;
11975 }
11976
11977 return gctx;
11978}
11979
11980static bool
11981perf_check_permission(struct perf_event_attr *attr, struct task_struct *task)
11982{
11983 unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS;
11984 bool is_capable = perfmon_capable();
11985
11986 if (attr->sigtrap) {
11987
11988
11989
11990
11991 rcu_read_lock();
11992 is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL);
11993 rcu_read_unlock();
11994
11995
11996
11997
11998
11999
12000 ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS;
12001 }
12002
12003
12004
12005
12006
12007
12008 return is_capable || ptrace_may_access(task, ptrace_mode);
12009}
12010
12011
12012
12013
12014
12015
12016
12017
12018
12019
12020SYSCALL_DEFINE5(perf_event_open,
12021 struct perf_event_attr __user *, attr_uptr,
12022 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
12023{
12024 struct perf_event *group_leader = NULL, *output_event = NULL;
12025 struct perf_event *event, *sibling;
12026 struct perf_event_attr attr;
12027 struct perf_event_context *ctx, *gctx;
12028 struct file *event_file = NULL;
12029 struct fd group = {NULL, 0};
12030 struct task_struct *task = NULL;
12031 struct pmu *pmu;
12032 int event_fd;
12033 int move_group = 0;
12034 int err;
12035 int f_flags = O_RDWR;
12036 int cgroup_fd = -1;
12037
12038
12039 if (flags & ~PERF_FLAG_ALL)
12040 return -EINVAL;
12041
12042
12043 err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
12044 if (err)
12045 return err;
12046
12047 err = perf_copy_attr(attr_uptr, &attr);
12048 if (err)
12049 return err;
12050
12051 if (!attr.exclude_kernel) {
12052 err = perf_allow_kernel(&attr);
12053 if (err)
12054 return err;
12055 }
12056
12057 if (attr.namespaces) {
12058 if (!perfmon_capable())
12059 return -EACCES;
12060 }
12061
12062 if (attr.freq) {
12063 if (attr.sample_freq > sysctl_perf_event_sample_rate)
12064 return -EINVAL;
12065 } else {
12066 if (attr.sample_period & (1ULL << 63))
12067 return -EINVAL;
12068 }
12069
12070
12071 if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) {
12072 err = perf_allow_kernel(&attr);
12073 if (err)
12074 return err;
12075 }
12076
12077
12078 if (attr.sample_type & PERF_SAMPLE_REGS_INTR) {
12079 err = security_locked_down(LOCKDOWN_PERF);
12080 if (err)
12081 return err;
12082 }
12083
12084
12085
12086
12087
12088
12089
12090 if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1))
12091 return -EINVAL;
12092
12093 if (flags & PERF_FLAG_FD_CLOEXEC)
12094 f_flags |= O_CLOEXEC;
12095
12096 event_fd = get_unused_fd_flags(f_flags);
12097 if (event_fd < 0)
12098 return event_fd;
12099
12100 if (group_fd != -1) {
12101 err = perf_fget_light(group_fd, &group);
12102 if (err)
12103 goto err_fd;
12104 group_leader = group.file->private_data;
12105 if (flags & PERF_FLAG_FD_OUTPUT)
12106 output_event = group_leader;
12107 if (flags & PERF_FLAG_FD_NO_GROUP)
12108 group_leader = NULL;
12109 }
12110
12111 if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) {
12112 task = find_lively_task_by_vpid(pid);
12113 if (IS_ERR(task)) {
12114 err = PTR_ERR(task);
12115 goto err_group_fd;
12116 }
12117 }
12118
12119 if (task && group_leader &&
12120 group_leader->attr.inherit != attr.inherit) {
12121 err = -EINVAL;
12122 goto err_task;
12123 }
12124
12125 if (flags & PERF_FLAG_PID_CGROUP)
12126 cgroup_fd = pid;
12127
12128 event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
12129 NULL, NULL, cgroup_fd);
12130 if (IS_ERR(event)) {
12131 err = PTR_ERR(event);
12132 goto err_task;
12133 }
12134
12135 if (is_sampling_event(event)) {
12136 if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
12137 err = -EOPNOTSUPP;
12138 goto err_alloc;
12139 }
12140 }
12141
12142
12143
12144
12145
12146 pmu = event->pmu;
12147
12148 if (attr.use_clockid) {
12149 err = perf_event_set_clock(event, attr.clockid);
12150 if (err)
12151 goto err_alloc;
12152 }
12153
12154 if (pmu->task_ctx_nr == perf_sw_context)
12155 event->event_caps |= PERF_EV_CAP_SOFTWARE;
12156
12157 if (group_leader) {
12158 if (is_software_event(event) &&
12159 !in_software_context(group_leader)) {
12160
12161
12162
12163
12164
12165
12166
12167
12168 pmu = group_leader->ctx->pmu;
12169 } else if (!is_software_event(event) &&
12170 is_software_event(group_leader) &&
12171 (group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
12172
12173
12174
12175
12176
12177 move_group = 1;
12178 }
12179 }
12180
12181
12182
12183
12184 ctx = find_get_context(pmu, task, event);
12185 if (IS_ERR(ctx)) {
12186 err = PTR_ERR(ctx);
12187 goto err_alloc;
12188 }
12189
12190
12191
12192
12193 if (group_leader) {
12194 err = -EINVAL;
12195
12196
12197
12198
12199
12200 if (group_leader->group_leader != group_leader)
12201 goto err_context;
12202
12203
12204 if (group_leader->clock != event->clock)
12205 goto err_context;
12206
12207
12208
12209
12210
12211
12212 if (group_leader->cpu != event->cpu)
12213 goto err_context;
12214
12215
12216
12217
12218
12219 if (group_leader->ctx->task != ctx->task)
12220 goto err_context;
12221
12222
12223
12224
12225
12226
12227 if (!move_group && group_leader->ctx != ctx)
12228 goto err_context;
12229
12230
12231
12232
12233 if (attr.exclusive || attr.pinned)
12234 goto err_context;
12235 }
12236
12237 if (output_event) {
12238 err = perf_event_set_output(event, output_event);
12239 if (err)
12240 goto err_context;
12241 }
12242
12243 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event,
12244 f_flags);
12245 if (IS_ERR(event_file)) {
12246 err = PTR_ERR(event_file);
12247 event_file = NULL;
12248 goto err_context;
12249 }
12250
12251 if (task) {
12252 err = down_read_interruptible(&task->signal->exec_update_lock);
12253 if (err)
12254 goto err_file;
12255
12256
12257
12258
12259
12260
12261
12262 err = -EACCES;
12263 if (!perf_check_permission(&attr, task))
12264 goto err_cred;
12265 }
12266
12267 if (move_group) {
12268 gctx = __perf_event_ctx_lock_double(group_leader, ctx);
12269
12270 if (gctx->task == TASK_TOMBSTONE) {
12271 err = -ESRCH;
12272 goto err_locked;
12273 }
12274
12275
12276
12277
12278
12279 if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
12280
12281
12282
12283
12284
12285 if (gctx != ctx) {
12286 err = -EINVAL;
12287 goto err_locked;
12288 } else {
12289 perf_event_ctx_unlock(group_leader, gctx);
12290 move_group = 0;
12291 }
12292 }
12293
12294
12295
12296
12297 err = -EBUSY;
12298 if (!exclusive_event_installable(group_leader, ctx))
12299 goto err_locked;
12300
12301 for_each_sibling_event(sibling, group_leader) {
12302 if (!exclusive_event_installable(sibling, ctx))
12303 goto err_locked;
12304 }
12305 } else {
12306 mutex_lock(&ctx->mutex);
12307 }
12308
12309 if (ctx->task == TASK_TOMBSTONE) {
12310 err = -ESRCH;
12311 goto err_locked;
12312 }
12313
12314 if (!perf_event_validate_size(event)) {
12315 err = -E2BIG;
12316 goto err_locked;
12317 }
12318
12319 if (!task) {
12320
12321
12322
12323
12324
12325
12326 struct perf_cpu_context *cpuctx =
12327 container_of(ctx, struct perf_cpu_context, ctx);
12328
12329 if (!cpuctx->online) {
12330 err = -ENODEV;
12331 goto err_locked;
12332 }
12333 }
12334
12335 if (perf_need_aux_event(event) && !perf_get_aux_event(event, group_leader)) {
12336 err = -EINVAL;
12337 goto err_locked;
12338 }
12339
12340
12341
12342
12343
12344 if (!exclusive_event_installable(event, ctx)) {
12345 err = -EBUSY;
12346 goto err_locked;
12347 }
12348
12349 WARN_ON_ONCE(ctx->parent_ctx);
12350
12351
12352
12353
12354
12355
12356 if (move_group) {
12357
12358
12359
12360
12361 perf_remove_from_context(group_leader, 0);
12362 put_ctx(gctx);
12363
12364 for_each_sibling_event(sibling, group_leader) {
12365 perf_remove_from_context(sibling, 0);
12366 put_ctx(gctx);
12367 }
12368
12369
12370
12371
12372
12373 synchronize_rcu();
12374
12375
12376
12377
12378
12379
12380
12381
12382
12383
12384
12385 for_each_sibling_event(sibling, group_leader) {
12386 perf_event__state_init(sibling);
12387 perf_install_in_context(ctx, sibling, sibling->cpu);
12388 get_ctx(ctx);
12389 }
12390
12391
12392
12393
12394
12395
12396 perf_event__state_init(group_leader);
12397 perf_install_in_context(ctx, group_leader, group_leader->cpu);
12398 get_ctx(ctx);
12399 }
12400
12401
12402
12403
12404
12405
12406
12407 perf_event__header_size(event);
12408 perf_event__id_header_size(event);
12409
12410 event->owner = current;
12411
12412 perf_install_in_context(ctx, event, event->cpu);
12413 perf_unpin_context(ctx);
12414
12415 if (move_group)
12416 perf_event_ctx_unlock(group_leader, gctx);
12417 mutex_unlock(&ctx->mutex);
12418
12419 if (task) {
12420 up_read(&task->signal->exec_update_lock);
12421 put_task_struct(task);
12422 }
12423
12424 mutex_lock(¤t->perf_event_mutex);
12425 list_add_tail(&event->owner_entry, ¤t->perf_event_list);
12426 mutex_unlock(¤t->perf_event_mutex);
12427
12428
12429
12430
12431
12432
12433
12434 fdput(group);
12435 fd_install(event_fd, event_file);
12436 return event_fd;
12437
12438err_locked:
12439 if (move_group)
12440 perf_event_ctx_unlock(group_leader, gctx);
12441 mutex_unlock(&ctx->mutex);
12442err_cred:
12443 if (task)
12444 up_read(&task->signal->exec_update_lock);
12445err_file:
12446 fput(event_file);
12447err_context:
12448 perf_unpin_context(ctx);
12449 put_ctx(ctx);
12450err_alloc:
12451
12452
12453
12454
12455 if (!event_file)
12456 free_event(event);
12457err_task:
12458 if (task)
12459 put_task_struct(task);
12460err_group_fd:
12461 fdput(group);
12462err_fd:
12463 put_unused_fd(event_fd);
12464 return err;
12465}
12466
12467
12468
12469
12470
12471
12472
12473
12474
12475
12476struct perf_event *
12477perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
12478 struct task_struct *task,
12479 perf_overflow_handler_t overflow_handler,
12480 void *context)
12481{
12482 struct perf_event_context *ctx;
12483 struct perf_event *event;
12484 int err;
12485
12486
12487
12488
12489
12490 if (attr->aux_output)
12491 return ERR_PTR(-EINVAL);
12492
12493 event = perf_event_alloc(attr, cpu, task, NULL, NULL,
12494 overflow_handler, context, -1);
12495 if (IS_ERR(event)) {
12496 err = PTR_ERR(event);
12497 goto err;
12498 }
12499
12500
12501 event->owner = TASK_TOMBSTONE;
12502
12503
12504
12505
12506 ctx = find_get_context(event->pmu, task, event);
12507 if (IS_ERR(ctx)) {
12508 err = PTR_ERR(ctx);
12509 goto err_free;
12510 }
12511
12512 WARN_ON_ONCE(ctx->parent_ctx);
12513 mutex_lock(&ctx->mutex);
12514 if (ctx->task == TASK_TOMBSTONE) {
12515 err = -ESRCH;
12516 goto err_unlock;
12517 }
12518
12519 if (!task) {
12520
12521
12522
12523
12524
12525
12526 struct perf_cpu_context *cpuctx =
12527 container_of(ctx, struct perf_cpu_context, ctx);
12528 if (!cpuctx->online) {
12529 err = -ENODEV;
12530 goto err_unlock;
12531 }
12532 }
12533
12534 if (!exclusive_event_installable(event, ctx)) {
12535 err = -EBUSY;
12536 goto err_unlock;
12537 }
12538
12539 perf_install_in_context(ctx, event, event->cpu);
12540 perf_unpin_context(ctx);
12541 mutex_unlock(&ctx->mutex);
12542
12543 return event;
12544
12545err_unlock:
12546 mutex_unlock(&ctx->mutex);
12547 perf_unpin_context(ctx);
12548 put_ctx(ctx);
12549err_free:
12550 free_event(event);
12551err:
12552 return ERR_PTR(err);
12553}
12554EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
12555
12556void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
12557{
12558 struct perf_event_context *src_ctx;
12559 struct perf_event_context *dst_ctx;
12560 struct perf_event *event, *tmp;
12561 LIST_HEAD(events);
12562
12563 src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
12564 dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
12565
12566
12567
12568
12569
12570 mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
12571 list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
12572 event_entry) {
12573 perf_remove_from_context(event, 0);
12574 unaccount_event_cpu(event, src_cpu);
12575 put_ctx(src_ctx);
12576 list_add(&event->migrate_entry, &events);
12577 }
12578
12579
12580
12581
12582 synchronize_rcu();
12583
12584
12585
12586
12587
12588
12589
12590
12591
12592 list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
12593 if (event->group_leader == event)
12594 continue;
12595
12596 list_del(&event->migrate_entry);
12597 if (event->state >= PERF_EVENT_STATE_OFF)
12598 event->state = PERF_EVENT_STATE_INACTIVE;
12599 account_event_cpu(event, dst_cpu);
12600 perf_install_in_context(dst_ctx, event, dst_cpu);
12601 get_ctx(dst_ctx);
12602 }
12603
12604
12605
12606
12607
12608 list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
12609 list_del(&event->migrate_entry);
12610 if (event->state >= PERF_EVENT_STATE_OFF)
12611 event->state = PERF_EVENT_STATE_INACTIVE;
12612 account_event_cpu(event, dst_cpu);
12613 perf_install_in_context(dst_ctx, event, dst_cpu);
12614 get_ctx(dst_ctx);
12615 }
12616 mutex_unlock(&dst_ctx->mutex);
12617 mutex_unlock(&src_ctx->mutex);
12618}
12619EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
12620
12621static void sync_child_event(struct perf_event *child_event)
12622{
12623 struct perf_event *parent_event = child_event->parent;
12624 u64 child_val;
12625
12626 if (child_event->attr.inherit_stat) {
12627 struct task_struct *task = child_event->ctx->task;
12628
12629 if (task && task != TASK_TOMBSTONE)
12630 perf_event_read_event(child_event, task);
12631 }
12632
12633 child_val = perf_event_count(child_event);
12634
12635
12636
12637
12638 atomic64_add(child_val, &parent_event->child_count);
12639 atomic64_add(child_event->total_time_enabled,
12640 &parent_event->child_total_time_enabled);
12641 atomic64_add(child_event->total_time_running,
12642 &parent_event->child_total_time_running);
12643}
12644
12645static void
12646perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
12647{
12648 struct perf_event *parent_event = event->parent;
12649 unsigned long detach_flags = 0;
12650
12651 if (parent_event) {
12652
12653
12654
12655
12656
12657
12658
12659
12660
12661
12662
12663
12664 detach_flags = DETACH_GROUP | DETACH_CHILD;
12665 mutex_lock(&parent_event->child_mutex);
12666 }
12667
12668 perf_remove_from_context(event, detach_flags);
12669
12670 raw_spin_lock_irq(&ctx->lock);
12671 if (event->state > PERF_EVENT_STATE_EXIT)
12672 perf_event_set_state(event, PERF_EVENT_STATE_EXIT);
12673 raw_spin_unlock_irq(&ctx->lock);
12674
12675
12676
12677
12678 if (parent_event) {
12679 mutex_unlock(&parent_event->child_mutex);
12680
12681
12682
12683 perf_event_wakeup(parent_event);
12684 free_event(event);
12685 put_event(parent_event);
12686 return;
12687 }
12688
12689
12690
12691
12692 perf_event_wakeup(event);
12693}
12694
12695static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
12696{
12697 struct perf_event_context *child_ctx, *clone_ctx = NULL;
12698 struct perf_event *child_event, *next;
12699
12700 WARN_ON_ONCE(child != current);
12701
12702 child_ctx = perf_pin_task_context(child, ctxn);
12703 if (!child_ctx)
12704 return;
12705
12706
12707
12708
12709
12710
12711
12712
12713
12714
12715
12716 mutex_lock(&child_ctx->mutex);
12717
12718
12719
12720
12721
12722
12723 raw_spin_lock_irq(&child_ctx->lock);
12724 task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx, EVENT_ALL);
12725
12726
12727
12728
12729
12730 RCU_INIT_POINTER(child->perf_event_ctxp[ctxn], NULL);
12731 put_ctx(child_ctx);
12732 WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
12733 put_task_struct(current);
12734
12735 clone_ctx = unclone_ctx(child_ctx);
12736 raw_spin_unlock_irq(&child_ctx->lock);
12737
12738 if (clone_ctx)
12739 put_ctx(clone_ctx);
12740
12741
12742
12743
12744
12745
12746 perf_event_task(child, child_ctx, 0);
12747
12748 list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
12749 perf_event_exit_event(child_event, child_ctx);
12750
12751 mutex_unlock(&child_ctx->mutex);
12752
12753 put_ctx(child_ctx);
12754}
12755
12756
12757
12758
12759
12760
12761
12762void perf_event_exit_task(struct task_struct *child)
12763{
12764 struct perf_event *event, *tmp;
12765 int ctxn;
12766
12767 mutex_lock(&child->perf_event_mutex);
12768 list_for_each_entry_safe(event, tmp, &child->perf_event_list,
12769 owner_entry) {
12770 list_del_init(&event->owner_entry);
12771
12772
12773
12774
12775
12776
12777 smp_store_release(&event->owner, NULL);
12778 }
12779 mutex_unlock(&child->perf_event_mutex);
12780
12781 for_each_task_context_nr(ctxn)
12782 perf_event_exit_task_context(child, ctxn);
12783
12784
12785
12786
12787
12788
12789
12790 perf_event_task(child, NULL, 0);
12791}
12792
12793static void perf_free_event(struct perf_event *event,
12794 struct perf_event_context *ctx)
12795{
12796 struct perf_event *parent = event->parent;
12797
12798 if (WARN_ON_ONCE(!parent))
12799 return;
12800
12801 mutex_lock(&parent->child_mutex);
12802 list_del_init(&event->child_list);
12803 mutex_unlock(&parent->child_mutex);
12804
12805 put_event(parent);
12806
12807 raw_spin_lock_irq(&ctx->lock);
12808 perf_group_detach(event);
12809 list_del_event(event, ctx);
12810 raw_spin_unlock_irq(&ctx->lock);
12811 free_event(event);
12812}
12813
12814
12815
12816
12817
12818
12819
12820
12821void perf_event_free_task(struct task_struct *task)
12822{
12823 struct perf_event_context *ctx;
12824 struct perf_event *event, *tmp;
12825 int ctxn;
12826
12827 for_each_task_context_nr(ctxn) {
12828 ctx = task->perf_event_ctxp[ctxn];
12829 if (!ctx)
12830 continue;
12831
12832 mutex_lock(&ctx->mutex);
12833 raw_spin_lock_irq(&ctx->lock);
12834
12835
12836
12837
12838
12839
12840 RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL);
12841 WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
12842 put_task_struct(task);
12843 raw_spin_unlock_irq(&ctx->lock);
12844
12845 list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)
12846 perf_free_event(event, ctx);
12847
12848 mutex_unlock(&ctx->mutex);
12849
12850
12851
12852
12853
12854
12855
12856
12857
12858
12859
12860
12861
12862
12863
12864 wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1);
12865 put_ctx(ctx);
12866 }
12867}
12868
12869void perf_event_delayed_put(struct task_struct *task)
12870{
12871 int ctxn;
12872
12873 for_each_task_context_nr(ctxn)
12874 WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
12875}
12876
12877struct file *perf_event_get(unsigned int fd)
12878{
12879 struct file *file = fget(fd);
12880 if (!file)
12881 return ERR_PTR(-EBADF);
12882
12883 if (file->f_op != &perf_fops) {
12884 fput(file);
12885 return ERR_PTR(-EBADF);
12886 }
12887
12888 return file;
12889}
12890
12891const struct perf_event *perf_get_event(struct file *file)
12892{
12893 if (file->f_op != &perf_fops)
12894 return ERR_PTR(-EINVAL);
12895
12896 return file->private_data;
12897}
12898
12899const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
12900{
12901 if (!event)
12902 return ERR_PTR(-EINVAL);
12903
12904 return &event->attr;
12905}
12906
12907
12908
12909
12910
12911
12912
12913
12914
12915static struct perf_event *
12916inherit_event(struct perf_event *parent_event,
12917 struct task_struct *parent,
12918 struct perf_event_context *parent_ctx,
12919 struct task_struct *child,
12920 struct perf_event *group_leader,
12921 struct perf_event_context *child_ctx)
12922{
12923 enum perf_event_state parent_state = parent_event->state;
12924 struct perf_event *child_event;
12925 unsigned long flags;
12926
12927
12928
12929
12930
12931
12932
12933 if (parent_event->parent)
12934 parent_event = parent_event->parent;
12935
12936 child_event = perf_event_alloc(&parent_event->attr,
12937 parent_event->cpu,
12938 child,
12939 group_leader, parent_event,
12940 NULL, NULL, -1);
12941 if (IS_ERR(child_event))
12942 return child_event;
12943
12944
12945 if ((child_event->attach_state & PERF_ATTACH_TASK_DATA) &&
12946 !child_ctx->task_ctx_data) {
12947 struct pmu *pmu = child_event->pmu;
12948
12949 child_ctx->task_ctx_data = alloc_task_ctx_data(pmu);
12950 if (!child_ctx->task_ctx_data) {
12951 free_event(child_event);
12952 return ERR_PTR(-ENOMEM);
12953 }
12954 }
12955
12956
12957
12958
12959
12960
12961
12962 mutex_lock(&parent_event->child_mutex);
12963 if (is_orphaned_event(parent_event) ||
12964 !atomic_long_inc_not_zero(&parent_event->refcount)) {
12965 mutex_unlock(&parent_event->child_mutex);
12966
12967 free_event(child_event);
12968 return NULL;
12969 }
12970
12971 get_ctx(child_ctx);
12972
12973
12974
12975
12976
12977
12978 if (parent_state >= PERF_EVENT_STATE_INACTIVE)
12979 child_event->state = PERF_EVENT_STATE_INACTIVE;
12980 else
12981 child_event->state = PERF_EVENT_STATE_OFF;
12982
12983 if (parent_event->attr.freq) {
12984 u64 sample_period = parent_event->hw.sample_period;
12985 struct hw_perf_event *hwc = &child_event->hw;
12986
12987 hwc->sample_period = sample_period;
12988 hwc->last_period = sample_period;
12989
12990 local64_set(&hwc->period_left, sample_period);
12991 }
12992
12993 child_event->ctx = child_ctx;
12994 child_event->overflow_handler = parent_event->overflow_handler;
12995 child_event->overflow_handler_context
12996 = parent_event->overflow_handler_context;
12997
12998
12999
13000
13001 perf_event__header_size(child_event);
13002 perf_event__id_header_size(child_event);
13003
13004
13005
13006
13007 raw_spin_lock_irqsave(&child_ctx->lock, flags);
13008 add_event_to_ctx(child_event, child_ctx);
13009 child_event->attach_state |= PERF_ATTACH_CHILD;
13010 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
13011
13012
13013
13014
13015 list_add_tail(&child_event->child_list, &parent_event->child_list);
13016 mutex_unlock(&parent_event->child_mutex);
13017
13018 return child_event;
13019}
13020
13021
13022
13023
13024
13025
13026
13027
13028
13029
13030
13031static int inherit_group(struct perf_event *parent_event,
13032 struct task_struct *parent,
13033 struct perf_event_context *parent_ctx,
13034 struct task_struct *child,
13035 struct perf_event_context *child_ctx)
13036{
13037 struct perf_event *leader;
13038 struct perf_event *sub;
13039 struct perf_event *child_ctr;
13040
13041 leader = inherit_event(parent_event, parent, parent_ctx,
13042 child, NULL, child_ctx);
13043 if (IS_ERR(leader))
13044 return PTR_ERR(leader);
13045
13046
13047
13048
13049
13050 for_each_sibling_event(sub, parent_event) {
13051 child_ctr = inherit_event(sub, parent, parent_ctx,
13052 child, leader, child_ctx);
13053 if (IS_ERR(child_ctr))
13054 return PTR_ERR(child_ctr);
13055
13056 if (sub->aux_event == parent_event && child_ctr &&
13057 !perf_get_aux_event(child_ctr, leader))
13058 return -EINVAL;
13059 }
13060 return 0;
13061}
13062
13063
13064
13065
13066
13067
13068
13069
13070
13071
13072
13073
13074static int
13075inherit_task_group(struct perf_event *event, struct task_struct *parent,
13076 struct perf_event_context *parent_ctx,
13077 struct task_struct *child, int ctxn,
13078 u64 clone_flags, int *inherited_all)
13079{
13080 int ret;
13081 struct perf_event_context *child_ctx;
13082
13083 if (!event->attr.inherit ||
13084 (event->attr.inherit_thread && !(clone_flags & CLONE_THREAD)) ||
13085
13086 (event->attr.sigtrap && (clone_flags & CLONE_CLEAR_SIGHAND))) {
13087 *inherited_all = 0;
13088 return 0;
13089 }
13090
13091 child_ctx = child->perf_event_ctxp[ctxn];
13092 if (!child_ctx) {
13093
13094
13095
13096
13097
13098
13099 child_ctx = alloc_perf_context(parent_ctx->pmu, child);
13100 if (!child_ctx)
13101 return -ENOMEM;
13102
13103 child->perf_event_ctxp[ctxn] = child_ctx;
13104 }
13105
13106 ret = inherit_group(event, parent, parent_ctx,
13107 child, child_ctx);
13108
13109 if (ret)
13110 *inherited_all = 0;
13111
13112 return ret;
13113}
13114
13115
13116
13117
13118static int perf_event_init_context(struct task_struct *child, int ctxn,
13119 u64 clone_flags)
13120{
13121 struct perf_event_context *child_ctx, *parent_ctx;
13122 struct perf_event_context *cloned_ctx;
13123 struct perf_event *event;
13124 struct task_struct *parent = current;
13125 int inherited_all = 1;
13126 unsigned long flags;
13127 int ret = 0;
13128
13129 if (likely(!parent->perf_event_ctxp[ctxn]))
13130 return 0;
13131
13132
13133
13134
13135
13136 parent_ctx = perf_pin_task_context(parent, ctxn);
13137 if (!parent_ctx)
13138 return 0;
13139
13140
13141
13142
13143
13144
13145
13146
13147
13148
13149
13150
13151 mutex_lock(&parent_ctx->mutex);
13152
13153
13154
13155
13156
13157 perf_event_groups_for_each(event, &parent_ctx->pinned_groups) {
13158 ret = inherit_task_group(event, parent, parent_ctx,
13159 child, ctxn, clone_flags,
13160 &inherited_all);
13161 if (ret)
13162 goto out_unlock;
13163 }
13164
13165
13166
13167
13168
13169
13170 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
13171 parent_ctx->rotate_disable = 1;
13172 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
13173
13174 perf_event_groups_for_each(event, &parent_ctx->flexible_groups) {
13175 ret = inherit_task_group(event, parent, parent_ctx,
13176 child, ctxn, clone_flags,
13177 &inherited_all);
13178 if (ret)
13179 goto out_unlock;
13180 }
13181
13182 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
13183 parent_ctx->rotate_disable = 0;
13184
13185 child_ctx = child->perf_event_ctxp[ctxn];
13186
13187 if (child_ctx && inherited_all) {
13188
13189
13190
13191
13192
13193
13194
13195 cloned_ctx = parent_ctx->parent_ctx;
13196 if (cloned_ctx) {
13197 child_ctx->parent_ctx = cloned_ctx;
13198 child_ctx->parent_gen = parent_ctx->parent_gen;
13199 } else {
13200 child_ctx->parent_ctx = parent_ctx;
13201 child_ctx->parent_gen = parent_ctx->generation;
13202 }
13203 get_ctx(child_ctx->parent_ctx);
13204 }
13205
13206 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
13207out_unlock:
13208 mutex_unlock(&parent_ctx->mutex);
13209
13210 perf_unpin_context(parent_ctx);
13211 put_ctx(parent_ctx);
13212
13213 return ret;
13214}
13215
13216
13217
13218
13219int perf_event_init_task(struct task_struct *child, u64 clone_flags)
13220{
13221 int ctxn, ret;
13222
13223 memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp));
13224 mutex_init(&child->perf_event_mutex);
13225 INIT_LIST_HEAD(&child->perf_event_list);
13226
13227 for_each_task_context_nr(ctxn) {
13228 ret = perf_event_init_context(child, ctxn, clone_flags);
13229 if (ret) {
13230 perf_event_free_task(child);
13231 return ret;
13232 }
13233 }
13234
13235 return 0;
13236}
13237
13238static void __init perf_event_init_all_cpus(void)
13239{
13240 struct swevent_htable *swhash;
13241 int cpu;
13242
13243 zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
13244
13245 for_each_possible_cpu(cpu) {
13246 swhash = &per_cpu(swevent_htable, cpu);
13247 mutex_init(&swhash->hlist_mutex);
13248 INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
13249
13250 INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
13251 raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
13252
13253#ifdef CONFIG_CGROUP_PERF
13254 INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
13255#endif
13256 INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
13257 }
13258}
13259
13260static void perf_swevent_init_cpu(unsigned int cpu)
13261{
13262 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
13263
13264 mutex_lock(&swhash->hlist_mutex);
13265 if (swhash->hlist_refcount > 0 && !swevent_hlist_deref(swhash)) {
13266 struct swevent_hlist *hlist;
13267
13268 hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
13269 WARN_ON(!hlist);
13270 rcu_assign_pointer(swhash->swevent_hlist, hlist);
13271 }
13272 mutex_unlock(&swhash->hlist_mutex);
13273}
13274
13275#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
13276static void __perf_event_exit_context(void *__info)
13277{
13278 struct perf_event_context *ctx = __info;
13279 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
13280 struct perf_event *event;
13281
13282 raw_spin_lock(&ctx->lock);
13283 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
13284 list_for_each_entry(event, &ctx->event_list, event_entry)
13285 __perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
13286 raw_spin_unlock(&ctx->lock);
13287}
13288
13289static void perf_event_exit_cpu_context(int cpu)
13290{
13291 struct perf_cpu_context *cpuctx;
13292 struct perf_event_context *ctx;
13293 struct pmu *pmu;
13294
13295 mutex_lock(&pmus_lock);
13296 list_for_each_entry(pmu, &pmus, entry) {
13297 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
13298 ctx = &cpuctx->ctx;
13299
13300 mutex_lock(&ctx->mutex);
13301 smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
13302 cpuctx->online = 0;
13303 mutex_unlock(&ctx->mutex);
13304 }
13305 cpumask_clear_cpu(cpu, perf_online_mask);
13306 mutex_unlock(&pmus_lock);
13307}
13308#else
13309
13310static void perf_event_exit_cpu_context(int cpu) { }
13311
13312#endif
13313
13314int perf_event_init_cpu(unsigned int cpu)
13315{
13316 struct perf_cpu_context *cpuctx;
13317 struct perf_event_context *ctx;
13318 struct pmu *pmu;
13319
13320 perf_swevent_init_cpu(cpu);
13321
13322 mutex_lock(&pmus_lock);
13323 cpumask_set_cpu(cpu, perf_online_mask);
13324 list_for_each_entry(pmu, &pmus, entry) {
13325 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
13326 ctx = &cpuctx->ctx;
13327
13328 mutex_lock(&ctx->mutex);
13329 cpuctx->online = 1;
13330 mutex_unlock(&ctx->mutex);
13331 }
13332 mutex_unlock(&pmus_lock);
13333
13334 return 0;
13335}
13336
13337int perf_event_exit_cpu(unsigned int cpu)
13338{
13339 perf_event_exit_cpu_context(cpu);
13340 return 0;
13341}
13342
13343static int
13344perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
13345{
13346 int cpu;
13347
13348 for_each_online_cpu(cpu)
13349 perf_event_exit_cpu(cpu);
13350
13351 return NOTIFY_OK;
13352}
13353
13354
13355
13356
13357
13358static struct notifier_block perf_reboot_notifier = {
13359 .notifier_call = perf_reboot,
13360 .priority = INT_MIN,
13361};
13362
13363void __init perf_event_init(void)
13364{
13365 int ret;
13366
13367 idr_init(&pmu_idr);
13368
13369 perf_event_init_all_cpus();
13370 init_srcu_struct(&pmus_srcu);
13371 perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
13372 perf_pmu_register(&perf_cpu_clock, NULL, -1);
13373 perf_pmu_register(&perf_task_clock, NULL, -1);
13374 perf_tp_register();
13375 perf_event_init_cpu(smp_processor_id());
13376 register_reboot_notifier(&perf_reboot_notifier);
13377
13378 ret = init_hw_breakpoint();
13379 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
13380
13381 perf_event_cache = KMEM_CACHE(perf_event, SLAB_PANIC);
13382
13383
13384
13385
13386
13387 BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head))
13388 != 1024);
13389}
13390
13391ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
13392 char *page)
13393{
13394 struct perf_pmu_events_attr *pmu_attr =
13395 container_of(attr, struct perf_pmu_events_attr, attr);
13396
13397 if (pmu_attr->event_str)
13398 return sprintf(page, "%s\n", pmu_attr->event_str);
13399
13400 return 0;
13401}
13402EXPORT_SYMBOL_GPL(perf_event_sysfs_show);
13403
13404static int __init perf_event_sysfs_init(void)
13405{
13406 struct pmu *pmu;
13407 int ret;
13408
13409 mutex_lock(&pmus_lock);
13410
13411 ret = bus_register(&pmu_bus);
13412 if (ret)
13413 goto unlock;
13414
13415 list_for_each_entry(pmu, &pmus, entry) {
13416 if (!pmu->name || pmu->type < 0)
13417 continue;
13418
13419 ret = pmu_dev_alloc(pmu);
13420 WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
13421 }
13422 pmu_bus_running = 1;
13423 ret = 0;
13424
13425unlock:
13426 mutex_unlock(&pmus_lock);
13427
13428 return ret;
13429}
13430device_initcall(perf_event_sysfs_init);
13431
13432#ifdef CONFIG_CGROUP_PERF
13433static struct cgroup_subsys_state *
13434perf_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
13435{
13436 struct perf_cgroup *jc;
13437
13438 jc = kzalloc(sizeof(*jc), GFP_KERNEL);
13439 if (!jc)
13440 return ERR_PTR(-ENOMEM);
13441
13442 jc->info = alloc_percpu(struct perf_cgroup_info);
13443 if (!jc->info) {
13444 kfree(jc);
13445 return ERR_PTR(-ENOMEM);
13446 }
13447
13448 return &jc->css;
13449}
13450
13451static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
13452{
13453 struct perf_cgroup *jc = container_of(css, struct perf_cgroup, css);
13454
13455 free_percpu(jc->info);
13456 kfree(jc);
13457}
13458
13459static int perf_cgroup_css_online(struct cgroup_subsys_state *css)
13460{
13461 perf_event_cgroup(css->cgroup);
13462 return 0;
13463}
13464
13465static int __perf_cgroup_move(void *info)
13466{
13467 struct task_struct *task = info;
13468 rcu_read_lock();
13469 perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
13470 rcu_read_unlock();
13471 return 0;
13472}
13473
13474static void perf_cgroup_attach(struct cgroup_taskset *tset)
13475{
13476 struct task_struct *task;
13477 struct cgroup_subsys_state *css;
13478
13479 cgroup_taskset_for_each(task, css, tset)
13480 task_function_call(task, __perf_cgroup_move, task);
13481}
13482
13483struct cgroup_subsys perf_event_cgrp_subsys = {
13484 .css_alloc = perf_cgroup_css_alloc,
13485 .css_free = perf_cgroup_css_free,
13486 .css_online = perf_cgroup_css_online,
13487 .attach = perf_cgroup_attach,
13488
13489
13490
13491
13492
13493 .implicit_on_dfl = true,
13494 .threaded = true,
13495};
13496#endif
13497
13498DEFINE_STATIC_CALL_RET0(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
13499