1
2
3
4
5
6
7
8
9
10
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/cpu.h>
14#include <linux/smp.h>
15#include <linux/idr.h>
16#include <linux/file.h>
17#include <linux/poll.h>
18#include <linux/slab.h>
19#include <linux/hash.h>
20#include <linux/tick.h>
21#include <linux/sysfs.h>
22#include <linux/dcache.h>
23#include <linux/percpu.h>
24#include <linux/ptrace.h>
25#include <linux/reboot.h>
26#include <linux/vmstat.h>
27#include <linux/device.h>
28#include <linux/export.h>
29#include <linux/vmalloc.h>
30#include <linux/hardirq.h>
31#include <linux/rculist.h>
32#include <linux/uaccess.h>
33#include <linux/syscalls.h>
34#include <linux/anon_inodes.h>
35#include <linux/kernel_stat.h>
36#include <linux/cgroup.h>
37#include <linux/perf_event.h>
38#include <linux/trace_events.h>
39#include <linux/hw_breakpoint.h>
40#include <linux/mm_types.h>
41#include <linux/module.h>
42#include <linux/mman.h>
43#include <linux/compat.h>
44#include <linux/bpf.h>
45#include <linux/filter.h>
46#include <linux/namei.h>
47#include <linux/parser.h>
48#include <linux/sched/clock.h>
49#include <linux/sched/mm.h>
50#include <linux/proc_ns.h>
51#include <linux/mount.h>
52#include <linux/min_heap.h>
53#include <linux/buildid.h>
54#include <linux/highmem.h>
55
56#include "internal.h"
57
58#include <asm/irq_regs.h>
59
60#include <linux/rh_features.h>
61
62typedef int (*remote_function_f)(void *);
63
64struct remote_function_call {
65 struct task_struct *p;
66 remote_function_f func;
67 void *info;
68 int ret;
69};
70
71static void remote_function(void *data)
72{
73 struct remote_function_call *tfc = data;
74 struct task_struct *p = tfc->p;
75
76 if (p) {
77
78 if (task_cpu(p) != smp_processor_id())
79 return;
80
81
82
83
84
85
86 tfc->ret = -ESRCH;
87 if (p != current)
88 return;
89 }
90
91 tfc->ret = tfc->func(tfc->info);
92}
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107static int
108task_function_call(struct task_struct *p, remote_function_f func, void *info)
109{
110 struct remote_function_call data = {
111 .p = p,
112 .func = func,
113 .info = info,
114 .ret = -EAGAIN,
115 };
116 int ret;
117
118 for (;;) {
119 ret = smp_call_function_single(task_cpu(p), remote_function,
120 &data, 1);
121 if (!ret)
122 ret = data.ret;
123
124 if (ret != -EAGAIN)
125 break;
126
127 cond_resched();
128 }
129
130 return ret;
131}
132
133
134
135
136
137
138
139
140
141
142static int cpu_function_call(int cpu, remote_function_f func, void *info)
143{
144 struct remote_function_call data = {
145 .p = NULL,
146 .func = func,
147 .info = info,
148 .ret = -ENXIO,
149 };
150
151 smp_call_function_single(cpu, remote_function, &data, 1);
152
153 return data.ret;
154}
155
156static inline struct perf_cpu_context *
157__get_cpu_context(struct perf_event_context *ctx)
158{
159 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
160}
161
162static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
163 struct perf_event_context *ctx)
164{
165 raw_spin_lock(&cpuctx->ctx.lock);
166 if (ctx)
167 raw_spin_lock(&ctx->lock);
168}
169
170static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
171 struct perf_event_context *ctx)
172{
173 if (ctx)
174 raw_spin_unlock(&ctx->lock);
175 raw_spin_unlock(&cpuctx->ctx.lock);
176}
177
178#define TASK_TOMBSTONE ((void *)-1L)
179
180static bool is_kernel_event(struct perf_event *event)
181{
182 return READ_ONCE(event->owner) == TASK_TOMBSTONE;
183}
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
205 struct perf_event_context *, void *);
206
207struct event_function_struct {
208 struct perf_event *event;
209 event_f func;
210 void *data;
211};
212
213static int event_function(void *info)
214{
215 struct event_function_struct *efs = info;
216 struct perf_event *event = efs->event;
217 struct perf_event_context *ctx = event->ctx;
218 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
219 struct perf_event_context *task_ctx = cpuctx->task_ctx;
220 int ret = 0;
221
222 lockdep_assert_irqs_disabled();
223
224 perf_ctx_lock(cpuctx, task_ctx);
225
226
227
228
229 if (ctx->task) {
230 if (ctx->task != current) {
231 ret = -ESRCH;
232 goto unlock;
233 }
234
235
236
237
238
239
240
241
242 WARN_ON_ONCE(!ctx->is_active);
243
244
245
246
247 WARN_ON_ONCE(task_ctx != ctx);
248 } else {
249 WARN_ON_ONCE(&cpuctx->ctx != ctx);
250 }
251
252 efs->func(event, cpuctx, ctx, efs->data);
253unlock:
254 perf_ctx_unlock(cpuctx, task_ctx);
255
256 return ret;
257}
258
259static void event_function_call(struct perf_event *event, event_f func, void *data)
260{
261 struct perf_event_context *ctx = event->ctx;
262 struct task_struct *task = READ_ONCE(ctx->task);
263 struct event_function_struct efs = {
264 .event = event,
265 .func = func,
266 .data = data,
267 };
268
269 if (!event->parent) {
270
271
272
273
274
275 lockdep_assert_held(&ctx->mutex);
276 }
277
278 if (!task) {
279 cpu_function_call(event->cpu, event_function, &efs);
280 return;
281 }
282
283 if (task == TASK_TOMBSTONE)
284 return;
285
286again:
287 if (!task_function_call(task, event_function, &efs))
288 return;
289
290 raw_spin_lock_irq(&ctx->lock);
291
292
293
294
295 task = ctx->task;
296 if (task == TASK_TOMBSTONE) {
297 raw_spin_unlock_irq(&ctx->lock);
298 return;
299 }
300 if (ctx->is_active) {
301 raw_spin_unlock_irq(&ctx->lock);
302 goto again;
303 }
304 func(event, NULL, ctx, data);
305 raw_spin_unlock_irq(&ctx->lock);
306}
307
308
309
310
311
312static void event_function_local(struct perf_event *event, event_f func, void *data)
313{
314 struct perf_event_context *ctx = event->ctx;
315 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
316 struct task_struct *task = READ_ONCE(ctx->task);
317 struct perf_event_context *task_ctx = NULL;
318
319 lockdep_assert_irqs_disabled();
320
321 if (task) {
322 if (task == TASK_TOMBSTONE)
323 return;
324
325 task_ctx = ctx;
326 }
327
328 perf_ctx_lock(cpuctx, task_ctx);
329
330 task = ctx->task;
331 if (task == TASK_TOMBSTONE)
332 goto unlock;
333
334 if (task) {
335
336
337
338
339
340 if (ctx->is_active) {
341 if (WARN_ON_ONCE(task != current))
342 goto unlock;
343
344 if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
345 goto unlock;
346 }
347 } else {
348 WARN_ON_ONCE(&cpuctx->ctx != ctx);
349 }
350
351 func(event, cpuctx, ctx, data);
352unlock:
353 perf_ctx_unlock(cpuctx, task_ctx);
354}
355
356#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
357 PERF_FLAG_FD_OUTPUT |\
358 PERF_FLAG_PID_CGROUP |\
359 PERF_FLAG_FD_CLOEXEC)
360
361
362
363
364#define PERF_SAMPLE_BRANCH_PERM_PLM \
365 (PERF_SAMPLE_BRANCH_KERNEL |\
366 PERF_SAMPLE_BRANCH_HV)
367
368enum event_type_t {
369 EVENT_FLEXIBLE = 0x1,
370 EVENT_PINNED = 0x2,
371 EVENT_TIME = 0x4,
372
373 EVENT_CPU = 0x8,
374 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
375};
376
377
378
379
380
381
382static void perf_sched_delayed(struct work_struct *work);
383DEFINE_STATIC_KEY_FALSE(perf_sched_events);
384static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
385static DEFINE_MUTEX(perf_sched_mutex);
386static atomic_t perf_sched_count;
387
388static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
389static DEFINE_PER_CPU(int, perf_sched_cb_usages);
390static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
391
392static atomic_t nr_mmap_events __read_mostly;
393static atomic_t nr_comm_events __read_mostly;
394static atomic_t nr_namespaces_events __read_mostly;
395static atomic_t nr_task_events __read_mostly;
396static atomic_t nr_freq_events __read_mostly;
397static atomic_t nr_switch_events __read_mostly;
398static atomic_t nr_ksymbol_events __read_mostly;
399static atomic_t nr_bpf_events __read_mostly;
400static atomic_t nr_cgroup_events __read_mostly;
401static atomic_t nr_text_poke_events __read_mostly;
402static atomic_t nr_build_id_events __read_mostly;
403
404static LIST_HEAD(pmus);
405static DEFINE_MUTEX(pmus_lock);
406static struct srcu_struct pmus_srcu;
407static cpumask_var_t perf_online_mask;
408
409
410
411
412
413
414
415
416int sysctl_perf_event_paranoid __read_mostly = 2;
417
418
419int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024);
420
421
422
423
424#define DEFAULT_MAX_SAMPLE_RATE 100000
425#define DEFAULT_SAMPLE_PERIOD_NS (NSEC_PER_SEC / DEFAULT_MAX_SAMPLE_RATE)
426#define DEFAULT_CPU_TIME_MAX_PERCENT 25
427
428int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
429
430static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
431static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS;
432
433static int perf_sample_allowed_ns __read_mostly =
434 DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100;
435
436static void update_perf_cpu_limits(void)
437{
438 u64 tmp = perf_sample_period_ns;
439
440 tmp *= sysctl_perf_cpu_time_max_percent;
441 tmp = div_u64(tmp, 100);
442 if (!tmp)
443 tmp = 1;
444
445 WRITE_ONCE(perf_sample_allowed_ns, tmp);
446}
447
448static bool perf_rotate_context(struct perf_cpu_context *cpuctx);
449
450int perf_proc_update_handler(struct ctl_table *table, int write,
451 void __user *buffer, size_t *lenp,
452 loff_t *ppos)
453{
454 int ret;
455 int perf_cpu = sysctl_perf_cpu_time_max_percent;
456
457
458
459 if (write && (perf_cpu == 100 || perf_cpu == 0))
460 return -EINVAL;
461
462 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
463 if (ret || !write)
464 return ret;
465
466 max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
467 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
468 update_perf_cpu_limits();
469
470 return 0;
471}
472
473int sysctl_perf_cpu_time_max_percent __read_mostly = DEFAULT_CPU_TIME_MAX_PERCENT;
474
475int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
476 void __user *buffer, size_t *lenp,
477 loff_t *ppos)
478{
479 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
480
481 if (ret || !write)
482 return ret;
483
484 if (sysctl_perf_cpu_time_max_percent == 100 ||
485 sysctl_perf_cpu_time_max_percent == 0) {
486 printk(KERN_WARNING
487 "perf: Dynamic interrupt throttling disabled, can hang your system!\n");
488 WRITE_ONCE(perf_sample_allowed_ns, 0);
489 } else {
490 update_perf_cpu_limits();
491 }
492
493 return 0;
494}
495
496
497
498
499
500
501
502#define NR_ACCUMULATED_SAMPLES 128
503static DEFINE_PER_CPU(u64, running_sample_length);
504
505static u64 __report_avg;
506static u64 __report_allowed;
507
508static void perf_duration_warn(struct irq_work *w)
509{
510 printk_ratelimited(KERN_INFO
511 "perf: interrupt took too long (%lld > %lld), lowering "
512 "kernel.perf_event_max_sample_rate to %d\n",
513 __report_avg, __report_allowed,
514 sysctl_perf_event_sample_rate);
515}
516
517static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn);
518
519void perf_sample_event_took(u64 sample_len_ns)
520{
521 u64 max_len = READ_ONCE(perf_sample_allowed_ns);
522 u64 running_len;
523 u64 avg_len;
524 u32 max;
525
526 if (max_len == 0)
527 return;
528
529
530 running_len = __this_cpu_read(running_sample_length);
531 running_len -= running_len/NR_ACCUMULATED_SAMPLES;
532 running_len += sample_len_ns;
533 __this_cpu_write(running_sample_length, running_len);
534
535
536
537
538
539
540 avg_len = running_len/NR_ACCUMULATED_SAMPLES;
541 if (avg_len <= max_len)
542 return;
543
544 __report_avg = avg_len;
545 __report_allowed = max_len;
546
547
548
549
550 avg_len += avg_len / 4;
551 max = (TICK_NSEC / 100) * sysctl_perf_cpu_time_max_percent;
552 if (avg_len < max)
553 max /= (u32)avg_len;
554 else
555 max = 1;
556
557 WRITE_ONCE(perf_sample_allowed_ns, avg_len);
558 WRITE_ONCE(max_samples_per_tick, max);
559
560 sysctl_perf_event_sample_rate = max * HZ;
561 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
562
563 if (!irq_work_queue(&perf_duration_work)) {
564 early_printk("perf: interrupt took too long (%lld > %lld), lowering "
565 "kernel.perf_event_max_sample_rate to %d\n",
566 __report_avg, __report_allowed,
567 sysctl_perf_event_sample_rate);
568 }
569}
570
571static atomic64_t perf_event_id;
572
573static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
574 enum event_type_t event_type);
575
576static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
577 enum event_type_t event_type,
578 struct task_struct *task);
579
580static void update_context_time(struct perf_event_context *ctx);
581static u64 perf_event_time(struct perf_event *event);
582
583void __weak perf_event_print_debug(void) { }
584
585extern __weak const char *perf_pmu_name(void)
586{
587 return "pmu";
588}
589
590static inline u64 perf_clock(void)
591{
592 return local_clock();
593}
594
595static inline u64 perf_event_clock(struct perf_event *event)
596{
597 return event->clock();
598}
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622static __always_inline enum perf_event_state
623__perf_effective_state(struct perf_event *event)
624{
625 struct perf_event *leader = event->group_leader;
626
627 if (leader->state <= PERF_EVENT_STATE_OFF)
628 return leader->state;
629
630 return event->state;
631}
632
633static __always_inline void
634__perf_update_times(struct perf_event *event, u64 now, u64 *enabled, u64 *running)
635{
636 enum perf_event_state state = __perf_effective_state(event);
637 u64 delta = now - event->tstamp;
638
639 *enabled = event->total_time_enabled;
640 if (state >= PERF_EVENT_STATE_INACTIVE)
641 *enabled += delta;
642
643 *running = event->total_time_running;
644 if (state >= PERF_EVENT_STATE_ACTIVE)
645 *running += delta;
646}
647
648static void perf_event_update_time(struct perf_event *event)
649{
650 u64 now = perf_event_time(event);
651
652 __perf_update_times(event, now, &event->total_time_enabled,
653 &event->total_time_running);
654 event->tstamp = now;
655}
656
657static void perf_event_update_sibling_time(struct perf_event *leader)
658{
659 struct perf_event *sibling;
660
661 for_each_sibling_event(sibling, leader)
662 perf_event_update_time(sibling);
663}
664
665static void
666perf_event_set_state(struct perf_event *event, enum perf_event_state state)
667{
668 if (event->state == state)
669 return;
670
671 perf_event_update_time(event);
672
673
674
675
676 if ((event->state < 0) ^ (state < 0))
677 perf_event_update_sibling_time(event);
678
679 WRITE_ONCE(event->state, state);
680}
681
682#ifdef CONFIG_CGROUP_PERF
683
684static inline bool
685perf_cgroup_match(struct perf_event *event)
686{
687 struct perf_event_context *ctx = event->ctx;
688 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
689
690
691 if (!event->cgrp)
692 return true;
693
694
695 if (!cpuctx->cgrp)
696 return false;
697
698
699
700
701
702
703
704 return cgroup_is_descendant(cpuctx->cgrp->css.cgroup,
705 event->cgrp->css.cgroup);
706}
707
708static inline void perf_detach_cgroup(struct perf_event *event)
709{
710 css_put(&event->cgrp->css);
711 event->cgrp = NULL;
712}
713
714static inline int is_cgroup_event(struct perf_event *event)
715{
716 return event->cgrp != NULL;
717}
718
719static inline u64 perf_cgroup_event_time(struct perf_event *event)
720{
721 struct perf_cgroup_info *t;
722
723 t = per_cpu_ptr(event->cgrp->info, event->cpu);
724 return t->time;
725}
726
727static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
728{
729 struct perf_cgroup_info *info;
730 u64 now;
731
732 now = perf_clock();
733
734 info = this_cpu_ptr(cgrp->info);
735
736 info->time += now - info->timestamp;
737 info->timestamp = now;
738}
739
740static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
741{
742 struct perf_cgroup *cgrp = cpuctx->cgrp;
743 struct cgroup_subsys_state *css;
744
745 if (cgrp) {
746 for (css = &cgrp->css; css; css = css->parent) {
747 cgrp = container_of(css, struct perf_cgroup, css);
748 __update_cgrp_time(cgrp);
749 }
750 }
751}
752
753static inline void update_cgrp_time_from_event(struct perf_event *event)
754{
755 struct perf_cgroup *cgrp;
756
757
758
759
760
761 if (!is_cgroup_event(event))
762 return;
763
764 cgrp = perf_cgroup_from_task(current, event->ctx);
765
766
767
768 if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
769 __update_cgrp_time(event->cgrp);
770}
771
772static inline void
773perf_cgroup_set_timestamp(struct task_struct *task,
774 struct perf_event_context *ctx)
775{
776 struct perf_cgroup *cgrp;
777 struct perf_cgroup_info *info;
778 struct cgroup_subsys_state *css;
779
780
781
782
783
784
785 if (!task || !ctx->nr_cgroups)
786 return;
787
788 cgrp = perf_cgroup_from_task(task, ctx);
789
790 for (css = &cgrp->css; css; css = css->parent) {
791 cgrp = container_of(css, struct perf_cgroup, css);
792 info = this_cpu_ptr(cgrp->info);
793 info->timestamp = ctx->timestamp;
794 }
795}
796
797static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);
798
799#define PERF_CGROUP_SWOUT 0x1
800#define PERF_CGROUP_SWIN 0x2
801
802
803
804
805
806
807
808static void perf_cgroup_switch(struct task_struct *task, int mode)
809{
810 struct perf_cpu_context *cpuctx;
811 struct list_head *list;
812 unsigned long flags;
813
814
815
816
817
818 local_irq_save(flags);
819
820 list = this_cpu_ptr(&cgrp_cpuctx_list);
821 list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) {
822 WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
823
824 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
825 perf_pmu_disable(cpuctx->ctx.pmu);
826
827 if (mode & PERF_CGROUP_SWOUT) {
828 cpu_ctx_sched_out(cpuctx, EVENT_ALL);
829
830
831
832
833 cpuctx->cgrp = NULL;
834 }
835
836 if (mode & PERF_CGROUP_SWIN) {
837 WARN_ON_ONCE(cpuctx->cgrp);
838
839
840
841
842
843
844
845 cpuctx->cgrp = perf_cgroup_from_task(task,
846 &cpuctx->ctx);
847 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
848 }
849 perf_pmu_enable(cpuctx->ctx.pmu);
850 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
851 }
852
853 local_irq_restore(flags);
854}
855
856static inline void perf_cgroup_sched_out(struct task_struct *task,
857 struct task_struct *next)
858{
859 struct perf_cgroup *cgrp1;
860 struct perf_cgroup *cgrp2 = NULL;
861
862 rcu_read_lock();
863
864
865
866
867
868 cgrp1 = perf_cgroup_from_task(task, NULL);
869 cgrp2 = perf_cgroup_from_task(next, NULL);
870
871
872
873
874
875
876 if (cgrp1 != cgrp2)
877 perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
878
879 rcu_read_unlock();
880}
881
882static inline void perf_cgroup_sched_in(struct task_struct *prev,
883 struct task_struct *task)
884{
885 struct perf_cgroup *cgrp1;
886 struct perf_cgroup *cgrp2 = NULL;
887
888 rcu_read_lock();
889
890
891
892
893
894 cgrp1 = perf_cgroup_from_task(task, NULL);
895 cgrp2 = perf_cgroup_from_task(prev, NULL);
896
897
898
899
900
901
902 if (cgrp1 != cgrp2)
903 perf_cgroup_switch(task, PERF_CGROUP_SWIN);
904
905 rcu_read_unlock();
906}
907
908static int perf_cgroup_ensure_storage(struct perf_event *event,
909 struct cgroup_subsys_state *css)
910{
911 struct perf_cpu_context *cpuctx;
912 struct perf_event **storage;
913 int cpu, heap_size, ret = 0;
914
915
916
917
918
919 for (heap_size = 1; css; css = css->parent)
920 heap_size++;
921
922 for_each_possible_cpu(cpu) {
923 cpuctx = per_cpu_ptr(event->pmu->pmu_cpu_context, cpu);
924 if (heap_size <= cpuctx->heap_size)
925 continue;
926
927 storage = kmalloc_node(heap_size * sizeof(struct perf_event *),
928 GFP_KERNEL, cpu_to_node(cpu));
929 if (!storage) {
930 ret = -ENOMEM;
931 break;
932 }
933
934 raw_spin_lock_irq(&cpuctx->ctx.lock);
935 if (cpuctx->heap_size < heap_size) {
936 swap(cpuctx->heap, storage);
937 if (storage == cpuctx->heap_default)
938 storage = NULL;
939 cpuctx->heap_size = heap_size;
940 }
941 raw_spin_unlock_irq(&cpuctx->ctx.lock);
942
943 kfree(storage);
944 }
945
946 return ret;
947}
948
949static inline int perf_cgroup_connect(int fd, struct perf_event *event,
950 struct perf_event_attr *attr,
951 struct perf_event *group_leader)
952{
953 struct perf_cgroup *cgrp;
954 struct cgroup_subsys_state *css;
955 struct fd f = fdget(fd);
956 int ret = 0;
957
958 if (!f.file)
959 return -EBADF;
960
961 css = css_tryget_online_from_dir(f.file->f_path.dentry,
962 &perf_event_cgrp_subsys);
963 if (IS_ERR(css)) {
964 ret = PTR_ERR(css);
965 goto out;
966 }
967
968 ret = perf_cgroup_ensure_storage(event, css);
969 if (ret)
970 goto out;
971
972 cgrp = container_of(css, struct perf_cgroup, css);
973 event->cgrp = cgrp;
974
975
976
977
978
979
980 if (group_leader && group_leader->cgrp != cgrp) {
981 perf_detach_cgroup(event);
982 ret = -EINVAL;
983 }
984out:
985 fdput(f);
986 return ret;
987}
988
989static inline void
990perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
991{
992 struct perf_cgroup_info *t;
993 t = per_cpu_ptr(event->cgrp->info, event->cpu);
994 event->shadow_ctx_time = now - t->timestamp;
995}
996
997static inline void
998perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
999{
1000 struct perf_cpu_context *cpuctx;
1001
1002 if (!is_cgroup_event(event))
1003 return;
1004
1005
1006
1007
1008
1009 cpuctx = container_of(ctx, struct perf_cpu_context, ctx);
1010
1011
1012
1013
1014
1015
1016
1017 if (ctx->is_active && !cpuctx->cgrp) {
1018 struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
1019
1020 if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
1021 cpuctx->cgrp = cgrp;
1022 }
1023
1024 if (ctx->nr_cgroups++)
1025 return;
1026
1027 list_add(&cpuctx->cgrp_cpuctx_entry,
1028 per_cpu_ptr(&cgrp_cpuctx_list, event->cpu));
1029}
1030
1031static inline void
1032perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
1033{
1034 struct perf_cpu_context *cpuctx;
1035
1036 if (!is_cgroup_event(event))
1037 return;
1038
1039
1040
1041
1042
1043 cpuctx = container_of(ctx, struct perf_cpu_context, ctx);
1044
1045 if (--ctx->nr_cgroups)
1046 return;
1047
1048 if (ctx->is_active && cpuctx->cgrp)
1049 cpuctx->cgrp = NULL;
1050
1051 list_del(&cpuctx->cgrp_cpuctx_entry);
1052}
1053
1054#else
1055
1056static inline bool
1057perf_cgroup_match(struct perf_event *event)
1058{
1059 return true;
1060}
1061
1062static inline void perf_detach_cgroup(struct perf_event *event)
1063{}
1064
1065static inline int is_cgroup_event(struct perf_event *event)
1066{
1067 return 0;
1068}
1069
1070static inline void update_cgrp_time_from_event(struct perf_event *event)
1071{
1072}
1073
1074static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
1075{
1076}
1077
1078static inline void perf_cgroup_sched_out(struct task_struct *task,
1079 struct task_struct *next)
1080{
1081}
1082
1083static inline void perf_cgroup_sched_in(struct task_struct *prev,
1084 struct task_struct *task)
1085{
1086}
1087
1088static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
1089 struct perf_event_attr *attr,
1090 struct perf_event *group_leader)
1091{
1092 return -EINVAL;
1093}
1094
1095static inline void
1096perf_cgroup_set_timestamp(struct task_struct *task,
1097 struct perf_event_context *ctx)
1098{
1099}
1100
1101static inline void
1102perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
1103{
1104}
1105
1106static inline void
1107perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
1108{
1109}
1110
1111static inline u64 perf_cgroup_event_time(struct perf_event *event)
1112{
1113 return 0;
1114}
1115
1116static inline void
1117perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
1118{
1119}
1120
1121static inline void
1122perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
1123{
1124}
1125#endif
1126
1127
1128
1129
1130
1131#define PERF_CPU_HRTIMER (1000 / HZ)
1132
1133
1134
1135static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
1136{
1137 struct perf_cpu_context *cpuctx;
1138 bool rotations;
1139
1140 lockdep_assert_irqs_disabled();
1141
1142 cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
1143 rotations = perf_rotate_context(cpuctx);
1144
1145 raw_spin_lock(&cpuctx->hrtimer_lock);
1146 if (rotations)
1147 hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
1148 else
1149 cpuctx->hrtimer_active = 0;
1150 raw_spin_unlock(&cpuctx->hrtimer_lock);
1151
1152 return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART;
1153}
1154
1155static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
1156{
1157 struct hrtimer *timer = &cpuctx->hrtimer;
1158 struct pmu *pmu = cpuctx->ctx.pmu;
1159 u64 interval;
1160
1161
1162 if (pmu->task_ctx_nr == perf_sw_context)
1163 return;
1164
1165
1166
1167
1168
1169 interval = pmu->hrtimer_interval_ms;
1170 if (interval < 1)
1171 interval = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
1172
1173 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
1174
1175 raw_spin_lock_init(&cpuctx->hrtimer_lock);
1176 hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1177 timer->function = perf_mux_hrtimer_handler;
1178}
1179
1180static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
1181{
1182 struct hrtimer *timer = &cpuctx->hrtimer;
1183 struct pmu *pmu = cpuctx->ctx.pmu;
1184 unsigned long flags;
1185
1186
1187 if (pmu->task_ctx_nr == perf_sw_context)
1188 return 0;
1189
1190 raw_spin_lock_irqsave(&cpuctx->hrtimer_lock, flags);
1191 if (!cpuctx->hrtimer_active) {
1192 cpuctx->hrtimer_active = 1;
1193 hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
1194 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
1195 }
1196 raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
1197
1198 return 0;
1199}
1200
1201void perf_pmu_disable(struct pmu *pmu)
1202{
1203 int *count = this_cpu_ptr(pmu->pmu_disable_count);
1204 if (!(*count)++)
1205 pmu->pmu_disable(pmu);
1206}
1207
1208void perf_pmu_enable(struct pmu *pmu)
1209{
1210 int *count = this_cpu_ptr(pmu->pmu_disable_count);
1211 if (!--(*count))
1212 pmu->pmu_enable(pmu);
1213}
1214
1215static DEFINE_PER_CPU(struct list_head, active_ctx_list);
1216
1217
1218
1219
1220
1221
1222
1223static void perf_event_ctx_activate(struct perf_event_context *ctx)
1224{
1225 struct list_head *head = this_cpu_ptr(&active_ctx_list);
1226
1227 lockdep_assert_irqs_disabled();
1228
1229 WARN_ON(!list_empty(&ctx->active_ctx_list));
1230
1231 list_add(&ctx->active_ctx_list, head);
1232}
1233
1234static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
1235{
1236 lockdep_assert_irqs_disabled();
1237
1238 WARN_ON(list_empty(&ctx->active_ctx_list));
1239
1240 list_del_init(&ctx->active_ctx_list);
1241}
1242
1243static void get_ctx(struct perf_event_context *ctx)
1244{
1245 refcount_inc(&ctx->refcount);
1246}
1247
1248static void *alloc_task_ctx_data(struct pmu *pmu)
1249{
1250 if (pmu->task_ctx_cache)
1251 return kmem_cache_zalloc(pmu->task_ctx_cache, GFP_KERNEL);
1252
1253 return NULL;
1254}
1255
1256static void free_task_ctx_data(struct pmu *pmu, void *task_ctx_data)
1257{
1258 if (pmu->task_ctx_cache && task_ctx_data)
1259 kmem_cache_free(pmu->task_ctx_cache, task_ctx_data);
1260}
1261
1262static void free_ctx(struct rcu_head *head)
1263{
1264 struct perf_event_context *ctx;
1265
1266 ctx = container_of(head, struct perf_event_context, rcu_head);
1267 free_task_ctx_data(ctx->pmu, ctx->task_ctx_data);
1268 kfree(ctx);
1269}
1270
1271static void put_ctx(struct perf_event_context *ctx)
1272{
1273 if (refcount_dec_and_test(&ctx->refcount)) {
1274 if (ctx->parent_ctx)
1275 put_ctx(ctx->parent_ctx);
1276 if (ctx->task && ctx->task != TASK_TOMBSTONE)
1277 put_task_struct(ctx->task);
1278 call_rcu(&ctx->rcu_head, free_ctx);
1279 }
1280}
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348static struct perf_event_context *
1349perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
1350{
1351 struct perf_event_context *ctx;
1352
1353again:
1354 rcu_read_lock();
1355 ctx = READ_ONCE(event->ctx);
1356 if (!refcount_inc_not_zero(&ctx->refcount)) {
1357 rcu_read_unlock();
1358 goto again;
1359 }
1360 rcu_read_unlock();
1361
1362 mutex_lock_nested(&ctx->mutex, nesting);
1363 if (event->ctx != ctx) {
1364 mutex_unlock(&ctx->mutex);
1365 put_ctx(ctx);
1366 goto again;
1367 }
1368
1369 return ctx;
1370}
1371
1372static inline struct perf_event_context *
1373perf_event_ctx_lock(struct perf_event *event)
1374{
1375 return perf_event_ctx_lock_nested(event, 0);
1376}
1377
1378static void perf_event_ctx_unlock(struct perf_event *event,
1379 struct perf_event_context *ctx)
1380{
1381 mutex_unlock(&ctx->mutex);
1382 put_ctx(ctx);
1383}
1384
1385
1386
1387
1388
1389
1390static __must_check struct perf_event_context *
1391unclone_ctx(struct perf_event_context *ctx)
1392{
1393 struct perf_event_context *parent_ctx = ctx->parent_ctx;
1394
1395 lockdep_assert_held(&ctx->lock);
1396
1397 if (parent_ctx)
1398 ctx->parent_ctx = NULL;
1399 ctx->generation++;
1400
1401 return parent_ctx;
1402}
1403
1404static u32 perf_event_pid_type(struct perf_event *event, struct task_struct *p,
1405 enum pid_type type)
1406{
1407 u32 nr;
1408
1409
1410
1411 if (event->parent)
1412 event = event->parent;
1413
1414 nr = __task_pid_nr_ns(p, type, event->ns);
1415
1416 if (!nr && !pid_alive(p))
1417 nr = -1;
1418 return nr;
1419}
1420
1421static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
1422{
1423 return perf_event_pid_type(event, p, PIDTYPE_TGID);
1424}
1425
1426static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
1427{
1428 return perf_event_pid_type(event, p, PIDTYPE_PID);
1429}
1430
1431
1432
1433
1434
1435static u64 primary_event_id(struct perf_event *event)
1436{
1437 u64 id = event->id;
1438
1439 if (event->parent)
1440 id = event->parent->id;
1441
1442 return id;
1443}
1444
1445
1446
1447
1448
1449
1450
1451static struct perf_event_context *
1452perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
1453{
1454 struct perf_event_context *ctx;
1455
1456retry:
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466 local_irq_save(*flags);
1467 rcu_read_lock();
1468 ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
1469 if (ctx) {
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480 raw_spin_lock(&ctx->lock);
1481 if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
1482 raw_spin_unlock(&ctx->lock);
1483 rcu_read_unlock();
1484 local_irq_restore(*flags);
1485 goto retry;
1486 }
1487
1488 if (ctx->task == TASK_TOMBSTONE ||
1489 !refcount_inc_not_zero(&ctx->refcount)) {
1490 raw_spin_unlock(&ctx->lock);
1491 ctx = NULL;
1492 } else {
1493 WARN_ON_ONCE(ctx->task != task);
1494 }
1495 }
1496 rcu_read_unlock();
1497 if (!ctx)
1498 local_irq_restore(*flags);
1499 return ctx;
1500}
1501
1502
1503
1504
1505
1506
1507static struct perf_event_context *
1508perf_pin_task_context(struct task_struct *task, int ctxn)
1509{
1510 struct perf_event_context *ctx;
1511 unsigned long flags;
1512
1513 ctx = perf_lock_task_context(task, ctxn, &flags);
1514 if (ctx) {
1515 ++ctx->pin_count;
1516 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1517 }
1518 return ctx;
1519}
1520
1521static void perf_unpin_context(struct perf_event_context *ctx)
1522{
1523 unsigned long flags;
1524
1525 raw_spin_lock_irqsave(&ctx->lock, flags);
1526 --ctx->pin_count;
1527 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1528}
1529
1530
1531
1532
1533static void update_context_time(struct perf_event_context *ctx)
1534{
1535 u64 now = perf_clock();
1536
1537 ctx->time += now - ctx->timestamp;
1538 ctx->timestamp = now;
1539}
1540
1541static u64 perf_event_time(struct perf_event *event)
1542{
1543 struct perf_event_context *ctx = event->ctx;
1544
1545 if (is_cgroup_event(event))
1546 return perf_cgroup_event_time(event);
1547
1548 return ctx ? ctx->time : 0;
1549}
1550
1551static enum event_type_t get_event_type(struct perf_event *event)
1552{
1553 struct perf_event_context *ctx = event->ctx;
1554 enum event_type_t event_type;
1555
1556 lockdep_assert_held(&ctx->lock);
1557
1558
1559
1560
1561
1562 if (event->group_leader != event)
1563 event = event->group_leader;
1564
1565 event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE;
1566 if (!ctx->task)
1567 event_type |= EVENT_CPU;
1568
1569 return event_type;
1570}
1571
1572
1573
1574
1575static void init_event_group(struct perf_event *event)
1576{
1577 RB_CLEAR_NODE(&event->group_node);
1578 event->group_index = 0;
1579}
1580
1581
1582
1583
1584
1585static struct perf_event_groups *
1586get_event_groups(struct perf_event *event, struct perf_event_context *ctx)
1587{
1588 if (event->attr.pinned)
1589 return &ctx->pinned_groups;
1590 else
1591 return &ctx->flexible_groups;
1592}
1593
1594
1595
1596
1597static void perf_event_groups_init(struct perf_event_groups *groups)
1598{
1599 groups->tree = RB_ROOT;
1600 groups->index = 0;
1601}
1602
1603
1604
1605
1606
1607
1608
1609static bool
1610perf_event_groups_less(struct perf_event *left, struct perf_event *right)
1611{
1612 if (left->cpu < right->cpu)
1613 return true;
1614 if (left->cpu > right->cpu)
1615 return false;
1616
1617#ifdef CONFIG_CGROUP_PERF
1618 if (left->cgrp != right->cgrp) {
1619 if (!left->cgrp || !left->cgrp->css.cgroup) {
1620
1621
1622
1623
1624 return true;
1625 }
1626 if (!right->cgrp || !right->cgrp->css.cgroup) {
1627
1628
1629
1630
1631 return false;
1632 }
1633
1634 if (left->cgrp->css.cgroup->kn->id < right->cgrp->css.cgroup->kn->id)
1635 return true;
1636
1637 return false;
1638 }
1639#endif
1640
1641 if (left->group_index < right->group_index)
1642 return true;
1643 if (left->group_index > right->group_index)
1644 return false;
1645
1646 return false;
1647}
1648
1649
1650
1651
1652
1653
1654static void
1655perf_event_groups_insert(struct perf_event_groups *groups,
1656 struct perf_event *event)
1657{
1658 struct perf_event *node_event;
1659 struct rb_node *parent;
1660 struct rb_node **node;
1661
1662 event->group_index = ++groups->index;
1663
1664 node = &groups->tree.rb_node;
1665 parent = *node;
1666
1667 while (*node) {
1668 parent = *node;
1669 node_event = container_of(*node, struct perf_event, group_node);
1670
1671 if (perf_event_groups_less(event, node_event))
1672 node = &parent->rb_left;
1673 else
1674 node = &parent->rb_right;
1675 }
1676
1677 rb_link_node(&event->group_node, parent, node);
1678 rb_insert_color(&event->group_node, &groups->tree);
1679}
1680
1681
1682
1683
1684static void
1685add_event_to_groups(struct perf_event *event, struct perf_event_context *ctx)
1686{
1687 struct perf_event_groups *groups;
1688
1689 groups = get_event_groups(event, ctx);
1690 perf_event_groups_insert(groups, event);
1691}
1692
1693
1694
1695
1696static void
1697perf_event_groups_delete(struct perf_event_groups *groups,
1698 struct perf_event *event)
1699{
1700 WARN_ON_ONCE(RB_EMPTY_NODE(&event->group_node) ||
1701 RB_EMPTY_ROOT(&groups->tree));
1702
1703 rb_erase(&event->group_node, &groups->tree);
1704 init_event_group(event);
1705}
1706
1707
1708
1709
1710static void
1711del_event_from_groups(struct perf_event *event, struct perf_event_context *ctx)
1712{
1713 struct perf_event_groups *groups;
1714
1715 groups = get_event_groups(event, ctx);
1716 perf_event_groups_delete(groups, event);
1717}
1718
1719
1720
1721
1722static struct perf_event *
1723perf_event_groups_first(struct perf_event_groups *groups, int cpu,
1724 struct cgroup *cgrp)
1725{
1726 struct perf_event *node_event = NULL, *match = NULL;
1727 struct rb_node *node = groups->tree.rb_node;
1728#ifdef CONFIG_CGROUP_PERF
1729 u64 node_cgrp_id, cgrp_id = 0;
1730
1731 if (cgrp)
1732 cgrp_id = cgrp->kn->id;
1733#endif
1734
1735 while (node) {
1736 node_event = container_of(node, struct perf_event, group_node);
1737
1738 if (cpu < node_event->cpu) {
1739 node = node->rb_left;
1740 continue;
1741 }
1742 if (cpu > node_event->cpu) {
1743 node = node->rb_right;
1744 continue;
1745 }
1746#ifdef CONFIG_CGROUP_PERF
1747 node_cgrp_id = 0;
1748 if (node_event->cgrp && node_event->cgrp->css.cgroup)
1749 node_cgrp_id = node_event->cgrp->css.cgroup->kn->id;
1750
1751 if (cgrp_id < node_cgrp_id) {
1752 node = node->rb_left;
1753 continue;
1754 }
1755 if (cgrp_id > node_cgrp_id) {
1756 node = node->rb_right;
1757 continue;
1758 }
1759#endif
1760 match = node_event;
1761 node = node->rb_left;
1762 }
1763
1764 return match;
1765}
1766
1767
1768
1769
1770static struct perf_event *
1771perf_event_groups_next(struct perf_event *event)
1772{
1773 struct perf_event *next;
1774#ifdef CONFIG_CGROUP_PERF
1775 u64 curr_cgrp_id = 0;
1776 u64 next_cgrp_id = 0;
1777#endif
1778
1779 next = rb_entry_safe(rb_next(&event->group_node), typeof(*event), group_node);
1780 if (next == NULL || next->cpu != event->cpu)
1781 return NULL;
1782
1783#ifdef CONFIG_CGROUP_PERF
1784 if (event->cgrp && event->cgrp->css.cgroup)
1785 curr_cgrp_id = event->cgrp->css.cgroup->kn->id;
1786
1787 if (next->cgrp && next->cgrp->css.cgroup)
1788 next_cgrp_id = next->cgrp->css.cgroup->kn->id;
1789
1790 if (curr_cgrp_id != next_cgrp_id)
1791 return NULL;
1792#endif
1793 return next;
1794}
1795
1796
1797
1798
1799#define perf_event_groups_for_each(event, groups) \
1800 for (event = rb_entry_safe(rb_first(&((groups)->tree)), \
1801 typeof(*event), group_node); event; \
1802 event = rb_entry_safe(rb_next(&event->group_node), \
1803 typeof(*event), group_node))
1804
1805
1806
1807
1808
1809static void
1810list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1811{
1812 lockdep_assert_held(&ctx->lock);
1813
1814 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
1815 event->attach_state |= PERF_ATTACH_CONTEXT;
1816
1817 event->tstamp = perf_event_time(event);
1818
1819
1820
1821
1822
1823
1824 if (event->group_leader == event) {
1825 event->group_caps = event->event_caps;
1826 add_event_to_groups(event, ctx);
1827 }
1828
1829 list_add_rcu(&event->event_entry, &ctx->event_list);
1830 ctx->nr_events++;
1831 if (event->attr.inherit_stat)
1832 ctx->nr_stat++;
1833
1834 if (event->state > PERF_EVENT_STATE_OFF)
1835 perf_cgroup_event_enable(event, ctx);
1836
1837 ctx->generation++;
1838}
1839
1840
1841
1842
1843static inline void perf_event__state_init(struct perf_event *event)
1844{
1845 event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF :
1846 PERF_EVENT_STATE_INACTIVE;
1847}
1848
1849static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
1850{
1851 int entry = sizeof(u64);
1852 int size = 0;
1853 int nr = 1;
1854
1855 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1856 size += sizeof(u64);
1857
1858 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1859 size += sizeof(u64);
1860
1861 if (event->attr.read_format & PERF_FORMAT_ID)
1862 entry += sizeof(u64);
1863
1864 if (event->attr.read_format & PERF_FORMAT_GROUP) {
1865 nr += nr_siblings;
1866 size += sizeof(u64);
1867 }
1868
1869 size += entry * nr;
1870 event->read_size = size;
1871}
1872
1873static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
1874{
1875 struct perf_sample_data *data;
1876 u16 size = 0;
1877
1878 if (sample_type & PERF_SAMPLE_IP)
1879 size += sizeof(data->ip);
1880
1881 if (sample_type & PERF_SAMPLE_ADDR)
1882 size += sizeof(data->addr);
1883
1884 if (sample_type & PERF_SAMPLE_PERIOD)
1885 size += sizeof(data->period);
1886
1887 if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
1888 size += sizeof(data->weight.full);
1889
1890 if (sample_type & PERF_SAMPLE_READ)
1891 size += event->read_size;
1892
1893 if (sample_type & PERF_SAMPLE_DATA_SRC)
1894 size += sizeof(data->data_src.val);
1895
1896 if (sample_type & PERF_SAMPLE_TRANSACTION)
1897 size += sizeof(data->txn);
1898
1899 if (sample_type & PERF_SAMPLE_PHYS_ADDR)
1900 size += sizeof(data->phys_addr);
1901
1902 if (sample_type & PERF_SAMPLE_CGROUP)
1903 size += sizeof(data->cgroup);
1904
1905 if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
1906 size += sizeof(data->data_page_size);
1907
1908 if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
1909 size += sizeof(data->code_page_size);
1910
1911 event->header_size = size;
1912}
1913
1914
1915
1916
1917
1918static void perf_event__header_size(struct perf_event *event)
1919{
1920 __perf_event_read_size(event,
1921 event->group_leader->nr_siblings);
1922 __perf_event_header_size(event, event->attr.sample_type);
1923}
1924
1925static void perf_event__id_header_size(struct perf_event *event)
1926{
1927 struct perf_sample_data *data;
1928 u64 sample_type = event->attr.sample_type;
1929 u16 size = 0;
1930
1931 if (sample_type & PERF_SAMPLE_TID)
1932 size += sizeof(data->tid_entry);
1933
1934 if (sample_type & PERF_SAMPLE_TIME)
1935 size += sizeof(data->time);
1936
1937 if (sample_type & PERF_SAMPLE_IDENTIFIER)
1938 size += sizeof(data->id);
1939
1940 if (sample_type & PERF_SAMPLE_ID)
1941 size += sizeof(data->id);
1942
1943 if (sample_type & PERF_SAMPLE_STREAM_ID)
1944 size += sizeof(data->stream_id);
1945
1946 if (sample_type & PERF_SAMPLE_CPU)
1947 size += sizeof(data->cpu_entry);
1948
1949 event->id_header_size = size;
1950}
1951
1952static bool perf_event_validate_size(struct perf_event *event)
1953{
1954
1955
1956
1957
1958 __perf_event_read_size(event, event->group_leader->nr_siblings + 1);
1959 __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
1960 perf_event__id_header_size(event);
1961
1962
1963
1964
1965
1966 if (event->read_size + event->header_size +
1967 event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
1968 return false;
1969
1970 return true;
1971}
1972
1973static void perf_group_attach(struct perf_event *event)
1974{
1975 struct perf_event *group_leader = event->group_leader, *pos;
1976
1977 lockdep_assert_held(&event->ctx->lock);
1978
1979
1980
1981
1982 if (event->attach_state & PERF_ATTACH_GROUP)
1983 return;
1984
1985 event->attach_state |= PERF_ATTACH_GROUP;
1986
1987 if (group_leader == event)
1988 return;
1989
1990 WARN_ON_ONCE(group_leader->ctx != event->ctx);
1991
1992 group_leader->group_caps &= event->event_caps;
1993
1994 list_add_tail(&event->sibling_list, &group_leader->sibling_list);
1995 group_leader->nr_siblings++;
1996
1997 perf_event__header_size(group_leader);
1998
1999 for_each_sibling_event(pos, group_leader)
2000 perf_event__header_size(pos);
2001}
2002
2003
2004
2005
2006
2007static void
2008list_del_event(struct perf_event *event, struct perf_event_context *ctx)
2009{
2010 WARN_ON_ONCE(event->ctx != ctx);
2011 lockdep_assert_held(&ctx->lock);
2012
2013
2014
2015
2016 if (!(event->attach_state & PERF_ATTACH_CONTEXT))
2017 return;
2018
2019 event->attach_state &= ~PERF_ATTACH_CONTEXT;
2020
2021 ctx->nr_events--;
2022 if (event->attr.inherit_stat)
2023 ctx->nr_stat--;
2024
2025 list_del_rcu(&event->event_entry);
2026
2027 if (event->group_leader == event)
2028 del_event_from_groups(event, ctx);
2029
2030
2031
2032
2033
2034
2035
2036
2037 if (event->state > PERF_EVENT_STATE_OFF) {
2038 perf_cgroup_event_disable(event, ctx);
2039 perf_event_set_state(event, PERF_EVENT_STATE_OFF);
2040 }
2041
2042 ctx->generation++;
2043}
2044
2045static int
2046perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event)
2047{
2048 if (!has_aux(aux_event))
2049 return 0;
2050
2051 if (!event->pmu->aux_output_match)
2052 return 0;
2053
2054 return event->pmu->aux_output_match(aux_event);
2055}
2056
2057static void put_event(struct perf_event *event);
2058static void event_sched_out(struct perf_event *event,
2059 struct perf_cpu_context *cpuctx,
2060 struct perf_event_context *ctx);
2061
2062static void perf_put_aux_event(struct perf_event *event)
2063{
2064 struct perf_event_context *ctx = event->ctx;
2065 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2066 struct perf_event *iter;
2067
2068
2069
2070
2071 if (event->aux_event) {
2072 iter = event->aux_event;
2073 event->aux_event = NULL;
2074 put_event(iter);
2075 return;
2076 }
2077
2078
2079
2080
2081
2082 for_each_sibling_event(iter, event->group_leader) {
2083 if (iter->aux_event != event)
2084 continue;
2085
2086 iter->aux_event = NULL;
2087 put_event(event);
2088
2089
2090
2091
2092
2093
2094 event_sched_out(iter, cpuctx, ctx);
2095 perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
2096 }
2097}
2098
2099static bool perf_need_aux_event(struct perf_event *event)
2100{
2101 return !!event->attr.aux_output || !!event->attr.aux_sample_size;
2102}
2103
2104static int perf_get_aux_event(struct perf_event *event,
2105 struct perf_event *group_leader)
2106{
2107
2108
2109
2110
2111
2112
2113 if (!group_leader)
2114 return 0;
2115
2116
2117
2118
2119 if (event->attr.aux_output && event->attr.aux_sample_size)
2120 return 0;
2121
2122 if (event->attr.aux_output &&
2123 !perf_aux_output_match(event, group_leader))
2124 return 0;
2125
2126 if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux)
2127 return 0;
2128
2129 if (!atomic_long_inc_not_zero(&group_leader->refcount))
2130 return 0;
2131
2132
2133
2134
2135
2136
2137
2138 event->aux_event = group_leader;
2139
2140 return 1;
2141}
2142
2143static inline struct list_head *get_event_list(struct perf_event *event)
2144{
2145 struct perf_event_context *ctx = event->ctx;
2146 return event->attr.pinned ? &ctx->pinned_active : &ctx->flexible_active;
2147}
2148
2149
2150
2151
2152
2153
2154
2155static inline void perf_remove_sibling_event(struct perf_event *event)
2156{
2157 struct perf_event_context *ctx = event->ctx;
2158 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2159
2160 event_sched_out(event, cpuctx, ctx);
2161 perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
2162}
2163
2164static void perf_group_detach(struct perf_event *event)
2165{
2166 struct perf_event *leader = event->group_leader;
2167 struct perf_event *sibling, *tmp;
2168 struct perf_event_context *ctx = event->ctx;
2169
2170 lockdep_assert_held(&ctx->lock);
2171
2172
2173
2174
2175 if (!(event->attach_state & PERF_ATTACH_GROUP))
2176 return;
2177
2178 event->attach_state &= ~PERF_ATTACH_GROUP;
2179
2180 perf_put_aux_event(event);
2181
2182
2183
2184
2185 if (leader != event) {
2186 list_del_init(&event->sibling_list);
2187 event->group_leader->nr_siblings--;
2188 goto out;
2189 }
2190
2191
2192
2193
2194
2195
2196 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {
2197
2198 if (sibling->event_caps & PERF_EV_CAP_SIBLING)
2199 perf_remove_sibling_event(sibling);
2200
2201 sibling->group_leader = sibling;
2202 list_del_init(&sibling->sibling_list);
2203
2204
2205 sibling->group_caps = event->group_caps;
2206
2207 if (!RB_EMPTY_NODE(&event->group_node)) {
2208 add_event_to_groups(sibling, event->ctx);
2209
2210 if (sibling->state == PERF_EVENT_STATE_ACTIVE)
2211 list_add_tail(&sibling->active_list, get_event_list(sibling));
2212 }
2213
2214 WARN_ON_ONCE(sibling->ctx != event->ctx);
2215 }
2216
2217out:
2218 for_each_sibling_event(tmp, leader)
2219 perf_event__header_size(tmp);
2220
2221 perf_event__header_size(leader);
2222}
2223
2224static bool is_orphaned_event(struct perf_event *event)
2225{
2226 return event->state == PERF_EVENT_STATE_DEAD;
2227}
2228
2229static inline int __pmu_filter_match(struct perf_event *event)
2230{
2231 struct pmu *pmu = event->pmu;
2232 return pmu->filter_match ? pmu->filter_match(event) : 1;
2233}
2234
2235
2236
2237
2238
2239
2240
2241static inline int pmu_filter_match(struct perf_event *event)
2242{
2243 struct perf_event *sibling;
2244
2245 if (!__pmu_filter_match(event))
2246 return 0;
2247
2248 for_each_sibling_event(sibling, event) {
2249 if (!__pmu_filter_match(sibling))
2250 return 0;
2251 }
2252
2253 return 1;
2254}
2255
2256static inline int
2257event_filter_match(struct perf_event *event)
2258{
2259 return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
2260 perf_cgroup_match(event) && pmu_filter_match(event);
2261}
2262
2263static void
2264event_sched_out(struct perf_event *event,
2265 struct perf_cpu_context *cpuctx,
2266 struct perf_event_context *ctx)
2267{
2268 enum perf_event_state state = PERF_EVENT_STATE_INACTIVE;
2269
2270 WARN_ON_ONCE(event->ctx != ctx);
2271 lockdep_assert_held(&ctx->lock);
2272
2273 if (event->state != PERF_EVENT_STATE_ACTIVE)
2274 return;
2275
2276
2277
2278
2279
2280
2281 list_del_init(&event->active_list);
2282
2283 perf_pmu_disable(event->pmu);
2284
2285 event->pmu->del(event, 0);
2286 event->oncpu = -1;
2287
2288 if (READ_ONCE(event->pending_disable) >= 0) {
2289 WRITE_ONCE(event->pending_disable, -1);
2290 perf_cgroup_event_disable(event, ctx);
2291 state = PERF_EVENT_STATE_OFF;
2292 }
2293 perf_event_set_state(event, state);
2294
2295 if (!is_software_event(event))
2296 cpuctx->active_oncpu--;
2297 if (!--ctx->nr_active)
2298 perf_event_ctx_deactivate(ctx);
2299 if (event->attr.freq && event->attr.sample_freq)
2300 ctx->nr_freq--;
2301 if (event->attr.exclusive || !cpuctx->active_oncpu)
2302 cpuctx->exclusive = 0;
2303
2304 perf_pmu_enable(event->pmu);
2305}
2306
2307static void
2308group_sched_out(struct perf_event *group_event,
2309 struct perf_cpu_context *cpuctx,
2310 struct perf_event_context *ctx)
2311{
2312 struct perf_event *event;
2313
2314 if (group_event->state != PERF_EVENT_STATE_ACTIVE)
2315 return;
2316
2317 perf_pmu_disable(ctx->pmu);
2318
2319 event_sched_out(group_event, cpuctx, ctx);
2320
2321
2322
2323
2324 for_each_sibling_event(event, group_event)
2325 event_sched_out(event, cpuctx, ctx);
2326
2327 perf_pmu_enable(ctx->pmu);
2328}
2329
2330#define DETACH_GROUP 0x01UL
2331
2332
2333
2334
2335
2336
2337
2338static void
2339__perf_remove_from_context(struct perf_event *event,
2340 struct perf_cpu_context *cpuctx,
2341 struct perf_event_context *ctx,
2342 void *info)
2343{
2344 unsigned long flags = (unsigned long)info;
2345
2346 if (ctx->is_active & EVENT_TIME) {
2347 update_context_time(ctx);
2348 update_cgrp_time_from_cpuctx(cpuctx);
2349 }
2350
2351 event_sched_out(event, cpuctx, ctx);
2352 if (flags & DETACH_GROUP)
2353 perf_group_detach(event);
2354 list_del_event(event, ctx);
2355
2356 if (!ctx->nr_events && ctx->is_active) {
2357 ctx->is_active = 0;
2358 ctx->rotate_necessary = 0;
2359 if (ctx->task) {
2360 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
2361 cpuctx->task_ctx = NULL;
2362 }
2363 }
2364}
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
2377{
2378 struct perf_event_context *ctx = event->ctx;
2379
2380 lockdep_assert_held(&ctx->mutex);
2381
2382 event_function_call(event, __perf_remove_from_context, (void *)flags);
2383
2384
2385
2386
2387
2388
2389
2390 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
2391 if ((flags & DETACH_GROUP) &&
2392 (event->attach_state & PERF_ATTACH_GROUP)) {
2393
2394
2395
2396
2397 raw_spin_lock_irq(&ctx->lock);
2398 perf_group_detach(event);
2399 raw_spin_unlock_irq(&ctx->lock);
2400 }
2401}
2402
2403
2404
2405
2406static void __perf_event_disable(struct perf_event *event,
2407 struct perf_cpu_context *cpuctx,
2408 struct perf_event_context *ctx,
2409 void *info)
2410{
2411 if (event->state < PERF_EVENT_STATE_INACTIVE)
2412 return;
2413
2414 if (ctx->is_active & EVENT_TIME) {
2415 update_context_time(ctx);
2416 update_cgrp_time_from_event(event);
2417 }
2418
2419 if (event == event->group_leader)
2420 group_sched_out(event, cpuctx, ctx);
2421 else
2422 event_sched_out(event, cpuctx, ctx);
2423
2424 perf_event_set_state(event, PERF_EVENT_STATE_OFF);
2425 perf_cgroup_event_disable(event, ctx);
2426}
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442static void _perf_event_disable(struct perf_event *event)
2443{
2444 struct perf_event_context *ctx = event->ctx;
2445
2446 raw_spin_lock_irq(&ctx->lock);
2447 if (event->state <= PERF_EVENT_STATE_OFF) {
2448 raw_spin_unlock_irq(&ctx->lock);
2449 return;
2450 }
2451 raw_spin_unlock_irq(&ctx->lock);
2452
2453 event_function_call(event, __perf_event_disable, NULL);
2454}
2455
2456void perf_event_disable_local(struct perf_event *event)
2457{
2458 event_function_local(event, __perf_event_disable, NULL);
2459}
2460
2461
2462
2463
2464
2465void perf_event_disable(struct perf_event *event)
2466{
2467 struct perf_event_context *ctx;
2468
2469 ctx = perf_event_ctx_lock(event);
2470 _perf_event_disable(event);
2471 perf_event_ctx_unlock(event, ctx);
2472}
2473EXPORT_SYMBOL_GPL(perf_event_disable);
2474
2475void perf_event_disable_inatomic(struct perf_event *event)
2476{
2477 WRITE_ONCE(event->pending_disable, smp_processor_id());
2478
2479 irq_work_queue(&event->pending);
2480}
2481
2482static void perf_set_shadow_time(struct perf_event *event,
2483 struct perf_event_context *ctx)
2484{
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510 if (is_cgroup_event(event))
2511 perf_cgroup_set_shadow_time(event, event->tstamp);
2512 else
2513 event->shadow_ctx_time = event->tstamp - ctx->timestamp;
2514}
2515
2516#define MAX_INTERRUPTS (~0ULL)
2517
2518static void perf_log_throttle(struct perf_event *event, int enable);
2519static void perf_log_itrace_start(struct perf_event *event);
2520
2521static int
2522event_sched_in(struct perf_event *event,
2523 struct perf_cpu_context *cpuctx,
2524 struct perf_event_context *ctx)
2525{
2526 int ret = 0;
2527
2528 WARN_ON_ONCE(event->ctx != ctx);
2529
2530 lockdep_assert_held(&ctx->lock);
2531
2532 if (event->state <= PERF_EVENT_STATE_OFF)
2533 return 0;
2534
2535 WRITE_ONCE(event->oncpu, smp_processor_id());
2536
2537
2538
2539
2540
2541 smp_wmb();
2542 perf_event_set_state(event, PERF_EVENT_STATE_ACTIVE);
2543
2544
2545
2546
2547
2548
2549 if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) {
2550 perf_log_throttle(event, 1);
2551 event->hw.interrupts = 0;
2552 }
2553
2554 perf_pmu_disable(event->pmu);
2555
2556 perf_set_shadow_time(event, ctx);
2557
2558 perf_log_itrace_start(event);
2559
2560 if (event->pmu->add(event, PERF_EF_START)) {
2561 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
2562 event->oncpu = -1;
2563 ret = -EAGAIN;
2564 goto out;
2565 }
2566
2567 if (!is_software_event(event))
2568 cpuctx->active_oncpu++;
2569 if (!ctx->nr_active++)
2570 perf_event_ctx_activate(ctx);
2571 if (event->attr.freq && event->attr.sample_freq)
2572 ctx->nr_freq++;
2573
2574 if (event->attr.exclusive)
2575 cpuctx->exclusive = 1;
2576
2577out:
2578 perf_pmu_enable(event->pmu);
2579
2580 return ret;
2581}
2582
2583static int
2584group_sched_in(struct perf_event *group_event,
2585 struct perf_cpu_context *cpuctx,
2586 struct perf_event_context *ctx)
2587{
2588 struct perf_event *event, *partial_group = NULL;
2589 struct pmu *pmu = ctx->pmu;
2590
2591 if (group_event->state == PERF_EVENT_STATE_OFF)
2592 return 0;
2593
2594 pmu->start_txn(pmu, PERF_PMU_TXN_ADD);
2595
2596 if (event_sched_in(group_event, cpuctx, ctx))
2597 goto error;
2598
2599
2600
2601
2602 for_each_sibling_event(event, group_event) {
2603 if (event_sched_in(event, cpuctx, ctx)) {
2604 partial_group = event;
2605 goto group_error;
2606 }
2607 }
2608
2609 if (!pmu->commit_txn(pmu))
2610 return 0;
2611
2612group_error:
2613
2614
2615
2616
2617
2618 for_each_sibling_event(event, group_event) {
2619 if (event == partial_group)
2620 break;
2621
2622 event_sched_out(event, cpuctx, ctx);
2623 }
2624 event_sched_out(group_event, cpuctx, ctx);
2625
2626error:
2627 pmu->cancel_txn(pmu);
2628 return -EAGAIN;
2629}
2630
2631
2632
2633
2634static int group_can_go_on(struct perf_event *event,
2635 struct perf_cpu_context *cpuctx,
2636 int can_add_hw)
2637{
2638
2639
2640
2641 if (event->group_caps & PERF_EV_CAP_SOFTWARE)
2642 return 1;
2643
2644
2645
2646
2647 if (cpuctx->exclusive)
2648 return 0;
2649
2650
2651
2652
2653 if (event->attr.exclusive && !list_empty(get_event_list(event)))
2654 return 0;
2655
2656
2657
2658
2659 return can_add_hw;
2660}
2661
2662static void add_event_to_ctx(struct perf_event *event,
2663 struct perf_event_context *ctx)
2664{
2665 list_add_event(event, ctx);
2666 perf_group_attach(event);
2667}
2668
2669static void ctx_sched_out(struct perf_event_context *ctx,
2670 struct perf_cpu_context *cpuctx,
2671 enum event_type_t event_type);
2672static void
2673ctx_sched_in(struct perf_event_context *ctx,
2674 struct perf_cpu_context *cpuctx,
2675 enum event_type_t event_type,
2676 struct task_struct *task);
2677
2678static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
2679 struct perf_event_context *ctx,
2680 enum event_type_t event_type)
2681{
2682 if (!cpuctx->task_ctx)
2683 return;
2684
2685 if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
2686 return;
2687
2688 ctx_sched_out(ctx, cpuctx, event_type);
2689}
2690
2691static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
2692 struct perf_event_context *ctx,
2693 struct task_struct *task)
2694{
2695 cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task);
2696 if (ctx)
2697 ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
2698 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
2699 if (ctx)
2700 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
2701}
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718static void ctx_resched(struct perf_cpu_context *cpuctx,
2719 struct perf_event_context *task_ctx,
2720 enum event_type_t event_type)
2721{
2722 enum event_type_t ctx_event_type;
2723 bool cpu_event = !!(event_type & EVENT_CPU);
2724
2725
2726
2727
2728
2729 if (event_type & EVENT_PINNED)
2730 event_type |= EVENT_FLEXIBLE;
2731
2732 ctx_event_type = event_type & EVENT_ALL;
2733
2734 perf_pmu_disable(cpuctx->ctx.pmu);
2735 if (task_ctx)
2736 task_ctx_sched_out(cpuctx, task_ctx, event_type);
2737
2738
2739
2740
2741
2742
2743
2744
2745 if (cpu_event)
2746 cpu_ctx_sched_out(cpuctx, ctx_event_type);
2747 else if (ctx_event_type & EVENT_PINNED)
2748 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2749
2750 perf_event_sched_in(cpuctx, task_ctx, current);
2751 perf_pmu_enable(cpuctx->ctx.pmu);
2752}
2753
2754void perf_pmu_resched(struct pmu *pmu)
2755{
2756 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
2757 struct perf_event_context *task_ctx = cpuctx->task_ctx;
2758
2759 perf_ctx_lock(cpuctx, task_ctx);
2760 ctx_resched(cpuctx, task_ctx, EVENT_ALL|EVENT_CPU);
2761 perf_ctx_unlock(cpuctx, task_ctx);
2762}
2763
2764
2765
2766
2767
2768
2769
2770static int __perf_install_in_context(void *info)
2771{
2772 struct perf_event *event = info;
2773 struct perf_event_context *ctx = event->ctx;
2774 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
2775 struct perf_event_context *task_ctx = cpuctx->task_ctx;
2776 bool reprogram = true;
2777 int ret = 0;
2778
2779 raw_spin_lock(&cpuctx->ctx.lock);
2780 if (ctx->task) {
2781 raw_spin_lock(&ctx->lock);
2782 task_ctx = ctx;
2783
2784 reprogram = (ctx->task == current);
2785
2786
2787
2788
2789
2790
2791
2792
2793 if (task_curr(ctx->task) && !reprogram) {
2794 ret = -ESRCH;
2795 goto unlock;
2796 }
2797
2798 WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx);
2799 } else if (task_ctx) {
2800 raw_spin_lock(&task_ctx->lock);
2801 }
2802
2803#ifdef CONFIG_CGROUP_PERF
2804 if (event->state > PERF_EVENT_STATE_OFF && is_cgroup_event(event)) {
2805
2806
2807
2808
2809 struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
2810 reprogram = cgroup_is_descendant(cgrp->css.cgroup,
2811 event->cgrp->css.cgroup);
2812 }
2813#endif
2814
2815 if (reprogram) {
2816 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
2817 add_event_to_ctx(event, ctx);
2818 ctx_resched(cpuctx, task_ctx, get_event_type(event));
2819 } else {
2820 add_event_to_ctx(event, ctx);
2821 }
2822
2823unlock:
2824 perf_ctx_unlock(cpuctx, task_ctx);
2825
2826 return ret;
2827}
2828
2829static bool exclusive_event_installable(struct perf_event *event,
2830 struct perf_event_context *ctx);
2831
2832
2833
2834
2835
2836
2837static void
2838perf_install_in_context(struct perf_event_context *ctx,
2839 struct perf_event *event,
2840 int cpu)
2841{
2842 struct task_struct *task = READ_ONCE(ctx->task);
2843
2844 lockdep_assert_held(&ctx->mutex);
2845
2846 WARN_ON_ONCE(!exclusive_event_installable(event, ctx));
2847
2848 if (event->cpu != -1)
2849 event->cpu = cpu;
2850
2851
2852
2853
2854
2855 smp_store_release(&event->ctx, ctx);
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865 if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) {
2866 raw_spin_lock_irq(&ctx->lock);
2867 if (ctx->task == TASK_TOMBSTONE) {
2868 raw_spin_unlock_irq(&ctx->lock);
2869 return;
2870 }
2871 add_event_to_ctx(event, ctx);
2872 raw_spin_unlock_irq(&ctx->lock);
2873 return;
2874 }
2875
2876 if (!task) {
2877 cpu_function_call(cpu, __perf_install_in_context, event);
2878 return;
2879 }
2880
2881
2882
2883
2884 if (WARN_ON_ONCE(task == TASK_TOMBSTONE))
2885 return;
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917 smp_mb();
2918again:
2919 if (!task_function_call(task, __perf_install_in_context, event))
2920 return;
2921
2922 raw_spin_lock_irq(&ctx->lock);
2923 task = ctx->task;
2924 if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) {
2925
2926
2927
2928
2929
2930 raw_spin_unlock_irq(&ctx->lock);
2931 return;
2932 }
2933
2934
2935
2936
2937 if (task_curr(task)) {
2938 raw_spin_unlock_irq(&ctx->lock);
2939 goto again;
2940 }
2941 add_event_to_ctx(event, ctx);
2942 raw_spin_unlock_irq(&ctx->lock);
2943}
2944
2945
2946
2947
2948static void __perf_event_enable(struct perf_event *event,
2949 struct perf_cpu_context *cpuctx,
2950 struct perf_event_context *ctx,
2951 void *info)
2952{
2953 struct perf_event *leader = event->group_leader;
2954 struct perf_event_context *task_ctx;
2955
2956 if (event->state >= PERF_EVENT_STATE_INACTIVE ||
2957 event->state <= PERF_EVENT_STATE_ERROR)
2958 return;
2959
2960 if (ctx->is_active)
2961 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
2962
2963 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
2964 perf_cgroup_event_enable(event, ctx);
2965
2966 if (!ctx->is_active)
2967 return;
2968
2969 if (!event_filter_match(event)) {
2970 ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
2971 return;
2972 }
2973
2974
2975
2976
2977
2978 if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
2979 ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
2980 return;
2981 }
2982
2983 task_ctx = cpuctx->task_ctx;
2984 if (ctx->task)
2985 WARN_ON_ONCE(task_ctx != ctx);
2986
2987 ctx_resched(cpuctx, task_ctx, get_event_type(event));
2988}
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999static void _perf_event_enable(struct perf_event *event)
3000{
3001 struct perf_event_context *ctx = event->ctx;
3002
3003 raw_spin_lock_irq(&ctx->lock);
3004 if (event->state >= PERF_EVENT_STATE_INACTIVE ||
3005 event->state < PERF_EVENT_STATE_ERROR) {
3006out:
3007 raw_spin_unlock_irq(&ctx->lock);
3008 return;
3009 }
3010
3011
3012
3013
3014
3015
3016
3017
3018 if (event->state == PERF_EVENT_STATE_ERROR) {
3019
3020
3021
3022 if (event->event_caps & PERF_EV_CAP_SIBLING &&
3023 event->group_leader == event)
3024 goto out;
3025
3026 event->state = PERF_EVENT_STATE_OFF;
3027 }
3028 raw_spin_unlock_irq(&ctx->lock);
3029
3030 event_function_call(event, __perf_event_enable, NULL);
3031}
3032
3033
3034
3035
3036void perf_event_enable(struct perf_event *event)
3037{
3038 struct perf_event_context *ctx;
3039
3040 ctx = perf_event_ctx_lock(event);
3041 _perf_event_enable(event);
3042 perf_event_ctx_unlock(event, ctx);
3043}
3044EXPORT_SYMBOL_GPL(perf_event_enable);
3045
3046struct stop_event_data {
3047 struct perf_event *event;
3048 unsigned int restart;
3049};
3050
3051static int __perf_event_stop(void *info)
3052{
3053 struct stop_event_data *sd = info;
3054 struct perf_event *event = sd->event;
3055
3056
3057 if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
3058 return 0;
3059
3060
3061 smp_rmb();
3062
3063
3064
3065
3066
3067 if (READ_ONCE(event->oncpu) != smp_processor_id())
3068 return -EAGAIN;
3069
3070 event->pmu->stop(event, PERF_EF_UPDATE);
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081 if (sd->restart)
3082 event->pmu->start(event, 0);
3083
3084 return 0;
3085}
3086
3087static int perf_event_stop(struct perf_event *event, int restart)
3088{
3089 struct stop_event_data sd = {
3090 .event = event,
3091 .restart = restart,
3092 };
3093 int ret = 0;
3094
3095 do {
3096 if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
3097 return 0;
3098
3099
3100 smp_rmb();
3101
3102
3103
3104
3105
3106
3107 ret = cpu_function_call(READ_ONCE(event->oncpu),
3108 __perf_event_stop, &sd);
3109 } while (ret == -EAGAIN);
3110
3111 return ret;
3112}
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136void perf_event_addr_filters_sync(struct perf_event *event)
3137{
3138 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
3139
3140 if (!has_addr_filter(event))
3141 return;
3142
3143 raw_spin_lock(&ifh->lock);
3144 if (event->addr_filters_gen != event->hw.addr_filters_gen) {
3145 event->pmu->addr_filters_sync(event);
3146 event->hw.addr_filters_gen = event->addr_filters_gen;
3147 }
3148 raw_spin_unlock(&ifh->lock);
3149}
3150EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);
3151
3152static int _perf_event_refresh(struct perf_event *event, int refresh)
3153{
3154
3155
3156
3157 if (event->attr.inherit || !is_sampling_event(event))
3158 return -EINVAL;
3159
3160 atomic_add(refresh, &event->event_limit);
3161 _perf_event_enable(event);
3162
3163 return 0;
3164}
3165
3166
3167
3168
3169int perf_event_refresh(struct perf_event *event, int refresh)
3170{
3171 struct perf_event_context *ctx;
3172 int ret;
3173
3174 ctx = perf_event_ctx_lock(event);
3175 ret = _perf_event_refresh(event, refresh);
3176 perf_event_ctx_unlock(event, ctx);
3177
3178 return ret;
3179}
3180EXPORT_SYMBOL_GPL(perf_event_refresh);
3181
3182static int perf_event_modify_breakpoint(struct perf_event *bp,
3183 struct perf_event_attr *attr)
3184{
3185 int err;
3186
3187 _perf_event_disable(bp);
3188
3189 err = modify_user_hw_breakpoint_check(bp, attr, true);
3190
3191 if (!bp->attr.disabled)
3192 _perf_event_enable(bp);
3193
3194 return err;
3195}
3196
3197static int perf_event_modify_attr(struct perf_event *event,
3198 struct perf_event_attr *attr)
3199{
3200 if (event->attr.type != attr->type)
3201 return -EINVAL;
3202
3203 switch (event->attr.type) {
3204 case PERF_TYPE_BREAKPOINT:
3205 return perf_event_modify_breakpoint(event, attr);
3206 default:
3207
3208 return -EOPNOTSUPP;
3209 }
3210}
3211
3212static void ctx_sched_out(struct perf_event_context *ctx,
3213 struct perf_cpu_context *cpuctx,
3214 enum event_type_t event_type)
3215{
3216 struct perf_event *event, *tmp;
3217 int is_active = ctx->is_active;
3218
3219 lockdep_assert_held(&ctx->lock);
3220
3221 if (likely(!ctx->nr_events)) {
3222
3223
3224
3225 WARN_ON_ONCE(ctx->is_active);
3226 if (ctx->task)
3227 WARN_ON_ONCE(cpuctx->task_ctx);
3228 return;
3229 }
3230
3231 ctx->is_active &= ~event_type;
3232 if (!(ctx->is_active & EVENT_ALL))
3233 ctx->is_active = 0;
3234
3235 if (ctx->task) {
3236 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
3237 if (!ctx->is_active)
3238 cpuctx->task_ctx = NULL;
3239 }
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251 if (is_active & EVENT_TIME) {
3252
3253 update_context_time(ctx);
3254 update_cgrp_time_from_cpuctx(cpuctx);
3255 }
3256
3257 is_active ^= ctx->is_active;
3258
3259 if (!ctx->nr_active || !(is_active & EVENT_ALL))
3260 return;
3261
3262 perf_pmu_disable(ctx->pmu);
3263 if (is_active & EVENT_PINNED) {
3264 list_for_each_entry_safe(event, tmp, &ctx->pinned_active, active_list)
3265 group_sched_out(event, cpuctx, ctx);
3266 }
3267
3268 if (is_active & EVENT_FLEXIBLE) {
3269 list_for_each_entry_safe(event, tmp, &ctx->flexible_active, active_list)
3270 group_sched_out(event, cpuctx, ctx);
3271
3272
3273
3274
3275
3276
3277 ctx->rotate_necessary = 0;
3278 }
3279 perf_pmu_enable(ctx->pmu);
3280}
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290static int context_equiv(struct perf_event_context *ctx1,
3291 struct perf_event_context *ctx2)
3292{
3293 lockdep_assert_held(&ctx1->lock);
3294 lockdep_assert_held(&ctx2->lock);
3295
3296
3297 if (ctx1->pin_count || ctx2->pin_count)
3298 return 0;
3299
3300
3301 if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen)
3302 return 1;
3303
3304
3305 if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation)
3306 return 1;
3307
3308
3309
3310
3311
3312 if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx &&
3313 ctx1->parent_gen == ctx2->parent_gen)
3314 return 1;
3315
3316
3317 return 0;
3318}
3319
3320static void __perf_event_sync_stat(struct perf_event *event,
3321 struct perf_event *next_event)
3322{
3323 u64 value;
3324
3325 if (!event->attr.inherit_stat)
3326 return;
3327
3328
3329
3330
3331
3332
3333
3334
3335 if (event->state == PERF_EVENT_STATE_ACTIVE)
3336 event->pmu->read(event);
3337
3338 perf_event_update_time(event);
3339
3340
3341
3342
3343
3344 value = local64_read(&next_event->count);
3345 value = local64_xchg(&event->count, value);
3346 local64_set(&next_event->count, value);
3347
3348 swap(event->total_time_enabled, next_event->total_time_enabled);
3349 swap(event->total_time_running, next_event->total_time_running);
3350
3351
3352
3353
3354 perf_event_update_userpage(event);
3355 perf_event_update_userpage(next_event);
3356}
3357
3358static void perf_event_sync_stat(struct perf_event_context *ctx,
3359 struct perf_event_context *next_ctx)
3360{
3361 struct perf_event *event, *next_event;
3362
3363 if (!ctx->nr_stat)
3364 return;
3365
3366 update_context_time(ctx);
3367
3368 event = list_first_entry(&ctx->event_list,
3369 struct perf_event, event_entry);
3370
3371 next_event = list_first_entry(&next_ctx->event_list,
3372 struct perf_event, event_entry);
3373
3374 while (&event->event_entry != &ctx->event_list &&
3375 &next_event->event_entry != &next_ctx->event_list) {
3376
3377 __perf_event_sync_stat(event, next_event);
3378
3379 event = list_next_entry(event, event_entry);
3380 next_event = list_next_entry(next_event, event_entry);
3381 }
3382}
3383
3384static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
3385 struct task_struct *next)
3386{
3387 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
3388 struct perf_event_context *next_ctx;
3389 struct perf_event_context *parent, *next_parent;
3390 struct perf_cpu_context *cpuctx;
3391 int do_switch = 1;
3392 struct pmu *pmu;
3393
3394 if (likely(!ctx))
3395 return;
3396
3397 pmu = ctx->pmu;
3398 cpuctx = __get_cpu_context(ctx);
3399 if (!cpuctx->task_ctx)
3400 return;
3401
3402 rcu_read_lock();
3403 next_ctx = next->perf_event_ctxp[ctxn];
3404 if (!next_ctx)
3405 goto unlock;
3406
3407 parent = rcu_dereference(ctx->parent_ctx);
3408 next_parent = rcu_dereference(next_ctx->parent_ctx);
3409
3410
3411 if (!parent && !next_parent)
3412 goto unlock;
3413
3414 if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424 raw_spin_lock(&ctx->lock);
3425 raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
3426 if (context_equiv(ctx, next_ctx)) {
3427
3428 WRITE_ONCE(ctx->task, next);
3429 WRITE_ONCE(next_ctx->task, task);
3430
3431 perf_pmu_disable(pmu);
3432
3433 if (cpuctx->sched_cb_usage && pmu->sched_task)
3434 pmu->sched_task(ctx, false);
3435
3436
3437
3438
3439
3440
3441
3442 if (pmu->swap_task_ctx)
3443 pmu->swap_task_ctx(ctx, next_ctx);
3444 else
3445 swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
3446
3447 perf_pmu_enable(pmu);
3448
3449
3450
3451
3452
3453
3454
3455
3456 RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], next_ctx);
3457 RCU_INIT_POINTER(next->perf_event_ctxp[ctxn], ctx);
3458
3459 do_switch = 0;
3460
3461 perf_event_sync_stat(ctx, next_ctx);
3462 }
3463 raw_spin_unlock(&next_ctx->lock);
3464 raw_spin_unlock(&ctx->lock);
3465 }
3466unlock:
3467 rcu_read_unlock();
3468
3469 if (do_switch) {
3470 raw_spin_lock(&ctx->lock);
3471 perf_pmu_disable(pmu);
3472
3473 if (cpuctx->sched_cb_usage && pmu->sched_task)
3474 pmu->sched_task(ctx, false);
3475 task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
3476
3477 perf_pmu_enable(pmu);
3478 raw_spin_unlock(&ctx->lock);
3479 }
3480}
3481
3482static DEFINE_PER_CPU(struct list_head, sched_cb_list);
3483
3484void perf_sched_cb_dec(struct pmu *pmu)
3485{
3486 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
3487
3488 this_cpu_dec(perf_sched_cb_usages);
3489
3490 if (!--cpuctx->sched_cb_usage)
3491 list_del(&cpuctx->sched_cb_entry);
3492}
3493
3494
3495void perf_sched_cb_inc(struct pmu *pmu)
3496{
3497 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
3498
3499 if (!cpuctx->sched_cb_usage++)
3500 list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
3501
3502 this_cpu_inc(perf_sched_cb_usages);
3503}
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in)
3514{
3515 struct pmu *pmu;
3516
3517 pmu = cpuctx->ctx.pmu;
3518
3519 if (WARN_ON_ONCE(!pmu->sched_task))
3520 return;
3521
3522 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
3523 perf_pmu_disable(pmu);
3524
3525 pmu->sched_task(cpuctx->task_ctx, sched_in);
3526
3527 perf_pmu_enable(pmu);
3528 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
3529}
3530
3531static void perf_pmu_sched_task(struct task_struct *prev,
3532 struct task_struct *next,
3533 bool sched_in)
3534{
3535 struct perf_cpu_context *cpuctx;
3536
3537 if (prev == next)
3538 return;
3539
3540 list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
3541
3542 if (cpuctx->task_ctx)
3543 continue;
3544
3545 __perf_pmu_sched_task(cpuctx, sched_in);
3546 }
3547}
3548
3549static void perf_event_switch(struct task_struct *task,
3550 struct task_struct *next_prev, bool sched_in);
3551
3552#define for_each_task_context_nr(ctxn) \
3553 for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566void __perf_event_task_sched_out(struct task_struct *task,
3567 struct task_struct *next)
3568{
3569 int ctxn;
3570
3571 if (__this_cpu_read(perf_sched_cb_usages))
3572 perf_pmu_sched_task(task, next, false);
3573
3574 if (atomic_read(&nr_switch_events))
3575 perf_event_switch(task, next, false);
3576
3577 for_each_task_context_nr(ctxn)
3578 perf_event_context_sched_out(task, ctxn, next);
3579
3580
3581
3582
3583
3584
3585 if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
3586 perf_cgroup_sched_out(task, next);
3587}
3588
3589
3590
3591
3592static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
3593 enum event_type_t event_type)
3594{
3595 ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
3596}
3597
3598static bool perf_less_group_idx(const void *l, const void *r)
3599{
3600 const struct perf_event *le = *(const struct perf_event **)l;
3601 const struct perf_event *re = *(const struct perf_event **)r;
3602
3603 return le->group_index < re->group_index;
3604}
3605
3606static void swap_ptr(void *l, void *r)
3607{
3608 void **lp = l, **rp = r;
3609
3610 swap(*lp, *rp);
3611}
3612
3613static const struct min_heap_callbacks perf_min_heap = {
3614 .elem_size = sizeof(struct perf_event *),
3615 .less = perf_less_group_idx,
3616 .swp = swap_ptr,
3617};
3618
3619static void __heap_add(struct min_heap *heap, struct perf_event *event)
3620{
3621 struct perf_event **itrs = heap->data;
3622
3623 if (event) {
3624 itrs[heap->nr] = event;
3625 heap->nr++;
3626 }
3627}
3628
3629static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
3630 struct perf_event_groups *groups, int cpu,
3631 int (*func)(struct perf_event *, void *),
3632 void *data)
3633{
3634#ifdef CONFIG_CGROUP_PERF
3635 struct cgroup_subsys_state *css = NULL;
3636#endif
3637
3638 struct perf_event *itrs[2];
3639 struct min_heap event_heap;
3640 struct perf_event **evt;
3641 int ret;
3642
3643 if (cpuctx) {
3644 event_heap = (struct min_heap){
3645 .data = cpuctx->heap,
3646 .nr = 0,
3647 .size = cpuctx->heap_size,
3648 };
3649
3650 lockdep_assert_held(&cpuctx->ctx.lock);
3651
3652#ifdef CONFIG_CGROUP_PERF
3653 if (cpuctx->cgrp)
3654 css = &cpuctx->cgrp->css;
3655#endif
3656 } else {
3657 event_heap = (struct min_heap){
3658 .data = itrs,
3659 .nr = 0,
3660 .size = ARRAY_SIZE(itrs),
3661 };
3662
3663 __heap_add(&event_heap, perf_event_groups_first(groups, -1, NULL));
3664 }
3665 evt = event_heap.data;
3666
3667 __heap_add(&event_heap, perf_event_groups_first(groups, cpu, NULL));
3668
3669#ifdef CONFIG_CGROUP_PERF
3670 for (; css; css = css->parent)
3671 __heap_add(&event_heap, perf_event_groups_first(groups, cpu, css->cgroup));
3672#endif
3673
3674 min_heapify_all(&event_heap, &perf_min_heap);
3675
3676 while (event_heap.nr) {
3677 ret = func(*evt, data);
3678 if (ret)
3679 return ret;
3680
3681 *evt = perf_event_groups_next(*evt);
3682 if (*evt)
3683 min_heapify(&event_heap, 0, &perf_min_heap);
3684 else
3685 min_heap_pop(&event_heap, &perf_min_heap);
3686 }
3687
3688 return 0;
3689}
3690
3691static int merge_sched_in(struct perf_event *event, void *data)
3692{
3693 struct perf_event_context *ctx = event->ctx;
3694 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
3695 int *can_add_hw = data;
3696
3697 if (event->state <= PERF_EVENT_STATE_OFF)
3698 return 0;
3699
3700 if (!event_filter_match(event))
3701 return 0;
3702
3703 if (group_can_go_on(event, cpuctx, *can_add_hw)) {
3704 if (!group_sched_in(event, cpuctx, ctx))
3705 list_add_tail(&event->active_list, get_event_list(event));
3706 }
3707
3708 if (event->state == PERF_EVENT_STATE_INACTIVE) {
3709 if (event->attr.pinned) {
3710 perf_cgroup_event_disable(event, ctx);
3711 perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
3712 }
3713
3714 *can_add_hw = 0;
3715 ctx->rotate_necessary = 1;
3716 perf_mux_hrtimer_restart(cpuctx);
3717 }
3718
3719 return 0;
3720}
3721
3722static void
3723ctx_pinned_sched_in(struct perf_event_context *ctx,
3724 struct perf_cpu_context *cpuctx)
3725{
3726 int can_add_hw = 1;
3727
3728 if (ctx != &cpuctx->ctx)
3729 cpuctx = NULL;
3730
3731 visit_groups_merge(cpuctx, &ctx->pinned_groups,
3732 smp_processor_id(),
3733 merge_sched_in, &can_add_hw);
3734}
3735
3736static void
3737ctx_flexible_sched_in(struct perf_event_context *ctx,
3738 struct perf_cpu_context *cpuctx)
3739{
3740 int can_add_hw = 1;
3741
3742 if (ctx != &cpuctx->ctx)
3743 cpuctx = NULL;
3744
3745 visit_groups_merge(cpuctx, &ctx->flexible_groups,
3746 smp_processor_id(),
3747 merge_sched_in, &can_add_hw);
3748}
3749
3750static void
3751ctx_sched_in(struct perf_event_context *ctx,
3752 struct perf_cpu_context *cpuctx,
3753 enum event_type_t event_type,
3754 struct task_struct *task)
3755{
3756 int is_active = ctx->is_active;
3757 u64 now;
3758
3759 lockdep_assert_held(&ctx->lock);
3760
3761 if (likely(!ctx->nr_events))
3762 return;
3763
3764 ctx->is_active |= (event_type | EVENT_TIME);
3765 if (ctx->task) {
3766 if (!is_active)
3767 cpuctx->task_ctx = ctx;
3768 else
3769 WARN_ON_ONCE(cpuctx->task_ctx != ctx);
3770 }
3771
3772 is_active ^= ctx->is_active;
3773
3774 if (is_active & EVENT_TIME) {
3775
3776 now = perf_clock();
3777 ctx->timestamp = now;
3778 perf_cgroup_set_timestamp(task, ctx);
3779 }
3780
3781
3782
3783
3784
3785 if (is_active & EVENT_PINNED)
3786 ctx_pinned_sched_in(ctx, cpuctx);
3787
3788
3789 if (is_active & EVENT_FLEXIBLE)
3790 ctx_flexible_sched_in(ctx, cpuctx);
3791}
3792
3793static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
3794 enum event_type_t event_type,
3795 struct task_struct *task)
3796{
3797 struct perf_event_context *ctx = &cpuctx->ctx;
3798
3799 ctx_sched_in(ctx, cpuctx, event_type, task);
3800}
3801
3802static void perf_event_context_sched_in(struct perf_event_context *ctx,
3803 struct task_struct *task)
3804{
3805 struct perf_cpu_context *cpuctx;
3806 struct pmu *pmu;
3807
3808 cpuctx = __get_cpu_context(ctx);
3809
3810
3811
3812
3813
3814 pmu = ctx->pmu = cpuctx->ctx.pmu;
3815
3816 if (cpuctx->task_ctx == ctx) {
3817 if (cpuctx->sched_cb_usage)
3818 __perf_pmu_sched_task(cpuctx, true);
3819 return;
3820 }
3821
3822 perf_ctx_lock(cpuctx, ctx);
3823
3824
3825
3826
3827 if (!ctx->nr_events)
3828 goto unlock;
3829
3830 perf_pmu_disable(pmu);
3831
3832
3833
3834
3835
3836
3837
3838
3839 if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
3840 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
3841 perf_event_sched_in(cpuctx, ctx, task);
3842
3843 if (cpuctx->sched_cb_usage && pmu->sched_task)
3844 pmu->sched_task(cpuctx->task_ctx, true);
3845
3846 perf_pmu_enable(pmu);
3847
3848unlock:
3849 perf_ctx_unlock(cpuctx, ctx);
3850}
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863void __perf_event_task_sched_in(struct task_struct *prev,
3864 struct task_struct *task)
3865{
3866 struct perf_event_context *ctx;
3867 int ctxn;
3868
3869
3870
3871
3872
3873
3874
3875
3876 if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
3877 perf_cgroup_sched_in(prev, task);
3878
3879 for_each_task_context_nr(ctxn) {
3880 ctx = task->perf_event_ctxp[ctxn];
3881 if (likely(!ctx))
3882 continue;
3883
3884 perf_event_context_sched_in(ctx, task);
3885 }
3886
3887 if (atomic_read(&nr_switch_events))
3888 perf_event_switch(task, prev, true);
3889
3890 if (__this_cpu_read(perf_sched_cb_usages))
3891 perf_pmu_sched_task(prev, task, true);
3892}
3893
3894static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
3895{
3896 u64 frequency = event->attr.sample_freq;
3897 u64 sec = NSEC_PER_SEC;
3898 u64 divisor, dividend;
3899
3900 int count_fls, nsec_fls, frequency_fls, sec_fls;
3901
3902 count_fls = fls64(count);
3903 nsec_fls = fls64(nsec);
3904 frequency_fls = fls64(frequency);
3905 sec_fls = 30;
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921#define REDUCE_FLS(a, b) \
3922do { \
3923 if (a##_fls > b##_fls) { \
3924 a >>= 1; \
3925 a##_fls--; \
3926 } else { \
3927 b >>= 1; \
3928 b##_fls--; \
3929 } \
3930} while (0)
3931
3932
3933
3934
3935
3936 while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
3937 REDUCE_FLS(nsec, frequency);
3938 REDUCE_FLS(sec, count);
3939 }
3940
3941 if (count_fls + sec_fls > 64) {
3942 divisor = nsec * frequency;
3943
3944 while (count_fls + sec_fls > 64) {
3945 REDUCE_FLS(count, sec);
3946 divisor >>= 1;
3947 }
3948
3949 dividend = count * sec;
3950 } else {
3951 dividend = count * sec;
3952
3953 while (nsec_fls + frequency_fls > 64) {
3954 REDUCE_FLS(nsec, frequency);
3955 dividend >>= 1;
3956 }
3957
3958 divisor = nsec * frequency;
3959 }
3960
3961 if (!divisor)
3962 return dividend;
3963
3964 return div64_u64(dividend, divisor);
3965}
3966
3967static DEFINE_PER_CPU(int, perf_throttled_count);
3968static DEFINE_PER_CPU(u64, perf_throttled_seq);
3969
3970static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable)
3971{
3972 struct hw_perf_event *hwc = &event->hw;
3973 s64 period, sample_period;
3974 s64 delta;
3975
3976 period = perf_calculate_period(event, nsec, count);
3977
3978 delta = (s64)(period - hwc->sample_period);
3979 delta = (delta + 7) / 8;
3980
3981 sample_period = hwc->sample_period + delta;
3982
3983 if (!sample_period)
3984 sample_period = 1;
3985
3986 hwc->sample_period = sample_period;
3987
3988 if (local64_read(&hwc->period_left) > 8*sample_period) {
3989 if (disable)
3990 event->pmu->stop(event, PERF_EF_UPDATE);
3991
3992 local64_set(&hwc->period_left, 0);
3993
3994 if (disable)
3995 event->pmu->start(event, PERF_EF_RELOAD);
3996 }
3997}
3998
3999
4000
4001
4002
4003
4004static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
4005 int needs_unthr)
4006{
4007 struct perf_event *event;
4008 struct hw_perf_event *hwc;
4009 u64 now, period = TICK_NSEC;
4010 s64 delta;
4011
4012
4013
4014
4015
4016
4017 if (!(ctx->nr_freq || needs_unthr))
4018 return;
4019
4020 raw_spin_lock(&ctx->lock);
4021 perf_pmu_disable(ctx->pmu);
4022
4023 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
4024 if (event->state != PERF_EVENT_STATE_ACTIVE)
4025 continue;
4026
4027 if (!event_filter_match(event))
4028 continue;
4029
4030 perf_pmu_disable(event->pmu);
4031
4032 hwc = &event->hw;
4033
4034 if (hwc->interrupts == MAX_INTERRUPTS) {
4035 hwc->interrupts = 0;
4036 perf_log_throttle(event, 1);
4037 event->pmu->start(event, 0);
4038 }
4039
4040 if (!event->attr.freq || !event->attr.sample_freq)
4041 goto next;
4042
4043
4044
4045
4046 event->pmu->stop(event, PERF_EF_UPDATE);
4047
4048 now = local64_read(&event->count);
4049 delta = now - hwc->freq_count_stamp;
4050 hwc->freq_count_stamp = now;
4051
4052
4053
4054
4055
4056
4057
4058
4059 if (delta > 0)
4060 perf_adjust_period(event, period, delta, false);
4061
4062 event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
4063 next:
4064 perf_pmu_enable(event->pmu);
4065 }
4066
4067 perf_pmu_enable(ctx->pmu);
4068 raw_spin_unlock(&ctx->lock);
4069}
4070
4071
4072
4073
4074static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event)
4075{
4076
4077
4078
4079
4080 if (ctx->rotate_disable)
4081 return;
4082
4083 perf_event_groups_delete(&ctx->flexible_groups, event);
4084 perf_event_groups_insert(&ctx->flexible_groups, event);
4085}
4086
4087
4088static inline struct perf_event *
4089ctx_event_to_rotate(struct perf_event_context *ctx)
4090{
4091 struct perf_event *event;
4092
4093
4094 event = list_first_entry_or_null(&ctx->flexible_active,
4095 struct perf_event, active_list);
4096
4097
4098 if (!event) {
4099 event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree),
4100 typeof(*event), group_node);
4101 }
4102
4103
4104
4105
4106
4107 ctx->rotate_necessary = 0;
4108
4109 return event;
4110}
4111
4112static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
4113{
4114 struct perf_event *cpu_event = NULL, *task_event = NULL;
4115 struct perf_event_context *task_ctx = NULL;
4116 int cpu_rotate, task_rotate;
4117
4118
4119
4120
4121
4122
4123 cpu_rotate = cpuctx->ctx.rotate_necessary;
4124 task_ctx = cpuctx->task_ctx;
4125 task_rotate = task_ctx ? task_ctx->rotate_necessary : 0;
4126
4127 if (!(cpu_rotate || task_rotate))
4128 return false;
4129
4130 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
4131 perf_pmu_disable(cpuctx->ctx.pmu);
4132
4133 if (task_rotate)
4134 task_event = ctx_event_to_rotate(task_ctx);
4135 if (cpu_rotate)
4136 cpu_event = ctx_event_to_rotate(&cpuctx->ctx);
4137
4138
4139
4140
4141
4142 if (task_event || (task_ctx && cpu_event))
4143 ctx_sched_out(task_ctx, cpuctx, EVENT_FLEXIBLE);
4144 if (cpu_event)
4145 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
4146
4147 if (task_event)
4148 rotate_ctx(task_ctx, task_event);
4149 if (cpu_event)
4150 rotate_ctx(&cpuctx->ctx, cpu_event);
4151
4152 perf_event_sched_in(cpuctx, task_ctx, current);
4153
4154 perf_pmu_enable(cpuctx->ctx.pmu);
4155 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
4156
4157 return true;
4158}
4159
4160void perf_event_task_tick(void)
4161{
4162 struct list_head *head = this_cpu_ptr(&active_ctx_list);
4163 struct perf_event_context *ctx, *tmp;
4164 int throttled;
4165
4166 lockdep_assert_irqs_disabled();
4167
4168 __this_cpu_inc(perf_throttled_seq);
4169 throttled = __this_cpu_xchg(perf_throttled_count, 0);
4170 tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
4171
4172 list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
4173 perf_adjust_freq_unthr_context(ctx, throttled);
4174}
4175
4176static int event_enable_on_exec(struct perf_event *event,
4177 struct perf_event_context *ctx)
4178{
4179 if (!event->attr.enable_on_exec)
4180 return 0;
4181
4182 event->attr.enable_on_exec = 0;
4183 if (event->state >= PERF_EVENT_STATE_INACTIVE)
4184 return 0;
4185
4186 perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
4187
4188 return 1;
4189}
4190
4191
4192
4193
4194
4195static void perf_event_enable_on_exec(int ctxn)
4196{
4197 struct perf_event_context *ctx, *clone_ctx = NULL;
4198 enum event_type_t event_type = 0;
4199 struct perf_cpu_context *cpuctx;
4200 struct perf_event *event;
4201 unsigned long flags;
4202 int enabled = 0;
4203
4204 local_irq_save(flags);
4205 ctx = current->perf_event_ctxp[ctxn];
4206 if (!ctx || !ctx->nr_events)
4207 goto out;
4208
4209 cpuctx = __get_cpu_context(ctx);
4210 perf_ctx_lock(cpuctx, ctx);
4211 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
4212 list_for_each_entry(event, &ctx->event_list, event_entry) {
4213 enabled |= event_enable_on_exec(event, ctx);
4214 event_type |= get_event_type(event);
4215 }
4216
4217
4218
4219
4220 if (enabled) {
4221 clone_ctx = unclone_ctx(ctx);
4222 ctx_resched(cpuctx, ctx, event_type);
4223 } else {
4224 ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
4225 }
4226 perf_ctx_unlock(cpuctx, ctx);
4227
4228out:
4229 local_irq_restore(flags);
4230
4231 if (clone_ctx)
4232 put_ctx(clone_ctx);
4233}
4234
4235struct perf_read_data {
4236 struct perf_event *event;
4237 bool group;
4238 int ret;
4239};
4240
4241static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
4242{
4243 u16 local_pkg, event_pkg;
4244
4245 if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
4246 int local_cpu = smp_processor_id();
4247
4248 event_pkg = topology_physical_package_id(event_cpu);
4249 local_pkg = topology_physical_package_id(local_cpu);
4250
4251 if (event_pkg == local_pkg)
4252 return local_cpu;
4253 }
4254
4255 return event_cpu;
4256}
4257
4258
4259
4260
4261static void __perf_event_read(void *info)
4262{
4263 struct perf_read_data *data = info;
4264 struct perf_event *sub, *event = data->event;
4265 struct perf_event_context *ctx = event->ctx;
4266 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
4267 struct pmu *pmu = event->pmu;
4268
4269
4270
4271
4272
4273
4274
4275
4276 if (ctx->task && cpuctx->task_ctx != ctx)
4277 return;
4278
4279 raw_spin_lock(&ctx->lock);
4280 if (ctx->is_active & EVENT_TIME) {
4281 update_context_time(ctx);
4282 update_cgrp_time_from_event(event);
4283 }
4284
4285 perf_event_update_time(event);
4286 if (data->group)
4287 perf_event_update_sibling_time(event);
4288
4289 if (event->state != PERF_EVENT_STATE_ACTIVE)
4290 goto unlock;
4291
4292 if (!data->group) {
4293 pmu->read(event);
4294 data->ret = 0;
4295 goto unlock;
4296 }
4297
4298 pmu->start_txn(pmu, PERF_PMU_TXN_READ);
4299
4300 pmu->read(event);
4301
4302 for_each_sibling_event(sub, event) {
4303 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
4304
4305
4306
4307
4308 sub->pmu->read(sub);
4309 }
4310 }
4311
4312 data->ret = pmu->commit_txn(pmu);
4313
4314unlock:
4315 raw_spin_unlock(&ctx->lock);
4316}
4317
4318static inline u64 perf_event_count(struct perf_event *event)
4319{
4320 return local64_read(&event->count) + atomic64_read(&event->child_count);
4321}
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331int perf_event_read_local(struct perf_event *event, u64 *value,
4332 u64 *enabled, u64 *running)
4333{
4334 unsigned long flags;
4335 int ret = 0;
4336
4337
4338
4339
4340
4341 local_irq_save(flags);
4342
4343
4344
4345
4346
4347 if (event->attr.inherit) {
4348 ret = -EOPNOTSUPP;
4349 goto out;
4350 }
4351
4352
4353 if ((event->attach_state & PERF_ATTACH_TASK) &&
4354 event->hw.target != current) {
4355 ret = -EINVAL;
4356 goto out;
4357 }
4358
4359
4360 if (!(event->attach_state & PERF_ATTACH_TASK) &&
4361 event->cpu != smp_processor_id()) {
4362 ret = -EINVAL;
4363 goto out;
4364 }
4365
4366
4367 if (event->attr.pinned && event->oncpu != smp_processor_id()) {
4368 ret = -EBUSY;
4369 goto out;
4370 }
4371
4372
4373 if (event->attr.pinned && event->oncpu != smp_processor_id()) {
4374 ret = -EBUSY;
4375 goto out;
4376 }
4377
4378
4379
4380
4381
4382
4383 if (event->oncpu == smp_processor_id())
4384 event->pmu->read(event);
4385
4386 *value = local64_read(&event->count);
4387 if (enabled || running) {
4388 u64 now = event->shadow_ctx_time + perf_clock();
4389 u64 __enabled, __running;
4390
4391 __perf_update_times(event, now, &__enabled, &__running);
4392 if (enabled)
4393 *enabled = __enabled;
4394 if (running)
4395 *running = __running;
4396 }
4397out:
4398 local_irq_restore(flags);
4399
4400 return ret;
4401}
4402
4403static int perf_event_read(struct perf_event *event, bool group)
4404{
4405 enum perf_event_state state = READ_ONCE(event->state);
4406 int event_cpu, ret = 0;
4407
4408
4409
4410
4411
4412again:
4413 if (state == PERF_EVENT_STATE_ACTIVE) {
4414 struct perf_read_data data;
4415
4416
4417
4418
4419
4420
4421
4422 smp_rmb();
4423
4424 event_cpu = READ_ONCE(event->oncpu);
4425 if ((unsigned)event_cpu >= nr_cpu_ids)
4426 return 0;
4427
4428 data = (struct perf_read_data){
4429 .event = event,
4430 .group = group,
4431 .ret = 0,
4432 };
4433
4434 preempt_disable();
4435 event_cpu = __perf_event_read_cpu(event, event_cpu);
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447 (void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
4448 preempt_enable();
4449 ret = data.ret;
4450
4451 } else if (state == PERF_EVENT_STATE_INACTIVE) {
4452 struct perf_event_context *ctx = event->ctx;
4453 unsigned long flags;
4454
4455 raw_spin_lock_irqsave(&ctx->lock, flags);
4456 state = event->state;
4457 if (state != PERF_EVENT_STATE_INACTIVE) {
4458 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4459 goto again;
4460 }
4461
4462
4463
4464
4465
4466 if (ctx->is_active & EVENT_TIME) {
4467 update_context_time(ctx);
4468 update_cgrp_time_from_event(event);
4469 }
4470
4471 perf_event_update_time(event);
4472 if (group)
4473 perf_event_update_sibling_time(event);
4474 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4475 }
4476
4477 return ret;
4478}
4479
4480
4481
4482
4483static void __perf_event_init_context(struct perf_event_context *ctx)
4484{
4485 raw_spin_lock_init(&ctx->lock);
4486 mutex_init(&ctx->mutex);
4487 INIT_LIST_HEAD(&ctx->active_ctx_list);
4488 perf_event_groups_init(&ctx->pinned_groups);
4489 perf_event_groups_init(&ctx->flexible_groups);
4490 INIT_LIST_HEAD(&ctx->event_list);
4491 INIT_LIST_HEAD(&ctx->pinned_active);
4492 INIT_LIST_HEAD(&ctx->flexible_active);
4493 refcount_set(&ctx->refcount, 1);
4494}
4495
4496static struct perf_event_context *
4497alloc_perf_context(struct pmu *pmu, struct task_struct *task)
4498{
4499 struct perf_event_context *ctx;
4500
4501 ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
4502 if (!ctx)
4503 return NULL;
4504
4505 __perf_event_init_context(ctx);
4506 if (task)
4507 ctx->task = get_task_struct(task);
4508 ctx->pmu = pmu;
4509
4510 return ctx;
4511}
4512
4513static struct task_struct *
4514find_lively_task_by_vpid(pid_t vpid)
4515{
4516 struct task_struct *task;
4517
4518 rcu_read_lock();
4519 if (!vpid)
4520 task = current;
4521 else
4522 task = find_task_by_vpid(vpid);
4523 if (task)
4524 get_task_struct(task);
4525 rcu_read_unlock();
4526
4527 if (!task)
4528 return ERR_PTR(-ESRCH);
4529
4530 return task;
4531}
4532
4533
4534
4535
4536static struct perf_event_context *
4537find_get_context(struct pmu *pmu, struct task_struct *task,
4538 struct perf_event *event)
4539{
4540 struct perf_event_context *ctx, *clone_ctx = NULL;
4541 struct perf_cpu_context *cpuctx;
4542 void *task_ctx_data = NULL;
4543 unsigned long flags;
4544 int ctxn, err;
4545 int cpu = event->cpu;
4546
4547 if (!task) {
4548
4549 err = perf_allow_cpu(&event->attr);
4550 if (err)
4551 return ERR_PTR(err);
4552
4553 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
4554 ctx = &cpuctx->ctx;
4555 get_ctx(ctx);
4556 ++ctx->pin_count;
4557
4558 return ctx;
4559 }
4560
4561 err = -EINVAL;
4562 ctxn = pmu->task_ctx_nr;
4563 if (ctxn < 0)
4564 goto errout;
4565
4566 if (event->attach_state & PERF_ATTACH_TASK_DATA) {
4567 task_ctx_data = alloc_task_ctx_data(pmu);
4568 if (!task_ctx_data) {
4569 err = -ENOMEM;
4570 goto errout;
4571 }
4572 }
4573
4574retry:
4575 ctx = perf_lock_task_context(task, ctxn, &flags);
4576 if (ctx) {
4577 clone_ctx = unclone_ctx(ctx);
4578 ++ctx->pin_count;
4579
4580 if (task_ctx_data && !ctx->task_ctx_data) {
4581 ctx->task_ctx_data = task_ctx_data;
4582 task_ctx_data = NULL;
4583 }
4584 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4585
4586 if (clone_ctx)
4587 put_ctx(clone_ctx);
4588 } else {
4589 ctx = alloc_perf_context(pmu, task);
4590 err = -ENOMEM;
4591 if (!ctx)
4592 goto errout;
4593
4594 if (task_ctx_data) {
4595 ctx->task_ctx_data = task_ctx_data;
4596 task_ctx_data = NULL;
4597 }
4598
4599 err = 0;
4600 mutex_lock(&task->perf_event_mutex);
4601
4602
4603
4604
4605 if (task->flags & PF_EXITING)
4606 err = -ESRCH;
4607 else if (task->perf_event_ctxp[ctxn])
4608 err = -EAGAIN;
4609 else {
4610 get_ctx(ctx);
4611 ++ctx->pin_count;
4612 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
4613 }
4614 mutex_unlock(&task->perf_event_mutex);
4615
4616 if (unlikely(err)) {
4617 put_ctx(ctx);
4618
4619 if (err == -EAGAIN)
4620 goto retry;
4621 goto errout;
4622 }
4623 }
4624
4625 free_task_ctx_data(pmu, task_ctx_data);
4626 return ctx;
4627
4628errout:
4629 free_task_ctx_data(pmu, task_ctx_data);
4630 return ERR_PTR(err);
4631}
4632
4633static void perf_event_free_filter(struct perf_event *event);
4634static void perf_event_free_bpf_prog(struct perf_event *event);
4635
4636static void free_event_rcu(struct rcu_head *head)
4637{
4638 struct perf_event *event;
4639
4640 event = container_of(head, struct perf_event, rcu_head);
4641 if (event->ns)
4642 put_pid_ns(event->ns);
4643 perf_event_free_filter(event);
4644 kfree(event);
4645}
4646
4647static void ring_buffer_attach(struct perf_event *event,
4648 struct perf_buffer *rb);
4649
4650static void detach_sb_event(struct perf_event *event)
4651{
4652 struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
4653
4654 raw_spin_lock(&pel->lock);
4655 list_del_rcu(&event->sb_list);
4656 raw_spin_unlock(&pel->lock);
4657}
4658
4659static bool is_sb_event(struct perf_event *event)
4660{
4661 struct perf_event_attr *attr = &event->attr;
4662
4663 if (event->parent)
4664 return false;
4665
4666 if (event->attach_state & PERF_ATTACH_TASK)
4667 return false;
4668
4669 if (attr->mmap || attr->mmap_data || attr->mmap2 ||
4670 attr->comm || attr->comm_exec ||
4671 attr->task || attr->ksymbol ||
4672 attr->context_switch || attr->text_poke ||
4673 attr->bpf_event)
4674 return true;
4675 return false;
4676}
4677
4678static void unaccount_pmu_sb_event(struct perf_event *event)
4679{
4680 if (is_sb_event(event))
4681 detach_sb_event(event);
4682}
4683
4684static void unaccount_event_cpu(struct perf_event *event, int cpu)
4685{
4686 if (event->parent)
4687 return;
4688
4689 if (is_cgroup_event(event))
4690 atomic_dec(&per_cpu(perf_cgroup_events, cpu));
4691}
4692
4693#ifdef CONFIG_NO_HZ_FULL
4694static DEFINE_SPINLOCK(nr_freq_lock);
4695#endif
4696
4697static void unaccount_freq_event_nohz(void)
4698{
4699#ifdef CONFIG_NO_HZ_FULL
4700 spin_lock(&nr_freq_lock);
4701 if (atomic_dec_and_test(&nr_freq_events))
4702 tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
4703 spin_unlock(&nr_freq_lock);
4704#endif
4705}
4706
4707static void unaccount_freq_event(void)
4708{
4709 if (tick_nohz_full_enabled())
4710 unaccount_freq_event_nohz();
4711 else
4712 atomic_dec(&nr_freq_events);
4713}
4714
4715static void unaccount_event(struct perf_event *event)
4716{
4717 bool dec = false;
4718
4719 if (event->parent)
4720 return;
4721
4722 if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
4723 dec = true;
4724 if (event->attr.mmap || event->attr.mmap_data)
4725 atomic_dec(&nr_mmap_events);
4726 if (event->attr.build_id)
4727 atomic_dec(&nr_build_id_events);
4728 if (event->attr.comm)
4729 atomic_dec(&nr_comm_events);
4730 if (event->attr.namespaces)
4731 atomic_dec(&nr_namespaces_events);
4732 if (event->attr.cgroup)
4733 atomic_dec(&nr_cgroup_events);
4734 if (event->attr.task)
4735 atomic_dec(&nr_task_events);
4736 if (event->attr.freq)
4737 unaccount_freq_event();
4738 if (event->attr.context_switch) {
4739 dec = true;
4740 atomic_dec(&nr_switch_events);
4741 }
4742 if (is_cgroup_event(event))
4743 dec = true;
4744 if (has_branch_stack(event))
4745 dec = true;
4746 if (event->attr.ksymbol)
4747 atomic_dec(&nr_ksymbol_events);
4748 if (event->attr.bpf_event)
4749 atomic_dec(&nr_bpf_events);
4750 if (event->attr.text_poke)
4751 atomic_dec(&nr_text_poke_events);
4752
4753 if (dec) {
4754 if (!atomic_add_unless(&perf_sched_count, -1, 1))
4755 schedule_delayed_work(&perf_sched_work, HZ);
4756 }
4757
4758 unaccount_event_cpu(event, event->cpu);
4759
4760 unaccount_pmu_sb_event(event);
4761}
4762
4763static void perf_sched_delayed(struct work_struct *work)
4764{
4765 mutex_lock(&perf_sched_mutex);
4766 if (atomic_dec_and_test(&perf_sched_count))
4767 static_branch_disable(&perf_sched_events);
4768 mutex_unlock(&perf_sched_mutex);
4769}
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783static int exclusive_event_init(struct perf_event *event)
4784{
4785 struct pmu *pmu = event->pmu;
4786
4787 if (!is_exclusive_pmu(pmu))
4788 return 0;
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803 if (event->attach_state & PERF_ATTACH_TASK) {
4804 if (!atomic_inc_unless_negative(&pmu->exclusive_cnt))
4805 return -EBUSY;
4806 } else {
4807 if (!atomic_dec_unless_positive(&pmu->exclusive_cnt))
4808 return -EBUSY;
4809 }
4810
4811 return 0;
4812}
4813
4814static void exclusive_event_destroy(struct perf_event *event)
4815{
4816 struct pmu *pmu = event->pmu;
4817
4818 if (!is_exclusive_pmu(pmu))
4819 return;
4820
4821
4822 if (event->attach_state & PERF_ATTACH_TASK)
4823 atomic_dec(&pmu->exclusive_cnt);
4824 else
4825 atomic_inc(&pmu->exclusive_cnt);
4826}
4827
4828static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
4829{
4830 if ((e1->pmu == e2->pmu) &&
4831 (e1->cpu == e2->cpu ||
4832 e1->cpu == -1 ||
4833 e2->cpu == -1))
4834 return true;
4835 return false;
4836}
4837
4838static bool exclusive_event_installable(struct perf_event *event,
4839 struct perf_event_context *ctx)
4840{
4841 struct perf_event *iter_event;
4842 struct pmu *pmu = event->pmu;
4843
4844 lockdep_assert_held(&ctx->mutex);
4845
4846 if (!is_exclusive_pmu(pmu))
4847 return true;
4848
4849 list_for_each_entry(iter_event, &ctx->event_list, event_entry) {
4850 if (exclusive_event_match(iter_event, event))
4851 return false;
4852 }
4853
4854 return true;
4855}
4856
4857static void perf_addr_filters_splice(struct perf_event *event,
4858 struct list_head *head);
4859
4860static void _free_event(struct perf_event *event)
4861{
4862 irq_work_sync(&event->pending);
4863
4864 unaccount_event(event);
4865
4866 security_perf_event_free(event);
4867
4868 if (event->rb) {
4869
4870
4871
4872
4873
4874
4875 mutex_lock(&event->mmap_mutex);
4876 ring_buffer_attach(event, NULL);
4877 mutex_unlock(&event->mmap_mutex);
4878 }
4879
4880 if (is_cgroup_event(event))
4881 perf_detach_cgroup(event);
4882
4883 if (!event->parent) {
4884 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
4885 put_callchain_buffers();
4886 }
4887
4888 perf_event_free_bpf_prog(event);
4889 perf_addr_filters_splice(event, NULL);
4890 kfree(event->addr_filter_ranges);
4891
4892 if (event->destroy)
4893 event->destroy(event);
4894
4895
4896
4897
4898
4899 if (event->hw.target)
4900 put_task_struct(event->hw.target);
4901
4902
4903
4904
4905
4906 if (event->ctx)
4907 put_ctx(event->ctx);
4908
4909 exclusive_event_destroy(event);
4910 module_put(event->pmu->module);
4911
4912 call_rcu(&event->rcu_head, free_event_rcu);
4913}
4914
4915
4916
4917
4918
4919static void free_event(struct perf_event *event)
4920{
4921 if (WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1,
4922 "unexpected event refcount: %ld; ptr=%p\n",
4923 atomic_long_read(&event->refcount), event)) {
4924
4925 return;
4926 }
4927
4928 _free_event(event);
4929}
4930
4931
4932
4933
4934static void perf_remove_from_owner(struct perf_event *event)
4935{
4936 struct task_struct *owner;
4937
4938 rcu_read_lock();
4939
4940
4941
4942
4943
4944
4945 owner = READ_ONCE(event->owner);
4946 if (owner) {
4947
4948
4949
4950
4951
4952 get_task_struct(owner);
4953 }
4954 rcu_read_unlock();
4955
4956 if (owner) {
4957
4958
4959
4960
4961
4962
4963
4964
4965 mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING);
4966
4967
4968
4969
4970
4971
4972
4973 if (event->owner) {
4974 list_del_init(&event->owner_entry);
4975 smp_store_release(&event->owner, NULL);
4976 }
4977 mutex_unlock(&owner->perf_event_mutex);
4978 put_task_struct(owner);
4979 }
4980}
4981
4982static void put_event(struct perf_event *event)
4983{
4984 if (!atomic_long_dec_and_test(&event->refcount))
4985 return;
4986
4987 _free_event(event);
4988}
4989
4990
4991
4992
4993
4994
4995int perf_event_release_kernel(struct perf_event *event)
4996{
4997 struct perf_event_context *ctx = event->ctx;
4998 struct perf_event *child, *tmp;
4999 LIST_HEAD(free_list);
5000
5001
5002
5003
5004
5005 if (!ctx) {
5006 WARN_ON_ONCE(event->attach_state &
5007 (PERF_ATTACH_CONTEXT|PERF_ATTACH_GROUP));
5008 goto no_ctx;
5009 }
5010
5011 if (!is_kernel_event(event))
5012 perf_remove_from_owner(event);
5013
5014 ctx = perf_event_ctx_lock(event);
5015 WARN_ON_ONCE(ctx->parent_ctx);
5016 perf_remove_from_context(event, DETACH_GROUP);
5017
5018 raw_spin_lock_irq(&ctx->lock);
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030 event->state = PERF_EVENT_STATE_DEAD;
5031 raw_spin_unlock_irq(&ctx->lock);
5032
5033 perf_event_ctx_unlock(event, ctx);
5034
5035again:
5036 mutex_lock(&event->child_mutex);
5037 list_for_each_entry(child, &event->child_list, child_list) {
5038
5039
5040
5041
5042
5043 ctx = READ_ONCE(child->ctx);
5044
5045
5046
5047
5048
5049
5050
5051
5052 get_ctx(ctx);
5053
5054
5055
5056
5057
5058
5059 mutex_unlock(&event->child_mutex);
5060 mutex_lock(&ctx->mutex);
5061 mutex_lock(&event->child_mutex);
5062
5063
5064
5065
5066
5067
5068 tmp = list_first_entry_or_null(&event->child_list,
5069 struct perf_event, child_list);
5070 if (tmp == child) {
5071 perf_remove_from_context(child, DETACH_GROUP);
5072 list_move(&child->child_list, &free_list);
5073
5074
5075
5076
5077 put_event(event);
5078 }
5079
5080 mutex_unlock(&event->child_mutex);
5081 mutex_unlock(&ctx->mutex);
5082 put_ctx(ctx);
5083 goto again;
5084 }
5085 mutex_unlock(&event->child_mutex);
5086
5087 list_for_each_entry_safe(child, tmp, &free_list, child_list) {
5088 void *var = &child->ctx->refcount;
5089
5090 list_del(&child->child_list);
5091 free_event(child);
5092
5093
5094
5095
5096
5097 smp_mb();
5098 wake_up_var(var);
5099 }
5100
5101no_ctx:
5102 put_event(event);
5103 return 0;
5104}
5105EXPORT_SYMBOL_GPL(perf_event_release_kernel);
5106
5107
5108
5109
5110static int perf_release(struct inode *inode, struct file *file)
5111{
5112 perf_event_release_kernel(file->private_data);
5113 return 0;
5114}
5115
5116static u64 __perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
5117{
5118 struct perf_event *child;
5119 u64 total = 0;
5120
5121 *enabled = 0;
5122 *running = 0;
5123
5124 mutex_lock(&event->child_mutex);
5125
5126 (void)perf_event_read(event, false);
5127 total += perf_event_count(event);
5128
5129 *enabled += event->total_time_enabled +
5130 atomic64_read(&event->child_total_time_enabled);
5131 *running += event->total_time_running +
5132 atomic64_read(&event->child_total_time_running);
5133
5134 list_for_each_entry(child, &event->child_list, child_list) {
5135 (void)perf_event_read(child, false);
5136 total += perf_event_count(child);
5137 *enabled += child->total_time_enabled;
5138 *running += child->total_time_running;
5139 }
5140 mutex_unlock(&event->child_mutex);
5141
5142 return total;
5143}
5144
5145u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
5146{
5147 struct perf_event_context *ctx;
5148 u64 count;
5149
5150 ctx = perf_event_ctx_lock(event);
5151 count = __perf_event_read_value(event, enabled, running);
5152 perf_event_ctx_unlock(event, ctx);
5153
5154 return count;
5155}
5156EXPORT_SYMBOL_GPL(perf_event_read_value);
5157
5158static int __perf_read_group_add(struct perf_event *leader,
5159 u64 read_format, u64 *values)
5160{
5161 struct perf_event_context *ctx = leader->ctx;
5162 struct perf_event *sub;
5163 unsigned long flags;
5164 int n = 1;
5165 int ret;
5166
5167 ret = perf_event_read(leader, true);
5168 if (ret)
5169 return ret;
5170
5171 raw_spin_lock_irqsave(&ctx->lock, flags);
5172
5173
5174
5175
5176
5177
5178 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
5179 values[n++] += leader->total_time_enabled +
5180 atomic64_read(&leader->child_total_time_enabled);
5181 }
5182
5183 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
5184 values[n++] += leader->total_time_running +
5185 atomic64_read(&leader->child_total_time_running);
5186 }
5187
5188
5189
5190
5191 values[n++] += perf_event_count(leader);
5192 if (read_format & PERF_FORMAT_ID)
5193 values[n++] = primary_event_id(leader);
5194
5195 for_each_sibling_event(sub, leader) {
5196 values[n++] += perf_event_count(sub);
5197 if (read_format & PERF_FORMAT_ID)
5198 values[n++] = primary_event_id(sub);
5199 }
5200
5201 raw_spin_unlock_irqrestore(&ctx->lock, flags);
5202 return 0;
5203}
5204
5205static int perf_read_group(struct perf_event *event,
5206 u64 read_format, char __user *buf)
5207{
5208 struct perf_event *leader = event->group_leader, *child;
5209 struct perf_event_context *ctx = leader->ctx;
5210 int ret;
5211 u64 *values;
5212
5213 lockdep_assert_held(&ctx->mutex);
5214
5215 values = kzalloc(event->read_size, GFP_KERNEL);
5216 if (!values)
5217 return -ENOMEM;
5218
5219 values[0] = 1 + leader->nr_siblings;
5220
5221
5222
5223
5224
5225 mutex_lock(&leader->child_mutex);
5226
5227 ret = __perf_read_group_add(leader, read_format, values);
5228 if (ret)
5229 goto unlock;
5230
5231 list_for_each_entry(child, &leader->child_list, child_list) {
5232 ret = __perf_read_group_add(child, read_format, values);
5233 if (ret)
5234 goto unlock;
5235 }
5236
5237 mutex_unlock(&leader->child_mutex);
5238
5239 ret = event->read_size;
5240 if (copy_to_user(buf, values, event->read_size))
5241 ret = -EFAULT;
5242 goto out;
5243
5244unlock:
5245 mutex_unlock(&leader->child_mutex);
5246out:
5247 kfree(values);
5248 return ret;
5249}
5250
5251static int perf_read_one(struct perf_event *event,
5252 u64 read_format, char __user *buf)
5253{
5254 u64 enabled, running;
5255 u64 values[4];
5256 int n = 0;
5257
5258 values[n++] = __perf_event_read_value(event, &enabled, &running);
5259 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
5260 values[n++] = enabled;
5261 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
5262 values[n++] = running;
5263 if (read_format & PERF_FORMAT_ID)
5264 values[n++] = primary_event_id(event);
5265
5266 if (copy_to_user(buf, values, n * sizeof(u64)))
5267 return -EFAULT;
5268
5269 return n * sizeof(u64);
5270}
5271
5272static bool is_event_hup(struct perf_event *event)
5273{
5274 bool no_children;
5275
5276 if (event->state > PERF_EVENT_STATE_EXIT)
5277 return false;
5278
5279 mutex_lock(&event->child_mutex);
5280 no_children = list_empty(&event->child_list);
5281 mutex_unlock(&event->child_mutex);
5282 return no_children;
5283}
5284
5285
5286
5287
5288static ssize_t
5289__perf_read(struct perf_event *event, char __user *buf, size_t count)
5290{
5291 u64 read_format = event->attr.read_format;
5292 int ret;
5293
5294
5295
5296
5297
5298
5299 if (event->state == PERF_EVENT_STATE_ERROR)
5300 return 0;
5301
5302 if (count < event->read_size)
5303 return -ENOSPC;
5304
5305 WARN_ON_ONCE(event->ctx->parent_ctx);
5306 if (read_format & PERF_FORMAT_GROUP)
5307 ret = perf_read_group(event, read_format, buf);
5308 else
5309 ret = perf_read_one(event, read_format, buf);
5310
5311 return ret;
5312}
5313
5314static ssize_t
5315perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
5316{
5317 struct perf_event *event = file->private_data;
5318 struct perf_event_context *ctx;
5319 int ret;
5320
5321 ret = security_perf_event_read(event);
5322 if (ret)
5323 return ret;
5324
5325 ctx = perf_event_ctx_lock(event);
5326 ret = __perf_read(event, buf, count);
5327 perf_event_ctx_unlock(event, ctx);
5328
5329 return ret;
5330}
5331
5332static __poll_t perf_poll(struct file *file, poll_table *wait)
5333{
5334 struct perf_event *event = file->private_data;
5335 struct perf_buffer *rb;
5336 __poll_t events = EPOLLHUP;
5337
5338 poll_wait(file, &event->waitq, wait);
5339
5340 if (is_event_hup(event))
5341 return events;
5342
5343
5344
5345
5346
5347 mutex_lock(&event->mmap_mutex);
5348 rb = event->rb;
5349 if (rb)
5350 events = atomic_xchg(&rb->poll, 0);
5351 mutex_unlock(&event->mmap_mutex);
5352 return events;
5353}
5354
5355static void _perf_event_reset(struct perf_event *event)
5356{
5357 (void)perf_event_read(event, false);
5358 local64_set(&event->count, 0);
5359 perf_event_update_userpage(event);
5360}
5361
5362
5363u64 perf_event_pause(struct perf_event *event, bool reset)
5364{
5365 struct perf_event_context *ctx;
5366 u64 count;
5367
5368 ctx = perf_event_ctx_lock(event);
5369 WARN_ON_ONCE(event->attr.inherit);
5370 _perf_event_disable(event);
5371 count = local64_read(&event->count);
5372 if (reset)
5373 local64_set(&event->count, 0);
5374 perf_event_ctx_unlock(event, ctx);
5375
5376 return count;
5377}
5378EXPORT_SYMBOL_GPL(perf_event_pause);
5379
5380
5381
5382
5383
5384
5385
5386static void perf_event_for_each_child(struct perf_event *event,
5387 void (*func)(struct perf_event *))
5388{
5389 struct perf_event *child;
5390
5391 WARN_ON_ONCE(event->ctx->parent_ctx);
5392
5393 mutex_lock(&event->child_mutex);
5394 func(event);
5395 list_for_each_entry(child, &event->child_list, child_list)
5396 func(child);
5397 mutex_unlock(&event->child_mutex);
5398}
5399
5400static void perf_event_for_each(struct perf_event *event,
5401 void (*func)(struct perf_event *))
5402{
5403 struct perf_event_context *ctx = event->ctx;
5404 struct perf_event *sibling;
5405
5406 lockdep_assert_held(&ctx->mutex);
5407
5408 event = event->group_leader;
5409
5410 perf_event_for_each_child(event, func);
5411 for_each_sibling_event(sibling, event)
5412 perf_event_for_each_child(sibling, func);
5413}
5414
5415static void __perf_event_period(struct perf_event *event,
5416 struct perf_cpu_context *cpuctx,
5417 struct perf_event_context *ctx,
5418 void *info)
5419{
5420 u64 value = *((u64 *)info);
5421 bool active;
5422
5423 if (event->attr.freq) {
5424 event->attr.sample_freq = value;
5425 } else {
5426 event->attr.sample_period = value;
5427 event->hw.sample_period = value;
5428 }
5429
5430 active = (event->state == PERF_EVENT_STATE_ACTIVE);
5431 if (active) {
5432 perf_pmu_disable(ctx->pmu);
5433
5434
5435
5436
5437 if (event->hw.interrupts == MAX_INTERRUPTS) {
5438 event->hw.interrupts = 0;
5439 perf_log_throttle(event, 1);
5440 }
5441 event->pmu->stop(event, PERF_EF_UPDATE);
5442 }
5443
5444 local64_set(&event->hw.period_left, 0);
5445
5446 if (active) {
5447 event->pmu->start(event, PERF_EF_RELOAD);
5448 perf_pmu_enable(ctx->pmu);
5449 }
5450}
5451
5452static int perf_event_check_period(struct perf_event *event, u64 value)
5453{
5454 return event->pmu->check_period(event, value);
5455}
5456
5457static int _perf_event_period(struct perf_event *event, u64 value)
5458{
5459 if (!is_sampling_event(event))
5460 return -EINVAL;
5461
5462 if (!value)
5463 return -EINVAL;
5464
5465 if (event->attr.freq && value > sysctl_perf_event_sample_rate)
5466 return -EINVAL;
5467
5468 if (perf_event_check_period(event, value))
5469 return -EINVAL;
5470
5471 if (!event->attr.freq && (value & (1ULL << 63)))
5472 return -EINVAL;
5473
5474 event_function_call(event, __perf_event_period, &value);
5475
5476 return 0;
5477}
5478
5479int perf_event_period(struct perf_event *event, u64 value)
5480{
5481 struct perf_event_context *ctx;
5482 int ret;
5483
5484 ctx = perf_event_ctx_lock(event);
5485 ret = _perf_event_period(event, value);
5486 perf_event_ctx_unlock(event, ctx);
5487
5488 return ret;
5489}
5490EXPORT_SYMBOL_GPL(perf_event_period);
5491
5492static const struct file_operations perf_fops;
5493
5494static inline int perf_fget_light(int fd, struct fd *p)
5495{
5496 struct fd f = fdget(fd);
5497 if (!f.file)
5498 return -EBADF;
5499
5500 if (f.file->f_op != &perf_fops) {
5501 fdput(f);
5502 return -EBADF;
5503 }
5504 *p = f;
5505 return 0;
5506}
5507
5508static int perf_event_set_output(struct perf_event *event,
5509 struct perf_event *output_event);
5510static int perf_event_set_filter(struct perf_event *event, void __user *arg);
5511static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
5512static int perf_copy_attr(struct perf_event_attr __user *uattr,
5513 struct perf_event_attr *attr);
5514
5515static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
5516{
5517 void (*func)(struct perf_event *);
5518 u32 flags = arg;
5519
5520 switch (cmd) {
5521 case PERF_EVENT_IOC_ENABLE:
5522 func = _perf_event_enable;
5523 break;
5524 case PERF_EVENT_IOC_DISABLE:
5525 func = _perf_event_disable;
5526 break;
5527 case PERF_EVENT_IOC_RESET:
5528 func = _perf_event_reset;
5529 break;
5530
5531 case PERF_EVENT_IOC_REFRESH:
5532 return _perf_event_refresh(event, arg);
5533
5534 case PERF_EVENT_IOC_PERIOD:
5535 {
5536 u64 value;
5537
5538 if (copy_from_user(&value, (u64 __user *)arg, sizeof(value)))
5539 return -EFAULT;
5540
5541 return _perf_event_period(event, value);
5542 }
5543 case PERF_EVENT_IOC_ID:
5544 {
5545 u64 id = primary_event_id(event);
5546
5547 if (copy_to_user((void __user *)arg, &id, sizeof(id)))
5548 return -EFAULT;
5549 return 0;
5550 }
5551
5552 case PERF_EVENT_IOC_SET_OUTPUT:
5553 {
5554 int ret;
5555 if (arg != -1) {
5556 struct perf_event *output_event;
5557 struct fd output;
5558 ret = perf_fget_light(arg, &output);
5559 if (ret)
5560 return ret;
5561 output_event = output.file->private_data;
5562 ret = perf_event_set_output(event, output_event);
5563 fdput(output);
5564 } else {
5565 ret = perf_event_set_output(event, NULL);
5566 }
5567 return ret;
5568 }
5569
5570 case PERF_EVENT_IOC_SET_FILTER:
5571 return perf_event_set_filter(event, (void __user *)arg);
5572
5573 case PERF_EVENT_IOC_SET_BPF:
5574 return perf_event_set_bpf_prog(event, arg);
5575
5576 case PERF_EVENT_IOC_PAUSE_OUTPUT: {
5577 struct perf_buffer *rb;
5578
5579 rcu_read_lock();
5580 rb = rcu_dereference(event->rb);
5581 if (!rb || !rb->nr_pages) {
5582 rcu_read_unlock();
5583 return -EINVAL;
5584 }
5585 rb_toggle_paused(rb, !!arg);
5586 rcu_read_unlock();
5587 return 0;
5588 }
5589
5590 case PERF_EVENT_IOC_QUERY_BPF:
5591 return perf_event_query_prog_array(event, (void __user *)arg);
5592
5593 case PERF_EVENT_IOC_MODIFY_ATTRIBUTES: {
5594 struct perf_event_attr new_attr;
5595 int err = perf_copy_attr((struct perf_event_attr __user *)arg,
5596 &new_attr);
5597
5598 if (err)
5599 return err;
5600
5601 return perf_event_modify_attr(event, &new_attr);
5602 }
5603 default:
5604 return -ENOTTY;
5605 }
5606
5607 if (flags & PERF_IOC_FLAG_GROUP)
5608 perf_event_for_each(event, func);
5609 else
5610 perf_event_for_each_child(event, func);
5611
5612 return 0;
5613}
5614
5615static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
5616{
5617 struct perf_event *event = file->private_data;
5618 struct perf_event_context *ctx;
5619 long ret;
5620
5621
5622 ret = security_perf_event_write(event);
5623 if (ret)
5624 return ret;
5625
5626 ctx = perf_event_ctx_lock(event);
5627 ret = _perf_ioctl(event, cmd, arg);
5628 perf_event_ctx_unlock(event, ctx);
5629
5630 return ret;
5631}
5632
5633#ifdef CONFIG_COMPAT
5634static long perf_compat_ioctl(struct file *file, unsigned int cmd,
5635 unsigned long arg)
5636{
5637 switch (_IOC_NR(cmd)) {
5638 case _IOC_NR(PERF_EVENT_IOC_SET_FILTER):
5639 case _IOC_NR(PERF_EVENT_IOC_ID):
5640 case _IOC_NR(PERF_EVENT_IOC_QUERY_BPF):
5641 case _IOC_NR(PERF_EVENT_IOC_MODIFY_ATTRIBUTES):
5642
5643 if (_IOC_SIZE(cmd) == sizeof(compat_uptr_t)) {
5644 cmd &= ~IOCSIZE_MASK;
5645 cmd |= sizeof(void *) << IOCSIZE_SHIFT;
5646 }
5647 break;
5648 }
5649 return perf_ioctl(file, cmd, arg);
5650}
5651#else
5652# define perf_compat_ioctl NULL
5653#endif
5654
5655int perf_event_task_enable(void)
5656{
5657 struct perf_event_context *ctx;
5658 struct perf_event *event;
5659
5660 mutex_lock(¤t->perf_event_mutex);
5661 list_for_each_entry(event, ¤t->perf_event_list, owner_entry) {
5662 ctx = perf_event_ctx_lock(event);
5663 perf_event_for_each_child(event, _perf_event_enable);
5664 perf_event_ctx_unlock(event, ctx);
5665 }
5666 mutex_unlock(¤t->perf_event_mutex);
5667
5668 return 0;
5669}
5670
5671int perf_event_task_disable(void)
5672{
5673 struct perf_event_context *ctx;
5674 struct perf_event *event;
5675
5676 mutex_lock(¤t->perf_event_mutex);
5677 list_for_each_entry(event, ¤t->perf_event_list, owner_entry) {
5678 ctx = perf_event_ctx_lock(event);
5679 perf_event_for_each_child(event, _perf_event_disable);
5680 perf_event_ctx_unlock(event, ctx);
5681 }
5682 mutex_unlock(¤t->perf_event_mutex);
5683
5684 return 0;
5685}
5686
5687static int perf_event_index(struct perf_event *event)
5688{
5689 if (event->hw.state & PERF_HES_STOPPED)
5690 return 0;
5691
5692 if (event->state != PERF_EVENT_STATE_ACTIVE)
5693 return 0;
5694
5695 return event->pmu->event_idx(event);
5696}
5697
5698static void calc_timer_values(struct perf_event *event,
5699 u64 *now,
5700 u64 *enabled,
5701 u64 *running)
5702{
5703 u64 ctx_time;
5704
5705 *now = perf_clock();
5706 ctx_time = event->shadow_ctx_time + *now;
5707 __perf_update_times(event, ctx_time, enabled, running);
5708}
5709
5710static void perf_event_init_userpage(struct perf_event *event)
5711{
5712 struct perf_event_mmap_page *userpg;
5713 struct perf_buffer *rb;
5714
5715 rcu_read_lock();
5716 rb = rcu_dereference(event->rb);
5717 if (!rb)
5718 goto unlock;
5719
5720 userpg = rb->user_page;
5721
5722
5723 userpg->cap_bit0_is_deprecated = 1;
5724 userpg->size = offsetof(struct perf_event_mmap_page, __reserved);
5725 userpg->data_offset = PAGE_SIZE;
5726 userpg->data_size = perf_data_size(rb);
5727
5728unlock:
5729 rcu_read_unlock();
5730}
5731
5732void __weak arch_perf_update_userpage(
5733 struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now)
5734{
5735}
5736
5737
5738
5739
5740
5741
5742void perf_event_update_userpage(struct perf_event *event)
5743{
5744 struct perf_event_mmap_page *userpg;
5745 struct perf_buffer *rb;
5746 u64 enabled, running, now;
5747
5748 rcu_read_lock();
5749 rb = rcu_dereference(event->rb);
5750 if (!rb)
5751 goto unlock;
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762 calc_timer_values(event, &now, &enabled, &running);
5763
5764 userpg = rb->user_page;
5765
5766
5767
5768
5769 preempt_disable();
5770 ++userpg->lock;
5771 barrier();
5772 userpg->index = perf_event_index(event);
5773 userpg->offset = perf_event_count(event);
5774 if (userpg->index)
5775 userpg->offset -= local64_read(&event->hw.prev_count);
5776
5777 userpg->time_enabled = enabled +
5778 atomic64_read(&event->child_total_time_enabled);
5779
5780 userpg->time_running = running +
5781 atomic64_read(&event->child_total_time_running);
5782
5783 arch_perf_update_userpage(event, userpg, now);
5784
5785 barrier();
5786 ++userpg->lock;
5787 preempt_enable();
5788unlock:
5789 rcu_read_unlock();
5790}
5791EXPORT_SYMBOL_GPL(perf_event_update_userpage);
5792
5793static vm_fault_t perf_mmap_fault(struct vm_fault *vmf)
5794{
5795 struct perf_event *event = vmf->vma->vm_file->private_data;
5796 struct perf_buffer *rb;
5797 vm_fault_t ret = VM_FAULT_SIGBUS;
5798
5799 if (vmf->flags & FAULT_FLAG_MKWRITE) {
5800 if (vmf->pgoff == 0)
5801 ret = 0;
5802 return ret;
5803 }
5804
5805 rcu_read_lock();
5806 rb = rcu_dereference(event->rb);
5807 if (!rb)
5808 goto unlock;
5809
5810 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
5811 goto unlock;
5812
5813 vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
5814 if (!vmf->page)
5815 goto unlock;
5816
5817 get_page(vmf->page);
5818 vmf->page->mapping = vmf->vma->vm_file->f_mapping;
5819 vmf->page->index = vmf->pgoff;
5820
5821 ret = 0;
5822unlock:
5823 rcu_read_unlock();
5824
5825 return ret;
5826}
5827
5828static void ring_buffer_attach(struct perf_event *event,
5829 struct perf_buffer *rb)
5830{
5831 struct perf_buffer *old_rb = NULL;
5832 unsigned long flags;
5833
5834 if (event->rb) {
5835
5836
5837
5838
5839 WARN_ON_ONCE(event->rcu_pending);
5840
5841 old_rb = event->rb;
5842 spin_lock_irqsave(&old_rb->event_lock, flags);
5843 list_del_rcu(&event->rb_entry);
5844 spin_unlock_irqrestore(&old_rb->event_lock, flags);
5845
5846 event->rcu_batches = get_state_synchronize_rcu();
5847 event->rcu_pending = 1;
5848 }
5849
5850 if (rb) {
5851 if (event->rcu_pending) {
5852 cond_synchronize_rcu(event->rcu_batches);
5853 event->rcu_pending = 0;
5854 }
5855
5856 spin_lock_irqsave(&rb->event_lock, flags);
5857 list_add_rcu(&event->rb_entry, &rb->event_list);
5858 spin_unlock_irqrestore(&rb->event_lock, flags);
5859 }
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871 if (has_aux(event))
5872 perf_event_stop(event, 0);
5873
5874 rcu_assign_pointer(event->rb, rb);
5875
5876 if (old_rb) {
5877 ring_buffer_put(old_rb);
5878
5879
5880
5881
5882
5883 wake_up_all(&event->waitq);
5884 }
5885}
5886
5887static void ring_buffer_wakeup(struct perf_event *event)
5888{
5889 struct perf_buffer *rb;
5890
5891 rcu_read_lock();
5892 rb = rcu_dereference(event->rb);
5893 if (rb) {
5894 list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
5895 wake_up_all(&event->waitq);
5896 }
5897 rcu_read_unlock();
5898}
5899
5900struct perf_buffer *ring_buffer_get(struct perf_event *event)
5901{
5902 struct perf_buffer *rb;
5903
5904 rcu_read_lock();
5905 rb = rcu_dereference(event->rb);
5906 if (rb) {
5907 if (!refcount_inc_not_zero(&rb->refcount))
5908 rb = NULL;
5909 }
5910 rcu_read_unlock();
5911
5912 return rb;
5913}
5914
5915void ring_buffer_put(struct perf_buffer *rb)
5916{
5917 if (!refcount_dec_and_test(&rb->refcount))
5918 return;
5919
5920 WARN_ON_ONCE(!list_empty(&rb->event_list));
5921
5922 call_rcu(&rb->rcu_head, rb_free_rcu);
5923}
5924
5925static void perf_mmap_open(struct vm_area_struct *vma)
5926{
5927 struct perf_event *event = vma->vm_file->private_data;
5928
5929 atomic_inc(&event->mmap_count);
5930 atomic_inc(&event->rb->mmap_count);
5931
5932 if (vma->vm_pgoff)
5933 atomic_inc(&event->rb->aux_mmap_count);
5934
5935 if (event->pmu->event_mapped)
5936 event->pmu->event_mapped(event, vma->vm_mm);
5937}
5938
5939static void perf_pmu_output_stop(struct perf_event *event);
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949static void perf_mmap_close(struct vm_area_struct *vma)
5950{
5951 struct perf_event *event = vma->vm_file->private_data;
5952 struct perf_buffer *rb = ring_buffer_get(event);
5953 struct user_struct *mmap_user = rb->mmap_user;
5954 int mmap_locked = rb->mmap_locked;
5955 unsigned long size = perf_data_size(rb);
5956 bool detach_rest = false;
5957
5958 if (event->pmu->event_unmapped)
5959 event->pmu->event_unmapped(event, vma->vm_mm);
5960
5961
5962
5963
5964
5965
5966 if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
5967 atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
5968
5969
5970
5971
5972
5973
5974 perf_pmu_output_stop(event);
5975
5976
5977 atomic_long_sub(rb->aux_nr_pages - rb->aux_mmap_locked, &mmap_user->locked_vm);
5978 atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
5979
5980
5981 rb_free_aux(rb);
5982 WARN_ON_ONCE(refcount_read(&rb->aux_refcount));
5983
5984 mutex_unlock(&event->mmap_mutex);
5985 }
5986
5987 if (atomic_dec_and_test(&rb->mmap_count))
5988 detach_rest = true;
5989
5990 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
5991 goto out_put;
5992
5993 ring_buffer_attach(event, NULL);
5994 mutex_unlock(&event->mmap_mutex);
5995
5996
5997 if (!detach_rest)
5998 goto out_put;
5999
6000
6001
6002
6003
6004
6005again:
6006 rcu_read_lock();
6007 list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
6008 if (!atomic_long_inc_not_zero(&event->refcount)) {
6009
6010
6011
6012
6013 continue;
6014 }
6015 rcu_read_unlock();
6016
6017 mutex_lock(&event->mmap_mutex);
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028 if (event->rb == rb)
6029 ring_buffer_attach(event, NULL);
6030
6031 mutex_unlock(&event->mmap_mutex);
6032 put_event(event);
6033
6034
6035
6036
6037
6038 goto again;
6039 }
6040 rcu_read_unlock();
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051 atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
6052 &mmap_user->locked_vm);
6053 atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
6054 free_uid(mmap_user);
6055
6056out_put:
6057 ring_buffer_put(rb);
6058}
6059
6060static const struct vm_operations_struct perf_mmap_vmops = {
6061 .open = perf_mmap_open,
6062 .close = perf_mmap_close,
6063 .fault = perf_mmap_fault,
6064 .page_mkwrite = perf_mmap_fault,
6065};
6066
6067static int perf_mmap(struct file *file, struct vm_area_struct *vma)
6068{
6069 struct perf_event *event = file->private_data;
6070 unsigned long user_locked, user_lock_limit;
6071 struct user_struct *user = current_user();
6072 struct perf_buffer *rb = NULL;
6073 unsigned long locked, lock_limit;
6074 unsigned long vma_size;
6075 unsigned long nr_pages;
6076 long user_extra = 0, extra = 0;
6077 int ret = 0, flags = 0;
6078
6079
6080
6081
6082
6083
6084 if (event->cpu == -1 && event->attr.inherit)
6085 return -EINVAL;
6086
6087 if (!(vma->vm_flags & VM_SHARED))
6088 return -EINVAL;
6089
6090 ret = security_perf_event_read(event);
6091 if (ret)
6092 return ret;
6093
6094 vma_size = vma->vm_end - vma->vm_start;
6095
6096 if (vma->vm_pgoff == 0) {
6097 nr_pages = (vma_size / PAGE_SIZE) - 1;
6098 } else {
6099
6100
6101
6102
6103
6104 u64 aux_offset, aux_size;
6105
6106 if (!event->rb)
6107 return -EINVAL;
6108
6109 nr_pages = vma_size / PAGE_SIZE;
6110
6111 mutex_lock(&event->mmap_mutex);
6112 ret = -EINVAL;
6113
6114 rb = event->rb;
6115 if (!rb)
6116 goto aux_unlock;
6117
6118 aux_offset = READ_ONCE(rb->user_page->aux_offset);
6119 aux_size = READ_ONCE(rb->user_page->aux_size);
6120
6121 if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
6122 goto aux_unlock;
6123
6124 if (aux_offset != vma->vm_pgoff << PAGE_SHIFT)
6125 goto aux_unlock;
6126
6127
6128 if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff)
6129 goto aux_unlock;
6130
6131 if (aux_size != vma_size || aux_size != nr_pages * PAGE_SIZE)
6132 goto aux_unlock;
6133
6134
6135 if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages)
6136 goto aux_unlock;
6137
6138 if (!is_power_of_2(nr_pages))
6139 goto aux_unlock;
6140
6141 if (!atomic_inc_not_zero(&rb->mmap_count))
6142 goto aux_unlock;
6143
6144 if (rb_has_aux(rb)) {
6145 atomic_inc(&rb->aux_mmap_count);
6146 ret = 0;
6147 goto unlock;
6148 }
6149
6150 atomic_set(&rb->aux_mmap_count, 1);
6151 user_extra = nr_pages;
6152
6153 goto accounting;
6154 }
6155
6156
6157
6158
6159
6160 if (nr_pages != 0 && !is_power_of_2(nr_pages))
6161 return -EINVAL;
6162
6163 if (vma_size != PAGE_SIZE * (1 + nr_pages))
6164 return -EINVAL;
6165
6166 WARN_ON_ONCE(event->ctx->parent_ctx);
6167again:
6168 mutex_lock(&event->mmap_mutex);
6169 if (event->rb) {
6170 if (event->rb->nr_pages != nr_pages) {
6171 ret = -EINVAL;
6172 goto unlock;
6173 }
6174
6175 if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
6176
6177
6178
6179
6180
6181 mutex_unlock(&event->mmap_mutex);
6182 goto again;
6183 }
6184
6185 goto unlock;
6186 }
6187
6188 user_extra = nr_pages + 1;
6189
6190accounting:
6191 user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
6192
6193
6194
6195
6196 user_lock_limit *= num_online_cpus();
6197
6198 user_locked = atomic_long_read(&user->locked_vm);
6199
6200
6201
6202
6203
6204 if (user_locked > user_lock_limit)
6205 user_locked = user_lock_limit;
6206 user_locked += user_extra;
6207
6208 if (user_locked > user_lock_limit) {
6209
6210
6211
6212
6213 extra = user_locked - user_lock_limit;
6214 user_extra -= extra;
6215 }
6216
6217 lock_limit = rlimit(RLIMIT_MEMLOCK);
6218 lock_limit >>= PAGE_SHIFT;
6219 locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
6220
6221 if ((locked > lock_limit) && perf_is_paranoid() &&
6222 !capable(CAP_IPC_LOCK)) {
6223 ret = -EPERM;
6224 goto unlock;
6225 }
6226
6227 WARN_ON(!rb && event->rb);
6228
6229 if (vma->vm_flags & VM_WRITE)
6230 flags |= RING_BUFFER_WRITABLE;
6231
6232 if (!rb) {
6233 rb = rb_alloc(nr_pages,
6234 event->attr.watermark ? event->attr.wakeup_watermark : 0,
6235 event->cpu, flags);
6236
6237 if (!rb) {
6238 ret = -ENOMEM;
6239 goto unlock;
6240 }
6241
6242 atomic_set(&rb->mmap_count, 1);
6243 rb->mmap_user = get_current_user();
6244 rb->mmap_locked = extra;
6245
6246 ring_buffer_attach(event, rb);
6247
6248 perf_event_init_userpage(event);
6249 perf_event_update_userpage(event);
6250 } else {
6251 ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
6252 event->attr.aux_watermark, flags);
6253 if (!ret)
6254 rb->aux_mmap_locked = extra;
6255 }
6256
6257unlock:
6258 if (!ret) {
6259 atomic_long_add(user_extra, &user->locked_vm);
6260 atomic64_add(extra, &vma->vm_mm->pinned_vm);
6261
6262 atomic_inc(&event->mmap_count);
6263 } else if (rb) {
6264 atomic_dec(&rb->mmap_count);
6265 }
6266aux_unlock:
6267 mutex_unlock(&event->mmap_mutex);
6268
6269
6270
6271
6272
6273 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
6274 vma->vm_ops = &perf_mmap_vmops;
6275
6276 if (event->pmu->event_mapped)
6277 event->pmu->event_mapped(event, vma->vm_mm);
6278
6279 return ret;
6280}
6281
6282static int perf_fasync(int fd, struct file *filp, int on)
6283{
6284 struct inode *inode = file_inode(filp);
6285 struct perf_event *event = filp->private_data;
6286 int retval;
6287
6288 inode_lock(inode);
6289 retval = fasync_helper(fd, filp, on, &event->fasync);
6290 inode_unlock(inode);
6291
6292 if (retval < 0)
6293 return retval;
6294
6295 return 0;
6296}
6297
6298static const struct file_operations perf_fops = {
6299 .llseek = no_llseek,
6300 .release = perf_release,
6301 .read = perf_read,
6302 .poll = perf_poll,
6303 .unlocked_ioctl = perf_ioctl,
6304 .compat_ioctl = perf_compat_ioctl,
6305 .mmap = perf_mmap,
6306 .fasync = perf_fasync,
6307};
6308
6309
6310
6311
6312
6313
6314
6315
6316static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
6317{
6318
6319 if (event->parent)
6320 event = event->parent;
6321 return &event->fasync;
6322}
6323
6324void perf_event_wakeup(struct perf_event *event)
6325{
6326 ring_buffer_wakeup(event);
6327
6328 if (event->pending_kill) {
6329 kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill);
6330 event->pending_kill = 0;
6331 }
6332}
6333
6334static void perf_pending_event_disable(struct perf_event *event)
6335{
6336 int cpu = READ_ONCE(event->pending_disable);
6337
6338 if (cpu < 0)
6339 return;
6340
6341 if (cpu == smp_processor_id()) {
6342 WRITE_ONCE(event->pending_disable, -1);
6343 perf_event_disable_local(event);
6344 return;
6345 }
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367 irq_work_queue_on(&event->pending, cpu);
6368}
6369
6370static void perf_pending_event(struct irq_work *entry)
6371{
6372 struct perf_event *event = container_of(entry, struct perf_event, pending);
6373 int rctx;
6374
6375 rctx = perf_swevent_get_recursion_context();
6376
6377
6378
6379
6380
6381 perf_pending_event_disable(event);
6382
6383 if (event->pending_wakeup) {
6384 event->pending_wakeup = 0;
6385 perf_event_wakeup(event);
6386 }
6387
6388 if (rctx >= 0)
6389 perf_swevent_put_recursion_context(rctx);
6390}
6391
6392
6393
6394
6395
6396
6397struct perf_guest_info_callbacks *perf_guest_cbs;
6398
6399int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
6400{
6401 perf_guest_cbs = cbs;
6402 return 0;
6403}
6404EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
6405
6406int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
6407{
6408 perf_guest_cbs = NULL;
6409 return 0;
6410}
6411EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
6412
6413static void
6414perf_output_sample_regs(struct perf_output_handle *handle,
6415 struct pt_regs *regs, u64 mask)
6416{
6417 int bit;
6418 DECLARE_BITMAP(_mask, 64);
6419
6420 bitmap_from_u64(_mask, mask);
6421 for_each_set_bit(bit, _mask, sizeof(mask) * BITS_PER_BYTE) {
6422 u64 val;
6423
6424 val = perf_reg_value(regs, bit);
6425 perf_output_put(handle, val);
6426 }
6427}
6428
6429static void perf_sample_regs_user(struct perf_regs *regs_user,
6430 struct pt_regs *regs)
6431{
6432 if (user_mode(regs)) {
6433 regs_user->abi = perf_reg_abi(current);
6434 regs_user->regs = regs;
6435 } else if (!(current->flags & PF_KTHREAD)) {
6436 perf_get_regs_user(regs_user, regs);
6437 } else {
6438 regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
6439 regs_user->regs = NULL;
6440 }
6441}
6442
6443static void perf_sample_regs_intr(struct perf_regs *regs_intr,
6444 struct pt_regs *regs)
6445{
6446 regs_intr->regs = regs;
6447 regs_intr->abi = perf_reg_abi(current);
6448}
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458static u64 perf_ustack_task_size(struct pt_regs *regs)
6459{
6460 unsigned long addr = perf_user_stack_pointer(regs);
6461
6462 if (!addr || addr >= TASK_SIZE)
6463 return 0;
6464
6465 return TASK_SIZE - addr;
6466}
6467
6468static u16
6469perf_sample_ustack_size(u16 stack_size, u16 header_size,
6470 struct pt_regs *regs)
6471{
6472 u64 task_size;
6473
6474
6475 if (!regs)
6476 return 0;
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488 task_size = min((u64) USHRT_MAX, perf_ustack_task_size(regs));
6489 stack_size = min(stack_size, (u16) task_size);
6490
6491
6492 header_size += 2 * sizeof(u64);
6493
6494
6495 if ((u16) (header_size + stack_size) < header_size) {
6496
6497
6498
6499
6500 stack_size = USHRT_MAX - header_size - sizeof(u64);
6501 stack_size = round_up(stack_size, sizeof(u64));
6502 }
6503
6504 return stack_size;
6505}
6506
6507static void
6508perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
6509 struct pt_regs *regs)
6510{
6511
6512 if (!regs) {
6513 u64 size = 0;
6514 perf_output_put(handle, size);
6515 } else {
6516 unsigned long sp;
6517 unsigned int rem;
6518 u64 dyn_size;
6519 mm_segment_t fs;
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533 perf_output_put(handle, dump_size);
6534
6535
6536 sp = perf_user_stack_pointer(regs);
6537 fs = get_fs();
6538 set_fs(USER_DS);
6539 rem = __output_copy_user(handle, (void *) sp, dump_size);
6540 set_fs(fs);
6541 dyn_size = dump_size - rem;
6542
6543 perf_output_skip(handle, rem);
6544
6545
6546 perf_output_put(handle, dyn_size);
6547 }
6548}
6549
6550static unsigned long perf_prepare_sample_aux(struct perf_event *event,
6551 struct perf_sample_data *data,
6552 size_t size)
6553{
6554 struct perf_event *sampler = event->aux_event;
6555 struct perf_buffer *rb;
6556
6557 data->aux_size = 0;
6558
6559 if (!sampler)
6560 goto out;
6561
6562 if (WARN_ON_ONCE(READ_ONCE(sampler->state) != PERF_EVENT_STATE_ACTIVE))
6563 goto out;
6564
6565 if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id()))
6566 goto out;
6567
6568 rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
6569 if (!rb)
6570 goto out;
6571
6572
6573
6574
6575
6576 if (READ_ONCE(rb->aux_in_sampling)) {
6577 data->aux_size = 0;
6578 } else {
6579 size = min_t(size_t, size, perf_aux_size(rb));
6580 data->aux_size = ALIGN(size, sizeof(u64));
6581 }
6582 ring_buffer_put(rb);
6583
6584out:
6585 return data->aux_size;
6586}
6587
6588long perf_pmu_snapshot_aux(struct perf_buffer *rb,
6589 struct perf_event *event,
6590 struct perf_output_handle *handle,
6591 unsigned long size)
6592{
6593 unsigned long flags;
6594 long ret;
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605 local_irq_save(flags);
6606
6607
6608
6609
6610 WRITE_ONCE(rb->aux_in_sampling, 1);
6611 barrier();
6612
6613 ret = event->pmu->snapshot_aux(event, handle, size);
6614
6615 barrier();
6616 WRITE_ONCE(rb->aux_in_sampling, 0);
6617 local_irq_restore(flags);
6618
6619 return ret;
6620}
6621
6622static void perf_aux_sample_output(struct perf_event *event,
6623 struct perf_output_handle *handle,
6624 struct perf_sample_data *data)
6625{
6626 struct perf_event *sampler = event->aux_event;
6627 struct perf_buffer *rb;
6628 unsigned long pad;
6629 long size;
6630
6631 if (WARN_ON_ONCE(!sampler || !data->aux_size))
6632 return;
6633
6634 rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
6635 if (!rb)
6636 return;
6637
6638 size = perf_pmu_snapshot_aux(rb, sampler, handle, data->aux_size);
6639
6640
6641
6642
6643
6644
6645
6646 if (WARN_ON_ONCE(size < 0))
6647 goto out_put;
6648
6649
6650
6651
6652
6653 pad = data->aux_size - size;
6654 if (WARN_ON_ONCE(pad >= sizeof(u64)))
6655 pad = 8;
6656
6657 if (pad) {
6658 u64 zero = 0;
6659 perf_output_copy(handle, &zero, pad);
6660 }
6661
6662out_put:
6663 ring_buffer_put(rb);
6664}
6665
6666static void __perf_event_header__init_id(struct perf_event_header *header,
6667 struct perf_sample_data *data,
6668 struct perf_event *event)
6669{
6670 u64 sample_type = event->attr.sample_type;
6671
6672 data->type = sample_type;
6673 header->size += event->id_header_size;
6674
6675 if (sample_type & PERF_SAMPLE_TID) {
6676
6677 data->tid_entry.pid = perf_event_pid(event, current);
6678 data->tid_entry.tid = perf_event_tid(event, current);
6679 }
6680
6681 if (sample_type & PERF_SAMPLE_TIME)
6682 data->time = perf_event_clock(event);
6683
6684 if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
6685 data->id = primary_event_id(event);
6686
6687 if (sample_type & PERF_SAMPLE_STREAM_ID)
6688 data->stream_id = event->id;
6689
6690 if (sample_type & PERF_SAMPLE_CPU) {
6691 data->cpu_entry.cpu = raw_smp_processor_id();
6692 data->cpu_entry.reserved = 0;
6693 }
6694}
6695
6696void perf_event_header__init_id(struct perf_event_header *header,
6697 struct perf_sample_data *data,
6698 struct perf_event *event)
6699{
6700 if (event->attr.sample_id_all)
6701 __perf_event_header__init_id(header, data, event);
6702}
6703
6704static void __perf_event__output_id_sample(struct perf_output_handle *handle,
6705 struct perf_sample_data *data)
6706{
6707 u64 sample_type = data->type;
6708
6709 if (sample_type & PERF_SAMPLE_TID)
6710 perf_output_put(handle, data->tid_entry);
6711
6712 if (sample_type & PERF_SAMPLE_TIME)
6713 perf_output_put(handle, data->time);
6714
6715 if (sample_type & PERF_SAMPLE_ID)
6716 perf_output_put(handle, data->id);
6717
6718 if (sample_type & PERF_SAMPLE_STREAM_ID)
6719 perf_output_put(handle, data->stream_id);
6720
6721 if (sample_type & PERF_SAMPLE_CPU)
6722 perf_output_put(handle, data->cpu_entry);
6723
6724 if (sample_type & PERF_SAMPLE_IDENTIFIER)
6725 perf_output_put(handle, data->id);
6726}
6727
6728void perf_event__output_id_sample(struct perf_event *event,
6729 struct perf_output_handle *handle,
6730 struct perf_sample_data *sample)
6731{
6732 if (event->attr.sample_id_all)
6733 __perf_event__output_id_sample(handle, sample);
6734}
6735
6736static void perf_output_read_one(struct perf_output_handle *handle,
6737 struct perf_event *event,
6738 u64 enabled, u64 running)
6739{
6740 u64 read_format = event->attr.read_format;
6741 u64 values[4];
6742 int n = 0;
6743
6744 values[n++] = perf_event_count(event);
6745 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
6746 values[n++] = enabled +
6747 atomic64_read(&event->child_total_time_enabled);
6748 }
6749 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
6750 values[n++] = running +
6751 atomic64_read(&event->child_total_time_running);
6752 }
6753 if (read_format & PERF_FORMAT_ID)
6754 values[n++] = primary_event_id(event);
6755
6756 __output_copy(handle, values, n * sizeof(u64));
6757}
6758
6759static void perf_output_read_group(struct perf_output_handle *handle,
6760 struct perf_event *event,
6761 u64 enabled, u64 running)
6762{
6763 struct perf_event *leader = event->group_leader, *sub;
6764 u64 read_format = event->attr.read_format;
6765 u64 values[5];
6766 int n = 0;
6767
6768 values[n++] = 1 + leader->nr_siblings;
6769
6770 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
6771 values[n++] = enabled;
6772
6773 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
6774 values[n++] = running;
6775
6776 if ((leader != event) &&
6777 (leader->state == PERF_EVENT_STATE_ACTIVE))
6778 leader->pmu->read(leader);
6779
6780 values[n++] = perf_event_count(leader);
6781 if (read_format & PERF_FORMAT_ID)
6782 values[n++] = primary_event_id(leader);
6783
6784 __output_copy(handle, values, n * sizeof(u64));
6785
6786 for_each_sibling_event(sub, leader) {
6787 n = 0;
6788
6789 if ((sub != event) &&
6790 (sub->state == PERF_EVENT_STATE_ACTIVE))
6791 sub->pmu->read(sub);
6792
6793 values[n++] = perf_event_count(sub);
6794 if (read_format & PERF_FORMAT_ID)
6795 values[n++] = primary_event_id(sub);
6796
6797 __output_copy(handle, values, n * sizeof(u64));
6798 }
6799}
6800
6801#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
6802 PERF_FORMAT_TOTAL_TIME_RUNNING)
6803
6804
6805
6806
6807
6808
6809
6810
6811static void perf_output_read(struct perf_output_handle *handle,
6812 struct perf_event *event)
6813{
6814 u64 enabled = 0, running = 0, now;
6815 u64 read_format = event->attr.read_format;
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826 if (read_format & PERF_FORMAT_TOTAL_TIMES)
6827 calc_timer_values(event, &now, &enabled, &running);
6828
6829 if (event->attr.read_format & PERF_FORMAT_GROUP)
6830 perf_output_read_group(handle, event, enabled, running);
6831 else
6832 perf_output_read_one(handle, event, enabled, running);
6833}
6834
6835static inline bool perf_sample_save_hw_index(struct perf_event *event)
6836{
6837 return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
6838}
6839
6840void perf_output_sample(struct perf_output_handle *handle,
6841 struct perf_event_header *header,
6842 struct perf_sample_data *data,
6843 struct perf_event *event)
6844{
6845 u64 sample_type = data->type;
6846
6847 perf_output_put(handle, *header);
6848
6849 if (sample_type & PERF_SAMPLE_IDENTIFIER)
6850 perf_output_put(handle, data->id);
6851
6852 if (sample_type & PERF_SAMPLE_IP)
6853 perf_output_put(handle, data->ip);
6854
6855 if (sample_type & PERF_SAMPLE_TID)
6856 perf_output_put(handle, data->tid_entry);
6857
6858 if (sample_type & PERF_SAMPLE_TIME)
6859 perf_output_put(handle, data->time);
6860
6861 if (sample_type & PERF_SAMPLE_ADDR)
6862 perf_output_put(handle, data->addr);
6863
6864 if (sample_type & PERF_SAMPLE_ID)
6865 perf_output_put(handle, data->id);
6866
6867 if (sample_type & PERF_SAMPLE_STREAM_ID)
6868 perf_output_put(handle, data->stream_id);
6869
6870 if (sample_type & PERF_SAMPLE_CPU)
6871 perf_output_put(handle, data->cpu_entry);
6872
6873 if (sample_type & PERF_SAMPLE_PERIOD)
6874 perf_output_put(handle, data->period);
6875
6876 if (sample_type & PERF_SAMPLE_READ)
6877 perf_output_read(handle, event);
6878
6879 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
6880 int size = 1;
6881
6882 size += data->callchain->nr;
6883 size *= sizeof(u64);
6884 __output_copy(handle, data->callchain, size);
6885 }
6886
6887 if (sample_type & PERF_SAMPLE_RAW) {
6888 struct perf_raw_record *raw = data->raw;
6889
6890 if (raw) {
6891 struct perf_raw_frag *frag = &raw->frag;
6892
6893 perf_output_put(handle, raw->size);
6894 do {
6895 if (frag->copy) {
6896 __output_custom(handle, frag->copy,
6897 frag->data, frag->size);
6898 } else {
6899 __output_copy(handle, frag->data,
6900 frag->size);
6901 }
6902 if (perf_raw_frag_last(frag))
6903 break;
6904 frag = frag->next;
6905 } while (1);
6906 if (frag->pad)
6907 __output_skip(handle, NULL, frag->pad);
6908 } else {
6909 struct {
6910 u32 size;
6911 u32 data;
6912 } raw = {
6913 .size = sizeof(u32),
6914 .data = 0,
6915 };
6916 perf_output_put(handle, raw);
6917 }
6918 }
6919
6920 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
6921 if (data->br_stack) {
6922 size_t size;
6923
6924 size = data->br_stack->nr
6925 * sizeof(struct perf_branch_entry);
6926
6927 perf_output_put(handle, data->br_stack->nr);
6928 if (perf_sample_save_hw_index(event))
6929 perf_output_put(handle, data->br_stack->hw_idx);
6930 perf_output_copy(handle, data->br_stack->entries, size);
6931 } else {
6932
6933
6934
6935 u64 nr = 0;
6936 perf_output_put(handle, nr);
6937 }
6938 }
6939
6940 if (sample_type & PERF_SAMPLE_REGS_USER) {
6941 u64 abi = data->regs_user.abi;
6942
6943
6944
6945
6946
6947 perf_output_put(handle, abi);
6948
6949 if (abi) {
6950 u64 mask = event->attr.sample_regs_user;
6951 perf_output_sample_regs(handle,
6952 data->regs_user.regs,
6953 mask);
6954 }
6955 }
6956
6957 if (sample_type & PERF_SAMPLE_STACK_USER) {
6958 perf_output_sample_ustack(handle,
6959 data->stack_user_size,
6960 data->regs_user.regs);
6961 }
6962
6963 if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
6964 perf_output_put(handle, data->weight.full);
6965
6966 if (sample_type & PERF_SAMPLE_DATA_SRC)
6967 perf_output_put(handle, data->data_src.val);
6968
6969 if (sample_type & PERF_SAMPLE_TRANSACTION)
6970 perf_output_put(handle, data->txn);
6971
6972 if (sample_type & PERF_SAMPLE_REGS_INTR) {
6973 u64 abi = data->regs_intr.abi;
6974
6975
6976
6977
6978 perf_output_put(handle, abi);
6979
6980 if (abi) {
6981 u64 mask = event->attr.sample_regs_intr;
6982
6983 perf_output_sample_regs(handle,
6984 data->regs_intr.regs,
6985 mask);
6986 }
6987 }
6988
6989 if (sample_type & PERF_SAMPLE_PHYS_ADDR)
6990 perf_output_put(handle, data->phys_addr);
6991
6992 if (sample_type & PERF_SAMPLE_CGROUP)
6993 perf_output_put(handle, data->cgroup);
6994
6995 if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
6996 perf_output_put(handle, data->data_page_size);
6997
6998 if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
6999 perf_output_put(handle, data->code_page_size);
7000
7001 if (sample_type & PERF_SAMPLE_AUX) {
7002 perf_output_put(handle, data->aux_size);
7003
7004 if (data->aux_size)
7005 perf_aux_sample_output(event, handle, data);
7006 }
7007
7008 if (!event->attr.watermark) {
7009 int wakeup_events = event->attr.wakeup_events;
7010
7011 if (wakeup_events) {
7012 struct perf_buffer *rb = handle->rb;
7013 int events = local_inc_return(&rb->events);
7014
7015 if (events >= wakeup_events) {
7016 local_sub(wakeup_events, &rb->events);
7017 local_inc(&rb->wakeup);
7018 }
7019 }
7020 }
7021}
7022
7023static u64 perf_virt_to_phys(u64 virt)
7024{
7025 u64 phys_addr = 0;
7026 struct page *p = NULL;
7027
7028 if (!virt)
7029 return 0;
7030
7031 if (virt >= TASK_SIZE) {
7032
7033 if (virt_addr_valid((void *)(uintptr_t)virt) &&
7034 !(virt >= VMALLOC_START && virt < VMALLOC_END))
7035 phys_addr = (u64)virt_to_phys((void *)(uintptr_t)virt);
7036 } else {
7037
7038
7039
7040
7041
7042
7043
7044 if (current->mm != NULL) {
7045 pagefault_disable();
7046 if (__get_user_pages_fast(virt, 1, 0, &p) == 1)
7047 phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
7048 pagefault_enable();
7049 }
7050
7051 if (p)
7052 put_page(p);
7053 }
7054
7055 return phys_addr;
7056}
7057
7058#ifdef CONFIG_MMU
7059
7060
7061
7062
7063static u64 __perf_get_page_size(struct mm_struct *mm, unsigned long addr)
7064{
7065 pgd_t *pgd;
7066 p4d_t *p4d;
7067 pud_t *pud;
7068 pmd_t *pmd;
7069 pte_t *pte;
7070
7071 pgd = pgd_offset(mm, addr);
7072 if (pgd_none(*pgd))
7073 return 0;
7074
7075 p4d = p4d_offset(pgd, addr);
7076 if (!p4d_present(*p4d))
7077 return 0;
7078
7079 if (p4d_leaf(*p4d))
7080 return 1ULL << P4D_SHIFT;
7081
7082 pud = pud_offset(p4d, addr);
7083 if (!pud_present(*pud))
7084 return 0;
7085
7086 if (pud_leaf(*pud))
7087 return 1ULL << PUD_SHIFT;
7088
7089 pmd = pmd_offset(pud, addr);
7090 if (!pmd_present(*pmd))
7091 return 0;
7092
7093 if (pmd_leaf(*pmd))
7094 return 1ULL << PMD_SHIFT;
7095
7096 pte = pte_offset_map(pmd, addr);
7097 if (!pte_present(*pte)) {
7098 pte_unmap(pte);
7099 return 0;
7100 }
7101
7102 pte_unmap(pte);
7103 return PAGE_SIZE;
7104}
7105
7106#else
7107
7108static u64 __perf_get_page_size(struct mm_struct *mm, unsigned long addr)
7109{
7110 return 0;
7111}
7112
7113#endif
7114
7115static u64 perf_get_page_size(unsigned long addr)
7116{
7117 struct mm_struct *mm;
7118 unsigned long flags;
7119 u64 size;
7120
7121 if (!addr)
7122 return 0;
7123
7124
7125
7126
7127
7128 local_irq_save(flags);
7129
7130 mm = current->mm;
7131 if (!mm) {
7132
7133
7134
7135
7136 mm = &init_mm;
7137 }
7138
7139 size = __perf_get_page_size(mm, addr);
7140
7141 local_irq_restore(flags);
7142
7143 return size;
7144}
7145
7146static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
7147
7148struct perf_callchain_entry *
7149perf_callchain(struct perf_event *event, struct pt_regs *regs)
7150{
7151 bool kernel = !event->attr.exclude_callchain_kernel;
7152 bool user = !event->attr.exclude_callchain_user;
7153
7154 bool crosstask = event->ctx->task && event->ctx->task != current;
7155 const u32 max_stack = event->attr.sample_max_stack;
7156 struct perf_callchain_entry *callchain;
7157
7158 if (!kernel && !user)
7159 return &__empty_callchain;
7160
7161 callchain = get_perf_callchain(regs, 0, kernel, user,
7162 max_stack, crosstask, true);
7163 return callchain ?: &__empty_callchain;
7164}
7165
7166void perf_prepare_sample(struct perf_event_header *header,
7167 struct perf_sample_data *data,
7168 struct perf_event *event,
7169 struct pt_regs *regs)
7170{
7171 u64 sample_type = event->attr.sample_type;
7172
7173 header->type = PERF_RECORD_SAMPLE;
7174 header->size = sizeof(*header) + event->header_size;
7175
7176 header->misc = 0;
7177 header->misc |= perf_misc_flags(regs);
7178
7179 __perf_event_header__init_id(header, data, event);
7180
7181 if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
7182 data->ip = perf_instruction_pointer(regs);
7183
7184 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
7185 int size = 1;
7186
7187 if (!(sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
7188 data->callchain = perf_callchain(event, regs);
7189
7190 size += data->callchain->nr;
7191
7192 header->size += size * sizeof(u64);
7193 }
7194
7195 if (sample_type & PERF_SAMPLE_RAW) {
7196 struct perf_raw_record *raw = data->raw;
7197 int size;
7198
7199 if (raw) {
7200 struct perf_raw_frag *frag = &raw->frag;
7201 u32 sum = 0;
7202
7203 do {
7204 sum += frag->size;
7205 if (perf_raw_frag_last(frag))
7206 break;
7207 frag = frag->next;
7208 } while (1);
7209
7210 size = round_up(sum + sizeof(u32), sizeof(u64));
7211 raw->size = size - sizeof(u32);
7212 frag->pad = raw->size - sum;
7213 } else {
7214 size = sizeof(u64);
7215 }
7216
7217 header->size += size;
7218 }
7219
7220 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
7221 int size = sizeof(u64);
7222 if (data->br_stack) {
7223 if (perf_sample_save_hw_index(event))
7224 size += sizeof(u64);
7225
7226 size += data->br_stack->nr
7227 * sizeof(struct perf_branch_entry);
7228 }
7229 header->size += size;
7230 }
7231
7232 if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
7233 perf_sample_regs_user(&data->regs_user, regs);
7234
7235 if (sample_type & PERF_SAMPLE_REGS_USER) {
7236
7237 int size = sizeof(u64);
7238
7239 if (data->regs_user.regs) {
7240 u64 mask = event->attr.sample_regs_user;
7241 size += hweight64(mask) * sizeof(u64);
7242 }
7243
7244 header->size += size;
7245 }
7246
7247 if (sample_type & PERF_SAMPLE_STACK_USER) {
7248
7249
7250
7251
7252
7253
7254 u16 stack_size = event->attr.sample_stack_user;
7255 u16 size = sizeof(u64);
7256
7257 stack_size = perf_sample_ustack_size(stack_size, header->size,
7258 data->regs_user.regs);
7259
7260
7261
7262
7263
7264
7265 if (stack_size)
7266 size += sizeof(u64) + stack_size;
7267
7268 data->stack_user_size = stack_size;
7269 header->size += size;
7270 }
7271
7272 if (sample_type & PERF_SAMPLE_REGS_INTR) {
7273
7274 int size = sizeof(u64);
7275
7276 perf_sample_regs_intr(&data->regs_intr, regs);
7277
7278 if (data->regs_intr.regs) {
7279 u64 mask = event->attr.sample_regs_intr;
7280
7281 size += hweight64(mask) * sizeof(u64);
7282 }
7283
7284 header->size += size;
7285 }
7286
7287 if (sample_type & PERF_SAMPLE_PHYS_ADDR)
7288 data->phys_addr = perf_virt_to_phys(data->addr);
7289
7290#ifdef CONFIG_CGROUP_PERF
7291 if (sample_type & PERF_SAMPLE_CGROUP) {
7292 struct cgroup *cgrp;
7293
7294
7295 cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup;
7296 data->cgroup = cgrp->kn->id;
7297 }
7298#endif
7299
7300
7301
7302
7303
7304
7305 if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
7306 data->data_page_size = perf_get_page_size(data->addr);
7307
7308 if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
7309 data->code_page_size = perf_get_page_size(data->ip);
7310
7311 if (sample_type & PERF_SAMPLE_AUX) {
7312 u64 size;
7313
7314 header->size += sizeof(u64);
7315
7316
7317
7318
7319
7320
7321
7322 size = min_t(size_t, U16_MAX - header->size,
7323 event->attr.aux_sample_size);
7324 size = rounddown(size, 8);
7325 size = perf_prepare_sample_aux(event, data, size);
7326
7327 WARN_ON_ONCE(size + header->size > U16_MAX);
7328 header->size += size;
7329 }
7330
7331
7332
7333
7334
7335
7336
7337
7338 WARN_ON_ONCE(header->size & 7);
7339}
7340
7341static __always_inline int
7342__perf_event_output(struct perf_event *event,
7343 struct perf_sample_data *data,
7344 struct pt_regs *regs,
7345 int (*output_begin)(struct perf_output_handle *,
7346 struct perf_sample_data *,
7347 struct perf_event *,
7348 unsigned int))
7349{
7350 struct perf_output_handle handle;
7351 struct perf_event_header header;
7352 int err;
7353
7354
7355 rcu_read_lock();
7356
7357 perf_prepare_sample(&header, data, event, regs);
7358
7359 err = output_begin(&handle, data, event, header.size);
7360 if (err)
7361 goto exit;
7362
7363 perf_output_sample(&handle, &header, data, event);
7364
7365 perf_output_end(&handle);
7366
7367exit:
7368 rcu_read_unlock();
7369 return err;
7370}
7371
7372void
7373perf_event_output_forward(struct perf_event *event,
7374 struct perf_sample_data *data,
7375 struct pt_regs *regs)
7376{
7377 __perf_event_output(event, data, regs, perf_output_begin_forward);
7378}
7379
7380void
7381perf_event_output_backward(struct perf_event *event,
7382 struct perf_sample_data *data,
7383 struct pt_regs *regs)
7384{
7385 __perf_event_output(event, data, regs, perf_output_begin_backward);
7386}
7387
7388int
7389perf_event_output(struct perf_event *event,
7390 struct perf_sample_data *data,
7391 struct pt_regs *regs)
7392{
7393 return __perf_event_output(event, data, regs, perf_output_begin);
7394}
7395
7396
7397
7398
7399
7400struct perf_read_event {
7401 struct perf_event_header header;
7402
7403 u32 pid;
7404 u32 tid;
7405};
7406
7407static void
7408perf_event_read_event(struct perf_event *event,
7409 struct task_struct *task)
7410{
7411 struct perf_output_handle handle;
7412 struct perf_sample_data sample;
7413 struct perf_read_event read_event = {
7414 .header = {
7415 .type = PERF_RECORD_READ,
7416 .misc = 0,
7417 .size = sizeof(read_event) + event->read_size,
7418 },
7419 .pid = perf_event_pid(event, task),
7420 .tid = perf_event_tid(event, task),
7421 };
7422 int ret;
7423
7424 perf_event_header__init_id(&read_event.header, &sample, event);
7425 ret = perf_output_begin(&handle, &sample, event, read_event.header.size);
7426 if (ret)
7427 return;
7428
7429 perf_output_put(&handle, read_event);
7430 perf_output_read(&handle, event);
7431 perf_event__output_id_sample(event, &handle, &sample);
7432
7433 perf_output_end(&handle);
7434}
7435
7436typedef void (perf_iterate_f)(struct perf_event *event, void *data);
7437
7438static void
7439perf_iterate_ctx(struct perf_event_context *ctx,
7440 perf_iterate_f output,
7441 void *data, bool all)
7442{
7443 struct perf_event *event;
7444
7445 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
7446 if (!all) {
7447 if (event->state < PERF_EVENT_STATE_INACTIVE)
7448 continue;
7449 if (!event_filter_match(event))
7450 continue;
7451 }
7452
7453 output(event, data);
7454 }
7455}
7456
7457static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
7458{
7459 struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
7460 struct perf_event *event;
7461
7462 list_for_each_entry_rcu(event, &pel->list, sb_list) {
7463
7464
7465
7466
7467
7468 if (!smp_load_acquire(&event->ctx))
7469 continue;
7470
7471 if (event->state < PERF_EVENT_STATE_INACTIVE)
7472 continue;
7473 if (!event_filter_match(event))
7474 continue;
7475 output(event, data);
7476 }
7477}
7478
7479
7480
7481
7482
7483
7484
7485static void
7486perf_iterate_sb(perf_iterate_f output, void *data,
7487 struct perf_event_context *task_ctx)
7488{
7489 struct perf_event_context *ctx;
7490 int ctxn;
7491
7492 rcu_read_lock();
7493 preempt_disable();
7494
7495
7496
7497
7498
7499
7500 if (task_ctx) {
7501 perf_iterate_ctx(task_ctx, output, data, false);
7502 goto done;
7503 }
7504
7505 perf_iterate_sb_cpu(output, data);
7506
7507 for_each_task_context_nr(ctxn) {
7508 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
7509 if (ctx)
7510 perf_iterate_ctx(ctx, output, data, false);
7511 }
7512done:
7513 preempt_enable();
7514 rcu_read_unlock();
7515}
7516
7517
7518
7519
7520
7521static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
7522{
7523 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
7524 struct perf_addr_filter *filter;
7525 unsigned int restart = 0, count = 0;
7526 unsigned long flags;
7527
7528 if (!has_addr_filter(event))
7529 return;
7530
7531 raw_spin_lock_irqsave(&ifh->lock, flags);
7532 list_for_each_entry(filter, &ifh->list, entry) {
7533 if (filter->path.dentry) {
7534 event->addr_filter_ranges[count].start = 0;
7535 event->addr_filter_ranges[count].size = 0;
7536 restart++;
7537 }
7538
7539 count++;
7540 }
7541
7542 if (restart)
7543 event->addr_filters_gen++;
7544 raw_spin_unlock_irqrestore(&ifh->lock, flags);
7545
7546 if (restart)
7547 perf_event_stop(event, 1);
7548}
7549
7550void perf_event_exec(void)
7551{
7552 struct perf_event_context *ctx;
7553 int ctxn;
7554
7555 rcu_read_lock();
7556 for_each_task_context_nr(ctxn) {
7557 ctx = current->perf_event_ctxp[ctxn];
7558 if (!ctx)
7559 continue;
7560
7561 perf_event_enable_on_exec(ctxn);
7562
7563 perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL,
7564 true);
7565 }
7566 rcu_read_unlock();
7567}
7568
7569struct remote_output {
7570 struct perf_buffer *rb;
7571 int err;
7572};
7573
7574static void __perf_event_output_stop(struct perf_event *event, void *data)
7575{
7576 struct perf_event *parent = event->parent;
7577 struct remote_output *ro = data;
7578 struct perf_buffer *rb = ro->rb;
7579 struct stop_event_data sd = {
7580 .event = event,
7581 };
7582
7583 if (!has_aux(event))
7584 return;
7585
7586 if (!parent)
7587 parent = event;
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599 if (rcu_dereference(parent->rb) == rb)
7600 ro->err = __perf_event_stop(&sd);
7601}
7602
7603static int __perf_pmu_output_stop(void *info)
7604{
7605 struct perf_event *event = info;
7606 struct pmu *pmu = event->ctx->pmu;
7607 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
7608 struct remote_output ro = {
7609 .rb = event->rb,
7610 };
7611
7612 rcu_read_lock();
7613 perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
7614 if (cpuctx->task_ctx)
7615 perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop,
7616 &ro, false);
7617 rcu_read_unlock();
7618
7619 return ro.err;
7620}
7621
7622static void perf_pmu_output_stop(struct perf_event *event)
7623{
7624 struct perf_event *iter;
7625 int err, cpu;
7626
7627restart:
7628 rcu_read_lock();
7629 list_for_each_entry_rcu(iter, &event->rb->event_list, rb_entry) {
7630
7631
7632
7633
7634
7635
7636 cpu = iter->cpu;
7637 if (cpu == -1)
7638 cpu = READ_ONCE(iter->oncpu);
7639
7640 if (cpu == -1)
7641 continue;
7642
7643 err = cpu_function_call(cpu, __perf_pmu_output_stop, event);
7644 if (err == -EAGAIN) {
7645 rcu_read_unlock();
7646 goto restart;
7647 }
7648 }
7649 rcu_read_unlock();
7650}
7651
7652
7653
7654
7655
7656
7657
7658struct perf_task_event {
7659 struct task_struct *task;
7660 struct perf_event_context *task_ctx;
7661
7662 struct {
7663 struct perf_event_header header;
7664
7665 u32 pid;
7666 u32 ppid;
7667 u32 tid;
7668 u32 ptid;
7669 u64 time;
7670 } event_id;
7671};
7672
7673static int perf_event_task_match(struct perf_event *event)
7674{
7675 return event->attr.comm || event->attr.mmap ||
7676 event->attr.mmap2 || event->attr.mmap_data ||
7677 event->attr.task;
7678}
7679
7680static void perf_event_task_output(struct perf_event *event,
7681 void *data)
7682{
7683 struct perf_task_event *task_event = data;
7684 struct perf_output_handle handle;
7685 struct perf_sample_data sample;
7686 struct task_struct *task = task_event->task;
7687 int ret, size = task_event->event_id.header.size;
7688
7689 if (!perf_event_task_match(event))
7690 return;
7691
7692 perf_event_header__init_id(&task_event->event_id.header, &sample, event);
7693
7694 ret = perf_output_begin(&handle, &sample, event,
7695 task_event->event_id.header.size);
7696 if (ret)
7697 goto out;
7698
7699 task_event->event_id.pid = perf_event_pid(event, task);
7700 task_event->event_id.tid = perf_event_tid(event, task);
7701
7702 if (task_event->event_id.header.type == PERF_RECORD_EXIT) {
7703 task_event->event_id.ppid = perf_event_pid(event,
7704 task->real_parent);
7705 task_event->event_id.ptid = perf_event_pid(event,
7706 task->real_parent);
7707 } else {
7708 task_event->event_id.ppid = perf_event_pid(event, current);
7709 task_event->event_id.ptid = perf_event_tid(event, current);
7710 }
7711
7712 task_event->event_id.time = perf_event_clock(event);
7713
7714 perf_output_put(&handle, task_event->event_id);
7715
7716 perf_event__output_id_sample(event, &handle, &sample);
7717
7718 perf_output_end(&handle);
7719out:
7720 task_event->event_id.header.size = size;
7721}
7722
7723static void perf_event_task(struct task_struct *task,
7724 struct perf_event_context *task_ctx,
7725 int new)
7726{
7727 struct perf_task_event task_event;
7728
7729 if (!atomic_read(&nr_comm_events) &&
7730 !atomic_read(&nr_mmap_events) &&
7731 !atomic_read(&nr_task_events))
7732 return;
7733
7734 task_event = (struct perf_task_event){
7735 .task = task,
7736 .task_ctx = task_ctx,
7737 .event_id = {
7738 .header = {
7739 .type = new ? PERF_RECORD_FORK : PERF_RECORD_EXIT,
7740 .misc = 0,
7741 .size = sizeof(task_event.event_id),
7742 },
7743
7744
7745
7746
7747
7748 },
7749 };
7750
7751 perf_iterate_sb(perf_event_task_output,
7752 &task_event,
7753 task_ctx);
7754}
7755
7756void perf_event_fork(struct task_struct *task)
7757{
7758 perf_event_task(task, NULL, 1);
7759 perf_event_namespaces(task);
7760}
7761
7762
7763
7764
7765
7766struct perf_comm_event {
7767 struct task_struct *task;
7768 char *comm;
7769 int comm_size;
7770
7771 struct {
7772 struct perf_event_header header;
7773
7774 u32 pid;
7775 u32 tid;
7776 } event_id;
7777};
7778
7779static int perf_event_comm_match(struct perf_event *event)
7780{
7781 return event->attr.comm;
7782}
7783
7784static void perf_event_comm_output(struct perf_event *event,
7785 void *data)
7786{
7787 struct perf_comm_event *comm_event = data;
7788 struct perf_output_handle handle;
7789 struct perf_sample_data sample;
7790 int size = comm_event->event_id.header.size;
7791 int ret;
7792
7793 if (!perf_event_comm_match(event))
7794 return;
7795
7796 perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
7797 ret = perf_output_begin(&handle, &sample, event,
7798 comm_event->event_id.header.size);
7799
7800 if (ret)
7801 goto out;
7802
7803 comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
7804 comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
7805
7806 perf_output_put(&handle, comm_event->event_id);
7807 __output_copy(&handle, comm_event->comm,
7808 comm_event->comm_size);
7809
7810 perf_event__output_id_sample(event, &handle, &sample);
7811
7812 perf_output_end(&handle);
7813out:
7814 comm_event->event_id.header.size = size;
7815}
7816
7817static void perf_event_comm_event(struct perf_comm_event *comm_event)
7818{
7819 char comm[TASK_COMM_LEN];
7820 unsigned int size;
7821
7822 memset(comm, 0, sizeof(comm));
7823 strlcpy(comm, comm_event->task->comm, sizeof(comm));
7824 size = ALIGN(strlen(comm)+1, sizeof(u64));
7825
7826 comm_event->comm = comm;
7827 comm_event->comm_size = size;
7828
7829 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
7830
7831 perf_iterate_sb(perf_event_comm_output,
7832 comm_event,
7833 NULL);
7834}
7835
7836void perf_event_comm(struct task_struct *task, bool exec)
7837{
7838 struct perf_comm_event comm_event;
7839
7840 if (!atomic_read(&nr_comm_events))
7841 return;
7842
7843 comm_event = (struct perf_comm_event){
7844 .task = task,
7845
7846
7847 .event_id = {
7848 .header = {
7849 .type = PERF_RECORD_COMM,
7850 .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
7851
7852 },
7853
7854
7855 },
7856 };
7857
7858 perf_event_comm_event(&comm_event);
7859}
7860
7861
7862
7863
7864
7865struct perf_namespaces_event {
7866 struct task_struct *task;
7867
7868 struct {
7869 struct perf_event_header header;
7870
7871 u32 pid;
7872 u32 tid;
7873 u64 nr_namespaces;
7874 struct perf_ns_link_info link_info[NR_NAMESPACES];
7875 } event_id;
7876};
7877
7878static int perf_event_namespaces_match(struct perf_event *event)
7879{
7880 return event->attr.namespaces;
7881}
7882
7883static void perf_event_namespaces_output(struct perf_event *event,
7884 void *data)
7885{
7886 struct perf_namespaces_event *namespaces_event = data;
7887 struct perf_output_handle handle;
7888 struct perf_sample_data sample;
7889 u16 header_size = namespaces_event->event_id.header.size;
7890 int ret;
7891
7892 if (!perf_event_namespaces_match(event))
7893 return;
7894
7895 perf_event_header__init_id(&namespaces_event->event_id.header,
7896 &sample, event);
7897 ret = perf_output_begin(&handle, &sample, event,
7898 namespaces_event->event_id.header.size);
7899 if (ret)
7900 goto out;
7901
7902 namespaces_event->event_id.pid = perf_event_pid(event,
7903 namespaces_event->task);
7904 namespaces_event->event_id.tid = perf_event_tid(event,
7905 namespaces_event->task);
7906
7907 perf_output_put(&handle, namespaces_event->event_id);
7908
7909 perf_event__output_id_sample(event, &handle, &sample);
7910
7911 perf_output_end(&handle);
7912out:
7913 namespaces_event->event_id.header.size = header_size;
7914}
7915
7916static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
7917 struct task_struct *task,
7918 const struct proc_ns_operations *ns_ops)
7919{
7920 struct path ns_path;
7921 struct inode *ns_inode;
7922 void *error;
7923
7924 error = ns_get_path(&ns_path, task, ns_ops);
7925 if (!error) {
7926 ns_inode = ns_path.dentry->d_inode;
7927 ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
7928 ns_link_info->ino = ns_inode->i_ino;
7929 path_put(&ns_path);
7930 }
7931}
7932
7933void perf_event_namespaces(struct task_struct *task)
7934{
7935 struct perf_namespaces_event namespaces_event;
7936 struct perf_ns_link_info *ns_link_info;
7937
7938 if (!atomic_read(&nr_namespaces_events))
7939 return;
7940
7941 namespaces_event = (struct perf_namespaces_event){
7942 .task = task,
7943 .event_id = {
7944 .header = {
7945 .type = PERF_RECORD_NAMESPACES,
7946 .misc = 0,
7947 .size = sizeof(namespaces_event.event_id),
7948 },
7949
7950
7951 .nr_namespaces = NR_NAMESPACES,
7952
7953 },
7954 };
7955
7956 ns_link_info = namespaces_event.event_id.link_info;
7957
7958 perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
7959 task, &mntns_operations);
7960
7961#ifdef CONFIG_USER_NS
7962 perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
7963 task, &userns_operations);
7964#endif
7965#ifdef CONFIG_NET_NS
7966 perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
7967 task, &netns_operations);
7968#endif
7969#ifdef CONFIG_UTS_NS
7970 perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
7971 task, &utsns_operations);
7972#endif
7973#ifdef CONFIG_IPC_NS
7974 perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
7975 task, &ipcns_operations);
7976#endif
7977#ifdef CONFIG_PID_NS
7978 perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
7979 task, &pidns_operations);
7980#endif
7981#ifdef CONFIG_CGROUPS
7982 perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
7983 task, &cgroupns_operations);
7984#endif
7985
7986 perf_iterate_sb(perf_event_namespaces_output,
7987 &namespaces_event,
7988 NULL);
7989}
7990
7991
7992
7993
7994#ifdef CONFIG_CGROUP_PERF
7995
7996struct perf_cgroup_event {
7997 char *path;
7998 int path_size;
7999 struct {
8000 struct perf_event_header header;
8001 u64 id;
8002 char path[];
8003 } event_id;
8004};
8005
8006static int perf_event_cgroup_match(struct perf_event *event)
8007{
8008 return event->attr.cgroup;
8009}
8010
8011static void perf_event_cgroup_output(struct perf_event *event, void *data)
8012{
8013 struct perf_cgroup_event *cgroup_event = data;
8014 struct perf_output_handle handle;
8015 struct perf_sample_data sample;
8016 u16 header_size = cgroup_event->event_id.header.size;
8017 int ret;
8018
8019 if (!perf_event_cgroup_match(event))
8020 return;
8021
8022 perf_event_header__init_id(&cgroup_event->event_id.header,
8023 &sample, event);
8024 ret = perf_output_begin(&handle, &sample, event,
8025 cgroup_event->event_id.header.size);
8026 if (ret)
8027 goto out;
8028
8029 perf_output_put(&handle, cgroup_event->event_id);
8030 __output_copy(&handle, cgroup_event->path, cgroup_event->path_size);
8031
8032 perf_event__output_id_sample(event, &handle, &sample);
8033
8034 perf_output_end(&handle);
8035out:
8036 cgroup_event->event_id.header.size = header_size;
8037}
8038
8039static void perf_event_cgroup(struct cgroup *cgrp)
8040{
8041 struct perf_cgroup_event cgroup_event;
8042 char path_enomem[16] = "//enomem";
8043 char *pathname;
8044 size_t size;
8045
8046 if (!atomic_read(&nr_cgroup_events))
8047 return;
8048
8049 cgroup_event = (struct perf_cgroup_event){
8050 .event_id = {
8051 .header = {
8052 .type = PERF_RECORD_CGROUP,
8053 .misc = 0,
8054 .size = sizeof(cgroup_event.event_id),
8055 },
8056 .id = cgrp->kn->id,
8057 },
8058 };
8059
8060 pathname = kmalloc(PATH_MAX, GFP_KERNEL);
8061 if (pathname == NULL) {
8062 cgroup_event.path = path_enomem;
8063 } else {
8064
8065 cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64));
8066 cgroup_event.path = pathname;
8067 }
8068
8069
8070
8071
8072
8073
8074 size = strlen(cgroup_event.path) + 1;
8075 while (!IS_ALIGNED(size, sizeof(u64)))
8076 cgroup_event.path[size++] = '\0';
8077
8078 cgroup_event.event_id.header.size += size;
8079 cgroup_event.path_size = size;
8080
8081 perf_iterate_sb(perf_event_cgroup_output,
8082 &cgroup_event,
8083 NULL);
8084
8085 kfree(pathname);
8086}
8087
8088#endif
8089
8090
8091
8092
8093
8094struct perf_mmap_event {
8095 struct vm_area_struct *vma;
8096
8097 const char *file_name;
8098 int file_size;
8099 int maj, min;
8100 u64 ino;
8101 u64 ino_generation;
8102 u32 prot, flags;
8103 u8 build_id[BUILD_ID_SIZE_MAX];
8104 u32 build_id_size;
8105
8106 struct {
8107 struct perf_event_header header;
8108
8109 u32 pid;
8110 u32 tid;
8111 u64 start;
8112 u64 len;
8113 u64 pgoff;
8114 } event_id;
8115};
8116
8117static int perf_event_mmap_match(struct perf_event *event,
8118 void *data)
8119{
8120 struct perf_mmap_event *mmap_event = data;
8121 struct vm_area_struct *vma = mmap_event->vma;
8122 int executable = vma->vm_flags & VM_EXEC;
8123
8124 return (!executable && event->attr.mmap_data) ||
8125 (executable && (event->attr.mmap || event->attr.mmap2));
8126}
8127
8128static void perf_event_mmap_output(struct perf_event *event,
8129 void *data)
8130{
8131 struct perf_mmap_event *mmap_event = data;
8132 struct perf_output_handle handle;
8133 struct perf_sample_data sample;
8134 int size = mmap_event->event_id.header.size;
8135 u32 type = mmap_event->event_id.header.type;
8136 bool use_build_id;
8137 int ret;
8138
8139 if (!perf_event_mmap_match(event, data))
8140 return;
8141
8142 if (event->attr.mmap2) {
8143 mmap_event->event_id.header.type = PERF_RECORD_MMAP2;
8144 mmap_event->event_id.header.size += sizeof(mmap_event->maj);
8145 mmap_event->event_id.header.size += sizeof(mmap_event->min);
8146 mmap_event->event_id.header.size += sizeof(mmap_event->ino);
8147 mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
8148 mmap_event->event_id.header.size += sizeof(mmap_event->prot);
8149 mmap_event->event_id.header.size += sizeof(mmap_event->flags);
8150 }
8151
8152 perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
8153 ret = perf_output_begin(&handle, &sample, event,
8154 mmap_event->event_id.header.size);
8155 if (ret)
8156 goto out;
8157
8158 mmap_event->event_id.pid = perf_event_pid(event, current);
8159 mmap_event->event_id.tid = perf_event_tid(event, current);
8160
8161 use_build_id = event->attr.build_id && mmap_event->build_id_size;
8162
8163 if (event->attr.mmap2 && use_build_id)
8164 mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID;
8165
8166 perf_output_put(&handle, mmap_event->event_id);
8167
8168 if (event->attr.mmap2) {
8169 if (use_build_id) {
8170 u8 size[4] = { (u8) mmap_event->build_id_size, 0, 0, 0 };
8171
8172 __output_copy(&handle, size, 4);
8173 __output_copy(&handle, mmap_event->build_id, BUILD_ID_SIZE_MAX);
8174 } else {
8175 perf_output_put(&handle, mmap_event->maj);
8176 perf_output_put(&handle, mmap_event->min);
8177 perf_output_put(&handle, mmap_event->ino);
8178 perf_output_put(&handle, mmap_event->ino_generation);
8179 }
8180 perf_output_put(&handle, mmap_event->prot);
8181 perf_output_put(&handle, mmap_event->flags);
8182 }
8183
8184 __output_copy(&handle, mmap_event->file_name,
8185 mmap_event->file_size);
8186
8187 perf_event__output_id_sample(event, &handle, &sample);
8188
8189 perf_output_end(&handle);
8190out:
8191 mmap_event->event_id.header.size = size;
8192 mmap_event->event_id.header.type = type;
8193}
8194
8195static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
8196{
8197 struct vm_area_struct *vma = mmap_event->vma;
8198 struct file *file = vma->vm_file;
8199 int maj = 0, min = 0;
8200 u64 ino = 0, gen = 0;
8201 u32 prot = 0, flags = 0;
8202 unsigned int size;
8203 char tmp[16];
8204 char *buf = NULL;
8205 char *name;
8206
8207 if (vma->vm_flags & VM_READ)
8208 prot |= PROT_READ;
8209 if (vma->vm_flags & VM_WRITE)
8210 prot |= PROT_WRITE;
8211 if (vma->vm_flags & VM_EXEC)
8212 prot |= PROT_EXEC;
8213
8214 if (vma->vm_flags & VM_MAYSHARE)
8215 flags = MAP_SHARED;
8216 else
8217 flags = MAP_PRIVATE;
8218
8219 if (vma->vm_flags & VM_DENYWRITE)
8220 flags |= MAP_DENYWRITE;
8221 if (vma->vm_flags & VM_MAYEXEC)
8222 flags |= MAP_EXECUTABLE;
8223 if (vma->vm_flags & VM_LOCKED)
8224 flags |= MAP_LOCKED;
8225 if (vma->vm_flags & VM_HUGETLB)
8226 flags |= MAP_HUGETLB;
8227
8228 if (file) {
8229 struct inode *inode;
8230 dev_t dev;
8231
8232 buf = kmalloc(PATH_MAX, GFP_KERNEL);
8233 if (!buf) {
8234 name = "//enomem";
8235 goto cpy_name;
8236 }
8237
8238
8239
8240
8241
8242 name = file_path(file, buf, PATH_MAX - sizeof(u64));
8243 if (IS_ERR(name)) {
8244 name = "//toolong";
8245 goto cpy_name;
8246 }
8247 inode = file_inode(vma->vm_file);
8248 dev = inode->i_sb->s_dev;
8249 ino = inode->i_ino;
8250 gen = inode->i_generation;
8251 maj = MAJOR(dev);
8252 min = MINOR(dev);
8253
8254 goto got_name;
8255 } else {
8256 if (vma->vm_ops && vma->vm_ops->name) {
8257 name = (char *) vma->vm_ops->name(vma);
8258 if (name)
8259 goto cpy_name;
8260 }
8261
8262 name = (char *)arch_vma_name(vma);
8263 if (name)
8264 goto cpy_name;
8265
8266 if (vma->vm_start <= vma->vm_mm->start_brk &&
8267 vma->vm_end >= vma->vm_mm->brk) {
8268 name = "[heap]";
8269 goto cpy_name;
8270 }
8271 if (vma->vm_start <= vma->vm_mm->start_stack &&
8272 vma->vm_end >= vma->vm_mm->start_stack) {
8273 name = "[stack]";
8274 goto cpy_name;
8275 }
8276
8277 name = "//anon";
8278 goto cpy_name;
8279 }
8280
8281cpy_name:
8282 strlcpy(tmp, name, sizeof(tmp));
8283 name = tmp;
8284got_name:
8285
8286
8287
8288
8289
8290 size = strlen(name)+1;
8291 while (!IS_ALIGNED(size, sizeof(u64)))
8292 name[size++] = '\0';
8293
8294 mmap_event->file_name = name;
8295 mmap_event->file_size = size;
8296 mmap_event->maj = maj;
8297 mmap_event->min = min;
8298 mmap_event->ino = ino;
8299 mmap_event->ino_generation = gen;
8300 mmap_event->prot = prot;
8301 mmap_event->flags = flags;
8302
8303 if (!(vma->vm_flags & VM_EXEC))
8304 mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
8305
8306 mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
8307
8308 if (atomic_read(&nr_build_id_events))
8309 build_id_parse(vma, mmap_event->build_id, &mmap_event->build_id_size);
8310
8311 perf_iterate_sb(perf_event_mmap_output,
8312 mmap_event,
8313 NULL);
8314
8315 kfree(buf);
8316}
8317
8318
8319
8320
8321static bool perf_addr_filter_match(struct perf_addr_filter *filter,
8322 struct file *file, unsigned long offset,
8323 unsigned long size)
8324{
8325
8326 if (!filter->path.dentry)
8327 return false;
8328
8329 if (d_inode(filter->path.dentry) != file_inode(file))
8330 return false;
8331
8332 if (filter->offset > offset + size)
8333 return false;
8334
8335 if (filter->offset + filter->size < offset)
8336 return false;
8337
8338 return true;
8339}
8340
8341static bool perf_addr_filter_vma_adjust(struct perf_addr_filter *filter,
8342 struct vm_area_struct *vma,
8343 struct perf_addr_filter_range *fr)
8344{
8345 unsigned long vma_size = vma->vm_end - vma->vm_start;
8346 unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
8347 struct file *file = vma->vm_file;
8348
8349 if (!perf_addr_filter_match(filter, file, off, vma_size))
8350 return false;
8351
8352 if (filter->offset < off) {
8353 fr->start = vma->vm_start;
8354 fr->size = min(vma_size, filter->size - (off - filter->offset));
8355 } else {
8356 fr->start = vma->vm_start + filter->offset - off;
8357 fr->size = min(vma->vm_end - fr->start, filter->size);
8358 }
8359
8360 return true;
8361}
8362
8363static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
8364{
8365 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
8366 struct vm_area_struct *vma = data;
8367 struct perf_addr_filter *filter;
8368 unsigned int restart = 0, count = 0;
8369 unsigned long flags;
8370
8371 if (!has_addr_filter(event))
8372 return;
8373
8374 if (!vma->vm_file)
8375 return;
8376
8377 raw_spin_lock_irqsave(&ifh->lock, flags);
8378 list_for_each_entry(filter, &ifh->list, entry) {
8379 if (perf_addr_filter_vma_adjust(filter, vma,
8380 &event->addr_filter_ranges[count]))
8381 restart++;
8382
8383 count++;
8384 }
8385
8386 if (restart)
8387 event->addr_filters_gen++;
8388 raw_spin_unlock_irqrestore(&ifh->lock, flags);
8389
8390 if (restart)
8391 perf_event_stop(event, 1);
8392}
8393
8394
8395
8396
8397static void perf_addr_filters_adjust(struct vm_area_struct *vma)
8398{
8399 struct perf_event_context *ctx;
8400 int ctxn;
8401
8402
8403
8404
8405
8406 if (!(vma->vm_flags & VM_EXEC))
8407 return;
8408
8409 rcu_read_lock();
8410 for_each_task_context_nr(ctxn) {
8411 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
8412 if (!ctx)
8413 continue;
8414
8415 perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true);
8416 }
8417 rcu_read_unlock();
8418}
8419
8420void perf_event_mmap(struct vm_area_struct *vma)
8421{
8422 struct perf_mmap_event mmap_event;
8423
8424 if (!atomic_read(&nr_mmap_events))
8425 return;
8426
8427 mmap_event = (struct perf_mmap_event){
8428 .vma = vma,
8429
8430
8431 .event_id = {
8432 .header = {
8433 .type = PERF_RECORD_MMAP,
8434 .misc = PERF_RECORD_MISC_USER,
8435
8436 },
8437
8438
8439 .start = vma->vm_start,
8440 .len = vma->vm_end - vma->vm_start,
8441 .pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT,
8442 },
8443
8444
8445
8446
8447
8448
8449 };
8450
8451 perf_addr_filters_adjust(vma);
8452 perf_event_mmap_event(&mmap_event);
8453}
8454
8455void perf_event_aux_event(struct perf_event *event, unsigned long head,
8456 unsigned long size, u64 flags)
8457{
8458 struct perf_output_handle handle;
8459 struct perf_sample_data sample;
8460 struct perf_aux_event {
8461 struct perf_event_header header;
8462 u64 offset;
8463 u64 size;
8464 u64 flags;
8465 } rec = {
8466 .header = {
8467 .type = PERF_RECORD_AUX,
8468 .misc = 0,
8469 .size = sizeof(rec),
8470 },
8471 .offset = head,
8472 .size = size,
8473 .flags = flags,
8474 };
8475 int ret;
8476
8477 perf_event_header__init_id(&rec.header, &sample, event);
8478 ret = perf_output_begin(&handle, &sample, event, rec.header.size);
8479
8480 if (ret)
8481 return;
8482
8483 perf_output_put(&handle, rec);
8484 perf_event__output_id_sample(event, &handle, &sample);
8485
8486 perf_output_end(&handle);
8487}
8488
8489
8490
8491
8492void perf_log_lost_samples(struct perf_event *event, u64 lost)
8493{
8494 struct perf_output_handle handle;
8495 struct perf_sample_data sample;
8496 int ret;
8497
8498 struct {
8499 struct perf_event_header header;
8500 u64 lost;
8501 } lost_samples_event = {
8502 .header = {
8503 .type = PERF_RECORD_LOST_SAMPLES,
8504 .misc = 0,
8505 .size = sizeof(lost_samples_event),
8506 },
8507 .lost = lost,
8508 };
8509
8510 perf_event_header__init_id(&lost_samples_event.header, &sample, event);
8511
8512 ret = perf_output_begin(&handle, &sample, event,
8513 lost_samples_event.header.size);
8514 if (ret)
8515 return;
8516
8517 perf_output_put(&handle, lost_samples_event);
8518 perf_event__output_id_sample(event, &handle, &sample);
8519 perf_output_end(&handle);
8520}
8521
8522
8523
8524
8525
8526struct perf_switch_event {
8527 struct task_struct *task;
8528 struct task_struct *next_prev;
8529
8530 struct {
8531 struct perf_event_header header;
8532 u32 next_prev_pid;
8533 u32 next_prev_tid;
8534 } event_id;
8535};
8536
8537static int perf_event_switch_match(struct perf_event *event)
8538{
8539 return event->attr.context_switch;
8540}
8541
8542static void perf_event_switch_output(struct perf_event *event, void *data)
8543{
8544 struct perf_switch_event *se = data;
8545 struct perf_output_handle handle;
8546 struct perf_sample_data sample;
8547 int ret;
8548
8549 if (!perf_event_switch_match(event))
8550 return;
8551
8552
8553 if (event->ctx->task) {
8554 se->event_id.header.type = PERF_RECORD_SWITCH;
8555 se->event_id.header.size = sizeof(se->event_id.header);
8556 } else {
8557 se->event_id.header.type = PERF_RECORD_SWITCH_CPU_WIDE;
8558 se->event_id.header.size = sizeof(se->event_id);
8559 se->event_id.next_prev_pid =
8560 perf_event_pid(event, se->next_prev);
8561 se->event_id.next_prev_tid =
8562 perf_event_tid(event, se->next_prev);
8563 }
8564
8565 perf_event_header__init_id(&se->event_id.header, &sample, event);
8566
8567 ret = perf_output_begin(&handle, &sample, event, se->event_id.header.size);
8568 if (ret)
8569 return;
8570
8571 if (event->ctx->task)
8572 perf_output_put(&handle, se->event_id.header);
8573 else
8574 perf_output_put(&handle, se->event_id);
8575
8576 perf_event__output_id_sample(event, &handle, &sample);
8577
8578 perf_output_end(&handle);
8579}
8580
8581static void perf_event_switch(struct task_struct *task,
8582 struct task_struct *next_prev, bool sched_in)
8583{
8584 struct perf_switch_event switch_event;
8585
8586
8587
8588 switch_event = (struct perf_switch_event){
8589 .task = task,
8590 .next_prev = next_prev,
8591 .event_id = {
8592 .header = {
8593
8594 .misc = sched_in ? 0 : PERF_RECORD_MISC_SWITCH_OUT,
8595
8596 },
8597
8598
8599 },
8600 };
8601
8602 if (!sched_in && task->state == TASK_RUNNING)
8603 switch_event.event_id.header.misc |=
8604 PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
8605
8606 perf_iterate_sb(perf_event_switch_output,
8607 &switch_event,
8608 NULL);
8609}
8610
8611
8612
8613
8614
8615static void perf_log_throttle(struct perf_event *event, int enable)
8616{
8617 struct perf_output_handle handle;
8618 struct perf_sample_data sample;
8619 int ret;
8620
8621 struct {
8622 struct perf_event_header header;
8623 u64 time;
8624 u64 id;
8625 u64 stream_id;
8626 } throttle_event = {
8627 .header = {
8628 .type = PERF_RECORD_THROTTLE,
8629 .misc = 0,
8630 .size = sizeof(throttle_event),
8631 },
8632 .time = perf_event_clock(event),
8633 .id = primary_event_id(event),
8634 .stream_id = event->id,
8635 };
8636
8637 if (enable)
8638 throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
8639
8640 perf_event_header__init_id(&throttle_event.header, &sample, event);
8641
8642 ret = perf_output_begin(&handle, &sample, event,
8643 throttle_event.header.size);
8644 if (ret)
8645 return;
8646
8647 perf_output_put(&handle, throttle_event);
8648 perf_event__output_id_sample(event, &handle, &sample);
8649 perf_output_end(&handle);
8650}
8651
8652
8653
8654
8655
8656struct perf_ksymbol_event {
8657 const char *name;
8658 int name_len;
8659 struct {
8660 struct perf_event_header header;
8661 u64 addr;
8662 u32 len;
8663 u16 ksym_type;
8664 u16 flags;
8665 } event_id;
8666};
8667
8668static int perf_event_ksymbol_match(struct perf_event *event)
8669{
8670 return event->attr.ksymbol;
8671}
8672
8673static void perf_event_ksymbol_output(struct perf_event *event, void *data)
8674{
8675 struct perf_ksymbol_event *ksymbol_event = data;
8676 struct perf_output_handle handle;
8677 struct perf_sample_data sample;
8678 int ret;
8679
8680 if (!perf_event_ksymbol_match(event))
8681 return;
8682
8683 perf_event_header__init_id(&ksymbol_event->event_id.header,
8684 &sample, event);
8685 ret = perf_output_begin(&handle, &sample, event,
8686 ksymbol_event->event_id.header.size);
8687 if (ret)
8688 return;
8689
8690 perf_output_put(&handle, ksymbol_event->event_id);
8691 __output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len);
8692 perf_event__output_id_sample(event, &handle, &sample);
8693
8694 perf_output_end(&handle);
8695}
8696
8697void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
8698 const char *sym)
8699{
8700 struct perf_ksymbol_event ksymbol_event;
8701 char name[KSYM_NAME_LEN];
8702 u16 flags = 0;
8703 int name_len;
8704
8705 if (!atomic_read(&nr_ksymbol_events))
8706 return;
8707
8708 if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX ||
8709 ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
8710 goto err;
8711
8712 strlcpy(name, sym, KSYM_NAME_LEN);
8713 name_len = strlen(name) + 1;
8714 while (!IS_ALIGNED(name_len, sizeof(u64)))
8715 name[name_len++] = '\0';
8716 BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64));
8717
8718 if (unregister)
8719 flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER;
8720
8721 ksymbol_event = (struct perf_ksymbol_event){
8722 .name = name,
8723 .name_len = name_len,
8724 .event_id = {
8725 .header = {
8726 .type = PERF_RECORD_KSYMBOL,
8727 .size = sizeof(ksymbol_event.event_id) +
8728 name_len,
8729 },
8730 .addr = addr,
8731 .len = len,
8732 .ksym_type = ksym_type,
8733 .flags = flags,
8734 },
8735 };
8736
8737 perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
8738 return;
8739err:
8740 WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
8741}
8742
8743
8744
8745
8746
8747struct perf_bpf_event {
8748 struct bpf_prog *prog;
8749 struct {
8750 struct perf_event_header header;
8751 u16 type;
8752 u16 flags;
8753 u32 id;
8754 u8 tag[BPF_TAG_SIZE];
8755 } event_id;
8756};
8757
8758static int perf_event_bpf_match(struct perf_event *event)
8759{
8760 return event->attr.bpf_event;
8761}
8762
8763static void perf_event_bpf_output(struct perf_event *event, void *data)
8764{
8765 struct perf_bpf_event *bpf_event = data;
8766 struct perf_output_handle handle;
8767 struct perf_sample_data sample;
8768 int ret;
8769
8770 if (!perf_event_bpf_match(event))
8771 return;
8772
8773 perf_event_header__init_id(&bpf_event->event_id.header,
8774 &sample, event);
8775 ret = perf_output_begin(&handle, data, event,
8776 bpf_event->event_id.header.size);
8777 if (ret)
8778 return;
8779
8780 perf_output_put(&handle, bpf_event->event_id);
8781 perf_event__output_id_sample(event, &handle, &sample);
8782
8783 perf_output_end(&handle);
8784}
8785
8786static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
8787 enum perf_bpf_event_type type)
8788{
8789 bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
8790 int i;
8791
8792 if (prog->aux->func_cnt == 0) {
8793 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
8794 (u64)(unsigned long)prog->bpf_func,
8795 prog->jited_len, unregister,
8796 prog->aux->ksym.name);
8797 } else {
8798 for (i = 0; i < prog->aux->func_cnt; i++) {
8799 struct bpf_prog *subprog = prog->aux->func[i];
8800
8801 perf_event_ksymbol(
8802 PERF_RECORD_KSYMBOL_TYPE_BPF,
8803 (u64)(unsigned long)subprog->bpf_func,
8804 subprog->jited_len, unregister,
8805 prog->aux->ksym.name);
8806 }
8807 }
8808}
8809
8810void perf_event_bpf_event(struct bpf_prog *prog,
8811 enum perf_bpf_event_type type,
8812 u16 flags)
8813{
8814 struct perf_bpf_event bpf_event;
8815
8816 if (type <= PERF_BPF_EVENT_UNKNOWN ||
8817 type >= PERF_BPF_EVENT_MAX)
8818 return;
8819
8820 switch (type) {
8821 case PERF_BPF_EVENT_PROG_LOAD:
8822 case PERF_BPF_EVENT_PROG_UNLOAD:
8823 if (atomic_read(&nr_ksymbol_events))
8824 perf_event_bpf_emit_ksymbols(prog, type);
8825 break;
8826 default:
8827 break;
8828 }
8829
8830 if (!atomic_read(&nr_bpf_events))
8831 return;
8832
8833 bpf_event = (struct perf_bpf_event){
8834 .prog = prog,
8835 .event_id = {
8836 .header = {
8837 .type = PERF_RECORD_BPF_EVENT,
8838 .size = sizeof(bpf_event.event_id),
8839 },
8840 .type = type,
8841 .flags = flags,
8842 .id = prog->aux->id,
8843 },
8844 };
8845
8846 BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64));
8847
8848 memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
8849 perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
8850}
8851
8852struct perf_text_poke_event {
8853 const void *old_bytes;
8854 const void *new_bytes;
8855 size_t pad;
8856 u16 old_len;
8857 u16 new_len;
8858
8859 struct {
8860 struct perf_event_header header;
8861
8862 u64 addr;
8863 } event_id;
8864};
8865
8866static int perf_event_text_poke_match(struct perf_event *event)
8867{
8868 return event->attr.text_poke;
8869}
8870
8871static void perf_event_text_poke_output(struct perf_event *event, void *data)
8872{
8873 struct perf_text_poke_event *text_poke_event = data;
8874 struct perf_output_handle handle;
8875 struct perf_sample_data sample;
8876 u64 padding = 0;
8877 int ret;
8878
8879 if (!perf_event_text_poke_match(event))
8880 return;
8881
8882 perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event);
8883
8884 ret = perf_output_begin(&handle, &sample, event,
8885 text_poke_event->event_id.header.size);
8886 if (ret)
8887 return;
8888
8889 perf_output_put(&handle, text_poke_event->event_id);
8890 perf_output_put(&handle, text_poke_event->old_len);
8891 perf_output_put(&handle, text_poke_event->new_len);
8892
8893 __output_copy(&handle, text_poke_event->old_bytes, text_poke_event->old_len);
8894 __output_copy(&handle, text_poke_event->new_bytes, text_poke_event->new_len);
8895
8896 if (text_poke_event->pad)
8897 __output_copy(&handle, &padding, text_poke_event->pad);
8898
8899 perf_event__output_id_sample(event, &handle, &sample);
8900
8901 perf_output_end(&handle);
8902}
8903
8904void perf_event_text_poke(const void *addr, const void *old_bytes,
8905 size_t old_len, const void *new_bytes, size_t new_len)
8906{
8907 struct perf_text_poke_event text_poke_event;
8908 size_t tot, pad;
8909
8910 if (!atomic_read(&nr_text_poke_events))
8911 return;
8912
8913 tot = sizeof(text_poke_event.old_len) + old_len;
8914 tot += sizeof(text_poke_event.new_len) + new_len;
8915 pad = ALIGN(tot, sizeof(u64)) - tot;
8916
8917 text_poke_event = (struct perf_text_poke_event){
8918 .old_bytes = old_bytes,
8919 .new_bytes = new_bytes,
8920 .pad = pad,
8921 .old_len = old_len,
8922 .new_len = new_len,
8923 .event_id = {
8924 .header = {
8925 .type = PERF_RECORD_TEXT_POKE,
8926 .misc = PERF_RECORD_MISC_KERNEL,
8927 .size = sizeof(text_poke_event.event_id) + tot + pad,
8928 },
8929 .addr = (unsigned long)addr,
8930 },
8931 };
8932
8933 perf_iterate_sb(perf_event_text_poke_output, &text_poke_event, NULL);
8934}
8935
8936void perf_event_itrace_started(struct perf_event *event)
8937{
8938 event->attach_state |= PERF_ATTACH_ITRACE;
8939}
8940
8941static void perf_log_itrace_start(struct perf_event *event)
8942{
8943 struct perf_output_handle handle;
8944 struct perf_sample_data sample;
8945 struct perf_aux_event {
8946 struct perf_event_header header;
8947 u32 pid;
8948 u32 tid;
8949 } rec;
8950 int ret;
8951
8952 if (event->parent)
8953 event = event->parent;
8954
8955 if (!(event->pmu->capabilities & PERF_PMU_CAP_ITRACE) ||
8956 event->attach_state & PERF_ATTACH_ITRACE)
8957 return;
8958
8959 rec.header.type = PERF_RECORD_ITRACE_START;
8960 rec.header.misc = 0;
8961 rec.header.size = sizeof(rec);
8962 rec.pid = perf_event_pid(event, current);
8963 rec.tid = perf_event_tid(event, current);
8964
8965 perf_event_header__init_id(&rec.header, &sample, event);
8966 ret = perf_output_begin(&handle, &sample, event, rec.header.size);
8967
8968 if (ret)
8969 return;
8970
8971 perf_output_put(&handle, rec);
8972 perf_event__output_id_sample(event, &handle, &sample);
8973
8974 perf_output_end(&handle);
8975}
8976
8977static int
8978__perf_event_account_interrupt(struct perf_event *event, int throttle)
8979{
8980 struct hw_perf_event *hwc = &event->hw;
8981 int ret = 0;
8982 u64 seq;
8983
8984 seq = __this_cpu_read(perf_throttled_seq);
8985 if (seq != hwc->interrupts_seq) {
8986 hwc->interrupts_seq = seq;
8987 hwc->interrupts = 1;
8988 } else {
8989 hwc->interrupts++;
8990 if (unlikely(throttle
8991 && hwc->interrupts >= max_samples_per_tick)) {
8992 __this_cpu_inc(perf_throttled_count);
8993 tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
8994 hwc->interrupts = MAX_INTERRUPTS;
8995 perf_log_throttle(event, 0);
8996 ret = 1;
8997 }
8998 }
8999
9000 if (event->attr.freq) {
9001 u64 now = perf_clock();
9002 s64 delta = now - hwc->freq_time_stamp;
9003
9004 hwc->freq_time_stamp = now;
9005
9006 if (delta > 0 && delta < 2*TICK_NSEC)
9007 perf_adjust_period(event, delta, hwc->last_period, true);
9008 }
9009
9010 return ret;
9011}
9012
9013int perf_event_account_interrupt(struct perf_event *event)
9014{
9015 return __perf_event_account_interrupt(event, 1);
9016}
9017
9018
9019
9020
9021
9022static int __perf_event_overflow(struct perf_event *event,
9023 int throttle, struct perf_sample_data *data,
9024 struct pt_regs *regs)
9025{
9026 int events = atomic_read(&event->event_limit);
9027 int ret = 0;
9028
9029
9030
9031
9032
9033 if (unlikely(!is_sampling_event(event)))
9034 return 0;
9035
9036 ret = __perf_event_account_interrupt(event, throttle);
9037
9038
9039
9040
9041
9042
9043 event->pending_kill = POLL_IN;
9044 if (events && atomic_dec_and_test(&event->event_limit)) {
9045 ret = 1;
9046 event->pending_kill = POLL_HUP;
9047
9048 perf_event_disable_inatomic(event);
9049 }
9050
9051 READ_ONCE(event->overflow_handler)(event, data, regs);
9052
9053 if (*perf_event_fasync(event) && event->pending_kill) {
9054 event->pending_wakeup = 1;
9055 irq_work_queue(&event->pending);
9056 }
9057
9058 return ret;
9059}
9060
9061int perf_event_overflow(struct perf_event *event,
9062 struct perf_sample_data *data,
9063 struct pt_regs *regs)
9064{
9065 return __perf_event_overflow(event, 1, data, regs);
9066}
9067
9068
9069
9070
9071
9072struct swevent_htable {
9073 struct swevent_hlist *swevent_hlist;
9074 struct mutex hlist_mutex;
9075 int hlist_refcount;
9076
9077
9078 int recursion[PERF_NR_CONTEXTS];
9079};
9080
9081static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
9082
9083
9084
9085
9086
9087
9088
9089
9090u64 perf_swevent_set_period(struct perf_event *event)
9091{
9092 struct hw_perf_event *hwc = &event->hw;
9093 u64 period = hwc->last_period;
9094 u64 nr, offset;
9095 s64 old, val;
9096
9097 hwc->last_period = hwc->sample_period;
9098
9099again:
9100 old = val = local64_read(&hwc->period_left);
9101 if (val < 0)
9102 return 0;
9103
9104 nr = div64_u64(period + val, period);
9105 offset = nr * period;
9106 val -= offset;
9107 if (local64_cmpxchg(&hwc->period_left, old, val) != old)
9108 goto again;
9109
9110 return nr;
9111}
9112
9113static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
9114 struct perf_sample_data *data,
9115 struct pt_regs *regs)
9116{
9117 struct hw_perf_event *hwc = &event->hw;
9118 int throttle = 0;
9119
9120 if (!overflow)
9121 overflow = perf_swevent_set_period(event);
9122
9123 if (hwc->interrupts == MAX_INTERRUPTS)
9124 return;
9125
9126 for (; overflow; overflow--) {
9127 if (__perf_event_overflow(event, throttle,
9128 data, regs)) {
9129
9130
9131
9132
9133 break;
9134 }
9135 throttle = 1;
9136 }
9137}
9138
9139static void perf_swevent_event(struct perf_event *event, u64 nr,
9140 struct perf_sample_data *data,
9141 struct pt_regs *regs)
9142{
9143 struct hw_perf_event *hwc = &event->hw;
9144
9145 local64_add(nr, &event->count);
9146
9147 if (!regs)
9148 return;
9149
9150 if (!is_sampling_event(event))
9151 return;
9152
9153 if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
9154 data->period = nr;
9155 return perf_swevent_overflow(event, 1, data, regs);
9156 } else
9157 data->period = event->hw.last_period;
9158
9159 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
9160 return perf_swevent_overflow(event, 1, data, regs);
9161
9162 if (local64_add_negative(nr, &hwc->period_left))
9163 return;
9164
9165 perf_swevent_overflow(event, 0, data, regs);
9166}
9167
9168static int perf_exclude_event(struct perf_event *event,
9169 struct pt_regs *regs)
9170{
9171 if (event->hw.state & PERF_HES_STOPPED)
9172 return 1;
9173
9174 if (regs) {
9175 if (event->attr.exclude_user && user_mode(regs))
9176 return 1;
9177
9178 if (event->attr.exclude_kernel && !user_mode(regs))
9179 return 1;
9180 }
9181
9182 return 0;
9183}
9184
9185static int perf_swevent_match(struct perf_event *event,
9186 enum perf_type_id type,
9187 u32 event_id,
9188 struct perf_sample_data *data,
9189 struct pt_regs *regs)
9190{
9191 if (event->attr.type != type)
9192 return 0;
9193
9194 if (event->attr.config != event_id)
9195 return 0;
9196
9197 if (perf_exclude_event(event, regs))
9198 return 0;
9199
9200 return 1;
9201}
9202
9203static inline u64 swevent_hash(u64 type, u32 event_id)
9204{
9205 u64 val = event_id | (type << 32);
9206
9207 return hash_64(val, SWEVENT_HLIST_BITS);
9208}
9209
9210static inline struct hlist_head *
9211__find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id)
9212{
9213 u64 hash = swevent_hash(type, event_id);
9214
9215 return &hlist->heads[hash];
9216}
9217
9218
9219static inline struct hlist_head *
9220find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id)
9221{
9222 struct swevent_hlist *hlist;
9223
9224 hlist = rcu_dereference(swhash->swevent_hlist);
9225 if (!hlist)
9226 return NULL;
9227
9228 return __find_swevent_head(hlist, type, event_id);
9229}
9230
9231
9232static inline struct hlist_head *
9233find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)
9234{
9235 struct swevent_hlist *hlist;
9236 u32 event_id = event->attr.config;
9237 u64 type = event->attr.type;
9238
9239
9240
9241
9242
9243
9244 hlist = rcu_dereference_protected(swhash->swevent_hlist,
9245 lockdep_is_held(&event->ctx->lock));
9246 if (!hlist)
9247 return NULL;
9248
9249 return __find_swevent_head(hlist, type, event_id);
9250}
9251
9252static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
9253 u64 nr,
9254 struct perf_sample_data *data,
9255 struct pt_regs *regs)
9256{
9257 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
9258 struct perf_event *event;
9259 struct hlist_head *head;
9260
9261 rcu_read_lock();
9262 head = find_swevent_head_rcu(swhash, type, event_id);
9263 if (!head)
9264 goto end;
9265
9266 hlist_for_each_entry_rcu(event, head, hlist_entry) {
9267 if (perf_swevent_match(event, type, event_id, data, regs))
9268 perf_swevent_event(event, nr, data, regs);
9269 }
9270end:
9271 rcu_read_unlock();
9272}
9273
9274DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
9275
9276int perf_swevent_get_recursion_context(void)
9277{
9278 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
9279
9280 return get_recursion_context(swhash->recursion);
9281}
9282EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
9283
9284void perf_swevent_put_recursion_context(int rctx)
9285{
9286 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
9287
9288 put_recursion_context(swhash->recursion, rctx);
9289}
9290
9291void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
9292{
9293 struct perf_sample_data data;
9294
9295 if (WARN_ON_ONCE(!regs))
9296 return;
9297
9298 perf_sample_data_init(&data, addr, 0);
9299 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
9300}
9301
9302void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
9303{
9304 int rctx;
9305
9306 preempt_disable_notrace();
9307 rctx = perf_swevent_get_recursion_context();
9308 if (unlikely(rctx < 0))
9309 goto fail;
9310
9311 ___perf_sw_event(event_id, nr, regs, addr);
9312
9313 perf_swevent_put_recursion_context(rctx);
9314fail:
9315 preempt_enable_notrace();
9316}
9317
9318static void perf_swevent_read(struct perf_event *event)
9319{
9320}
9321
9322static int perf_swevent_add(struct perf_event *event, int flags)
9323{
9324 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
9325 struct hw_perf_event *hwc = &event->hw;
9326 struct hlist_head *head;
9327
9328 if (is_sampling_event(event)) {
9329 hwc->last_period = hwc->sample_period;
9330 perf_swevent_set_period(event);
9331 }
9332
9333 hwc->state = !(flags & PERF_EF_START);
9334
9335 head = find_swevent_head(swhash, event);
9336 if (WARN_ON_ONCE(!head))
9337 return -EINVAL;
9338
9339 hlist_add_head_rcu(&event->hlist_entry, head);
9340 perf_event_update_userpage(event);
9341
9342 return 0;
9343}
9344
9345static void perf_swevent_del(struct perf_event *event, int flags)
9346{
9347 hlist_del_rcu(&event->hlist_entry);
9348}
9349
9350static void perf_swevent_start(struct perf_event *event, int flags)
9351{
9352 event->hw.state = 0;
9353}
9354
9355static void perf_swevent_stop(struct perf_event *event, int flags)
9356{
9357 event->hw.state = PERF_HES_STOPPED;
9358}
9359
9360
9361static inline struct swevent_hlist *
9362swevent_hlist_deref(struct swevent_htable *swhash)
9363{
9364 return rcu_dereference_protected(swhash->swevent_hlist,
9365 lockdep_is_held(&swhash->hlist_mutex));
9366}
9367
9368static void swevent_hlist_release(struct swevent_htable *swhash)
9369{
9370 struct swevent_hlist *hlist = swevent_hlist_deref(swhash);
9371
9372 if (!hlist)
9373 return;
9374
9375 RCU_INIT_POINTER(swhash->swevent_hlist, NULL);
9376 kfree_rcu(hlist, rcu_head);
9377}
9378
9379static void swevent_hlist_put_cpu(int cpu)
9380{
9381 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
9382
9383 mutex_lock(&swhash->hlist_mutex);
9384
9385 if (!--swhash->hlist_refcount)
9386 swevent_hlist_release(swhash);
9387
9388 mutex_unlock(&swhash->hlist_mutex);
9389}
9390
9391static void swevent_hlist_put(void)
9392{
9393 int cpu;
9394
9395 for_each_possible_cpu(cpu)
9396 swevent_hlist_put_cpu(cpu);
9397}
9398
9399static int swevent_hlist_get_cpu(int cpu)
9400{
9401 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
9402 int err = 0;
9403
9404 mutex_lock(&swhash->hlist_mutex);
9405 if (!swevent_hlist_deref(swhash) &&
9406 cpumask_test_cpu(cpu, perf_online_mask)) {
9407 struct swevent_hlist *hlist;
9408
9409 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
9410 if (!hlist) {
9411 err = -ENOMEM;
9412 goto exit;
9413 }
9414 rcu_assign_pointer(swhash->swevent_hlist, hlist);
9415 }
9416 swhash->hlist_refcount++;
9417exit:
9418 mutex_unlock(&swhash->hlist_mutex);
9419
9420 return err;
9421}
9422
9423static int swevent_hlist_get(void)
9424{
9425 int err, cpu, failed_cpu;
9426
9427 mutex_lock(&pmus_lock);
9428 for_each_possible_cpu(cpu) {
9429 err = swevent_hlist_get_cpu(cpu);
9430 if (err) {
9431 failed_cpu = cpu;
9432 goto fail;
9433 }
9434 }
9435 mutex_unlock(&pmus_lock);
9436 return 0;
9437fail:
9438 for_each_possible_cpu(cpu) {
9439 if (cpu == failed_cpu)
9440 break;
9441 swevent_hlist_put_cpu(cpu);
9442 }
9443 mutex_unlock(&pmus_lock);
9444 return err;
9445}
9446
9447struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
9448
9449static void sw_perf_event_destroy(struct perf_event *event)
9450{
9451 u64 event_id = event->attr.config;
9452
9453 WARN_ON(event->parent);
9454
9455 static_key_slow_dec(&perf_swevent_enabled[event_id]);
9456 swevent_hlist_put();
9457}
9458
9459static int perf_swevent_init(struct perf_event *event)
9460{
9461 u64 event_id = event->attr.config;
9462
9463 if (event->attr.type != PERF_TYPE_SOFTWARE)
9464 return -ENOENT;
9465
9466
9467
9468
9469 if (has_branch_stack(event))
9470 return -EOPNOTSUPP;
9471
9472 switch (event_id) {
9473 case PERF_COUNT_SW_CPU_CLOCK:
9474 case PERF_COUNT_SW_TASK_CLOCK:
9475 return -ENOENT;
9476
9477 default:
9478 break;
9479 }
9480
9481 if (event_id >= PERF_COUNT_SW_MAX)
9482 return -ENOENT;
9483
9484 if (!event->parent) {
9485 int err;
9486
9487 err = swevent_hlist_get();
9488 if (err)
9489 return err;
9490
9491 static_key_slow_inc(&perf_swevent_enabled[event_id]);
9492 event->destroy = sw_perf_event_destroy;
9493 }
9494
9495 return 0;
9496}
9497
9498static struct pmu perf_swevent = {
9499 .task_ctx_nr = perf_sw_context,
9500
9501 .capabilities = PERF_PMU_CAP_NO_NMI,
9502
9503 .event_init = perf_swevent_init,
9504 .add = perf_swevent_add,
9505 .del = perf_swevent_del,
9506 .start = perf_swevent_start,
9507 .stop = perf_swevent_stop,
9508 .read = perf_swevent_read,
9509};
9510
9511#ifdef CONFIG_EVENT_TRACING
9512
9513static int perf_tp_filter_match(struct perf_event *event,
9514 struct perf_sample_data *data)
9515{
9516 void *record = data->raw->frag.data;
9517
9518
9519 if (event->parent)
9520 event = event->parent;
9521
9522 if (likely(!event->filter) || filter_match_preds(event->filter, record))
9523 return 1;
9524 return 0;
9525}
9526
9527static int perf_tp_event_match(struct perf_event *event,
9528 struct perf_sample_data *data,
9529 struct pt_regs *regs)
9530{
9531 if (event->hw.state & PERF_HES_STOPPED)
9532 return 0;
9533
9534
9535
9536 if (event->attr.exclude_kernel && !user_mode(regs))
9537 return 0;
9538
9539 if (!perf_tp_filter_match(event, data))
9540 return 0;
9541
9542 return 1;
9543}
9544
9545void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
9546 struct trace_event_call *call, u64 count,
9547 struct pt_regs *regs, struct hlist_head *head,
9548 struct task_struct *task)
9549{
9550 if (bpf_prog_array_valid(call)) {
9551 *(struct pt_regs **)raw_data = regs;
9552 if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) {
9553 perf_swevent_put_recursion_context(rctx);
9554 return;
9555 }
9556 }
9557 perf_tp_event(call->event.type, count, raw_data, size, regs, head,
9558 rctx, task);
9559}
9560EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit);
9561
9562void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
9563 struct pt_regs *regs, struct hlist_head *head, int rctx,
9564 struct task_struct *task)
9565{
9566 struct perf_sample_data data;
9567 struct perf_event *event;
9568
9569 struct perf_raw_record raw = {
9570 .frag = {
9571 .size = entry_size,
9572 .data = record,
9573 },
9574 };
9575
9576 perf_sample_data_init(&data, 0, 0);
9577 data.raw = &raw;
9578
9579 perf_trace_buf_update(record, event_type);
9580
9581 hlist_for_each_entry_rcu(event, head, hlist_entry) {
9582 if (perf_tp_event_match(event, &data, regs))
9583 perf_swevent_event(event, count, &data, regs);
9584 }
9585
9586
9587
9588
9589
9590 if (task && task != current) {
9591 struct perf_event_context *ctx;
9592 struct trace_entry *entry = record;
9593
9594 rcu_read_lock();
9595 ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]);
9596 if (!ctx)
9597 goto unlock;
9598
9599 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
9600 if (event->cpu != smp_processor_id())
9601 continue;
9602 if (event->attr.type != PERF_TYPE_TRACEPOINT)
9603 continue;
9604 if (event->attr.config != entry->type)
9605 continue;
9606 if (perf_tp_event_match(event, &data, regs))
9607 perf_swevent_event(event, count, &data, regs);
9608 }
9609unlock:
9610 rcu_read_unlock();
9611 }
9612
9613 perf_swevent_put_recursion_context(rctx);
9614}
9615EXPORT_SYMBOL_GPL(perf_tp_event);
9616
9617static void tp_perf_event_destroy(struct perf_event *event)
9618{
9619 perf_trace_destroy(event);
9620}
9621
9622static int perf_tp_event_init(struct perf_event *event)
9623{
9624 int err;
9625
9626 if (event->attr.type != PERF_TYPE_TRACEPOINT)
9627 return -ENOENT;
9628
9629
9630
9631
9632 if (has_branch_stack(event))
9633 return -EOPNOTSUPP;
9634
9635 err = perf_trace_init(event);
9636 if (err)
9637 return err;
9638
9639 event->destroy = tp_perf_event_destroy;
9640
9641 return 0;
9642}
9643
9644static struct pmu perf_tracepoint = {
9645 .task_ctx_nr = perf_sw_context,
9646
9647 .event_init = perf_tp_event_init,
9648 .add = perf_trace_add,
9649 .del = perf_trace_del,
9650 .start = perf_swevent_start,
9651 .stop = perf_swevent_stop,
9652 .read = perf_swevent_read,
9653};
9654
9655#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
9656
9657
9658
9659
9660
9661
9662
9663enum perf_probe_config {
9664 PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0,
9665};
9666
9667PMU_FORMAT_ATTR(retprobe, "config:0");
9668
9669static struct attribute *probe_attrs[] = {
9670 &format_attr_retprobe.attr,
9671 NULL,
9672};
9673
9674static struct attribute_group probe_format_group = {
9675 .name = "format",
9676 .attrs = probe_attrs,
9677};
9678
9679static const struct attribute_group *probe_attr_groups[] = {
9680 &probe_format_group,
9681 NULL,
9682};
9683#endif
9684
9685#ifdef CONFIG_KPROBE_EVENTS
9686static int perf_kprobe_event_init(struct perf_event *event);
9687static struct pmu perf_kprobe = {
9688 .task_ctx_nr = perf_sw_context,
9689 .event_init = perf_kprobe_event_init,
9690 .add = perf_trace_add,
9691 .del = perf_trace_del,
9692 .start = perf_swevent_start,
9693 .stop = perf_swevent_stop,
9694 .read = perf_swevent_read,
9695 .attr_groups = probe_attr_groups,
9696};
9697
9698static int perf_kprobe_event_init(struct perf_event *event)
9699{
9700 int err;
9701 bool is_retprobe;
9702
9703 if (event->attr.type != perf_kprobe.type)
9704 return -ENOENT;
9705
9706 if (!perfmon_capable())
9707 return -EACCES;
9708
9709
9710
9711
9712 if (has_branch_stack(event))
9713 return -EOPNOTSUPP;
9714
9715 is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
9716 err = perf_kprobe_init(event, is_retprobe);
9717 if (err)
9718 return err;
9719
9720 event->destroy = perf_kprobe_destroy;
9721
9722 return 0;
9723}
9724#endif
9725
9726#ifdef CONFIG_UPROBE_EVENTS
9727static int perf_uprobe_event_init(struct perf_event *event);
9728static struct pmu perf_uprobe = {
9729 .task_ctx_nr = perf_sw_context,
9730 .event_init = perf_uprobe_event_init,
9731 .add = perf_trace_add,
9732 .del = perf_trace_del,
9733 .start = perf_swevent_start,
9734 .stop = perf_swevent_stop,
9735 .read = perf_swevent_read,
9736 .attr_groups = probe_attr_groups,
9737};
9738
9739static int perf_uprobe_event_init(struct perf_event *event)
9740{
9741 int err;
9742 bool is_retprobe;
9743
9744 if (event->attr.type != perf_uprobe.type)
9745 return -ENOENT;
9746
9747 if (!perfmon_capable())
9748 return -EACCES;
9749
9750
9751
9752
9753 if (has_branch_stack(event))
9754 return -EOPNOTSUPP;
9755
9756 is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
9757 err = perf_uprobe_init(event, is_retprobe);
9758 if (err)
9759 return err;
9760
9761 event->destroy = perf_uprobe_destroy;
9762
9763 return 0;
9764}
9765#endif
9766
9767static inline void perf_tp_register(void)
9768{
9769 perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
9770#ifdef CONFIG_KPROBE_EVENTS
9771 perf_pmu_register(&perf_kprobe, "kprobe", -1);
9772#endif
9773#ifdef CONFIG_UPROBE_EVENTS
9774 perf_pmu_register(&perf_uprobe, "uprobe", -1);
9775#endif
9776}
9777
9778static void perf_event_free_filter(struct perf_event *event)
9779{
9780 ftrace_profile_free_filter(event);
9781}
9782
9783#ifdef CONFIG_BPF_SYSCALL
9784static void bpf_overflow_handler(struct perf_event *event,
9785 struct perf_sample_data *data,
9786 struct pt_regs *regs)
9787{
9788 struct bpf_perf_event_data_kern ctx = {
9789 .data = data,
9790 .event = event,
9791 };
9792 int ret = 0;
9793
9794 ctx.regs = perf_arch_bpf_user_pt_regs(regs);
9795 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
9796 goto out;
9797 rcu_read_lock();
9798 ret = BPF_PROG_RUN(event->prog, &ctx);
9799 rcu_read_unlock();
9800out:
9801 __this_cpu_dec(bpf_prog_active);
9802 if (!ret)
9803 return;
9804
9805 event->orig_overflow_handler(event, data, regs);
9806}
9807
9808static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
9809{
9810 struct bpf_prog *prog;
9811
9812 if (event->overflow_handler_context)
9813
9814 return -EINVAL;
9815
9816 if (event->prog)
9817 return -EEXIST;
9818
9819 prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
9820 if (IS_ERR(prog))
9821 return PTR_ERR(prog);
9822
9823 if (event->attr.precise_ip &&
9824 prog->call_get_stack &&
9825 (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY) ||
9826 event->attr.exclude_callchain_kernel ||
9827 event->attr.exclude_callchain_user)) {
9828
9829
9830
9831
9832
9833
9834
9835
9836
9837 bpf_prog_put(prog);
9838 return -EPROTO;
9839 }
9840
9841 event->prog = prog;
9842 event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
9843 WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
9844 return 0;
9845}
9846
9847static void perf_event_free_bpf_handler(struct perf_event *event)
9848{
9849 struct bpf_prog *prog = event->prog;
9850
9851 if (!prog)
9852 return;
9853
9854 WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
9855 event->prog = NULL;
9856 bpf_prog_put(prog);
9857}
9858#else
9859static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
9860{
9861 return -EOPNOTSUPP;
9862}
9863static void perf_event_free_bpf_handler(struct perf_event *event)
9864{
9865}
9866#endif
9867
9868
9869
9870
9871
9872static inline bool perf_event_is_tracing(struct perf_event *event)
9873{
9874 if (event->pmu == &perf_tracepoint)
9875 return true;
9876#ifdef CONFIG_KPROBE_EVENTS
9877 if (event->pmu == &perf_kprobe)
9878 return true;
9879#endif
9880#ifdef CONFIG_UPROBE_EVENTS
9881 if (event->pmu == &perf_uprobe)
9882 return true;
9883#endif
9884 return false;
9885}
9886
9887static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
9888{
9889 bool is_kprobe, is_tracepoint, is_syscall_tp;
9890 struct bpf_prog *prog;
9891 int ret;
9892
9893 rh_mark_used_feature("eBPF/event");
9894
9895 if (!perf_event_is_tracing(event))
9896 return perf_event_set_bpf_handler(event, prog_fd);
9897
9898 is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
9899 is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
9900 is_syscall_tp = is_syscall_trace_event(event->tp_event);
9901 if (!is_kprobe && !is_tracepoint && !is_syscall_tp)
9902
9903 return -EINVAL;
9904
9905 prog = bpf_prog_get(prog_fd);
9906 if (IS_ERR(prog))
9907 return PTR_ERR(prog);
9908
9909 if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
9910 (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
9911 (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
9912
9913 bpf_prog_put(prog);
9914 return -EINVAL;
9915 }
9916
9917
9918 if (prog->kprobe_override &&
9919 !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) {
9920 bpf_prog_put(prog);
9921 return -EINVAL;
9922 }
9923
9924 if (is_tracepoint || is_syscall_tp) {
9925 int off = trace_event_get_offsets(event->tp_event);
9926
9927 if (prog->aux->max_ctx_offset > off) {
9928 bpf_prog_put(prog);
9929 return -EACCES;
9930 }
9931 }
9932
9933 ret = perf_event_attach_bpf_prog(event, prog);
9934 if (ret)
9935 bpf_prog_put(prog);
9936 return ret;
9937}
9938
9939static void perf_event_free_bpf_prog(struct perf_event *event)
9940{
9941 if (!perf_event_is_tracing(event)) {
9942 perf_event_free_bpf_handler(event);
9943 return;
9944 }
9945 perf_event_detach_bpf_prog(event);
9946}
9947
9948#else
9949
9950static inline void perf_tp_register(void)
9951{
9952}
9953
9954static void perf_event_free_filter(struct perf_event *event)
9955{
9956}
9957
9958static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
9959{
9960 return -ENOENT;
9961}
9962
9963static void perf_event_free_bpf_prog(struct perf_event *event)
9964{
9965}
9966#endif
9967
9968#ifdef CONFIG_HAVE_HW_BREAKPOINT
9969void perf_bp_event(struct perf_event *bp, void *data)
9970{
9971 struct perf_sample_data sample;
9972 struct pt_regs *regs = data;
9973
9974 perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
9975
9976 if (!bp->hw.state && !perf_exclude_event(bp, regs))
9977 perf_swevent_event(bp, 1, &sample, regs);
9978}
9979#endif
9980
9981
9982
9983
9984static struct perf_addr_filter *
9985perf_addr_filter_new(struct perf_event *event, struct list_head *filters)
9986{
9987 int node = cpu_to_node(event->cpu == -1 ? 0 : event->cpu);
9988 struct perf_addr_filter *filter;
9989
9990 filter = kzalloc_node(sizeof(*filter), GFP_KERNEL, node);
9991 if (!filter)
9992 return NULL;
9993
9994 INIT_LIST_HEAD(&filter->entry);
9995 list_add_tail(&filter->entry, filters);
9996
9997 return filter;
9998}
9999
10000static void free_filters_list(struct list_head *filters)
10001{
10002 struct perf_addr_filter *filter, *iter;
10003
10004 list_for_each_entry_safe(filter, iter, filters, entry) {
10005 path_put(&filter->path);
10006 list_del(&filter->entry);
10007 kfree(filter);
10008 }
10009}
10010
10011
10012
10013
10014static void perf_addr_filters_splice(struct perf_event *event,
10015 struct list_head *head)
10016{
10017 unsigned long flags;
10018 LIST_HEAD(list);
10019
10020 if (!has_addr_filter(event))
10021 return;
10022
10023
10024 if (event->parent)
10025 return;
10026
10027 raw_spin_lock_irqsave(&event->addr_filters.lock, flags);
10028
10029 list_splice_init(&event->addr_filters.list, &list);
10030 if (head)
10031 list_splice(head, &event->addr_filters.list);
10032
10033 raw_spin_unlock_irqrestore(&event->addr_filters.lock, flags);
10034
10035 free_filters_list(&list);
10036}
10037
10038
10039
10040
10041
10042
10043static void perf_addr_filter_apply(struct perf_addr_filter *filter,
10044 struct mm_struct *mm,
10045 struct perf_addr_filter_range *fr)
10046{
10047 struct vm_area_struct *vma;
10048
10049 for (vma = mm->mmap; vma; vma = vma->vm_next) {
10050 if (!vma->vm_file)
10051 continue;
10052
10053 if (perf_addr_filter_vma_adjust(filter, vma, fr))
10054 return;
10055 }
10056}
10057
10058
10059
10060
10061
10062static void perf_event_addr_filters_apply(struct perf_event *event)
10063{
10064 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
10065 struct task_struct *task = READ_ONCE(event->ctx->task);
10066 struct perf_addr_filter *filter;
10067 struct mm_struct *mm = NULL;
10068 unsigned int count = 0;
10069 unsigned long flags;
10070
10071
10072
10073
10074
10075 if (task == TASK_TOMBSTONE)
10076 return;
10077
10078 if (ifh->nr_file_filters) {
10079 mm = get_task_mm(event->ctx->task);
10080 if (!mm)
10081 goto restart;
10082
10083 mmap_read_lock(mm);
10084 }
10085
10086 raw_spin_lock_irqsave(&ifh->lock, flags);
10087 list_for_each_entry(filter, &ifh->list, entry) {
10088 if (filter->path.dentry) {
10089
10090
10091
10092
10093 event->addr_filter_ranges[count].start = 0;
10094 event->addr_filter_ranges[count].size = 0;
10095
10096 perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
10097 } else {
10098 event->addr_filter_ranges[count].start = filter->offset;
10099 event->addr_filter_ranges[count].size = filter->size;
10100 }
10101
10102 count++;
10103 }
10104
10105 event->addr_filters_gen++;
10106 raw_spin_unlock_irqrestore(&ifh->lock, flags);
10107
10108 if (ifh->nr_file_filters) {
10109 mmap_read_unlock(mm);
10110
10111 mmput(mm);
10112 }
10113
10114restart:
10115 perf_event_stop(event, 1);
10116}
10117
10118
10119
10120
10121
10122
10123
10124
10125
10126
10127
10128
10129
10130
10131
10132
10133
10134
10135
10136
10137enum {
10138 IF_ACT_NONE = -1,
10139 IF_ACT_FILTER,
10140 IF_ACT_START,
10141 IF_ACT_STOP,
10142 IF_SRC_FILE,
10143 IF_SRC_KERNEL,
10144 IF_SRC_FILEADDR,
10145 IF_SRC_KERNELADDR,
10146};
10147
10148enum {
10149 IF_STATE_ACTION = 0,
10150 IF_STATE_SOURCE,
10151 IF_STATE_END,
10152};
10153
10154static const match_table_t if_tokens = {
10155 { IF_ACT_FILTER, "filter" },
10156 { IF_ACT_START, "start" },
10157 { IF_ACT_STOP, "stop" },
10158 { IF_SRC_FILE, "%u/%u@%s" },
10159 { IF_SRC_KERNEL, "%u/%u" },
10160 { IF_SRC_FILEADDR, "%u@%s" },
10161 { IF_SRC_KERNELADDR, "%u" },
10162 { IF_ACT_NONE, NULL },
10163};
10164
10165
10166
10167
10168static int
10169perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
10170 struct list_head *filters)
10171{
10172 struct perf_addr_filter *filter = NULL;
10173 char *start, *orig, *filename = NULL;
10174 substring_t args[MAX_OPT_ARGS];
10175 int state = IF_STATE_ACTION, token;
10176 unsigned int kernel = 0;
10177 int ret = -EINVAL;
10178
10179 orig = fstr = kstrdup(fstr, GFP_KERNEL);
10180 if (!fstr)
10181 return -ENOMEM;
10182
10183 while ((start = strsep(&fstr, " ,\n")) != NULL) {
10184 static const enum perf_addr_filter_action_t actions[] = {
10185 [IF_ACT_FILTER] = PERF_ADDR_FILTER_ACTION_FILTER,
10186 [IF_ACT_START] = PERF_ADDR_FILTER_ACTION_START,
10187 [IF_ACT_STOP] = PERF_ADDR_FILTER_ACTION_STOP,
10188 };
10189 ret = -EINVAL;
10190
10191 if (!*start)
10192 continue;
10193
10194
10195 if (state == IF_STATE_ACTION) {
10196 filter = perf_addr_filter_new(event, filters);
10197 if (!filter)
10198 goto fail;
10199 }
10200
10201 token = match_token(start, if_tokens, args);
10202 switch (token) {
10203 case IF_ACT_FILTER:
10204 case IF_ACT_START:
10205 case IF_ACT_STOP:
10206 if (state != IF_STATE_ACTION)
10207 goto fail;
10208
10209 filter->action = actions[token];
10210 state = IF_STATE_SOURCE;
10211 break;
10212
10213 case IF_SRC_KERNELADDR:
10214 case IF_SRC_KERNEL:
10215 kernel = 1;
10216
10217
10218 case IF_SRC_FILEADDR:
10219 case IF_SRC_FILE:
10220 if (state != IF_STATE_SOURCE)
10221 goto fail;
10222
10223 *args[0].to = 0;
10224 ret = kstrtoul(args[0].from, 0, &filter->offset);
10225 if (ret)
10226 goto fail;
10227
10228 if (token == IF_SRC_KERNEL || token == IF_SRC_FILE) {
10229 *args[1].to = 0;
10230 ret = kstrtoul(args[1].from, 0, &filter->size);
10231 if (ret)
10232 goto fail;
10233 }
10234
10235 if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
10236 int fpos = token == IF_SRC_FILE ? 2 : 1;
10237
10238 kfree(filename);
10239 filename = match_strdup(&args[fpos]);
10240 if (!filename) {
10241 ret = -ENOMEM;
10242 goto fail;
10243 }
10244 }
10245
10246 state = IF_STATE_END;
10247 break;
10248
10249 default:
10250 goto fail;
10251 }
10252
10253
10254
10255
10256
10257
10258 if (state == IF_STATE_END) {
10259 ret = -EINVAL;
10260 if (kernel && event->attr.exclude_kernel)
10261 goto fail;
10262
10263
10264
10265
10266
10267 if (filter->action == PERF_ADDR_FILTER_ACTION_FILTER &&
10268 !filter->size)
10269 goto fail;
10270
10271 if (!kernel) {
10272 if (!filename)
10273 goto fail;
10274
10275
10276
10277
10278
10279
10280
10281
10282
10283 ret = -EOPNOTSUPP;
10284 if (!event->ctx->task)
10285 goto fail;
10286
10287
10288 ret = kern_path(filename, LOOKUP_FOLLOW,
10289 &filter->path);
10290 if (ret)
10291 goto fail;
10292
10293 ret = -EINVAL;
10294 if (!filter->path.dentry ||
10295 !S_ISREG(d_inode(filter->path.dentry)
10296 ->i_mode))
10297 goto fail;
10298
10299 event->addr_filters.nr_file_filters++;
10300 }
10301
10302
10303 state = IF_STATE_ACTION;
10304 filter = NULL;
10305 }
10306 }
10307
10308 if (state != IF_STATE_ACTION)
10309 goto fail;
10310
10311 kfree(filename);
10312 kfree(orig);
10313
10314 return 0;
10315
10316fail:
10317 kfree(filename);
10318 free_filters_list(filters);
10319 kfree(orig);
10320
10321 return ret;
10322}
10323
10324static int
10325perf_event_set_addr_filter(struct perf_event *event, char *filter_str)
10326{
10327 LIST_HEAD(filters);
10328 int ret;
10329
10330
10331
10332
10333
10334 lockdep_assert_held(&event->ctx->mutex);
10335
10336 if (WARN_ON_ONCE(event->parent))
10337 return -EINVAL;
10338
10339 ret = perf_event_parse_addr_filter(event, filter_str, &filters);
10340 if (ret)
10341 goto fail_clear_files;
10342
10343 ret = event->pmu->addr_filters_validate(&filters);
10344 if (ret)
10345 goto fail_free_filters;
10346
10347
10348 perf_addr_filters_splice(event, &filters);
10349
10350
10351 perf_event_for_each_child(event, perf_event_addr_filters_apply);
10352
10353 return ret;
10354
10355fail_free_filters:
10356 free_filters_list(&filters);
10357
10358fail_clear_files:
10359 event->addr_filters.nr_file_filters = 0;
10360
10361 return ret;
10362}
10363
10364static int perf_event_set_filter(struct perf_event *event, void __user *arg)
10365{
10366 int ret = -EINVAL;
10367 char *filter_str;
10368
10369 filter_str = strndup_user(arg, PAGE_SIZE);
10370 if (IS_ERR(filter_str))
10371 return PTR_ERR(filter_str);
10372
10373#ifdef CONFIG_EVENT_TRACING
10374 if (perf_event_is_tracing(event)) {
10375 struct perf_event_context *ctx = event->ctx;
10376
10377
10378
10379
10380
10381
10382
10383
10384
10385
10386
10387
10388 mutex_unlock(&ctx->mutex);
10389 ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
10390 mutex_lock(&ctx->mutex);
10391 } else
10392#endif
10393 if (has_addr_filter(event))
10394 ret = perf_event_set_addr_filter(event, filter_str);
10395
10396 kfree(filter_str);
10397 return ret;
10398}
10399
10400
10401
10402
10403
10404static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
10405{
10406 enum hrtimer_restart ret = HRTIMER_RESTART;
10407 struct perf_sample_data data;
10408 struct pt_regs *regs;
10409 struct perf_event *event;
10410 u64 period;
10411
10412 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
10413
10414 if (event->state != PERF_EVENT_STATE_ACTIVE)
10415 return HRTIMER_NORESTART;
10416
10417 event->pmu->read(event);
10418
10419 perf_sample_data_init(&data, 0, event->hw.last_period);
10420 regs = get_irq_regs();
10421
10422 if (regs && !perf_exclude_event(event, regs)) {
10423 if (!(event->attr.exclude_idle && is_idle_task(current)))
10424 if (__perf_event_overflow(event, 1, &data, regs))
10425 ret = HRTIMER_NORESTART;
10426 }
10427
10428 period = max_t(u64, 10000, event->hw.sample_period);
10429 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
10430
10431 return ret;
10432}
10433
10434static void perf_swevent_start_hrtimer(struct perf_event *event)
10435{
10436 struct hw_perf_event *hwc = &event->hw;
10437 s64 period;
10438
10439 if (!is_sampling_event(event))
10440 return;
10441
10442 period = local64_read(&hwc->period_left);
10443 if (period) {
10444 if (period < 0)
10445 period = 10000;
10446
10447 local64_set(&hwc->period_left, 0);
10448 } else {
10449 period = max_t(u64, 10000, hwc->sample_period);
10450 }
10451 hrtimer_start(&hwc->hrtimer, ns_to_ktime(period),
10452 HRTIMER_MODE_REL_PINNED_HARD);
10453}
10454
10455static void perf_swevent_cancel_hrtimer(struct perf_event *event)
10456{
10457 struct hw_perf_event *hwc = &event->hw;
10458
10459 if (is_sampling_event(event)) {
10460 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
10461 local64_set(&hwc->period_left, ktime_to_ns(remaining));
10462
10463 hrtimer_cancel(&hwc->hrtimer);
10464 }
10465}
10466
10467static void perf_swevent_init_hrtimer(struct perf_event *event)
10468{
10469 struct hw_perf_event *hwc = &event->hw;
10470
10471 if (!is_sampling_event(event))
10472 return;
10473
10474 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
10475 hwc->hrtimer.function = perf_swevent_hrtimer;
10476
10477
10478
10479
10480
10481 if (event->attr.freq) {
10482 long freq = event->attr.sample_freq;
10483
10484 event->attr.sample_period = NSEC_PER_SEC / freq;
10485 hwc->sample_period = event->attr.sample_period;
10486 local64_set(&hwc->period_left, hwc->sample_period);
10487 hwc->last_period = hwc->sample_period;
10488 event->attr.freq = 0;
10489 }
10490}
10491
10492
10493
10494
10495
10496static void cpu_clock_event_update(struct perf_event *event)
10497{
10498 s64 prev;
10499 u64 now;
10500
10501 now = local_clock();
10502 prev = local64_xchg(&event->hw.prev_count, now);
10503 local64_add(now - prev, &event->count);
10504}
10505
10506static void cpu_clock_event_start(struct perf_event *event, int flags)
10507{
10508 local64_set(&event->hw.prev_count, local_clock());
10509 perf_swevent_start_hrtimer(event);
10510}
10511
10512static void cpu_clock_event_stop(struct perf_event *event, int flags)
10513{
10514 perf_swevent_cancel_hrtimer(event);
10515 cpu_clock_event_update(event);
10516}
10517
10518static int cpu_clock_event_add(struct perf_event *event, int flags)
10519{
10520 if (flags & PERF_EF_START)
10521 cpu_clock_event_start(event, flags);
10522 perf_event_update_userpage(event);
10523
10524 return 0;
10525}
10526
10527static void cpu_clock_event_del(struct perf_event *event, int flags)
10528{
10529 cpu_clock_event_stop(event, flags);
10530}
10531
10532static void cpu_clock_event_read(struct perf_event *event)
10533{
10534 cpu_clock_event_update(event);
10535}
10536
10537static int cpu_clock_event_init(struct perf_event *event)
10538{
10539 if (event->attr.type != PERF_TYPE_SOFTWARE)
10540 return -ENOENT;
10541
10542 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
10543 return -ENOENT;
10544
10545
10546
10547
10548 if (has_branch_stack(event))
10549 return -EOPNOTSUPP;
10550
10551 perf_swevent_init_hrtimer(event);
10552
10553 return 0;
10554}
10555
10556static struct pmu perf_cpu_clock = {
10557 .task_ctx_nr = perf_sw_context,
10558
10559 .capabilities = PERF_PMU_CAP_NO_NMI,
10560
10561 .event_init = cpu_clock_event_init,
10562 .add = cpu_clock_event_add,
10563 .del = cpu_clock_event_del,
10564 .start = cpu_clock_event_start,
10565 .stop = cpu_clock_event_stop,
10566 .read = cpu_clock_event_read,
10567};
10568
10569
10570
10571
10572
10573static void task_clock_event_update(struct perf_event *event, u64 now)
10574{
10575 u64 prev;
10576 s64 delta;
10577
10578 prev = local64_xchg(&event->hw.prev_count, now);
10579 delta = now - prev;
10580 local64_add(delta, &event->count);
10581}
10582
10583static void task_clock_event_start(struct perf_event *event, int flags)
10584{
10585 local64_set(&event->hw.prev_count, event->ctx->time);
10586 perf_swevent_start_hrtimer(event);
10587}
10588
10589static void task_clock_event_stop(struct perf_event *event, int flags)
10590{
10591 perf_swevent_cancel_hrtimer(event);
10592 task_clock_event_update(event, event->ctx->time);
10593}
10594
10595static int task_clock_event_add(struct perf_event *event, int flags)
10596{
10597 if (flags & PERF_EF_START)
10598 task_clock_event_start(event, flags);
10599 perf_event_update_userpage(event);
10600
10601 return 0;
10602}
10603
10604static void task_clock_event_del(struct perf_event *event, int flags)
10605{
10606 task_clock_event_stop(event, PERF_EF_UPDATE);
10607}
10608
10609static void task_clock_event_read(struct perf_event *event)
10610{
10611 u64 now = perf_clock();
10612 u64 delta = now - event->ctx->timestamp;
10613 u64 time = event->ctx->time + delta;
10614
10615 task_clock_event_update(event, time);
10616}
10617
10618static int task_clock_event_init(struct perf_event *event)
10619{
10620 if (event->attr.type != PERF_TYPE_SOFTWARE)
10621 return -ENOENT;
10622
10623 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
10624 return -ENOENT;
10625
10626
10627
10628
10629 if (has_branch_stack(event))
10630 return -EOPNOTSUPP;
10631
10632 perf_swevent_init_hrtimer(event);
10633
10634 return 0;
10635}
10636
10637static struct pmu perf_task_clock = {
10638 .task_ctx_nr = perf_sw_context,
10639
10640 .capabilities = PERF_PMU_CAP_NO_NMI,
10641
10642 .event_init = task_clock_event_init,
10643 .add = task_clock_event_add,
10644 .del = task_clock_event_del,
10645 .start = task_clock_event_start,
10646 .stop = task_clock_event_stop,
10647 .read = task_clock_event_read,
10648};
10649
10650static void perf_pmu_nop_void(struct pmu *pmu)
10651{
10652}
10653
10654static void perf_pmu_nop_txn(struct pmu *pmu, unsigned int flags)
10655{
10656}
10657
10658static int perf_pmu_nop_int(struct pmu *pmu)
10659{
10660 return 0;
10661}
10662
10663static int perf_event_nop_int(struct perf_event *event, u64 value)
10664{
10665 return 0;
10666}
10667
10668static DEFINE_PER_CPU(unsigned int, nop_txn_flags);
10669
10670static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags)
10671{
10672 __this_cpu_write(nop_txn_flags, flags);
10673
10674 if (flags & ~PERF_PMU_TXN_ADD)
10675 return;
10676
10677 perf_pmu_disable(pmu);
10678}
10679
10680static int perf_pmu_commit_txn(struct pmu *pmu)
10681{
10682 unsigned int flags = __this_cpu_read(nop_txn_flags);
10683
10684 __this_cpu_write(nop_txn_flags, 0);
10685
10686 if (flags & ~PERF_PMU_TXN_ADD)
10687 return 0;
10688
10689 perf_pmu_enable(pmu);
10690 return 0;
10691}
10692
10693static void perf_pmu_cancel_txn(struct pmu *pmu)
10694{
10695 unsigned int flags = __this_cpu_read(nop_txn_flags);
10696
10697 __this_cpu_write(nop_txn_flags, 0);
10698
10699 if (flags & ~PERF_PMU_TXN_ADD)
10700 return;
10701
10702 perf_pmu_enable(pmu);
10703}
10704
10705static int perf_event_idx_default(struct perf_event *event)
10706{
10707 return 0;
10708}
10709
10710
10711
10712
10713
10714static struct perf_cpu_context __percpu *find_pmu_context(int ctxn)
10715{
10716 struct pmu *pmu;
10717
10718 if (ctxn < 0)
10719 return NULL;
10720
10721 list_for_each_entry(pmu, &pmus, entry) {
10722 if (pmu->task_ctx_nr == ctxn)
10723 return pmu->pmu_cpu_context;
10724 }
10725
10726 return NULL;
10727}
10728
10729static void free_pmu_context(struct pmu *pmu)
10730{
10731
10732
10733
10734
10735
10736 if (pmu->task_ctx_nr > perf_invalid_context)
10737 return;
10738
10739 free_percpu(pmu->pmu_cpu_context);
10740}
10741
10742
10743
10744
10745static ssize_t nr_addr_filters_show(struct device *dev,
10746 struct device_attribute *attr,
10747 char *page)
10748{
10749 struct pmu *pmu = dev_get_drvdata(dev);
10750
10751 return snprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters);
10752}
10753DEVICE_ATTR_RO(nr_addr_filters);
10754
10755static struct idr pmu_idr;
10756
10757static ssize_t
10758type_show(struct device *dev, struct device_attribute *attr, char *page)
10759{
10760 struct pmu *pmu = dev_get_drvdata(dev);
10761
10762 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
10763}
10764static DEVICE_ATTR_RO(type);
10765
10766static ssize_t
10767perf_event_mux_interval_ms_show(struct device *dev,
10768 struct device_attribute *attr,
10769 char *page)
10770{
10771 struct pmu *pmu = dev_get_drvdata(dev);
10772
10773 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms);
10774}
10775
10776static DEFINE_MUTEX(mux_interval_mutex);
10777
10778static ssize_t
10779perf_event_mux_interval_ms_store(struct device *dev,
10780 struct device_attribute *attr,
10781 const char *buf, size_t count)
10782{
10783 struct pmu *pmu = dev_get_drvdata(dev);
10784 int timer, cpu, ret;
10785
10786 ret = kstrtoint(buf, 0, &timer);
10787 if (ret)
10788 return ret;
10789
10790 if (timer < 1)
10791 return -EINVAL;
10792
10793
10794 if (timer == pmu->hrtimer_interval_ms)
10795 return count;
10796
10797 mutex_lock(&mux_interval_mutex);
10798 pmu->hrtimer_interval_ms = timer;
10799
10800
10801 cpus_read_lock();
10802 for_each_online_cpu(cpu) {
10803 struct perf_cpu_context *cpuctx;
10804 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
10805 cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
10806
10807 cpu_function_call(cpu,
10808 (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
10809 }
10810 cpus_read_unlock();
10811 mutex_unlock(&mux_interval_mutex);
10812
10813 return count;
10814}
10815static DEVICE_ATTR_RW(perf_event_mux_interval_ms);
10816
10817static struct attribute *pmu_dev_attrs[] = {
10818 &dev_attr_type.attr,
10819 &dev_attr_perf_event_mux_interval_ms.attr,
10820 NULL,
10821};
10822ATTRIBUTE_GROUPS(pmu_dev);
10823
10824static int pmu_bus_running;
10825static struct bus_type pmu_bus = {
10826 .name = "event_source",
10827 .dev_groups = pmu_dev_groups,
10828};
10829
10830static void pmu_dev_release(struct device *dev)
10831{
10832 kfree(dev);
10833}
10834
10835static int pmu_dev_alloc(struct pmu *pmu)
10836{
10837 int ret = -ENOMEM;
10838
10839 pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
10840 if (!pmu->dev)
10841 goto out;
10842
10843 pmu->dev->groups = pmu->attr_groups;
10844 device_initialize(pmu->dev);
10845 ret = dev_set_name(pmu->dev, "%s", pmu->name);
10846 if (ret)
10847 goto free_dev;
10848
10849 dev_set_drvdata(pmu->dev, pmu);
10850 pmu->dev->bus = &pmu_bus;
10851 pmu->dev->release = pmu_dev_release;
10852 ret = device_add(pmu->dev);
10853 if (ret)
10854 goto free_dev;
10855
10856
10857 if (pmu->nr_addr_filters)
10858 ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters);
10859
10860 if (ret)
10861 goto del_dev;
10862
10863 if (pmu->attr_update)
10864 ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update);
10865
10866 if (ret)
10867 goto del_dev;
10868
10869out:
10870 return ret;
10871
10872del_dev:
10873 device_del(pmu->dev);
10874
10875free_dev:
10876 put_device(pmu->dev);
10877 goto out;
10878}
10879
10880static struct lock_class_key cpuctx_mutex;
10881static struct lock_class_key cpuctx_lock;
10882
10883int perf_pmu_register(struct pmu *pmu, const char *name, int type)
10884{
10885 int cpu, ret, max = PERF_TYPE_MAX;
10886
10887 mutex_lock(&pmus_lock);
10888 ret = -ENOMEM;
10889 pmu->pmu_disable_count = alloc_percpu(int);
10890 if (!pmu->pmu_disable_count)
10891 goto unlock;
10892
10893 pmu->type = -1;
10894 if (!name)
10895 goto skip_type;
10896 pmu->name = name;
10897
10898 if (type != PERF_TYPE_SOFTWARE) {
10899 if (type >= 0)
10900 max = type;
10901
10902 ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
10903 if (ret < 0)
10904 goto free_pdc;
10905
10906 WARN_ON(type >= 0 && ret != type);
10907
10908 type = ret;
10909 }
10910 pmu->type = type;
10911
10912 if (pmu_bus_running) {
10913 ret = pmu_dev_alloc(pmu);
10914 if (ret)
10915 goto free_idr;
10916 }
10917
10918skip_type:
10919 if (pmu->task_ctx_nr == perf_hw_context) {
10920 static int hw_context_taken = 0;
10921
10922
10923
10924
10925
10926
10927 if (WARN_ON_ONCE(hw_context_taken &&
10928 !(pmu->capabilities & PERF_PMU_CAP_HETEROGENEOUS_CPUS)))
10929 pmu->task_ctx_nr = perf_invalid_context;
10930
10931 hw_context_taken = 1;
10932 }
10933
10934 pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
10935 if (pmu->pmu_cpu_context)
10936 goto got_cpu_context;
10937
10938 ret = -ENOMEM;
10939 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
10940 if (!pmu->pmu_cpu_context)
10941 goto free_dev;
10942
10943 for_each_possible_cpu(cpu) {
10944 struct perf_cpu_context *cpuctx;
10945
10946 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
10947 __perf_event_init_context(&cpuctx->ctx);
10948 lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
10949 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
10950 cpuctx->ctx.pmu = pmu;
10951 cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
10952
10953 __perf_mux_hrtimer_init(cpuctx, cpu);
10954
10955 cpuctx->heap_size = ARRAY_SIZE(cpuctx->heap_default);
10956 cpuctx->heap = cpuctx->heap_default;
10957 }
10958
10959got_cpu_context:
10960 if (!pmu->start_txn) {
10961 if (pmu->pmu_enable) {
10962
10963
10964
10965
10966
10967 pmu->start_txn = perf_pmu_start_txn;
10968 pmu->commit_txn = perf_pmu_commit_txn;
10969 pmu->cancel_txn = perf_pmu_cancel_txn;
10970 } else {
10971 pmu->start_txn = perf_pmu_nop_txn;
10972 pmu->commit_txn = perf_pmu_nop_int;
10973 pmu->cancel_txn = perf_pmu_nop_void;
10974 }
10975 }
10976
10977 if (!pmu->pmu_enable) {
10978 pmu->pmu_enable = perf_pmu_nop_void;
10979 pmu->pmu_disable = perf_pmu_nop_void;
10980 }
10981
10982 if (!pmu->check_period)
10983 pmu->check_period = perf_event_nop_int;
10984
10985 if (!pmu->event_idx)
10986 pmu->event_idx = perf_event_idx_default;
10987
10988
10989
10990
10991
10992
10993 if (type == PERF_TYPE_SOFTWARE || !name)
10994 list_add_rcu(&pmu->entry, &pmus);
10995 else
10996 list_add_tail_rcu(&pmu->entry, &pmus);
10997
10998 atomic_set(&pmu->exclusive_cnt, 0);
10999 ret = 0;
11000unlock:
11001 mutex_unlock(&pmus_lock);
11002
11003 return ret;
11004
11005free_dev:
11006 device_del(pmu->dev);
11007 put_device(pmu->dev);
11008
11009free_idr:
11010 if (pmu->type != PERF_TYPE_SOFTWARE)
11011 idr_remove(&pmu_idr, pmu->type);
11012
11013free_pdc:
11014 free_percpu(pmu->pmu_disable_count);
11015 goto unlock;
11016}
11017EXPORT_SYMBOL_GPL(perf_pmu_register);
11018
11019void perf_pmu_unregister(struct pmu *pmu)
11020{
11021 mutex_lock(&pmus_lock);
11022 list_del_rcu(&pmu->entry);
11023
11024
11025
11026
11027
11028 synchronize_srcu(&pmus_srcu);
11029 synchronize_rcu();
11030
11031 free_percpu(pmu->pmu_disable_count);
11032 if (pmu->type != PERF_TYPE_SOFTWARE)
11033 idr_remove(&pmu_idr, pmu->type);
11034 if (pmu_bus_running) {
11035 if (pmu->nr_addr_filters)
11036 device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
11037 device_del(pmu->dev);
11038 put_device(pmu->dev);
11039 }
11040 free_pmu_context(pmu);
11041 mutex_unlock(&pmus_lock);
11042}
11043EXPORT_SYMBOL_GPL(perf_pmu_unregister);
11044
11045static inline bool has_extended_regs(struct perf_event *event)
11046{
11047 return (event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK) ||
11048 (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK);
11049}
11050
11051static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
11052{
11053 struct perf_event_context *ctx = NULL;
11054 int ret;
11055
11056 if (!try_module_get(pmu->module))
11057 return -ENODEV;
11058
11059
11060
11061
11062
11063
11064
11065 if (event->group_leader != event && pmu->task_ctx_nr != perf_sw_context) {
11066
11067
11068
11069
11070 ctx = perf_event_ctx_lock_nested(event->group_leader,
11071 SINGLE_DEPTH_NESTING);
11072 BUG_ON(!ctx);
11073 }
11074
11075 event->pmu = pmu;
11076 ret = pmu->event_init(event);
11077
11078 if (ctx)
11079 perf_event_ctx_unlock(event->group_leader, ctx);
11080
11081 if (!ret) {
11082 if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) &&
11083 has_extended_regs(event))
11084 ret = -EOPNOTSUPP;
11085
11086 if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
11087 event_has_any_exclude_flag(event))
11088 ret = -EINVAL;
11089
11090 if (ret && event->destroy)
11091 event->destroy(event);
11092 }
11093
11094 if (ret)
11095 module_put(pmu->module);
11096
11097 return ret;
11098}
11099
11100static struct pmu *perf_init_event(struct perf_event *event)
11101{
11102 bool extended_type = false;
11103 int idx, type, ret;
11104 struct pmu *pmu;
11105
11106 idx = srcu_read_lock(&pmus_srcu);
11107
11108
11109 if (event->parent && event->parent->pmu) {
11110 pmu = event->parent->pmu;
11111 ret = perf_try_init_event(pmu, event);
11112 if (!ret)
11113 goto unlock;
11114 }
11115
11116
11117
11118
11119
11120 type = event->attr.type;
11121 if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) {
11122 type = event->attr.config >> PERF_PMU_TYPE_SHIFT;
11123 if (!type) {
11124 type = PERF_TYPE_RAW;
11125 } else {
11126 extended_type = true;
11127 event->attr.config &= PERF_HW_EVENT_MASK;
11128 }
11129 }
11130
11131again:
11132 rcu_read_lock();
11133 pmu = idr_find(&pmu_idr, type);
11134 rcu_read_unlock();
11135 if (pmu) {
11136 if (event->attr.type != type && type != PERF_TYPE_RAW &&
11137 !(pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE))
11138 goto fail;
11139
11140 ret = perf_try_init_event(pmu, event);
11141 if (ret == -ENOENT && event->attr.type != type && !extended_type) {
11142 type = event->attr.type;
11143 goto again;
11144 }
11145
11146 if (ret)
11147 pmu = ERR_PTR(ret);
11148
11149 goto unlock;
11150 }
11151
11152 list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) {
11153 ret = perf_try_init_event(pmu, event);
11154 if (!ret)
11155 goto unlock;
11156
11157 if (ret != -ENOENT) {
11158 pmu = ERR_PTR(ret);
11159 goto unlock;
11160 }
11161 }
11162fail:
11163 pmu = ERR_PTR(-ENOENT);
11164unlock:
11165 srcu_read_unlock(&pmus_srcu, idx);
11166
11167 return pmu;
11168}
11169
11170static void attach_sb_event(struct perf_event *event)
11171{
11172 struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
11173
11174 raw_spin_lock(&pel->lock);
11175 list_add_rcu(&event->sb_list, &pel->list);
11176 raw_spin_unlock(&pel->lock);
11177}
11178
11179
11180
11181
11182
11183
11184
11185
11186static void account_pmu_sb_event(struct perf_event *event)
11187{
11188 if (is_sb_event(event))
11189 attach_sb_event(event);
11190}
11191
11192static void account_event_cpu(struct perf_event *event, int cpu)
11193{
11194 if (event->parent)
11195 return;
11196
11197 if (is_cgroup_event(event))
11198 atomic_inc(&per_cpu(perf_cgroup_events, cpu));
11199}
11200
11201
11202static void account_freq_event_nohz(void)
11203{
11204#ifdef CONFIG_NO_HZ_FULL
11205
11206 spin_lock(&nr_freq_lock);
11207 if (atomic_inc_return(&nr_freq_events) == 1)
11208 tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
11209 spin_unlock(&nr_freq_lock);
11210#endif
11211}
11212
11213static void account_freq_event(void)
11214{
11215 if (tick_nohz_full_enabled())
11216 account_freq_event_nohz();
11217 else
11218 atomic_inc(&nr_freq_events);
11219}
11220
11221
11222static void account_event(struct perf_event *event)
11223{
11224 bool inc = false;
11225
11226 if (event->parent)
11227 return;
11228
11229 if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
11230 inc = true;
11231 if (event->attr.mmap || event->attr.mmap_data)
11232 atomic_inc(&nr_mmap_events);
11233 if (event->attr.build_id)
11234 atomic_inc(&nr_build_id_events);
11235 if (event->attr.comm)
11236 atomic_inc(&nr_comm_events);
11237 if (event->attr.namespaces)
11238 atomic_inc(&nr_namespaces_events);
11239 if (event->attr.cgroup)
11240 atomic_inc(&nr_cgroup_events);
11241 if (event->attr.task)
11242 atomic_inc(&nr_task_events);
11243 if (event->attr.freq)
11244 account_freq_event();
11245 if (event->attr.context_switch) {
11246 atomic_inc(&nr_switch_events);
11247 inc = true;
11248 }
11249 if (has_branch_stack(event))
11250 inc = true;
11251 if (is_cgroup_event(event))
11252 inc = true;
11253 if (event->attr.ksymbol)
11254 atomic_inc(&nr_ksymbol_events);
11255 if (event->attr.bpf_event)
11256 atomic_inc(&nr_bpf_events);
11257 if (event->attr.text_poke)
11258 atomic_inc(&nr_text_poke_events);
11259
11260 if (inc) {
11261
11262
11263
11264
11265
11266 if (atomic_inc_not_zero(&perf_sched_count))
11267 goto enabled;
11268
11269 mutex_lock(&perf_sched_mutex);
11270 if (!atomic_read(&perf_sched_count)) {
11271 static_branch_enable(&perf_sched_events);
11272
11273
11274
11275
11276
11277 synchronize_rcu();
11278 }
11279
11280
11281
11282
11283 atomic_inc(&perf_sched_count);
11284 mutex_unlock(&perf_sched_mutex);
11285 }
11286enabled:
11287
11288 account_event_cpu(event, event->cpu);
11289
11290 account_pmu_sb_event(event);
11291}
11292
11293
11294
11295
11296static struct perf_event *
11297perf_event_alloc(struct perf_event_attr *attr, int cpu,
11298 struct task_struct *task,
11299 struct perf_event *group_leader,
11300 struct perf_event *parent_event,
11301 perf_overflow_handler_t overflow_handler,
11302 void *context, int cgroup_fd)
11303{
11304 struct pmu *pmu;
11305 struct perf_event *event;
11306 struct hw_perf_event *hwc;
11307 long err = -EINVAL;
11308
11309 if ((unsigned)cpu >= nr_cpu_ids) {
11310 if (!task || cpu != -1)
11311 return ERR_PTR(-EINVAL);
11312 }
11313
11314 event = kzalloc(sizeof(*event), GFP_KERNEL);
11315 if (!event)
11316 return ERR_PTR(-ENOMEM);
11317
11318
11319
11320
11321
11322 if (!group_leader)
11323 group_leader = event;
11324
11325 mutex_init(&event->child_mutex);
11326 INIT_LIST_HEAD(&event->child_list);
11327
11328 INIT_LIST_HEAD(&event->event_entry);
11329 INIT_LIST_HEAD(&event->sibling_list);
11330 INIT_LIST_HEAD(&event->active_list);
11331 init_event_group(event);
11332 INIT_LIST_HEAD(&event->rb_entry);
11333 INIT_LIST_HEAD(&event->active_entry);
11334 INIT_LIST_HEAD(&event->addr_filters.list);
11335 INIT_HLIST_NODE(&event->hlist_entry);
11336
11337
11338 init_waitqueue_head(&event->waitq);
11339 event->pending_disable = -1;
11340 init_irq_work(&event->pending, perf_pending_event);
11341
11342 mutex_init(&event->mmap_mutex);
11343 raw_spin_lock_init(&event->addr_filters.lock);
11344
11345 atomic_long_set(&event->refcount, 1);
11346 event->cpu = cpu;
11347 event->attr = *attr;
11348 event->group_leader = group_leader;
11349 event->pmu = NULL;
11350 event->oncpu = -1;
11351
11352 event->parent = parent_event;
11353
11354 event->ns = get_pid_ns(task_active_pid_ns(current));
11355 event->id = atomic64_inc_return(&perf_event_id);
11356
11357 event->state = PERF_EVENT_STATE_INACTIVE;
11358
11359 if (task) {
11360 event->attach_state = PERF_ATTACH_TASK;
11361
11362
11363
11364
11365
11366 event->hw.target = get_task_struct(task);
11367 }
11368
11369 event->clock = &local_clock;
11370 if (parent_event)
11371 event->clock = parent_event->clock;
11372
11373 if (!overflow_handler && parent_event) {
11374 overflow_handler = parent_event->overflow_handler;
11375 context = parent_event->overflow_handler_context;
11376#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
11377 if (overflow_handler == bpf_overflow_handler) {
11378 struct bpf_prog *prog = parent_event->prog;
11379
11380 bpf_prog_inc(prog);
11381 event->prog = prog;
11382 event->orig_overflow_handler =
11383 parent_event->orig_overflow_handler;
11384 }
11385#endif
11386 }
11387
11388 if (overflow_handler) {
11389 event->overflow_handler = overflow_handler;
11390 event->overflow_handler_context = context;
11391 } else if (is_write_backward(event)){
11392 event->overflow_handler = perf_event_output_backward;
11393 event->overflow_handler_context = NULL;
11394 } else {
11395 event->overflow_handler = perf_event_output_forward;
11396 event->overflow_handler_context = NULL;
11397 }
11398
11399 perf_event__state_init(event);
11400
11401 pmu = NULL;
11402
11403 hwc = &event->hw;
11404 hwc->sample_period = attr->sample_period;
11405 if (attr->freq && attr->sample_freq)
11406 hwc->sample_period = 1;
11407 hwc->last_period = hwc->sample_period;
11408
11409 local64_set(&hwc->period_left, hwc->sample_period);
11410
11411
11412
11413
11414
11415 if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ))
11416 goto err_ns;
11417
11418 if (!has_branch_stack(event))
11419 event->attr.branch_sample_type = 0;
11420
11421 pmu = perf_init_event(event);
11422 if (IS_ERR(pmu)) {
11423 err = PTR_ERR(pmu);
11424 goto err_ns;
11425 }
11426
11427
11428
11429
11430
11431 if (pmu->task_ctx_nr == perf_invalid_context && cgroup_fd != -1) {
11432 err = -EINVAL;
11433 goto err_pmu;
11434 }
11435
11436 if (event->attr.aux_output &&
11437 !(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
11438 err = -EOPNOTSUPP;
11439 goto err_pmu;
11440 }
11441
11442 if (cgroup_fd != -1) {
11443 err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
11444 if (err)
11445 goto err_pmu;
11446 }
11447
11448 err = exclusive_event_init(event);
11449 if (err)
11450 goto err_pmu;
11451
11452 if (has_addr_filter(event)) {
11453 event->addr_filter_ranges = kcalloc(pmu->nr_addr_filters,
11454 sizeof(struct perf_addr_filter_range),
11455 GFP_KERNEL);
11456 if (!event->addr_filter_ranges) {
11457 err = -ENOMEM;
11458 goto err_per_task;
11459 }
11460
11461
11462
11463
11464
11465 if (event->parent) {
11466 struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
11467
11468 raw_spin_lock_irq(&ifh->lock);
11469 memcpy(event->addr_filter_ranges,
11470 event->parent->addr_filter_ranges,
11471 pmu->nr_addr_filters * sizeof(struct perf_addr_filter_range));
11472 raw_spin_unlock_irq(&ifh->lock);
11473 }
11474
11475
11476 event->addr_filters_gen = 1;
11477 }
11478
11479 if (!event->parent) {
11480 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
11481 err = get_callchain_buffers(attr->sample_max_stack);
11482 if (err)
11483 goto err_addr_filters;
11484 }
11485 }
11486
11487 err = security_perf_event_alloc(event);
11488 if (err)
11489 goto err_callchain_buffer;
11490
11491
11492 account_event(event);
11493
11494 return event;
11495
11496err_callchain_buffer:
11497 if (!event->parent) {
11498 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
11499 put_callchain_buffers();
11500 }
11501err_addr_filters:
11502 kfree(event->addr_filter_ranges);
11503
11504err_per_task:
11505 exclusive_event_destroy(event);
11506
11507err_pmu:
11508 if (is_cgroup_event(event))
11509 perf_detach_cgroup(event);
11510 if (event->destroy)
11511 event->destroy(event);
11512 module_put(pmu->module);
11513err_ns:
11514 if (event->ns)
11515 put_pid_ns(event->ns);
11516 if (event->hw.target)
11517 put_task_struct(event->hw.target);
11518 kfree(event);
11519
11520 return ERR_PTR(err);
11521}
11522
11523static int perf_copy_attr(struct perf_event_attr __user *uattr,
11524 struct perf_event_attr *attr)
11525{
11526 u32 size;
11527 int ret;
11528
11529 if (!access_ok(uattr, PERF_ATTR_SIZE_VER0))
11530 return -EFAULT;
11531
11532
11533
11534
11535 memset(attr, 0, sizeof(*attr));
11536
11537 ret = get_user(size, &uattr->size);
11538 if (ret)
11539 return ret;
11540
11541 if (size > PAGE_SIZE)
11542 goto err_size;
11543
11544 if (!size)
11545 size = PERF_ATTR_SIZE_VER0;
11546
11547 if (size < PERF_ATTR_SIZE_VER0)
11548 goto err_size;
11549
11550
11551
11552
11553
11554
11555
11556 if (size > sizeof(*attr)) {
11557 unsigned char __user *addr;
11558 unsigned char __user *end;
11559 unsigned char val;
11560
11561 addr = (void __user *)uattr + sizeof(*attr);
11562 end = (void __user *)uattr + size;
11563
11564 for (; addr < end; addr++) {
11565 ret = get_user(val, addr);
11566 if (ret)
11567 return ret;
11568 if (val)
11569 goto err_size;
11570 }
11571 size = sizeof(*attr);
11572 }
11573
11574 ret = copy_from_user(attr, uattr, size);
11575 if (ret)
11576 return -EFAULT;
11577
11578 attr->size = size;
11579
11580 if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
11581 return -EINVAL;
11582
11583 if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
11584 return -EINVAL;
11585
11586 if (attr->read_format & ~(PERF_FORMAT_MAX-1))
11587 return -EINVAL;
11588
11589 if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
11590 u64 mask = attr->branch_sample_type;
11591
11592
11593 if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1))
11594 return -EINVAL;
11595
11596
11597 if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
11598 return -EINVAL;
11599
11600
11601 if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
11602
11603
11604 if (!attr->exclude_kernel)
11605 mask |= PERF_SAMPLE_BRANCH_KERNEL;
11606
11607 if (!attr->exclude_user)
11608 mask |= PERF_SAMPLE_BRANCH_USER;
11609
11610 if (!attr->exclude_hv)
11611 mask |= PERF_SAMPLE_BRANCH_HV;
11612
11613
11614
11615 attr->branch_sample_type = mask;
11616 }
11617
11618 if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) {
11619 ret = perf_allow_kernel(attr);
11620 if (ret)
11621 return ret;
11622 }
11623 }
11624
11625 if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
11626 ret = perf_reg_validate(attr->sample_regs_user);
11627 if (ret)
11628 return ret;
11629 }
11630
11631 if (attr->sample_type & PERF_SAMPLE_STACK_USER) {
11632 if (!arch_perf_have_user_stack_dump())
11633 return -ENOSYS;
11634
11635
11636
11637
11638
11639
11640 if (attr->sample_stack_user >= USHRT_MAX)
11641 return -EINVAL;
11642 else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64)))
11643 return -EINVAL;
11644 }
11645
11646 if (!attr->sample_max_stack)
11647 attr->sample_max_stack = sysctl_perf_event_max_stack;
11648
11649 if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
11650 ret = perf_reg_validate(attr->sample_regs_intr);
11651
11652#ifndef CONFIG_CGROUP_PERF
11653 if (attr->sample_type & PERF_SAMPLE_CGROUP)
11654 return -EINVAL;
11655#endif
11656 if ((attr->sample_type & PERF_SAMPLE_WEIGHT) &&
11657 (attr->sample_type & PERF_SAMPLE_WEIGHT_STRUCT))
11658 return -EINVAL;
11659
11660out:
11661 return ret;
11662
11663err_size:
11664 put_user(sizeof(*attr), &uattr->size);
11665 ret = -E2BIG;
11666 goto out;
11667}
11668
11669static int
11670perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
11671{
11672 struct perf_buffer *rb = NULL;
11673 int ret = -EINVAL;
11674
11675 if (!output_event)
11676 goto set;
11677
11678
11679 if (event == output_event)
11680 goto out;
11681
11682
11683
11684
11685 if (output_event->cpu != event->cpu)
11686 goto out;
11687
11688
11689
11690
11691 if (output_event->cpu == -1 && output_event->ctx != event->ctx)
11692 goto out;
11693
11694
11695
11696
11697 if (output_event->clock != event->clock)
11698 goto out;
11699
11700
11701
11702
11703
11704 if (is_write_backward(output_event) != is_write_backward(event))
11705 goto out;
11706
11707
11708
11709
11710 if (has_aux(event) && has_aux(output_event) &&
11711 event->pmu != output_event->pmu)
11712 goto out;
11713
11714set:
11715 mutex_lock(&event->mmap_mutex);
11716
11717 if (atomic_read(&event->mmap_count))
11718 goto unlock;
11719
11720 if (output_event) {
11721
11722 rb = ring_buffer_get(output_event);
11723 if (!rb)
11724 goto unlock;
11725 }
11726
11727 ring_buffer_attach(event, rb);
11728
11729 ret = 0;
11730unlock:
11731 mutex_unlock(&event->mmap_mutex);
11732
11733out:
11734 return ret;
11735}
11736
11737static void mutex_lock_double(struct mutex *a, struct mutex *b)
11738{
11739 if (b < a)
11740 swap(a, b);
11741
11742 mutex_lock(a);
11743 mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
11744}
11745
11746static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
11747{
11748 bool nmi_safe = false;
11749
11750 switch (clk_id) {
11751 case CLOCK_MONOTONIC:
11752 event->clock = &ktime_get_mono_fast_ns;
11753 nmi_safe = true;
11754 break;
11755
11756 case CLOCK_MONOTONIC_RAW:
11757 event->clock = &ktime_get_raw_fast_ns;
11758 nmi_safe = true;
11759 break;
11760
11761 case CLOCK_REALTIME:
11762 event->clock = &ktime_get_real_ns;
11763 break;
11764
11765 case CLOCK_BOOTTIME:
11766 event->clock = &ktime_get_boot_ns;
11767 break;
11768
11769 case CLOCK_TAI:
11770 event->clock = &ktime_get_tai_ns;
11771 break;
11772
11773 default:
11774 return -EINVAL;
11775 }
11776
11777 if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI))
11778 return -EINVAL;
11779
11780 return 0;
11781}
11782
11783
11784
11785
11786
11787static struct perf_event_context *
11788__perf_event_ctx_lock_double(struct perf_event *group_leader,
11789 struct perf_event_context *ctx)
11790{
11791 struct perf_event_context *gctx;
11792
11793again:
11794 rcu_read_lock();
11795 gctx = READ_ONCE(group_leader->ctx);
11796 if (!refcount_inc_not_zero(&gctx->refcount)) {
11797 rcu_read_unlock();
11798 goto again;
11799 }
11800 rcu_read_unlock();
11801
11802 mutex_lock_double(&gctx->mutex, &ctx->mutex);
11803
11804 if (group_leader->ctx != gctx) {
11805 mutex_unlock(&ctx->mutex);
11806 mutex_unlock(&gctx->mutex);
11807 put_ctx(gctx);
11808 goto again;
11809 }
11810
11811 return gctx;
11812}
11813
11814
11815
11816
11817
11818
11819
11820
11821
11822SYSCALL_DEFINE5(perf_event_open,
11823 struct perf_event_attr __user *, attr_uptr,
11824 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
11825{
11826 struct perf_event *group_leader = NULL, *output_event = NULL;
11827 struct perf_event *event, *sibling;
11828 struct perf_event_attr attr;
11829 struct perf_event_context *ctx, *uninitialized_var(gctx);
11830 struct file *event_file = NULL;
11831 struct fd group = {NULL, 0};
11832 struct task_struct *task = NULL;
11833 struct pmu *pmu;
11834 int event_fd;
11835 int move_group = 0;
11836 int err;
11837 int f_flags = O_RDWR;
11838 int cgroup_fd = -1;
11839
11840
11841 if (flags & ~PERF_FLAG_ALL)
11842 return -EINVAL;
11843
11844
11845 err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
11846 if (err)
11847 return err;
11848
11849 err = perf_copy_attr(attr_uptr, &attr);
11850 if (err)
11851 return err;
11852
11853 if (!attr.exclude_kernel) {
11854 err = perf_allow_kernel(&attr);
11855 if (err)
11856 return err;
11857 }
11858
11859 if (attr.namespaces) {
11860 if (!perfmon_capable())
11861 return -EACCES;
11862 }
11863
11864 if (attr.freq) {
11865 if (attr.sample_freq > sysctl_perf_event_sample_rate)
11866 return -EINVAL;
11867 } else {
11868 if (attr.sample_period & (1ULL << 63))
11869 return -EINVAL;
11870 }
11871
11872
11873 if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) {
11874 err = perf_allow_kernel(&attr);
11875 if (err)
11876 return err;
11877 }
11878
11879
11880
11881
11882
11883
11884
11885 if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1))
11886 return -EINVAL;
11887
11888 if (flags & PERF_FLAG_FD_CLOEXEC)
11889 f_flags |= O_CLOEXEC;
11890
11891 event_fd = get_unused_fd_flags(f_flags);
11892 if (event_fd < 0)
11893 return event_fd;
11894
11895 if (group_fd != -1) {
11896 err = perf_fget_light(group_fd, &group);
11897 if (err)
11898 goto err_fd;
11899 group_leader = group.file->private_data;
11900 if (flags & PERF_FLAG_FD_OUTPUT)
11901 output_event = group_leader;
11902 if (flags & PERF_FLAG_FD_NO_GROUP)
11903 group_leader = NULL;
11904 }
11905
11906 if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) {
11907 task = find_lively_task_by_vpid(pid);
11908 if (IS_ERR(task)) {
11909 err = PTR_ERR(task);
11910 goto err_group_fd;
11911 }
11912 }
11913
11914 if (task && group_leader &&
11915 group_leader->attr.inherit != attr.inherit) {
11916 err = -EINVAL;
11917 goto err_task;
11918 }
11919
11920 if (flags & PERF_FLAG_PID_CGROUP)
11921 cgroup_fd = pid;
11922
11923 event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
11924 NULL, NULL, cgroup_fd);
11925 if (IS_ERR(event)) {
11926 err = PTR_ERR(event);
11927 goto err_task;
11928 }
11929
11930 if (is_sampling_event(event)) {
11931 if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
11932 err = -EOPNOTSUPP;
11933 goto err_alloc;
11934 }
11935 }
11936
11937
11938
11939
11940
11941 pmu = event->pmu;
11942
11943 if (attr.use_clockid) {
11944 err = perf_event_set_clock(event, attr.clockid);
11945 if (err)
11946 goto err_alloc;
11947 }
11948
11949 if (pmu->task_ctx_nr == perf_sw_context)
11950 event->event_caps |= PERF_EV_CAP_SOFTWARE;
11951
11952 if (group_leader) {
11953 if (is_software_event(event) &&
11954 !in_software_context(group_leader)) {
11955
11956
11957
11958
11959
11960
11961
11962
11963 pmu = group_leader->ctx->pmu;
11964 } else if (!is_software_event(event) &&
11965 is_software_event(group_leader) &&
11966 (group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
11967
11968
11969
11970
11971
11972 move_group = 1;
11973 }
11974 }
11975
11976
11977
11978
11979 ctx = find_get_context(pmu, task, event);
11980 if (IS_ERR(ctx)) {
11981 err = PTR_ERR(ctx);
11982 goto err_alloc;
11983 }
11984
11985
11986
11987
11988 if (group_leader) {
11989 err = -EINVAL;
11990
11991
11992
11993
11994
11995 if (group_leader->group_leader != group_leader)
11996 goto err_context;
11997
11998
11999 if (group_leader->clock != event->clock)
12000 goto err_context;
12001
12002
12003
12004
12005
12006
12007 if (group_leader->cpu != event->cpu)
12008 goto err_context;
12009
12010
12011
12012
12013
12014 if (group_leader->ctx->task != ctx->task)
12015 goto err_context;
12016
12017
12018
12019
12020
12021
12022 if (!move_group && group_leader->ctx != ctx)
12023 goto err_context;
12024
12025
12026
12027
12028 if (attr.exclusive || attr.pinned)
12029 goto err_context;
12030 }
12031
12032 if (output_event) {
12033 err = perf_event_set_output(event, output_event);
12034 if (err)
12035 goto err_context;
12036 }
12037
12038 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event,
12039 f_flags);
12040 if (IS_ERR(event_file)) {
12041 err = PTR_ERR(event_file);
12042 event_file = NULL;
12043 goto err_context;
12044 }
12045
12046 if (task) {
12047 err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
12048 if (err)
12049 goto err_file;
12050
12051
12052
12053
12054
12055
12056
12057
12058
12059 err = -EACCES;
12060 if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
12061 goto err_cred;
12062 }
12063
12064 if (move_group) {
12065 gctx = __perf_event_ctx_lock_double(group_leader, ctx);
12066
12067 if (gctx->task == TASK_TOMBSTONE) {
12068 err = -ESRCH;
12069 goto err_locked;
12070 }
12071
12072
12073
12074
12075
12076 if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
12077
12078
12079
12080
12081
12082 if (gctx != ctx) {
12083 err = -EINVAL;
12084 goto err_locked;
12085 } else {
12086 perf_event_ctx_unlock(group_leader, gctx);
12087 move_group = 0;
12088 }
12089 }
12090
12091
12092
12093
12094 err = -EBUSY;
12095 if (!exclusive_event_installable(group_leader, ctx))
12096 goto err_locked;
12097
12098 for_each_sibling_event(sibling, group_leader) {
12099 if (!exclusive_event_installable(sibling, ctx))
12100 goto err_locked;
12101 }
12102 } else {
12103 mutex_lock(&ctx->mutex);
12104 }
12105
12106 if (ctx->task == TASK_TOMBSTONE) {
12107 err = -ESRCH;
12108 goto err_locked;
12109 }
12110
12111 if (!perf_event_validate_size(event)) {
12112 err = -E2BIG;
12113 goto err_locked;
12114 }
12115
12116 if (!task) {
12117
12118
12119
12120
12121
12122
12123 struct perf_cpu_context *cpuctx =
12124 container_of(ctx, struct perf_cpu_context, ctx);
12125
12126 if (!cpuctx->online) {
12127 err = -ENODEV;
12128 goto err_locked;
12129 }
12130 }
12131
12132 if (perf_need_aux_event(event) && !perf_get_aux_event(event, group_leader)) {
12133 err = -EINVAL;
12134 goto err_locked;
12135 }
12136
12137
12138
12139
12140
12141 if (!exclusive_event_installable(event, ctx)) {
12142 err = -EBUSY;
12143 goto err_locked;
12144 }
12145
12146 WARN_ON_ONCE(ctx->parent_ctx);
12147
12148
12149
12150
12151
12152
12153 if (move_group) {
12154
12155
12156
12157
12158 perf_remove_from_context(group_leader, 0);
12159 put_ctx(gctx);
12160
12161 for_each_sibling_event(sibling, group_leader) {
12162 perf_remove_from_context(sibling, 0);
12163 put_ctx(gctx);
12164 }
12165
12166
12167
12168
12169
12170 synchronize_rcu();
12171
12172
12173
12174
12175
12176
12177
12178
12179
12180
12181
12182 for_each_sibling_event(sibling, group_leader) {
12183 perf_event__state_init(sibling);
12184 perf_install_in_context(ctx, sibling, sibling->cpu);
12185 get_ctx(ctx);
12186 }
12187
12188
12189
12190
12191
12192
12193 perf_event__state_init(group_leader);
12194 perf_install_in_context(ctx, group_leader, group_leader->cpu);
12195 get_ctx(ctx);
12196 }
12197
12198
12199
12200
12201
12202
12203
12204 perf_event__header_size(event);
12205 perf_event__id_header_size(event);
12206
12207 event->owner = current;
12208
12209 perf_install_in_context(ctx, event, event->cpu);
12210 perf_unpin_context(ctx);
12211
12212 if (move_group)
12213 perf_event_ctx_unlock(group_leader, gctx);
12214 mutex_unlock(&ctx->mutex);
12215
12216 if (task) {
12217 mutex_unlock(&task->signal->cred_guard_mutex);
12218 put_task_struct(task);
12219 }
12220
12221 mutex_lock(¤t->perf_event_mutex);
12222 list_add_tail(&event->owner_entry, ¤t->perf_event_list);
12223 mutex_unlock(¤t->perf_event_mutex);
12224
12225
12226
12227
12228
12229
12230
12231 fdput(group);
12232 fd_install(event_fd, event_file);
12233 return event_fd;
12234
12235err_locked:
12236 if (move_group)
12237 perf_event_ctx_unlock(group_leader, gctx);
12238 mutex_unlock(&ctx->mutex);
12239err_cred:
12240 if (task)
12241 mutex_unlock(&task->signal->cred_guard_mutex);
12242err_file:
12243 fput(event_file);
12244err_context:
12245 perf_unpin_context(ctx);
12246 put_ctx(ctx);
12247err_alloc:
12248
12249
12250
12251
12252 if (!event_file)
12253 free_event(event);
12254err_task:
12255 if (task)
12256 put_task_struct(task);
12257err_group_fd:
12258 fdput(group);
12259err_fd:
12260 put_unused_fd(event_fd);
12261 return err;
12262}
12263
12264
12265
12266
12267
12268
12269
12270
12271struct perf_event *
12272perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
12273 struct task_struct *task,
12274 perf_overflow_handler_t overflow_handler,
12275 void *context)
12276{
12277 struct perf_event_context *ctx;
12278 struct perf_event *event;
12279 int err;
12280
12281
12282
12283
12284
12285 if (attr->aux_output)
12286 return ERR_PTR(-EINVAL);
12287
12288 event = perf_event_alloc(attr, cpu, task, NULL, NULL,
12289 overflow_handler, context, -1);
12290 if (IS_ERR(event)) {
12291 err = PTR_ERR(event);
12292 goto err;
12293 }
12294
12295
12296 event->owner = TASK_TOMBSTONE;
12297
12298
12299
12300
12301 ctx = find_get_context(event->pmu, task, event);
12302 if (IS_ERR(ctx)) {
12303 err = PTR_ERR(ctx);
12304 goto err_free;
12305 }
12306
12307 WARN_ON_ONCE(ctx->parent_ctx);
12308 mutex_lock(&ctx->mutex);
12309 if (ctx->task == TASK_TOMBSTONE) {
12310 err = -ESRCH;
12311 goto err_unlock;
12312 }
12313
12314 if (!task) {
12315
12316
12317
12318
12319
12320
12321 struct perf_cpu_context *cpuctx =
12322 container_of(ctx, struct perf_cpu_context, ctx);
12323 if (!cpuctx->online) {
12324 err = -ENODEV;
12325 goto err_unlock;
12326 }
12327 }
12328
12329 if (!exclusive_event_installable(event, ctx)) {
12330 err = -EBUSY;
12331 goto err_unlock;
12332 }
12333
12334 perf_install_in_context(ctx, event, event->cpu);
12335 perf_unpin_context(ctx);
12336 mutex_unlock(&ctx->mutex);
12337
12338 return event;
12339
12340err_unlock:
12341 mutex_unlock(&ctx->mutex);
12342 perf_unpin_context(ctx);
12343 put_ctx(ctx);
12344err_free:
12345 free_event(event);
12346err:
12347 return ERR_PTR(err);
12348}
12349EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
12350
12351void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
12352{
12353 struct perf_event_context *src_ctx;
12354 struct perf_event_context *dst_ctx;
12355 struct perf_event *event, *tmp;
12356 LIST_HEAD(events);
12357
12358 src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
12359 dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
12360
12361
12362
12363
12364
12365 mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
12366 list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
12367 event_entry) {
12368 perf_remove_from_context(event, 0);
12369 unaccount_event_cpu(event, src_cpu);
12370 put_ctx(src_ctx);
12371 list_add(&event->migrate_entry, &events);
12372 }
12373
12374
12375
12376
12377 synchronize_rcu();
12378
12379
12380
12381
12382
12383
12384
12385
12386
12387 list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
12388 if (event->group_leader == event)
12389 continue;
12390
12391 list_del(&event->migrate_entry);
12392 if (event->state >= PERF_EVENT_STATE_OFF)
12393 event->state = PERF_EVENT_STATE_INACTIVE;
12394 account_event_cpu(event, dst_cpu);
12395 perf_install_in_context(dst_ctx, event, dst_cpu);
12396 get_ctx(dst_ctx);
12397 }
12398
12399
12400
12401
12402
12403 list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
12404 list_del(&event->migrate_entry);
12405 if (event->state >= PERF_EVENT_STATE_OFF)
12406 event->state = PERF_EVENT_STATE_INACTIVE;
12407 account_event_cpu(event, dst_cpu);
12408 perf_install_in_context(dst_ctx, event, dst_cpu);
12409 get_ctx(dst_ctx);
12410 }
12411 mutex_unlock(&dst_ctx->mutex);
12412 mutex_unlock(&src_ctx->mutex);
12413}
12414EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
12415
12416static void sync_child_event(struct perf_event *child_event,
12417 struct task_struct *child)
12418{
12419 struct perf_event *parent_event = child_event->parent;
12420 u64 child_val;
12421
12422 if (child_event->attr.inherit_stat)
12423 perf_event_read_event(child_event, child);
12424
12425 child_val = perf_event_count(child_event);
12426
12427
12428
12429
12430 atomic64_add(child_val, &parent_event->child_count);
12431 atomic64_add(child_event->total_time_enabled,
12432 &parent_event->child_total_time_enabled);
12433 atomic64_add(child_event->total_time_running,
12434 &parent_event->child_total_time_running);
12435}
12436
12437static void
12438perf_event_exit_event(struct perf_event *child_event,
12439 struct perf_event_context *child_ctx,
12440 struct task_struct *child)
12441{
12442 struct perf_event *parent_event = child_event->parent;
12443
12444
12445
12446
12447
12448
12449
12450
12451
12452
12453
12454
12455
12456 raw_spin_lock_irq(&child_ctx->lock);
12457 WARN_ON_ONCE(child_ctx->is_active);
12458
12459 if (parent_event)
12460 perf_group_detach(child_event);
12461 list_del_event(child_event, child_ctx);
12462 perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT);
12463 raw_spin_unlock_irq(&child_ctx->lock);
12464
12465
12466
12467
12468 if (!parent_event) {
12469 perf_event_wakeup(child_event);
12470 return;
12471 }
12472
12473
12474
12475
12476 sync_child_event(child_event, child);
12477
12478
12479
12480
12481 WARN_ON_ONCE(parent_event->ctx->parent_ctx);
12482 mutex_lock(&parent_event->child_mutex);
12483 list_del_init(&child_event->child_list);
12484 mutex_unlock(&parent_event->child_mutex);
12485
12486
12487
12488
12489 perf_event_wakeup(parent_event);
12490 free_event(child_event);
12491 put_event(parent_event);
12492}
12493
12494static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
12495{
12496 struct perf_event_context *child_ctx, *clone_ctx = NULL;
12497 struct perf_event *child_event, *next;
12498
12499 WARN_ON_ONCE(child != current);
12500
12501 child_ctx = perf_pin_task_context(child, ctxn);
12502 if (!child_ctx)
12503 return;
12504
12505
12506
12507
12508
12509
12510
12511
12512
12513
12514
12515 mutex_lock(&child_ctx->mutex);
12516
12517
12518
12519
12520
12521
12522 raw_spin_lock_irq(&child_ctx->lock);
12523 task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx, EVENT_ALL);
12524
12525
12526
12527
12528
12529 RCU_INIT_POINTER(child->perf_event_ctxp[ctxn], NULL);
12530 put_ctx(child_ctx);
12531 WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
12532 put_task_struct(current);
12533
12534 clone_ctx = unclone_ctx(child_ctx);
12535 raw_spin_unlock_irq(&child_ctx->lock);
12536
12537 if (clone_ctx)
12538 put_ctx(clone_ctx);
12539
12540
12541
12542
12543
12544
12545 perf_event_task(child, child_ctx, 0);
12546
12547 list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
12548 perf_event_exit_event(child_event, child_ctx, child);
12549
12550 mutex_unlock(&child_ctx->mutex);
12551
12552 put_ctx(child_ctx);
12553}
12554
12555
12556
12557
12558
12559
12560
12561void perf_event_exit_task(struct task_struct *child)
12562{
12563 struct perf_event *event, *tmp;
12564 int ctxn;
12565
12566 mutex_lock(&child->perf_event_mutex);
12567 list_for_each_entry_safe(event, tmp, &child->perf_event_list,
12568 owner_entry) {
12569 list_del_init(&event->owner_entry);
12570
12571
12572
12573
12574
12575
12576 smp_store_release(&event->owner, NULL);
12577 }
12578 mutex_unlock(&child->perf_event_mutex);
12579
12580 for_each_task_context_nr(ctxn)
12581 perf_event_exit_task_context(child, ctxn);
12582
12583
12584
12585
12586
12587
12588
12589 perf_event_task(child, NULL, 0);
12590}
12591
12592static void perf_free_event(struct perf_event *event,
12593 struct perf_event_context *ctx)
12594{
12595 struct perf_event *parent = event->parent;
12596
12597 if (WARN_ON_ONCE(!parent))
12598 return;
12599
12600 mutex_lock(&parent->child_mutex);
12601 list_del_init(&event->child_list);
12602 mutex_unlock(&parent->child_mutex);
12603
12604 put_event(parent);
12605
12606 raw_spin_lock_irq(&ctx->lock);
12607 perf_group_detach(event);
12608 list_del_event(event, ctx);
12609 raw_spin_unlock_irq(&ctx->lock);
12610 free_event(event);
12611}
12612
12613
12614
12615
12616
12617
12618
12619
12620void perf_event_free_task(struct task_struct *task)
12621{
12622 struct perf_event_context *ctx;
12623 struct perf_event *event, *tmp;
12624 int ctxn;
12625
12626 for_each_task_context_nr(ctxn) {
12627 ctx = task->perf_event_ctxp[ctxn];
12628 if (!ctx)
12629 continue;
12630
12631 mutex_lock(&ctx->mutex);
12632 raw_spin_lock_irq(&ctx->lock);
12633
12634
12635
12636
12637
12638
12639 RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL);
12640 WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
12641 put_task_struct(task);
12642 raw_spin_unlock_irq(&ctx->lock);
12643
12644 list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)
12645 perf_free_event(event, ctx);
12646
12647 mutex_unlock(&ctx->mutex);
12648
12649
12650
12651
12652
12653
12654
12655
12656
12657
12658
12659
12660
12661
12662
12663 wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1);
12664 put_ctx(ctx);
12665 }
12666}
12667
12668void perf_event_delayed_put(struct task_struct *task)
12669{
12670 int ctxn;
12671
12672 for_each_task_context_nr(ctxn)
12673 WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
12674}
12675
12676struct file *perf_event_get(unsigned int fd)
12677{
12678 struct file *file = fget(fd);
12679 if (!file)
12680 return ERR_PTR(-EBADF);
12681
12682 if (file->f_op != &perf_fops) {
12683 fput(file);
12684 return ERR_PTR(-EBADF);
12685 }
12686
12687 return file;
12688}
12689
12690const struct perf_event *perf_get_event(struct file *file)
12691{
12692 if (file->f_op != &perf_fops)
12693 return ERR_PTR(-EINVAL);
12694
12695 return file->private_data;
12696}
12697
12698const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
12699{
12700 if (!event)
12701 return ERR_PTR(-EINVAL);
12702
12703 return &event->attr;
12704}
12705
12706
12707
12708
12709
12710
12711
12712
12713
12714static struct perf_event *
12715inherit_event(struct perf_event *parent_event,
12716 struct task_struct *parent,
12717 struct perf_event_context *parent_ctx,
12718 struct task_struct *child,
12719 struct perf_event *group_leader,
12720 struct perf_event_context *child_ctx)
12721{
12722 enum perf_event_state parent_state = parent_event->state;
12723 struct perf_event *child_event;
12724 unsigned long flags;
12725
12726
12727
12728
12729
12730
12731
12732 if (parent_event->parent)
12733 parent_event = parent_event->parent;
12734
12735 child_event = perf_event_alloc(&parent_event->attr,
12736 parent_event->cpu,
12737 child,
12738 group_leader, parent_event,
12739 NULL, NULL, -1);
12740 if (IS_ERR(child_event))
12741 return child_event;
12742
12743
12744 if ((child_event->attach_state & PERF_ATTACH_TASK_DATA) &&
12745 !child_ctx->task_ctx_data) {
12746 struct pmu *pmu = child_event->pmu;
12747
12748 child_ctx->task_ctx_data = alloc_task_ctx_data(pmu);
12749 if (!child_ctx->task_ctx_data) {
12750 free_event(child_event);
12751 return ERR_PTR(-ENOMEM);
12752 }
12753 }
12754
12755
12756
12757
12758
12759
12760
12761 mutex_lock(&parent_event->child_mutex);
12762 if (is_orphaned_event(parent_event) ||
12763 !atomic_long_inc_not_zero(&parent_event->refcount)) {
12764 mutex_unlock(&parent_event->child_mutex);
12765
12766 free_event(child_event);
12767 return NULL;
12768 }
12769
12770 get_ctx(child_ctx);
12771
12772
12773
12774
12775
12776
12777 if (parent_state >= PERF_EVENT_STATE_INACTIVE)
12778 child_event->state = PERF_EVENT_STATE_INACTIVE;
12779 else
12780 child_event->state = PERF_EVENT_STATE_OFF;
12781
12782 if (parent_event->attr.freq) {
12783 u64 sample_period = parent_event->hw.sample_period;
12784 struct hw_perf_event *hwc = &child_event->hw;
12785
12786 hwc->sample_period = sample_period;
12787 hwc->last_period = sample_period;
12788
12789 local64_set(&hwc->period_left, sample_period);
12790 }
12791
12792 child_event->ctx = child_ctx;
12793 child_event->overflow_handler = parent_event->overflow_handler;
12794 child_event->overflow_handler_context
12795 = parent_event->overflow_handler_context;
12796
12797
12798
12799
12800 perf_event__header_size(child_event);
12801 perf_event__id_header_size(child_event);
12802
12803
12804
12805
12806 raw_spin_lock_irqsave(&child_ctx->lock, flags);
12807 add_event_to_ctx(child_event, child_ctx);
12808 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
12809
12810
12811
12812
12813 list_add_tail(&child_event->child_list, &parent_event->child_list);
12814 mutex_unlock(&parent_event->child_mutex);
12815
12816 return child_event;
12817}
12818
12819
12820
12821
12822
12823
12824
12825
12826
12827
12828
12829static int inherit_group(struct perf_event *parent_event,
12830 struct task_struct *parent,
12831 struct perf_event_context *parent_ctx,
12832 struct task_struct *child,
12833 struct perf_event_context *child_ctx)
12834{
12835 struct perf_event *leader;
12836 struct perf_event *sub;
12837 struct perf_event *child_ctr;
12838
12839 leader = inherit_event(parent_event, parent, parent_ctx,
12840 child, NULL, child_ctx);
12841 if (IS_ERR(leader))
12842 return PTR_ERR(leader);
12843
12844
12845
12846
12847
12848 for_each_sibling_event(sub, parent_event) {
12849 child_ctr = inherit_event(sub, parent, parent_ctx,
12850 child, leader, child_ctx);
12851 if (IS_ERR(child_ctr))
12852 return PTR_ERR(child_ctr);
12853
12854 if (sub->aux_event == parent_event && child_ctr &&
12855 !perf_get_aux_event(child_ctr, leader))
12856 return -EINVAL;
12857 }
12858 return 0;
12859}
12860
12861
12862
12863
12864
12865
12866
12867
12868
12869
12870
12871
12872static int
12873inherit_task_group(struct perf_event *event, struct task_struct *parent,
12874 struct perf_event_context *parent_ctx,
12875 struct task_struct *child, int ctxn,
12876 int *inherited_all)
12877{
12878 int ret;
12879 struct perf_event_context *child_ctx;
12880
12881 if (!event->attr.inherit) {
12882 *inherited_all = 0;
12883 return 0;
12884 }
12885
12886 child_ctx = child->perf_event_ctxp[ctxn];
12887 if (!child_ctx) {
12888
12889
12890
12891
12892
12893
12894 child_ctx = alloc_perf_context(parent_ctx->pmu, child);
12895 if (!child_ctx)
12896 return -ENOMEM;
12897
12898 child->perf_event_ctxp[ctxn] = child_ctx;
12899 }
12900
12901 ret = inherit_group(event, parent, parent_ctx,
12902 child, child_ctx);
12903
12904 if (ret)
12905 *inherited_all = 0;
12906
12907 return ret;
12908}
12909
12910
12911
12912
12913static int perf_event_init_context(struct task_struct *child, int ctxn)
12914{
12915 struct perf_event_context *child_ctx, *parent_ctx;
12916 struct perf_event_context *cloned_ctx;
12917 struct perf_event *event;
12918 struct task_struct *parent = current;
12919 int inherited_all = 1;
12920 unsigned long flags;
12921 int ret = 0;
12922
12923 if (likely(!parent->perf_event_ctxp[ctxn]))
12924 return 0;
12925
12926
12927
12928
12929
12930 parent_ctx = perf_pin_task_context(parent, ctxn);
12931 if (!parent_ctx)
12932 return 0;
12933
12934
12935
12936
12937
12938
12939
12940
12941
12942
12943
12944
12945 mutex_lock(&parent_ctx->mutex);
12946
12947
12948
12949
12950
12951 perf_event_groups_for_each(event, &parent_ctx->pinned_groups) {
12952 ret = inherit_task_group(event, parent, parent_ctx,
12953 child, ctxn, &inherited_all);
12954 if (ret)
12955 goto out_unlock;
12956 }
12957
12958
12959
12960
12961
12962
12963 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
12964 parent_ctx->rotate_disable = 1;
12965 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
12966
12967 perf_event_groups_for_each(event, &parent_ctx->flexible_groups) {
12968 ret = inherit_task_group(event, parent, parent_ctx,
12969 child, ctxn, &inherited_all);
12970 if (ret)
12971 goto out_unlock;
12972 }
12973
12974 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
12975 parent_ctx->rotate_disable = 0;
12976
12977 child_ctx = child->perf_event_ctxp[ctxn];
12978
12979 if (child_ctx && inherited_all) {
12980
12981
12982
12983
12984
12985
12986
12987 cloned_ctx = parent_ctx->parent_ctx;
12988 if (cloned_ctx) {
12989 child_ctx->parent_ctx = cloned_ctx;
12990 child_ctx->parent_gen = parent_ctx->parent_gen;
12991 } else {
12992 child_ctx->parent_ctx = parent_ctx;
12993 child_ctx->parent_gen = parent_ctx->generation;
12994 }
12995 get_ctx(child_ctx->parent_ctx);
12996 }
12997
12998 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
12999out_unlock:
13000 mutex_unlock(&parent_ctx->mutex);
13001
13002 perf_unpin_context(parent_ctx);
13003 put_ctx(parent_ctx);
13004
13005 return ret;
13006}
13007
13008
13009
13010
13011int perf_event_init_task(struct task_struct *child)
13012{
13013 int ctxn, ret;
13014
13015 memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp));
13016 mutex_init(&child->perf_event_mutex);
13017 INIT_LIST_HEAD(&child->perf_event_list);
13018
13019 for_each_task_context_nr(ctxn) {
13020 ret = perf_event_init_context(child, ctxn);
13021 if (ret) {
13022 perf_event_free_task(child);
13023 return ret;
13024 }
13025 }
13026
13027 return 0;
13028}
13029
13030static void __init perf_event_init_all_cpus(void)
13031{
13032 struct swevent_htable *swhash;
13033 int cpu;
13034
13035 zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
13036
13037 for_each_possible_cpu(cpu) {
13038 swhash = &per_cpu(swevent_htable, cpu);
13039 mutex_init(&swhash->hlist_mutex);
13040 INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
13041
13042 INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
13043 raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
13044
13045#ifdef CONFIG_CGROUP_PERF
13046 INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
13047#endif
13048 INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
13049 }
13050}
13051
13052static void perf_swevent_init_cpu(unsigned int cpu)
13053{
13054 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
13055
13056 mutex_lock(&swhash->hlist_mutex);
13057 if (swhash->hlist_refcount > 0 && !swevent_hlist_deref(swhash)) {
13058 struct swevent_hlist *hlist;
13059
13060 hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
13061 WARN_ON(!hlist);
13062 rcu_assign_pointer(swhash->swevent_hlist, hlist);
13063 }
13064 mutex_unlock(&swhash->hlist_mutex);
13065}
13066
13067#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
13068static void __perf_event_exit_context(void *__info)
13069{
13070 struct perf_event_context *ctx = __info;
13071 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
13072 struct perf_event *event;
13073
13074 raw_spin_lock(&ctx->lock);
13075 ctx_sched_out(ctx, cpuctx, EVENT_TIME);
13076 list_for_each_entry(event, &ctx->event_list, event_entry)
13077 __perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
13078 raw_spin_unlock(&ctx->lock);
13079}
13080
13081static void perf_event_exit_cpu_context(int cpu)
13082{
13083 struct perf_cpu_context *cpuctx;
13084 struct perf_event_context *ctx;
13085 struct pmu *pmu;
13086
13087 mutex_lock(&pmus_lock);
13088 list_for_each_entry(pmu, &pmus, entry) {
13089 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
13090 ctx = &cpuctx->ctx;
13091
13092 mutex_lock(&ctx->mutex);
13093 smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
13094 cpuctx->online = 0;
13095 mutex_unlock(&ctx->mutex);
13096 }
13097 cpumask_clear_cpu(cpu, perf_online_mask);
13098 mutex_unlock(&pmus_lock);
13099}
13100#else
13101
13102static void perf_event_exit_cpu_context(int cpu) { }
13103
13104#endif
13105
13106int perf_event_init_cpu(unsigned int cpu)
13107{
13108 struct perf_cpu_context *cpuctx;
13109 struct perf_event_context *ctx;
13110 struct pmu *pmu;
13111
13112 perf_swevent_init_cpu(cpu);
13113
13114 mutex_lock(&pmus_lock);
13115 cpumask_set_cpu(cpu, perf_online_mask);
13116 list_for_each_entry(pmu, &pmus, entry) {
13117 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
13118 ctx = &cpuctx->ctx;
13119
13120 mutex_lock(&ctx->mutex);
13121 cpuctx->online = 1;
13122 mutex_unlock(&ctx->mutex);
13123 }
13124 mutex_unlock(&pmus_lock);
13125
13126 return 0;
13127}
13128
13129int perf_event_exit_cpu(unsigned int cpu)
13130{
13131 perf_event_exit_cpu_context(cpu);
13132 return 0;
13133}
13134
13135static int
13136perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
13137{
13138 int cpu;
13139
13140 for_each_online_cpu(cpu)
13141 perf_event_exit_cpu(cpu);
13142
13143 return NOTIFY_OK;
13144}
13145
13146
13147
13148
13149
13150static struct notifier_block perf_reboot_notifier = {
13151 .notifier_call = perf_reboot,
13152 .priority = INT_MIN,
13153};
13154
13155void __init perf_event_init(void)
13156{
13157 int ret;
13158
13159 idr_init(&pmu_idr);
13160
13161 perf_event_init_all_cpus();
13162 init_srcu_struct(&pmus_srcu);
13163 perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
13164 perf_pmu_register(&perf_cpu_clock, NULL, -1);
13165 perf_pmu_register(&perf_task_clock, NULL, -1);
13166 perf_tp_register();
13167 perf_event_init_cpu(smp_processor_id());
13168 register_reboot_notifier(&perf_reboot_notifier);
13169
13170 ret = init_hw_breakpoint();
13171 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
13172
13173
13174
13175
13176
13177 BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head))
13178 != 1024);
13179}
13180
13181ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
13182 char *page)
13183{
13184 struct perf_pmu_events_attr *pmu_attr =
13185 container_of(attr, struct perf_pmu_events_attr, attr);
13186
13187 if (pmu_attr->event_str)
13188 return sprintf(page, "%s\n", pmu_attr->event_str);
13189
13190 return 0;
13191}
13192EXPORT_SYMBOL_GPL(perf_event_sysfs_show);
13193
13194static int __init perf_event_sysfs_init(void)
13195{
13196 struct pmu *pmu;
13197 int ret;
13198
13199 mutex_lock(&pmus_lock);
13200
13201 ret = bus_register(&pmu_bus);
13202 if (ret)
13203 goto unlock;
13204
13205 list_for_each_entry(pmu, &pmus, entry) {
13206 if (!pmu->name || pmu->type < 0)
13207 continue;
13208
13209 ret = pmu_dev_alloc(pmu);
13210 WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
13211 }
13212 pmu_bus_running = 1;
13213 ret = 0;
13214
13215unlock:
13216 mutex_unlock(&pmus_lock);
13217
13218 return ret;
13219}
13220device_initcall(perf_event_sysfs_init);
13221
13222#ifdef CONFIG_CGROUP_PERF
13223static struct cgroup_subsys_state *
13224perf_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
13225{
13226 struct perf_cgroup *jc;
13227
13228 jc = kzalloc(sizeof(*jc), GFP_KERNEL);
13229 if (!jc)
13230 return ERR_PTR(-ENOMEM);
13231
13232 jc->info = alloc_percpu(struct perf_cgroup_info);
13233 if (!jc->info) {
13234 kfree(jc);
13235 return ERR_PTR(-ENOMEM);
13236 }
13237
13238 return &jc->css;
13239}
13240
13241static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
13242{
13243 struct perf_cgroup *jc = container_of(css, struct perf_cgroup, css);
13244
13245 free_percpu(jc->info);
13246 kfree(jc);
13247}
13248
13249static int perf_cgroup_css_online(struct cgroup_subsys_state *css)
13250{
13251 perf_event_cgroup(css->cgroup);
13252 return 0;
13253}
13254
13255static int __perf_cgroup_move(void *info)
13256{
13257 struct task_struct *task = info;
13258 rcu_read_lock();
13259 perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
13260 rcu_read_unlock();
13261 return 0;
13262}
13263
13264static void perf_cgroup_attach(struct cgroup_taskset *tset)
13265{
13266 struct task_struct *task;
13267 struct cgroup_subsys_state *css;
13268
13269 cgroup_taskset_for_each(task, css, tset)
13270 task_function_call(task, __perf_cgroup_move, task);
13271}
13272
13273struct cgroup_subsys perf_event_cgrp_subsys = {
13274 .css_alloc = perf_cgroup_css_alloc,
13275 .css_free = perf_cgroup_css_free,
13276 .css_online = perf_cgroup_css_online,
13277 .attach = perf_cgroup_attach,
13278
13279
13280
13281
13282
13283 .implicit_on_dfl = true,
13284 .threaded = true,
13285};
13286#endif
13287