1
2
3
4
5#include <assert.h>
6#include <limits.h>
7#include <unistd.h>
8#include <sys/file.h>
9#include <sys/time.h>
10#include <linux/err.h>
11#include <linux/zalloc.h>
12#include <api/fs/fs.h>
13#include <perf/bpf_perf.h>
14
15#include "bpf_counter.h"
16#include "counts.h"
17#include "debug.h"
18#include "evsel.h"
19#include "evlist.h"
20#include "target.h"
21#include "cgroup.h"
22#include "cpumap.h"
23#include "thread_map.h"
24
25#include "bpf_skel/bpf_prog_profiler.skel.h"
26#include "bpf_skel/bperf_u.h"
27#include "bpf_skel/bperf_leader.skel.h"
28#include "bpf_skel/bperf_follower.skel.h"
29
30#define ATTR_MAP_SIZE 16
31
32static inline void *u64_to_ptr(__u64 ptr)
33{
34 return (void *)(unsigned long)ptr;
35}
36
37static struct bpf_counter *bpf_counter_alloc(void)
38{
39 struct bpf_counter *counter;
40
41 counter = zalloc(sizeof(*counter));
42 if (counter)
43 INIT_LIST_HEAD(&counter->list);
44 return counter;
45}
46
47static int bpf_program_profiler__destroy(struct evsel *evsel)
48{
49 struct bpf_counter *counter, *tmp;
50
51 list_for_each_entry_safe(counter, tmp,
52 &evsel->bpf_counter_list, list) {
53 list_del_init(&counter->list);
54 bpf_prog_profiler_bpf__destroy(counter->skel);
55 free(counter);
56 }
57 assert(list_empty(&evsel->bpf_counter_list));
58
59 return 0;
60}
61
62static char *bpf_target_prog_name(int tgt_fd)
63{
64 struct bpf_prog_info_linear *info_linear;
65 struct bpf_func_info *func_info;
66 const struct btf_type *t;
67 char *name = NULL;
68 struct btf *btf;
69
70 info_linear = bpf_program__get_prog_info_linear(
71 tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
72 if (IS_ERR_OR_NULL(info_linear)) {
73 pr_debug("failed to get info_linear for prog FD %d\n", tgt_fd);
74 return NULL;
75 }
76
77 if (info_linear->info.btf_id == 0 ||
78 btf__get_from_id(info_linear->info.btf_id, &btf)) {
79 pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd);
80 goto out;
81 }
82
83 func_info = u64_to_ptr(info_linear->info.func_info);
84 t = btf__type_by_id(btf, func_info[0].type_id);
85 if (!t) {
86 pr_debug("btf %d doesn't have type %d\n",
87 info_linear->info.btf_id, func_info[0].type_id);
88 goto out;
89 }
90 name = strdup(btf__name_by_offset(btf, t->name_off));
91out:
92 free(info_linear);
93 return name;
94}
95
96static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id)
97{
98 struct bpf_prog_profiler_bpf *skel;
99 struct bpf_counter *counter;
100 struct bpf_program *prog;
101 char *prog_name;
102 int prog_fd;
103 int err;
104
105 prog_fd = bpf_prog_get_fd_by_id(prog_id);
106 if (prog_fd < 0) {
107 pr_err("Failed to open fd for bpf prog %u\n", prog_id);
108 return -1;
109 }
110 counter = bpf_counter_alloc();
111 if (!counter) {
112 close(prog_fd);
113 return -1;
114 }
115
116 skel = bpf_prog_profiler_bpf__open();
117 if (!skel) {
118 pr_err("Failed to open bpf skeleton\n");
119 goto err_out;
120 }
121
122 skel->rodata->num_cpu = evsel__nr_cpus(evsel);
123
124 bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel));
125 bpf_map__resize(skel->maps.fentry_readings, 1);
126 bpf_map__resize(skel->maps.accum_readings, 1);
127
128 prog_name = bpf_target_prog_name(prog_fd);
129 if (!prog_name) {
130 pr_err("Failed to get program name for bpf prog %u. Does it have BTF?\n", prog_id);
131 goto err_out;
132 }
133
134 bpf_object__for_each_program(prog, skel->obj) {
135 err = bpf_program__set_attach_target(prog, prog_fd, prog_name);
136 if (err) {
137 pr_err("bpf_program__set_attach_target failed.\n"
138 "Does bpf prog %u have BTF?\n", prog_id);
139 goto err_out;
140 }
141 }
142 set_max_rlimit();
143 err = bpf_prog_profiler_bpf__load(skel);
144 if (err) {
145 pr_err("bpf_prog_profiler_bpf__load failed\n");
146 goto err_out;
147 }
148
149 assert(skel != NULL);
150 counter->skel = skel;
151 list_add(&counter->list, &evsel->bpf_counter_list);
152 close(prog_fd);
153 return 0;
154err_out:
155 bpf_prog_profiler_bpf__destroy(skel);
156 free(counter);
157 close(prog_fd);
158 return -1;
159}
160
161static int bpf_program_profiler__load(struct evsel *evsel, struct target *target)
162{
163 char *bpf_str, *bpf_str_, *tok, *saveptr = NULL, *p;
164 u32 prog_id;
165 int ret;
166
167 bpf_str_ = bpf_str = strdup(target->bpf_str);
168 if (!bpf_str)
169 return -1;
170
171 while ((tok = strtok_r(bpf_str, ",", &saveptr)) != NULL) {
172 prog_id = strtoul(tok, &p, 10);
173 if (prog_id == 0 || prog_id == UINT_MAX ||
174 (*p != '\0' && *p != ',')) {
175 pr_err("Failed to parse bpf prog ids %s\n",
176 target->bpf_str);
177 return -1;
178 }
179
180 ret = bpf_program_profiler_load_one(evsel, prog_id);
181 if (ret) {
182 bpf_program_profiler__destroy(evsel);
183 free(bpf_str_);
184 return -1;
185 }
186 bpf_str = NULL;
187 }
188 free(bpf_str_);
189 return 0;
190}
191
192static int bpf_program_profiler__enable(struct evsel *evsel)
193{
194 struct bpf_counter *counter;
195 int ret;
196
197 list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
198 assert(counter->skel != NULL);
199 ret = bpf_prog_profiler_bpf__attach(counter->skel);
200 if (ret) {
201 bpf_program_profiler__destroy(evsel);
202 return ret;
203 }
204 }
205 return 0;
206}
207
208static int bpf_program_profiler__disable(struct evsel *evsel)
209{
210 struct bpf_counter *counter;
211
212 list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
213 assert(counter->skel != NULL);
214 bpf_prog_profiler_bpf__detach(counter->skel);
215 }
216 return 0;
217}
218
219static int bpf_program_profiler__read(struct evsel *evsel)
220{
221
222 int num_cpu = evsel__nr_cpus(evsel);
223
224
225
226 int num_cpu_bpf = libbpf_num_possible_cpus();
227 struct bpf_perf_event_value values[num_cpu_bpf];
228 struct bpf_counter *counter;
229 int reading_map_fd;
230 __u32 key = 0;
231 int err, cpu;
232
233 if (list_empty(&evsel->bpf_counter_list))
234 return -EAGAIN;
235
236 for (cpu = 0; cpu < num_cpu; cpu++) {
237 perf_counts(evsel->counts, cpu, 0)->val = 0;
238 perf_counts(evsel->counts, cpu, 0)->ena = 0;
239 perf_counts(evsel->counts, cpu, 0)->run = 0;
240 }
241 list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
242 struct bpf_prog_profiler_bpf *skel = counter->skel;
243
244 assert(skel != NULL);
245 reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
246
247 err = bpf_map_lookup_elem(reading_map_fd, &key, values);
248 if (err) {
249 pr_err("failed to read value\n");
250 return err;
251 }
252
253 for (cpu = 0; cpu < num_cpu; cpu++) {
254 perf_counts(evsel->counts, cpu, 0)->val += values[cpu].counter;
255 perf_counts(evsel->counts, cpu, 0)->ena += values[cpu].enabled;
256 perf_counts(evsel->counts, cpu, 0)->run += values[cpu].running;
257 }
258 }
259 return 0;
260}
261
262static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu,
263 int fd)
264{
265 struct bpf_prog_profiler_bpf *skel;
266 struct bpf_counter *counter;
267 int ret;
268
269 list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
270 skel = counter->skel;
271 assert(skel != NULL);
272
273 ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events),
274 &cpu, &fd, BPF_ANY);
275 if (ret)
276 return ret;
277 }
278 return 0;
279}
280
281struct bpf_counter_ops bpf_program_profiler_ops = {
282 .load = bpf_program_profiler__load,
283 .enable = bpf_program_profiler__enable,
284 .disable = bpf_program_profiler__disable,
285 .read = bpf_program_profiler__read,
286 .destroy = bpf_program_profiler__destroy,
287 .install_pe = bpf_program_profiler__install_pe,
288};
289
290static bool bperf_attr_map_compatible(int attr_map_fd)
291{
292 struct bpf_map_info map_info = {0};
293 __u32 map_info_len = sizeof(map_info);
294 int err;
295
296 err = bpf_obj_get_info_by_fd(attr_map_fd, &map_info, &map_info_len);
297
298 if (err)
299 return false;
300 return (map_info.key_size == sizeof(struct perf_event_attr)) &&
301 (map_info.value_size == sizeof(struct perf_event_attr_map_entry));
302}
303
304static int bperf_lock_attr_map(struct target *target)
305{
306 char path[PATH_MAX];
307 int map_fd, err;
308
309 if (target->attr_map) {
310 scnprintf(path, PATH_MAX, "%s", target->attr_map);
311 } else {
312 scnprintf(path, PATH_MAX, "%s/fs/bpf/%s", sysfs__mountpoint(),
313 BPF_PERF_DEFAULT_ATTR_MAP_PATH);
314 }
315
316 if (access(path, F_OK)) {
317 map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
318 sizeof(struct perf_event_attr),
319 sizeof(struct perf_event_attr_map_entry),
320 ATTR_MAP_SIZE, 0);
321 if (map_fd < 0)
322 return -1;
323
324 err = bpf_obj_pin(map_fd, path);
325 if (err) {
326
327 close(map_fd);
328 map_fd = bpf_obj_get(path);
329 if (map_fd < 0)
330 return -1;
331 }
332 } else {
333 map_fd = bpf_obj_get(path);
334 if (map_fd < 0)
335 return -1;
336 }
337
338 if (!bperf_attr_map_compatible(map_fd)) {
339 close(map_fd);
340 return -1;
341
342 }
343 err = flock(map_fd, LOCK_EX);
344 if (err) {
345 close(map_fd);
346 return -1;
347 }
348 return map_fd;
349}
350
351static int bperf_check_target(struct evsel *evsel,
352 struct target *target,
353 enum bperf_filter_type *filter_type,
354 __u32 *filter_entry_cnt)
355{
356 if (evsel->core.leader->nr_members > 1) {
357 pr_err("bpf managed perf events do not yet support groups.\n");
358 return -1;
359 }
360
361
362 if (target->system_wide) {
363 *filter_type = BPERF_FILTER_GLOBAL;
364 *filter_entry_cnt = 1;
365 } else if (target->cpu_list) {
366 *filter_type = BPERF_FILTER_CPU;
367 *filter_entry_cnt = perf_cpu_map__nr(evsel__cpus(evsel));
368 } else if (target->tid) {
369 *filter_type = BPERF_FILTER_PID;
370 *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads);
371 } else if (target->pid || evsel->evlist->workload.pid != -1) {
372 *filter_type = BPERF_FILTER_TGID;
373 *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads);
374 } else {
375 pr_err("bpf managed perf events do not yet support these targets.\n");
376 return -1;
377 }
378
379 return 0;
380}
381
382static struct perf_cpu_map *all_cpu_map;
383
384static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
385 struct perf_event_attr_map_entry *entry)
386{
387 struct bperf_leader_bpf *skel = bperf_leader_bpf__open();
388 int link_fd, diff_map_fd, err;
389 struct bpf_link *link = NULL;
390
391 if (!skel) {
392 pr_err("Failed to open leader skeleton\n");
393 return -1;
394 }
395
396 bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus());
397 err = bperf_leader_bpf__load(skel);
398 if (err) {
399 pr_err("Failed to load leader skeleton\n");
400 goto out;
401 }
402
403 link = bpf_program__attach(skel->progs.on_switch);
404 if (IS_ERR(link)) {
405 pr_err("Failed to attach leader program\n");
406 err = PTR_ERR(link);
407 goto out;
408 }
409
410 link_fd = bpf_link__fd(link);
411 diff_map_fd = bpf_map__fd(skel->maps.diff_readings);
412 entry->link_id = bpf_link_get_id(link_fd);
413 entry->diff_map_id = bpf_map_get_id(diff_map_fd);
414 err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, BPF_ANY);
415 assert(err == 0);
416
417 evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id);
418 assert(evsel->bperf_leader_link_fd >= 0);
419
420
421
422
423
424 evsel->leader_skel = skel;
425 evsel__open_per_cpu(evsel, all_cpu_map, -1);
426
427out:
428 bperf_leader_bpf__destroy(skel);
429 bpf_link__destroy(link);
430 return err;
431}
432
433static int bperf__load(struct evsel *evsel, struct target *target)
434{
435 struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff};
436 int attr_map_fd, diff_map_fd = -1, err;
437 enum bperf_filter_type filter_type;
438 __u32 filter_entry_cnt, i;
439
440 if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt))
441 return -1;
442
443 if (!all_cpu_map) {
444 all_cpu_map = perf_cpu_map__new(NULL);
445 if (!all_cpu_map)
446 return -1;
447 }
448
449 evsel->bperf_leader_prog_fd = -1;
450 evsel->bperf_leader_link_fd = -1;
451
452
453
454
455
456
457
458 attr_map_fd = bperf_lock_attr_map(target);
459 if (attr_map_fd < 0) {
460 pr_err("Failed to lock perf_event_attr map\n");
461 return -1;
462 }
463
464 err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry);
465 if (err) {
466 err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, &entry, BPF_ANY);
467 if (err)
468 goto out;
469 }
470
471 evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id);
472 if (evsel->bperf_leader_link_fd < 0 &&
473 bperf_reload_leader_program(evsel, attr_map_fd, &entry)) {
474 err = -1;
475 goto out;
476 }
477
478
479
480
481
482 evsel->bperf_leader_prog_fd = bpf_prog_get_fd_by_id(
483 bpf_link_get_prog_id(evsel->bperf_leader_link_fd));
484 assert(evsel->bperf_leader_prog_fd >= 0);
485
486 diff_map_fd = bpf_map_get_fd_by_id(entry.diff_map_id);
487 assert(diff_map_fd >= 0);
488
489
490
491
492
493 err = bperf_trigger_reading(evsel->bperf_leader_prog_fd, 0);
494 if (err) {
495 pr_err("The kernel does not support test_run for raw_tp BPF programs.\n"
496 "Therefore, --use-bpf might show inaccurate readings\n");
497 goto out;
498 }
499
500
501 evsel->follower_skel = bperf_follower_bpf__open();
502 if (!evsel->follower_skel) {
503 err = -1;
504 pr_err("Failed to open follower skeleton\n");
505 goto out;
506 }
507
508
509 bpf_program__set_attach_target(evsel->follower_skel->progs.fexit_XXX,
510 evsel->bperf_leader_prog_fd, "on_switch");
511
512
513 bpf_map__reuse_fd(evsel->follower_skel->maps.diff_readings, diff_map_fd);
514
515
516 bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings,
517 filter_entry_cnt);
518
519 bpf_map__set_max_entries(evsel->follower_skel->maps.filter,
520 filter_entry_cnt);
521 err = bperf_follower_bpf__load(evsel->follower_skel);
522 if (err) {
523 pr_err("Failed to load follower skeleton\n");
524 bperf_follower_bpf__destroy(evsel->follower_skel);
525 evsel->follower_skel = NULL;
526 goto out;
527 }
528
529 for (i = 0; i < filter_entry_cnt; i++) {
530 int filter_map_fd;
531 __u32 key;
532
533 if (filter_type == BPERF_FILTER_PID ||
534 filter_type == BPERF_FILTER_TGID)
535 key = evsel->core.threads->map[i].pid;
536 else if (filter_type == BPERF_FILTER_CPU)
537 key = evsel->core.cpus->map[i];
538 else
539 break;
540
541 filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter);
542 bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY);
543 }
544
545 evsel->follower_skel->bss->type = filter_type;
546
547 err = bperf_follower_bpf__attach(evsel->follower_skel);
548
549out:
550 if (err && evsel->bperf_leader_link_fd >= 0)
551 close(evsel->bperf_leader_link_fd);
552 if (err && evsel->bperf_leader_prog_fd >= 0)
553 close(evsel->bperf_leader_prog_fd);
554 if (diff_map_fd >= 0)
555 close(diff_map_fd);
556
557 flock(attr_map_fd, LOCK_UN);
558 close(attr_map_fd);
559
560 return err;
561}
562
563static int bperf__install_pe(struct evsel *evsel, int cpu, int fd)
564{
565 struct bperf_leader_bpf *skel = evsel->leader_skel;
566
567 return bpf_map_update_elem(bpf_map__fd(skel->maps.events),
568 &cpu, &fd, BPF_ANY);
569}
570
571
572
573
574
575static int bperf_sync_counters(struct evsel *evsel)
576{
577 int num_cpu, i, cpu;
578
579 num_cpu = all_cpu_map->nr;
580 for (i = 0; i < num_cpu; i++) {
581 cpu = all_cpu_map->map[i];
582 bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu);
583 }
584 return 0;
585}
586
587static int bperf__enable(struct evsel *evsel)
588{
589 evsel->follower_skel->bss->enabled = 1;
590 return 0;
591}
592
593static int bperf__disable(struct evsel *evsel)
594{
595 evsel->follower_skel->bss->enabled = 0;
596 return 0;
597}
598
599static int bperf__read(struct evsel *evsel)
600{
601 struct bperf_follower_bpf *skel = evsel->follower_skel;
602 __u32 num_cpu_bpf = cpu__max_cpu();
603 struct bpf_perf_event_value values[num_cpu_bpf];
604 int reading_map_fd, err = 0;
605 __u32 i, j, num_cpu;
606
607 bperf_sync_counters(evsel);
608 reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
609
610 for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) {
611 __u32 cpu;
612
613 err = bpf_map_lookup_elem(reading_map_fd, &i, values);
614 if (err)
615 goto out;
616 switch (evsel->follower_skel->bss->type) {
617 case BPERF_FILTER_GLOBAL:
618 assert(i == 0);
619
620 num_cpu = all_cpu_map->nr;
621 for (j = 0; j < num_cpu; j++) {
622 cpu = all_cpu_map->map[j];
623 perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter;
624 perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled;
625 perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running;
626 }
627 break;
628 case BPERF_FILTER_CPU:
629 cpu = evsel->core.cpus->map[i];
630 perf_counts(evsel->counts, i, 0)->val = values[cpu].counter;
631 perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled;
632 perf_counts(evsel->counts, i, 0)->run = values[cpu].running;
633 break;
634 case BPERF_FILTER_PID:
635 case BPERF_FILTER_TGID:
636 perf_counts(evsel->counts, 0, i)->val = 0;
637 perf_counts(evsel->counts, 0, i)->ena = 0;
638 perf_counts(evsel->counts, 0, i)->run = 0;
639
640 for (cpu = 0; cpu < num_cpu_bpf; cpu++) {
641 perf_counts(evsel->counts, 0, i)->val += values[cpu].counter;
642 perf_counts(evsel->counts, 0, i)->ena += values[cpu].enabled;
643 perf_counts(evsel->counts, 0, i)->run += values[cpu].running;
644 }
645 break;
646 default:
647 break;
648 }
649 }
650out:
651 return err;
652}
653
654static int bperf__destroy(struct evsel *evsel)
655{
656 bperf_follower_bpf__destroy(evsel->follower_skel);
657 close(evsel->bperf_leader_prog_fd);
658 close(evsel->bperf_leader_link_fd);
659 return 0;
660}
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737struct bpf_counter_ops bperf_ops = {
738 .load = bperf__load,
739 .enable = bperf__enable,
740 .disable = bperf__disable,
741 .read = bperf__read,
742 .install_pe = bperf__install_pe,
743 .destroy = bperf__destroy,
744};
745
746extern struct bpf_counter_ops bperf_cgrp_ops;
747
748static inline bool bpf_counter_skip(struct evsel *evsel)
749{
750 return list_empty(&evsel->bpf_counter_list) &&
751 evsel->follower_skel == NULL;
752}
753
754int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd)
755{
756 if (bpf_counter_skip(evsel))
757 return 0;
758 return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd);
759}
760
761int bpf_counter__load(struct evsel *evsel, struct target *target)
762{
763 if (target->bpf_str)
764 evsel->bpf_counter_ops = &bpf_program_profiler_ops;
765 else if (cgrp_event_expanded && target->use_bpf)
766 evsel->bpf_counter_ops = &bperf_cgrp_ops;
767 else if (target->use_bpf || evsel->bpf_counter ||
768 evsel__match_bpf_counter_events(evsel->name))
769 evsel->bpf_counter_ops = &bperf_ops;
770
771 if (evsel->bpf_counter_ops)
772 return evsel->bpf_counter_ops->load(evsel, target);
773 return 0;
774}
775
776int bpf_counter__enable(struct evsel *evsel)
777{
778 if (bpf_counter_skip(evsel))
779 return 0;
780 return evsel->bpf_counter_ops->enable(evsel);
781}
782
783int bpf_counter__disable(struct evsel *evsel)
784{
785 if (bpf_counter_skip(evsel))
786 return 0;
787 return evsel->bpf_counter_ops->disable(evsel);
788}
789
790int bpf_counter__read(struct evsel *evsel)
791{
792 if (bpf_counter_skip(evsel))
793 return -EAGAIN;
794 return evsel->bpf_counter_ops->read(evsel);
795}
796
797void bpf_counter__destroy(struct evsel *evsel)
798{
799 if (bpf_counter_skip(evsel))
800 return;
801 evsel->bpf_counter_ops->destroy(evsel);
802 evsel->bpf_counter_ops = NULL;
803}
804