1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include "util/record.h"
18#include <traceevent/event-parse.h>
19#include <api/fs/tracing_path.h>
20#include <bpf/bpf.h>
21#include "util/bpf_map.h"
22#include "util/rlimit.h"
23#include "builtin.h"
24#include "util/cgroup.h"
25#include "util/color.h"
26#include "util/config.h"
27#include "util/debug.h"
28#include "util/dso.h"
29#include "util/env.h"
30#include "util/event.h"
31#include "util/evsel.h"
32#include "util/evsel_fprintf.h"
33#include "util/synthetic-events.h"
34#include "util/evlist.h"
35#include "util/evswitch.h"
36#include "util/mmap.h"
37#include <subcmd/pager.h>
38#include <subcmd/exec-cmd.h>
39#include "util/machine.h"
40#include "util/map.h"
41#include "util/symbol.h"
42#include "util/path.h"
43#include "util/session.h"
44#include "util/thread.h"
45#include <subcmd/parse-options.h>
46#include "util/strlist.h"
47#include "util/intlist.h"
48#include "util/thread_map.h"
49#include "util/stat.h"
50#include "util/tool.h"
51#include "util/util.h"
52#include "trace/beauty/beauty.h"
53#include "trace-event.h"
54#include "util/parse-events.h"
55#include "util/bpf-loader.h"
56#include "callchain.h"
57#include "print_binary.h"
58#include "string2.h"
59#include "syscalltbl.h"
60#include "rb_resort.h"
61#include "../perf.h"
62
63#include <errno.h>
64#include <inttypes.h>
65#include <poll.h>
66#include <signal.h>
67#include <stdlib.h>
68#include <string.h>
69#include <linux/err.h>
70#include <linux/filter.h>
71#include <linux/kernel.h>
72#include <linux/random.h>
73#include <linux/stringify.h>
74#include <linux/time64.h>
75#include <linux/zalloc.h>
76#include <fcntl.h>
77#include <sys/sysmacros.h>
78
79#include <linux/ctype.h>
80#include <perf/mmap.h>
81
82#ifndef O_CLOEXEC
83# define O_CLOEXEC 02000000
84#endif
85
86#ifndef F_LINUX_SPECIFIC_BASE
87# define F_LINUX_SPECIFIC_BASE 1024
88#endif
89
90
91
92
93struct syscall_arg_fmt {
94 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
95 bool (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val);
96 unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
97 void *parm;
98 const char *name;
99 u16 nr_entries;
100 bool show_zero;
101};
102
103struct syscall_fmt {
104 const char *name;
105 const char *alias;
106 struct {
107 const char *sys_enter,
108 *sys_exit;
109 } bpf_prog_name;
110 struct syscall_arg_fmt arg[6];
111 u8 nr_args;
112 bool errpid;
113 bool timeout;
114 bool hexret;
115};
116
117struct trace {
118 struct perf_tool tool;
119 struct syscalltbl *sctbl;
120 struct {
121 struct syscall *table;
122 struct bpf_map *map;
123 struct {
124 struct bpf_map *sys_enter,
125 *sys_exit;
126 } prog_array;
127 struct {
128 struct evsel *sys_enter,
129 *sys_exit,
130 *augmented;
131 } events;
132 struct bpf_program *unaugmented_prog;
133 } syscalls;
134 struct {
135 struct bpf_map *map;
136 } dump;
137 struct record_opts opts;
138 struct evlist *evlist;
139 struct machine *host;
140 struct thread *current;
141 struct bpf_object *bpf_obj;
142 struct cgroup *cgroup;
143 u64 base_time;
144 FILE *output;
145 unsigned long nr_events;
146 unsigned long nr_events_printed;
147 unsigned long max_events;
148 struct evswitch evswitch;
149 struct strlist *ev_qualifier;
150 struct {
151 size_t nr;
152 int *entries;
153 } ev_qualifier_ids;
154 struct {
155 size_t nr;
156 pid_t *entries;
157 struct bpf_map *map;
158 } filter_pids;
159 double duration_filter;
160 double runtime_ms;
161 struct {
162 u64 vfs_getname,
163 proc_getname;
164 } stats;
165 unsigned int max_stack;
166 unsigned int min_stack;
167 int raw_augmented_syscalls_args_size;
168 bool raw_augmented_syscalls;
169 bool fd_path_disabled;
170 bool sort_events;
171 bool not_ev_qualifier;
172 bool live;
173 bool full_time;
174 bool sched;
175 bool multiple_threads;
176 bool summary;
177 bool summary_only;
178 bool errno_summary;
179 bool failure_only;
180 bool show_comm;
181 bool print_sample;
182 bool show_tool_stats;
183 bool trace_syscalls;
184 bool libtraceevent_print;
185 bool kernel_syscallchains;
186 s16 args_alignment;
187 bool show_tstamp;
188 bool show_duration;
189 bool show_zeros;
190 bool show_arg_names;
191 bool show_string_prefix;
192 bool force;
193 bool vfs_getname;
194 int trace_pgfaults;
195 char *perfconfig_events;
196 struct {
197 struct ordered_events data;
198 u64 last;
199 } oe;
200};
201
202struct tp_field {
203 int offset;
204 union {
205 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
206 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
207 };
208};
209
210#define TP_UINT_FIELD(bits) \
211static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
212{ \
213 u##bits value; \
214 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
215 return value; \
216}
217
218TP_UINT_FIELD(8);
219TP_UINT_FIELD(16);
220TP_UINT_FIELD(32);
221TP_UINT_FIELD(64);
222
223#define TP_UINT_FIELD__SWAPPED(bits) \
224static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
225{ \
226 u##bits value; \
227 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
228 return bswap_##bits(value);\
229}
230
231TP_UINT_FIELD__SWAPPED(16);
232TP_UINT_FIELD__SWAPPED(32);
233TP_UINT_FIELD__SWAPPED(64);
234
235static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
236{
237 field->offset = offset;
238
239 switch (size) {
240 case 1:
241 field->integer = tp_field__u8;
242 break;
243 case 2:
244 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
245 break;
246 case 4:
247 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
248 break;
249 case 8:
250 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
251 break;
252 default:
253 return -1;
254 }
255
256 return 0;
257}
258
259static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
260{
261 return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
262}
263
264static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
265{
266 return sample->raw_data + field->offset;
267}
268
269static int __tp_field__init_ptr(struct tp_field *field, int offset)
270{
271 field->offset = offset;
272 field->pointer = tp_field__ptr;
273 return 0;
274}
275
276static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
277{
278 return __tp_field__init_ptr(field, format_field->offset);
279}
280
281struct syscall_tp {
282 struct tp_field id;
283 union {
284 struct tp_field args, ret;
285 };
286};
287
288
289
290
291
292
293struct evsel_trace {
294 struct syscall_tp sc;
295 struct syscall_arg_fmt *fmt;
296};
297
298static struct evsel_trace *evsel_trace__new(void)
299{
300 return zalloc(sizeof(struct evsel_trace));
301}
302
303static void evsel_trace__delete(struct evsel_trace *et)
304{
305 if (et == NULL)
306 return;
307
308 zfree(&et->fmt);
309 free(et);
310}
311
312
313
314
315
316static inline struct syscall_tp *__evsel__syscall_tp(struct evsel *evsel)
317{
318 struct evsel_trace *et = evsel->priv;
319
320 return &et->sc;
321}
322
323static struct syscall_tp *evsel__syscall_tp(struct evsel *evsel)
324{
325 if (evsel->priv == NULL) {
326 evsel->priv = evsel_trace__new();
327 if (evsel->priv == NULL)
328 return NULL;
329 }
330
331 return __evsel__syscall_tp(evsel);
332}
333
334
335
336
337static inline struct syscall_arg_fmt *__evsel__syscall_arg_fmt(struct evsel *evsel)
338{
339 struct evsel_trace *et = evsel->priv;
340
341 return et->fmt;
342}
343
344static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel)
345{
346 struct evsel_trace *et = evsel->priv;
347
348 if (evsel->priv == NULL) {
349 et = evsel->priv = evsel_trace__new();
350
351 if (et == NULL)
352 return NULL;
353 }
354
355 if (et->fmt == NULL) {
356 et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
357 if (et->fmt == NULL)
358 goto out_delete;
359 }
360
361 return __evsel__syscall_arg_fmt(evsel);
362
363out_delete:
364 evsel_trace__delete(evsel->priv);
365 evsel->priv = NULL;
366 return NULL;
367}
368
369static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name)
370{
371 struct tep_format_field *format_field = evsel__field(evsel, name);
372
373 if (format_field == NULL)
374 return -1;
375
376 return tp_field__init_uint(field, format_field, evsel->needs_swap);
377}
378
379#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
380 ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
381 evsel__init_tp_uint_field(evsel, &sc->name, #name); })
382
383static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name)
384{
385 struct tep_format_field *format_field = evsel__field(evsel, name);
386
387 if (format_field == NULL)
388 return -1;
389
390 return tp_field__init_ptr(field, format_field);
391}
392
393#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
394 ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
395 evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
396
397static void evsel__delete_priv(struct evsel *evsel)
398{
399 zfree(&evsel->priv);
400 evsel__delete(evsel);
401}
402
403static int evsel__init_syscall_tp(struct evsel *evsel)
404{
405 struct syscall_tp *sc = evsel__syscall_tp(evsel);
406
407 if (sc != NULL) {
408 if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
409 evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
410 return -ENOENT;
411 return 0;
412 }
413
414 return -ENOMEM;
415}
416
417static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
418{
419 struct syscall_tp *sc = evsel__syscall_tp(evsel);
420
421 if (sc != NULL) {
422 struct tep_format_field *syscall_id = evsel__field(tp, "id");
423 if (syscall_id == NULL)
424 syscall_id = evsel__field(tp, "__syscall_nr");
425 if (syscall_id == NULL ||
426 __tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
427 return -EINVAL;
428
429 return 0;
430 }
431
432 return -ENOMEM;
433}
434
435static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
436{
437 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
438
439 return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
440}
441
442static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
443{
444 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
445
446 return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
447}
448
449static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
450{
451 if (evsel__syscall_tp(evsel) != NULL) {
452 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
453 return -ENOENT;
454
455 evsel->handler = handler;
456 return 0;
457 }
458
459 return -ENOMEM;
460}
461
462static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
463{
464 struct evsel *evsel = evsel__newtp("raw_syscalls", direction);
465
466
467 if (IS_ERR(evsel))
468 evsel = evsel__newtp("syscalls", direction);
469
470 if (IS_ERR(evsel))
471 return NULL;
472
473 if (evsel__init_raw_syscall_tp(evsel, handler))
474 goto out_delete;
475
476 return evsel;
477
478out_delete:
479 evsel__delete_priv(evsel);
480 return NULL;
481}
482
483#define perf_evsel__sc_tp_uint(evsel, name, sample) \
484 ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
485 fields->name.integer(&fields->name, sample); })
486
487#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
488 ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
489 fields->name.pointer(&fields->name, sample); })
490
491size_t strarray__scnprintf_suffix(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_suffix, int val)
492{
493 int idx = val - sa->offset;
494
495 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
496 size_t printed = scnprintf(bf, size, intfmt, val);
497 if (show_suffix)
498 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
499 return printed;
500 }
501
502 return scnprintf(bf, size, "%s%s", sa->entries[idx], show_suffix ? sa->prefix : "");
503}
504
505size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
506{
507 int idx = val - sa->offset;
508
509 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
510 size_t printed = scnprintf(bf, size, intfmt, val);
511 if (show_prefix)
512 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
513 return printed;
514 }
515
516 return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
517}
518
519static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
520 const char *intfmt,
521 struct syscall_arg *arg)
522{
523 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->show_string_prefix, arg->val);
524}
525
526static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
527 struct syscall_arg *arg)
528{
529 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
530}
531
532#define SCA_STRARRAY syscall_arg__scnprintf_strarray
533
534bool syscall_arg__strtoul_strarray(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
535{
536 return strarray__strtoul(arg->parm, bf, size, ret);
537}
538
539bool syscall_arg__strtoul_strarray_flags(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
540{
541 return strarray__strtoul_flags(arg->parm, bf, size, ret);
542}
543
544bool syscall_arg__strtoul_strarrays(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
545{
546 return strarrays__strtoul(arg->parm, bf, size, ret);
547}
548
549size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg)
550{
551 return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val);
552}
553
554size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
555{
556 size_t printed;
557 int i;
558
559 for (i = 0; i < sas->nr_entries; ++i) {
560 struct strarray *sa = sas->entries[i];
561 int idx = val - sa->offset;
562
563 if (idx >= 0 && idx < sa->nr_entries) {
564 if (sa->entries[idx] == NULL)
565 break;
566 return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
567 }
568 }
569
570 printed = scnprintf(bf, size, intfmt, val);
571 if (show_prefix)
572 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sas->entries[0]->prefix);
573 return printed;
574}
575
576bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret)
577{
578 int i;
579
580 for (i = 0; i < sa->nr_entries; ++i) {
581 if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') {
582 *ret = sa->offset + i;
583 return true;
584 }
585 }
586
587 return false;
588}
589
590bool strarray__strtoul_flags(struct strarray *sa, char *bf, size_t size, u64 *ret)
591{
592 u64 val = 0;
593 char *tok = bf, *sep, *end;
594
595 *ret = 0;
596
597 while (size != 0) {
598 int toklen = size;
599
600 sep = memchr(tok, '|', size);
601 if (sep != NULL) {
602 size -= sep - tok + 1;
603
604 end = sep - 1;
605 while (end > tok && isspace(*end))
606 --end;
607
608 toklen = end - tok + 1;
609 }
610
611 while (isspace(*tok))
612 ++tok;
613
614 if (isalpha(*tok) || *tok == '_') {
615 if (!strarray__strtoul(sa, tok, toklen, &val))
616 return false;
617 } else {
618 bool is_hexa = tok[0] == 0 && (tok[1] = 'x' || tok[1] == 'X');
619
620 val = strtoul(tok, NULL, is_hexa ? 16 : 0);
621 }
622
623 *ret |= (1 << (val - 1));
624
625 if (sep == NULL)
626 break;
627 tok = sep + 1;
628 }
629
630 return true;
631}
632
633bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret)
634{
635 int i;
636
637 for (i = 0; i < sas->nr_entries; ++i) {
638 struct strarray *sa = sas->entries[i];
639
640 if (strarray__strtoul(sa, bf, size, ret))
641 return true;
642 }
643
644 return false;
645}
646
647size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
648 struct syscall_arg *arg)
649{
650 return strarrays__scnprintf(arg->parm, bf, size, "%d", arg->show_string_prefix, arg->val);
651}
652
653#ifndef AT_FDCWD
654#define AT_FDCWD -100
655#endif
656
657static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
658 struct syscall_arg *arg)
659{
660 int fd = arg->val;
661 const char *prefix = "AT_FD";
662
663 if (fd == AT_FDCWD)
664 return scnprintf(bf, size, "%s%s", arg->show_string_prefix ? prefix : "", "CWD");
665
666 return syscall_arg__scnprintf_fd(bf, size, arg);
667}
668
669#define SCA_FDAT syscall_arg__scnprintf_fd_at
670
671static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
672 struct syscall_arg *arg);
673
674#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
675
676size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
677{
678 return scnprintf(bf, size, "%#lx", arg->val);
679}
680
681size_t syscall_arg__scnprintf_ptr(char *bf, size_t size, struct syscall_arg *arg)
682{
683 if (arg->val == 0)
684 return scnprintf(bf, size, "NULL");
685 return syscall_arg__scnprintf_hex(bf, size, arg);
686}
687
688size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
689{
690 return scnprintf(bf, size, "%d", arg->val);
691}
692
693size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
694{
695 return scnprintf(bf, size, "%ld", arg->val);
696}
697
698static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg)
699{
700
701
702
703 return scnprintf(bf, size, "\"%-.*s\"", arg->fmt->nr_entries ?: arg->len, arg->val);
704}
705
706#define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array
707
708static const char *bpf_cmd[] = {
709 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
710 "MAP_GET_NEXT_KEY", "PROG_LOAD", "OBJ_PIN", "OBJ_GET", "PROG_ATTACH",
711 "PROG_DETACH", "PROG_TEST_RUN", "PROG_GET_NEXT_ID", "MAP_GET_NEXT_ID",
712 "PROG_GET_FD_BY_ID", "MAP_GET_FD_BY_ID", "OBJ_GET_INFO_BY_FD",
713 "PROG_QUERY", "RAW_TRACEPOINT_OPEN", "BTF_LOAD", "BTF_GET_FD_BY_ID",
714 "TASK_FD_QUERY", "MAP_LOOKUP_AND_DELETE_ELEM", "MAP_FREEZE",
715 "BTF_GET_NEXT_ID", "MAP_LOOKUP_BATCH", "MAP_LOOKUP_AND_DELETE_BATCH",
716 "MAP_UPDATE_BATCH", "MAP_DELETE_BATCH", "LINK_CREATE", "LINK_UPDATE",
717 "LINK_GET_FD_BY_ID", "LINK_GET_NEXT_ID", "ENABLE_STATS", "ITER_CREATE",
718 "LINK_DETACH", "PROG_BIND_MAP",
719};
720static DEFINE_STRARRAY(bpf_cmd, "BPF_");
721
722static const char *fsmount_flags[] = {
723 [1] = "CLOEXEC",
724};
725static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
726
727#include "trace/beauty/generated/fsconfig_arrays.c"
728
729static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");
730
731static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
732static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
733
734static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
735static DEFINE_STRARRAY(itimers, "ITIMER_");
736
737static const char *keyctl_options[] = {
738 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
739 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
740 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
741 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
742 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
743};
744static DEFINE_STRARRAY(keyctl_options, "KEYCTL_");
745
746static const char *whences[] = { "SET", "CUR", "END",
747#ifdef SEEK_DATA
748"DATA",
749#endif
750#ifdef SEEK_HOLE
751"HOLE",
752#endif
753};
754static DEFINE_STRARRAY(whences, "SEEK_");
755
756static const char *fcntl_cmds[] = {
757 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
758 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
759 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
760 "GETOWNER_UIDS",
761};
762static DEFINE_STRARRAY(fcntl_cmds, "F_");
763
764static const char *fcntl_linux_specific_cmds[] = {
765 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
766 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
767 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
768};
769
770static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, "F_", F_LINUX_SPECIFIC_BASE);
771
772static struct strarray *fcntl_cmds_arrays[] = {
773 &strarray__fcntl_cmds,
774 &strarray__fcntl_linux_specific_cmds,
775};
776
777static DEFINE_STRARRAYS(fcntl_cmds_arrays);
778
779static const char *rlimit_resources[] = {
780 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
781 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
782 "RTTIME",
783};
784static DEFINE_STRARRAY(rlimit_resources, "RLIMIT_");
785
786static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
787static DEFINE_STRARRAY(sighow, "SIG_");
788
789static const char *clockid[] = {
790 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
791 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
792 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
793};
794static DEFINE_STRARRAY(clockid, "CLOCK_");
795
796static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
797 struct syscall_arg *arg)
798{
799 bool show_prefix = arg->show_string_prefix;
800 const char *suffix = "_OK";
801 size_t printed = 0;
802 int mode = arg->val;
803
804 if (mode == F_OK)
805 return scnprintf(bf, size, "F%s", show_prefix ? suffix : "");
806#define P_MODE(n) \
807 if (mode & n##_OK) { \
808 printed += scnprintf(bf + printed, size - printed, "%s%s", #n, show_prefix ? suffix : ""); \
809 mode &= ~n##_OK; \
810 }
811
812 P_MODE(R);
813 P_MODE(W);
814 P_MODE(X);
815#undef P_MODE
816
817 if (mode)
818 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
819
820 return printed;
821}
822
823#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
824
825static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
826 struct syscall_arg *arg);
827
828#define SCA_FILENAME syscall_arg__scnprintf_filename
829
830static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
831 struct syscall_arg *arg)
832{
833 bool show_prefix = arg->show_string_prefix;
834 const char *prefix = "O_";
835 int printed = 0, flags = arg->val;
836
837#define P_FLAG(n) \
838 if (flags & O_##n) { \
839 printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
840 flags &= ~O_##n; \
841 }
842
843 P_FLAG(CLOEXEC);
844 P_FLAG(NONBLOCK);
845#undef P_FLAG
846
847 if (flags)
848 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
849
850 return printed;
851}
852
853#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
854
855#ifndef GRND_NONBLOCK
856#define GRND_NONBLOCK 0x0001
857#endif
858#ifndef GRND_RANDOM
859#define GRND_RANDOM 0x0002
860#endif
861
862static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
863 struct syscall_arg *arg)
864{
865 bool show_prefix = arg->show_string_prefix;
866 const char *prefix = "GRND_";
867 int printed = 0, flags = arg->val;
868
869#define P_FLAG(n) \
870 if (flags & GRND_##n) { \
871 printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
872 flags &= ~GRND_##n; \
873 }
874
875 P_FLAG(RANDOM);
876 P_FLAG(NONBLOCK);
877#undef P_FLAG
878
879 if (flags)
880 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
881
882 return printed;
883}
884
885#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
886
887#define STRARRAY(name, array) \
888 { .scnprintf = SCA_STRARRAY, \
889 .strtoul = STUL_STRARRAY, \
890 .parm = &strarray__##array, }
891
892#define STRARRAY_FLAGS(name, array) \
893 { .scnprintf = SCA_STRARRAY_FLAGS, \
894 .strtoul = STUL_STRARRAY_FLAGS, \
895 .parm = &strarray__##array, }
896
897#include "trace/beauty/arch_errno_names.c"
898#include "trace/beauty/eventfd.c"
899#include "trace/beauty/futex_op.c"
900#include "trace/beauty/futex_val3.c"
901#include "trace/beauty/mmap.c"
902#include "trace/beauty/mode_t.c"
903#include "trace/beauty/msg_flags.c"
904#include "trace/beauty/open_flags.c"
905#include "trace/beauty/perf_event_open.c"
906#include "trace/beauty/pid.c"
907#include "trace/beauty/sched_policy.c"
908#include "trace/beauty/seccomp.c"
909#include "trace/beauty/signum.c"
910#include "trace/beauty/socket_type.c"
911#include "trace/beauty/waitid_options.c"
912
913static struct syscall_fmt syscall_fmts[] = {
914 { .name = "access",
915 .arg = { [1] = { .scnprintf = SCA_ACCMODE, }, }, },
916 { .name = "arch_prctl",
917 .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, },
918 [1] = { .scnprintf = SCA_PTR, }, }, },
919 { .name = "bind",
920 .arg = { [0] = { .scnprintf = SCA_INT, },
921 [1] = { .scnprintf = SCA_SOCKADDR, },
922 [2] = { .scnprintf = SCA_INT, }, }, },
923 { .name = "bpf",
924 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
925 { .name = "brk", .hexret = true,
926 .arg = { [0] = { .scnprintf = SCA_PTR, }, }, },
927 { .name = "clock_gettime",
928 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
929 { .name = "clone", .errpid = true, .nr_args = 5,
930 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
931 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
932 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
933 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
934 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
935 { .name = "close",
936 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, }, }, },
937 { .name = "connect",
938 .arg = { [0] = { .scnprintf = SCA_INT, },
939 [1] = { .scnprintf = SCA_SOCKADDR, },
940 [2] = { .scnprintf = SCA_INT, }, }, },
941 { .name = "epoll_ctl",
942 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
943 { .name = "eventfd2",
944 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, }, }, },
945 { .name = "fchmodat",
946 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
947 { .name = "fchownat",
948 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
949 { .name = "fcntl",
950 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD,
951 .strtoul = STUL_STRARRAYS,
952 .parm = &strarrays__fcntl_cmds_arrays,
953 .show_zero = true, },
954 [2] = { .scnprintf = SCA_FCNTL_ARG, }, }, },
955 { .name = "flock",
956 .arg = { [1] = { .scnprintf = SCA_FLOCK, }, }, },
957 { .name = "fsconfig",
958 .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
959 { .name = "fsmount",
960 .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
961 [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, }, }, },
962 { .name = "fspick",
963 .arg = { [0] = { .scnprintf = SCA_FDAT, },
964 [1] = { .scnprintf = SCA_FILENAME, },
965 [2] = { .scnprintf = SCA_FSPICK_FLAGS, }, }, },
966 { .name = "fstat", .alias = "newfstat", },
967 { .name = "fstatat", .alias = "newfstatat", },
968 { .name = "futex",
969 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, },
970 [5] = { .scnprintf = SCA_FUTEX_VAL3, }, }, },
971 { .name = "futimesat",
972 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
973 { .name = "getitimer",
974 .arg = { [0] = STRARRAY(which, itimers), }, },
975 { .name = "getpid", .errpid = true, },
976 { .name = "getpgid", .errpid = true, },
977 { .name = "getppid", .errpid = true, },
978 { .name = "getrandom",
979 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, }, }, },
980 { .name = "getrlimit",
981 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
982 { .name = "getsockopt",
983 .arg = { [1] = STRARRAY(level, socket_level), }, },
984 { .name = "gettid", .errpid = true, },
985 { .name = "ioctl",
986 .arg = {
987#if defined(__i386__) || defined(__x86_64__)
988
989
990
991 [1] = { .scnprintf = SCA_IOCTL_CMD, },
992 [2] = { .scnprintf = SCA_HEX, }, }, },
993#else
994 [2] = { .scnprintf = SCA_HEX, }, }, },
995#endif
996 { .name = "kcmp", .nr_args = 5,
997 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
998 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
999 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
1000 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
1001 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
1002 { .name = "keyctl",
1003 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
1004 { .name = "kill",
1005 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
1006 { .name = "linkat",
1007 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1008 { .name = "lseek",
1009 .arg = { [2] = STRARRAY(whence, whences), }, },
1010 { .name = "lstat", .alias = "newlstat", },
1011 { .name = "madvise",
1012 .arg = { [0] = { .scnprintf = SCA_HEX, },
1013 [2] = { .scnprintf = SCA_MADV_BHV, }, }, },
1014 { .name = "mkdirat",
1015 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1016 { .name = "mknodat",
1017 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1018 { .name = "mmap", .hexret = true,
1019
1020#if defined(__s390x__)
1021 .alias = "old_mmap",
1022#endif
1023 .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, },
1024 [3] = { .scnprintf = SCA_MMAP_FLAGS,
1025 .strtoul = STUL_STRARRAY_FLAGS,
1026 .parm = &strarray__mmap_flags, },
1027 [5] = { .scnprintf = SCA_HEX, }, }, },
1028 { .name = "mount",
1029 .arg = { [0] = { .scnprintf = SCA_FILENAME, },
1030 [3] = { .scnprintf = SCA_MOUNT_FLAGS,
1031 .mask_val = SCAMV_MOUNT_FLAGS, }, }, },
1032 { .name = "move_mount",
1033 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1034 [1] = { .scnprintf = SCA_FILENAME, },
1035 [2] = { .scnprintf = SCA_FDAT, },
1036 [3] = { .scnprintf = SCA_FILENAME, },
1037 [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, }, }, },
1038 { .name = "mprotect",
1039 .arg = { [0] = { .scnprintf = SCA_HEX, },
1040 [2] = { .scnprintf = SCA_MMAP_PROT, }, }, },
1041 { .name = "mq_unlink",
1042 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1043 { .name = "mremap", .hexret = true,
1044 .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, }, }, },
1045 { .name = "name_to_handle_at",
1046 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1047 { .name = "newfstatat",
1048 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1049 { .name = "open",
1050 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
1051 { .name = "open_by_handle_at",
1052 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1053 [2] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
1054 { .name = "openat",
1055 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1056 [2] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
1057 { .name = "perf_event_open",
1058 .arg = { [2] = { .scnprintf = SCA_INT, },
1059 [3] = { .scnprintf = SCA_FD, },
1060 [4] = { .scnprintf = SCA_PERF_FLAGS, }, }, },
1061 { .name = "pipe2",
1062 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, }, }, },
1063 { .name = "pkey_alloc",
1064 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, }, }, },
1065 { .name = "pkey_free",
1066 .arg = { [0] = { .scnprintf = SCA_INT, }, }, },
1067 { .name = "pkey_mprotect",
1068 .arg = { [0] = { .scnprintf = SCA_HEX, },
1069 [2] = { .scnprintf = SCA_MMAP_PROT, },
1070 [3] = { .scnprintf = SCA_INT, }, }, },
1071 { .name = "poll", .timeout = true, },
1072 { .name = "ppoll", .timeout = true, },
1073 { .name = "prctl",
1074 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION,
1075 .strtoul = STUL_STRARRAY,
1076 .parm = &strarray__prctl_options, },
1077 [1] = { .scnprintf = SCA_PRCTL_ARG2, },
1078 [2] = { .scnprintf = SCA_PRCTL_ARG3, }, }, },
1079 { .name = "pread", .alias = "pread64", },
1080 { .name = "preadv", .alias = "pread", },
1081 { .name = "prlimit64",
1082 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
1083 { .name = "pwrite", .alias = "pwrite64", },
1084 { .name = "readlinkat",
1085 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1086 { .name = "recvfrom",
1087 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1088 { .name = "recvmmsg",
1089 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1090 { .name = "recvmsg",
1091 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1092 { .name = "renameat",
1093 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1094 [2] = { .scnprintf = SCA_FDAT, }, }, },
1095 { .name = "renameat2",
1096 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1097 [2] = { .scnprintf = SCA_FDAT, },
1098 [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, }, }, },
1099 { .name = "rt_sigaction",
1100 .arg = { [0] = { .scnprintf = SCA_SIGNUM, }, }, },
1101 { .name = "rt_sigprocmask",
1102 .arg = { [0] = STRARRAY(how, sighow), }, },
1103 { .name = "rt_sigqueueinfo",
1104 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
1105 { .name = "rt_tgsigqueueinfo",
1106 .arg = { [2] = { .scnprintf = SCA_SIGNUM, }, }, },
1107 { .name = "sched_setscheduler",
1108 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, }, }, },
1109 { .name = "seccomp",
1110 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, },
1111 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, }, }, },
1112 { .name = "select", .timeout = true, },
1113 { .name = "sendfile", .alias = "sendfile64", },
1114 { .name = "sendmmsg",
1115 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1116 { .name = "sendmsg",
1117 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1118 { .name = "sendto",
1119 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, },
1120 [4] = { .scnprintf = SCA_SOCKADDR, }, }, },
1121 { .name = "set_tid_address", .errpid = true, },
1122 { .name = "setitimer",
1123 .arg = { [0] = STRARRAY(which, itimers), }, },
1124 { .name = "setrlimit",
1125 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1126 { .name = "setsockopt",
1127 .arg = { [1] = STRARRAY(level, socket_level), }, },
1128 { .name = "socket",
1129 .arg = { [0] = STRARRAY(family, socket_families),
1130 [1] = { .scnprintf = SCA_SK_TYPE, },
1131 [2] = { .scnprintf = SCA_SK_PROTO, }, }, },
1132 { .name = "socketpair",
1133 .arg = { [0] = STRARRAY(family, socket_families),
1134 [1] = { .scnprintf = SCA_SK_TYPE, },
1135 [2] = { .scnprintf = SCA_SK_PROTO, }, }, },
1136 { .name = "stat", .alias = "newstat", },
1137 { .name = "statx",
1138 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1139 [2] = { .scnprintf = SCA_STATX_FLAGS, } ,
1140 [3] = { .scnprintf = SCA_STATX_MASK, }, }, },
1141 { .name = "swapoff",
1142 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1143 { .name = "swapon",
1144 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1145 { .name = "symlinkat",
1146 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1147 { .name = "sync_file_range",
1148 .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, }, }, },
1149 { .name = "tgkill",
1150 .arg = { [2] = { .scnprintf = SCA_SIGNUM, }, }, },
1151 { .name = "tkill",
1152 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
1153 { .name = "umount2", .alias = "umount",
1154 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1155 { .name = "uname", .alias = "newuname", },
1156 { .name = "unlinkat",
1157 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1158 { .name = "utimensat",
1159 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1160 { .name = "wait4", .errpid = true,
1161 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, }, }, },
1162 { .name = "waitid", .errpid = true,
1163 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, }, }, },
1164};
1165
1166static int syscall_fmt__cmp(const void *name, const void *fmtp)
1167{
1168 const struct syscall_fmt *fmt = fmtp;
1169 return strcmp(name, fmt->name);
1170}
1171
1172static struct syscall_fmt *__syscall_fmt__find(struct syscall_fmt *fmts, const int nmemb, const char *name)
1173{
1174 return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1175}
1176
1177static struct syscall_fmt *syscall_fmt__find(const char *name)
1178{
1179 const int nmemb = ARRAY_SIZE(syscall_fmts);
1180 return __syscall_fmt__find(syscall_fmts, nmemb, name);
1181}
1182
1183static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts, const int nmemb, const char *alias)
1184{
1185 int i;
1186
1187 for (i = 0; i < nmemb; ++i) {
1188 if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0)
1189 return &fmts[i];
1190 }
1191
1192 return NULL;
1193}
1194
1195static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
1196{
1197 const int nmemb = ARRAY_SIZE(syscall_fmts);
1198 return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias);
1199}
1200
1201
1202
1203
1204
1205
1206
1207struct syscall {
1208 struct tep_event *tp_format;
1209 int nr_args;
1210 int args_size;
1211 struct {
1212 struct bpf_program *sys_enter,
1213 *sys_exit;
1214 } bpf_prog;
1215 bool is_exit;
1216 bool is_open;
1217 bool nonexistent;
1218 struct tep_format_field *args;
1219 const char *name;
1220 struct syscall_fmt *fmt;
1221 struct syscall_arg_fmt *arg_fmt;
1222};
1223
1224
1225
1226
1227
1228
1229struct bpf_map_syscall_entry {
1230 bool enabled;
1231 u16 string_args_len[6];
1232};
1233
1234
1235
1236
1237
1238
1239
1240
1241static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
1242{
1243 double duration = (double)t / NSEC_PER_MSEC;
1244 size_t printed = fprintf(fp, "(");
1245
1246 if (!calculated)
1247 printed += fprintf(fp, " ");
1248 else if (duration >= 1.0)
1249 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1250 else if (duration >= 0.01)
1251 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1252 else
1253 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1254 return printed + fprintf(fp, "): ");
1255}
1256
1257
1258
1259
1260
1261
1262
1263
1264struct thread_trace {
1265 u64 entry_time;
1266 bool entry_pending;
1267 unsigned long nr_events;
1268 unsigned long pfmaj, pfmin;
1269 char *entry_str;
1270 double runtime_ms;
1271 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1272 struct {
1273 unsigned long ptr;
1274 short int entry_str_pos;
1275 bool pending_open;
1276 unsigned int namelen;
1277 char *name;
1278 } filename;
1279 struct {
1280 int max;
1281 struct file *table;
1282 } files;
1283
1284 struct intlist *syscall_stats;
1285};
1286
1287static struct thread_trace *thread_trace__new(void)
1288{
1289 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1290
1291 if (ttrace) {
1292 ttrace->files.max = -1;
1293 ttrace->syscall_stats = intlist__new(NULL);
1294 }
1295
1296 return ttrace;
1297}
1298
1299static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1300{
1301 struct thread_trace *ttrace;
1302
1303 if (thread == NULL)
1304 goto fail;
1305
1306 if (thread__priv(thread) == NULL)
1307 thread__set_priv(thread, thread_trace__new());
1308
1309 if (thread__priv(thread) == NULL)
1310 goto fail;
1311
1312 ttrace = thread__priv(thread);
1313 ++ttrace->nr_events;
1314
1315 return ttrace;
1316fail:
1317 color_fprintf(fp, PERF_COLOR_RED,
1318 "WARNING: not enough memory, dropping samples!\n");
1319 return NULL;
1320}
1321
1322
1323void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
1324 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
1325{
1326 struct thread_trace *ttrace = thread__priv(arg->thread);
1327
1328 ttrace->ret_scnprintf = ret_scnprintf;
1329}
1330
1331#define TRACE_PFMAJ (1 << 0)
1332#define TRACE_PFMIN (1 << 1)
1333
1334static const size_t trace__entry_str_size = 2048;
1335
1336static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
1337{
1338 if (fd < 0)
1339 return NULL;
1340
1341 if (fd > ttrace->files.max) {
1342 struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
1343
1344 if (nfiles == NULL)
1345 return NULL;
1346
1347 if (ttrace->files.max != -1) {
1348 memset(nfiles + ttrace->files.max + 1, 0,
1349 (fd - ttrace->files.max) * sizeof(struct file));
1350 } else {
1351 memset(nfiles, 0, (fd + 1) * sizeof(struct file));
1352 }
1353
1354 ttrace->files.table = nfiles;
1355 ttrace->files.max = fd;
1356 }
1357
1358 return ttrace->files.table + fd;
1359}
1360
1361struct file *thread__files_entry(struct thread *thread, int fd)
1362{
1363 return thread_trace__files_entry(thread__priv(thread), fd);
1364}
1365
1366static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1367{
1368 struct thread_trace *ttrace = thread__priv(thread);
1369 struct file *file = thread_trace__files_entry(ttrace, fd);
1370
1371 if (file != NULL) {
1372 struct stat st;
1373 if (stat(pathname, &st) == 0)
1374 file->dev_maj = major(st.st_rdev);
1375 file->pathname = strdup(pathname);
1376 if (file->pathname)
1377 return 0;
1378 }
1379
1380 return -1;
1381}
1382
1383static int thread__read_fd_path(struct thread *thread, int fd)
1384{
1385 char linkname[PATH_MAX], pathname[PATH_MAX];
1386 struct stat st;
1387 int ret;
1388
1389 if (thread->pid_ == thread->tid) {
1390 scnprintf(linkname, sizeof(linkname),
1391 "/proc/%d/fd/%d", thread->pid_, fd);
1392 } else {
1393 scnprintf(linkname, sizeof(linkname),
1394 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1395 }
1396
1397 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1398 return -1;
1399
1400 ret = readlink(linkname, pathname, sizeof(pathname));
1401
1402 if (ret < 0 || ret > st.st_size)
1403 return -1;
1404
1405 pathname[ret] = '\0';
1406 return trace__set_fd_pathname(thread, fd, pathname);
1407}
1408
1409static const char *thread__fd_path(struct thread *thread, int fd,
1410 struct trace *trace)
1411{
1412 struct thread_trace *ttrace = thread__priv(thread);
1413
1414 if (ttrace == NULL || trace->fd_path_disabled)
1415 return NULL;
1416
1417 if (fd < 0)
1418 return NULL;
1419
1420 if ((fd > ttrace->files.max || ttrace->files.table[fd].pathname == NULL)) {
1421 if (!trace->live)
1422 return NULL;
1423 ++trace->stats.proc_getname;
1424 if (thread__read_fd_path(thread, fd))
1425 return NULL;
1426 }
1427
1428 return ttrace->files.table[fd].pathname;
1429}
1430
1431size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
1432{
1433 int fd = arg->val;
1434 size_t printed = scnprintf(bf, size, "%d", fd);
1435 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1436
1437 if (path)
1438 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1439
1440 return printed;
1441}
1442
1443size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1444{
1445 size_t printed = scnprintf(bf, size, "%d", fd);
1446 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1447
1448 if (thread) {
1449 const char *path = thread__fd_path(thread, fd, trace);
1450
1451 if (path)
1452 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1453
1454 thread__put(thread);
1455 }
1456
1457 return printed;
1458}
1459
1460static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1461 struct syscall_arg *arg)
1462{
1463 int fd = arg->val;
1464 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1465 struct thread_trace *ttrace = thread__priv(arg->thread);
1466
1467 if (ttrace && fd >= 0 && fd <= ttrace->files.max)
1468 zfree(&ttrace->files.table[fd].pathname);
1469
1470 return printed;
1471}
1472
1473static void thread__set_filename_pos(struct thread *thread, const char *bf,
1474 unsigned long ptr)
1475{
1476 struct thread_trace *ttrace = thread__priv(thread);
1477
1478 ttrace->filename.ptr = ptr;
1479 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1480}
1481
1482static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
1483{
1484 struct augmented_arg *augmented_arg = arg->augmented.args;
1485 size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
1486
1487
1488
1489
1490 int consumed = sizeof(*augmented_arg) + augmented_arg->size;
1491
1492 arg->augmented.args = ((void *)arg->augmented.args) + consumed;
1493 arg->augmented.size -= consumed;
1494
1495 return printed;
1496}
1497
1498static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1499 struct syscall_arg *arg)
1500{
1501 unsigned long ptr = arg->val;
1502
1503 if (arg->augmented.args)
1504 return syscall_arg__scnprintf_augmented_string(arg, bf, size);
1505
1506 if (!arg->trace->vfs_getname)
1507 return scnprintf(bf, size, "%#x", ptr);
1508
1509 thread__set_filename_pos(arg->thread, bf, ptr);
1510 return 0;
1511}
1512
1513static bool trace__filter_duration(struct trace *trace, double t)
1514{
1515 return t < (trace->duration_filter * NSEC_PER_MSEC);
1516}
1517
1518static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1519{
1520 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1521
1522 return fprintf(fp, "%10.3f ", ts);
1523}
1524
1525
1526
1527
1528
1529
1530
1531static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1532{
1533 if (tstamp > 0)
1534 return __trace__fprintf_tstamp(trace, tstamp, fp);
1535
1536 return fprintf(fp, " ? ");
1537}
1538
1539static pid_t workload_pid = -1;
1540static bool done = false;
1541static bool interrupted = false;
1542
1543static void sighandler_interrupt(int sig __maybe_unused)
1544{
1545 done = interrupted = true;
1546}
1547
1548static void sighandler_chld(int sig __maybe_unused, siginfo_t *info,
1549 void *context __maybe_unused)
1550{
1551 if (info->si_pid == workload_pid)
1552 done = true;
1553}
1554
1555static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
1556{
1557 size_t printed = 0;
1558
1559 if (trace->multiple_threads) {
1560 if (trace->show_comm)
1561 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1562 printed += fprintf(fp, "%d ", thread->tid);
1563 }
1564
1565 return printed;
1566}
1567
1568static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1569 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1570{
1571 size_t printed = 0;
1572
1573 if (trace->show_tstamp)
1574 printed = trace__fprintf_tstamp(trace, tstamp, fp);
1575 if (trace->show_duration)
1576 printed += fprintf_duration(duration, duration_calculated, fp);
1577 return printed + trace__fprintf_comm_tid(trace, thread, fp);
1578}
1579
1580static int trace__process_event(struct trace *trace, struct machine *machine,
1581 union perf_event *event, struct perf_sample *sample)
1582{
1583 int ret = 0;
1584
1585 switch (event->header.type) {
1586 case PERF_RECORD_LOST:
1587 color_fprintf(trace->output, PERF_COLOR_RED,
1588 "LOST %" PRIu64 " events!\n", event->lost.lost);
1589 ret = machine__process_lost_event(machine, event, sample);
1590 break;
1591 default:
1592 ret = machine__process_event(machine, event, sample);
1593 break;
1594 }
1595
1596 return ret;
1597}
1598
1599static int trace__tool_process(struct perf_tool *tool,
1600 union perf_event *event,
1601 struct perf_sample *sample,
1602 struct machine *machine)
1603{
1604 struct trace *trace = container_of(tool, struct trace, tool);
1605 return trace__process_event(trace, machine, event, sample);
1606}
1607
1608static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1609{
1610 struct machine *machine = vmachine;
1611
1612 if (machine->kptr_restrict_warned)
1613 return NULL;
1614
1615 if (symbol_conf.kptr_restrict) {
1616 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1617 "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1618 "Kernel samples will not be resolved.\n");
1619 machine->kptr_restrict_warned = true;
1620 return NULL;
1621 }
1622
1623 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1624}
1625
1626static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
1627{
1628 int err = symbol__init(NULL);
1629
1630 if (err)
1631 return err;
1632
1633 trace->host = machine__new_host();
1634 if (trace->host == NULL)
1635 return -ENOMEM;
1636
1637 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1638 if (err < 0)
1639 goto out;
1640
1641 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1642 evlist->core.threads, trace__tool_process,
1643 true, false, 1);
1644out:
1645 if (err)
1646 symbol__exit();
1647
1648 return err;
1649}
1650
1651static void trace__symbols__exit(struct trace *trace)
1652{
1653 machine__exit(trace->host);
1654 trace->host = NULL;
1655
1656 symbol__exit();
1657}
1658
1659static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
1660{
1661 int idx;
1662
1663 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1664 nr_args = sc->fmt->nr_args;
1665
1666 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
1667 if (sc->arg_fmt == NULL)
1668 return -1;
1669
1670 for (idx = 0; idx < nr_args; ++idx) {
1671 if (sc->fmt)
1672 sc->arg_fmt[idx] = sc->fmt->arg[idx];
1673 }
1674
1675 sc->nr_args = nr_args;
1676 return 0;
1677}
1678
1679static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = {
1680 { .name = "msr", .scnprintf = SCA_X86_MSR, .strtoul = STUL_X86_MSR, },
1681 { .name = "vector", .scnprintf = SCA_X86_IRQ_VECTORS, .strtoul = STUL_X86_IRQ_VECTORS, },
1682};
1683
1684static int syscall_arg_fmt__cmp(const void *name, const void *fmtp)
1685{
1686 const struct syscall_arg_fmt *fmt = fmtp;
1687 return strcmp(name, fmt->name);
1688}
1689
1690static struct syscall_arg_fmt *
1691__syscall_arg_fmt__find_by_name(struct syscall_arg_fmt *fmts, const int nmemb, const char *name)
1692{
1693 return bsearch(name, fmts, nmemb, sizeof(struct syscall_arg_fmt), syscall_arg_fmt__cmp);
1694}
1695
1696static struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name)
1697{
1698 const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name);
1699 return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
1700}
1701
1702static struct tep_format_field *
1703syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field)
1704{
1705 struct tep_format_field *last_field = NULL;
1706 int len;
1707
1708 for (; field; field = field->next, ++arg) {
1709 last_field = field;
1710
1711 if (arg->scnprintf)
1712 continue;
1713
1714 len = strlen(field->name);
1715
1716 if (strcmp(field->type, "const char *") == 0 &&
1717 ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
1718 strstr(field->name, "path") != NULL))
1719 arg->scnprintf = SCA_FILENAME;
1720 else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
1721 arg->scnprintf = SCA_PTR;
1722 else if (strcmp(field->type, "pid_t") == 0)
1723 arg->scnprintf = SCA_PID;
1724 else if (strcmp(field->type, "umode_t") == 0)
1725 arg->scnprintf = SCA_MODE_T;
1726 else if ((field->flags & TEP_FIELD_IS_ARRAY) && strstr(field->type, "char")) {
1727 arg->scnprintf = SCA_CHAR_ARRAY;
1728 arg->nr_entries = field->arraylen;
1729 } else if ((strcmp(field->type, "int") == 0 ||
1730 strcmp(field->type, "unsigned int") == 0 ||
1731 strcmp(field->type, "long") == 0) &&
1732 len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
1733
1734
1735
1736
1737
1738
1739
1740 arg->scnprintf = SCA_FD;
1741 } else {
1742 struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name);
1743
1744 if (fmt) {
1745 arg->scnprintf = fmt->scnprintf;
1746 arg->strtoul = fmt->strtoul;
1747 }
1748 }
1749 }
1750
1751 return last_field;
1752}
1753
1754static int syscall__set_arg_fmts(struct syscall *sc)
1755{
1756 struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args);
1757
1758 if (last_field)
1759 sc->args_size = last_field->offset + last_field->size;
1760
1761 return 0;
1762}
1763
1764static int trace__read_syscall_info(struct trace *trace, int id)
1765{
1766 char tp_name[128];
1767 struct syscall *sc;
1768 const char *name = syscalltbl__name(trace->sctbl, id);
1769
1770#ifdef HAVE_SYSCALL_TABLE_SUPPORT
1771 if (trace->syscalls.table == NULL) {
1772 trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
1773 if (trace->syscalls.table == NULL)
1774 return -ENOMEM;
1775 }
1776#else
1777 if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) {
1778
1779 struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1780
1781 if (table == NULL)
1782 return -ENOMEM;
1783
1784
1785 if (trace->syscalls.table == NULL)
1786 memset(table, 0, (id + 1) * sizeof(*sc));
1787 else
1788 memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
1789
1790 trace->syscalls.table = table;
1791 trace->sctbl->syscalls.max_id = id;
1792 }
1793#endif
1794 sc = trace->syscalls.table + id;
1795 if (sc->nonexistent)
1796 return 0;
1797
1798 if (name == NULL) {
1799 sc->nonexistent = true;
1800 return 0;
1801 }
1802
1803 sc->name = name;
1804 sc->fmt = syscall_fmt__find(sc->name);
1805
1806 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1807 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1808
1809 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1810 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1811 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1812 }
1813
1814 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1815 return -ENOMEM;
1816
1817 if (IS_ERR(sc->tp_format))
1818 return PTR_ERR(sc->tp_format);
1819
1820 sc->args = sc->tp_format->format.fields;
1821
1822
1823
1824
1825
1826 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1827 sc->args = sc->args->next;
1828 --sc->nr_args;
1829 }
1830
1831 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1832 sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
1833
1834 return syscall__set_arg_fmts(sc);
1835}
1836
1837static int evsel__init_tp_arg_scnprintf(struct evsel *evsel)
1838{
1839 struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
1840
1841 if (fmt != NULL) {
1842 syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields);
1843 return 0;
1844 }
1845
1846 return -ENOMEM;
1847}
1848
1849static int intcmp(const void *a, const void *b)
1850{
1851 const int *one = a, *another = b;
1852
1853 return *one - *another;
1854}
1855
1856static int trace__validate_ev_qualifier(struct trace *trace)
1857{
1858 int err = 0;
1859 bool printed_invalid_prefix = false;
1860 struct str_node *pos;
1861 size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier);
1862
1863 trace->ev_qualifier_ids.entries = malloc(nr_allocated *
1864 sizeof(trace->ev_qualifier_ids.entries[0]));
1865
1866 if (trace->ev_qualifier_ids.entries == NULL) {
1867 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1868 trace->output);
1869 err = -EINVAL;
1870 goto out;
1871 }
1872
1873 strlist__for_each_entry(pos, trace->ev_qualifier) {
1874 const char *sc = pos->s;
1875 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
1876
1877 if (id < 0) {
1878 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1879 if (id >= 0)
1880 goto matches;
1881
1882 if (!printed_invalid_prefix) {
1883 pr_debug("Skipping unknown syscalls: ");
1884 printed_invalid_prefix = true;
1885 } else {
1886 pr_debug(", ");
1887 }
1888
1889 pr_debug("%s", sc);
1890 continue;
1891 }
1892matches:
1893 trace->ev_qualifier_ids.entries[nr_used++] = id;
1894 if (match_next == -1)
1895 continue;
1896
1897 while (1) {
1898 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1899 if (id < 0)
1900 break;
1901 if (nr_allocated == nr_used) {
1902 void *entries;
1903
1904 nr_allocated += 8;
1905 entries = realloc(trace->ev_qualifier_ids.entries,
1906 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1907 if (entries == NULL) {
1908 err = -ENOMEM;
1909 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1910 goto out_free;
1911 }
1912 trace->ev_qualifier_ids.entries = entries;
1913 }
1914 trace->ev_qualifier_ids.entries[nr_used++] = id;
1915 }
1916 }
1917
1918 trace->ev_qualifier_ids.nr = nr_used;
1919 qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp);
1920out:
1921 if (printed_invalid_prefix)
1922 pr_debug("\n");
1923 return err;
1924out_free:
1925 zfree(&trace->ev_qualifier_ids.entries);
1926 trace->ev_qualifier_ids.nr = 0;
1927 goto out;
1928}
1929
1930static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id)
1931{
1932 bool in_ev_qualifier;
1933
1934 if (trace->ev_qualifier_ids.nr == 0)
1935 return true;
1936
1937 in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries,
1938 trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL;
1939
1940 if (in_ev_qualifier)
1941 return !trace->not_ev_qualifier;
1942
1943 return trace->not_ev_qualifier;
1944}
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1955{
1956 unsigned long val;
1957 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
1958
1959 memcpy(&val, p, sizeof(val));
1960 return val;
1961}
1962
1963static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1964 struct syscall_arg *arg)
1965{
1966 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1967 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1968
1969 return scnprintf(bf, size, "arg%d: ", arg->idx);
1970}
1971
1972
1973
1974
1975
1976
1977static unsigned long syscall_arg_fmt__mask_val(struct syscall_arg_fmt *fmt, struct syscall_arg *arg, unsigned long val)
1978{
1979 if (fmt && fmt->mask_val)
1980 return fmt->mask_val(arg, val);
1981
1982 return val;
1983}
1984
1985static size_t syscall_arg_fmt__scnprintf_val(struct syscall_arg_fmt *fmt, char *bf, size_t size,
1986 struct syscall_arg *arg, unsigned long val)
1987{
1988 if (fmt && fmt->scnprintf) {
1989 arg->val = val;
1990 if (fmt->parm)
1991 arg->parm = fmt->parm;
1992 return fmt->scnprintf(bf, size, arg);
1993 }
1994 return scnprintf(bf, size, "%ld", val);
1995}
1996
1997static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1998 unsigned char *args, void *augmented_args, int augmented_args_size,
1999 struct trace *trace, struct thread *thread)
2000{
2001 size_t printed = 0;
2002 unsigned long val;
2003 u8 bit = 1;
2004 struct syscall_arg arg = {
2005 .args = args,
2006 .augmented = {
2007 .size = augmented_args_size,
2008 .args = augmented_args,
2009 },
2010 .idx = 0,
2011 .mask = 0,
2012 .trace = trace,
2013 .thread = thread,
2014 .show_string_prefix = trace->show_string_prefix,
2015 };
2016 struct thread_trace *ttrace = thread__priv(thread);
2017
2018
2019
2020
2021
2022
2023 ttrace->ret_scnprintf = NULL;
2024
2025 if (sc->args != NULL) {
2026 struct tep_format_field *field;
2027
2028 for (field = sc->args; field;
2029 field = field->next, ++arg.idx, bit <<= 1) {
2030 if (arg.mask & bit)
2031 continue;
2032
2033 arg.fmt = &sc->arg_fmt[arg.idx];
2034 val = syscall_arg__val(&arg, arg.idx);
2035
2036
2037
2038
2039 val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val);
2040
2041
2042
2043
2044
2045
2046 if (val == 0 &&
2047 !trace->show_zeros &&
2048 !(sc->arg_fmt &&
2049 (sc->arg_fmt[arg.idx].show_zero ||
2050 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
2051 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
2052 sc->arg_fmt[arg.idx].parm))
2053 continue;
2054
2055 printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2056
2057 if (trace->show_arg_names)
2058 printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2059
2060 printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx],
2061 bf + printed, size - printed, &arg, val);
2062 }
2063 } else if (IS_ERR(sc->tp_format)) {
2064
2065
2066
2067
2068
2069 while (arg.idx < sc->nr_args) {
2070 if (arg.mask & bit)
2071 goto next_arg;
2072 val = syscall_arg__val(&arg, arg.idx);
2073 if (printed)
2074 printed += scnprintf(bf + printed, size - printed, ", ");
2075 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
2076 printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], bf + printed, size - printed, &arg, val);
2077next_arg:
2078 ++arg.idx;
2079 bit <<= 1;
2080 }
2081 }
2082
2083 return printed;
2084}
2085
2086typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel,
2087 union perf_event *event,
2088 struct perf_sample *sample);
2089
2090static struct syscall *trace__syscall_info(struct trace *trace,
2091 struct evsel *evsel, int id)
2092{
2093 int err = 0;
2094
2095 if (id < 0) {
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107 if (verbose > 1) {
2108 static u64 n;
2109 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
2110 id, evsel__name(evsel), ++n);
2111 }
2112 return NULL;
2113 }
2114
2115 err = -EINVAL;
2116
2117#ifdef HAVE_SYSCALL_TABLE_SUPPORT
2118 if (id > trace->sctbl->syscalls.max_id) {
2119#else
2120 if (id >= trace->sctbl->syscalls.max_id) {
2121
2122
2123
2124
2125
2126 err = trace__read_syscall_info(trace, id);
2127 if (err)
2128#endif
2129 goto out_cant_read;
2130 }
2131
2132 if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
2133 (err = trace__read_syscall_info(trace, id)) != 0)
2134 goto out_cant_read;
2135
2136 if (trace->syscalls.table[id].name == NULL) {
2137 if (trace->syscalls.table[id].nonexistent)
2138 return NULL;
2139 goto out_cant_read;
2140 }
2141
2142 return &trace->syscalls.table[id];
2143
2144out_cant_read:
2145 if (verbose > 0) {
2146 char sbuf[STRERR_BUFSIZE];
2147 fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf)));
2148 if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL)
2149 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
2150 fputs(" information\n", trace->output);
2151 }
2152 return NULL;
2153}
2154
2155struct syscall_stats {
2156 struct stats stats;
2157 u64 nr_failures;
2158 int max_errno;
2159 u32 *errnos;
2160};
2161
2162static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
2163 int id, struct perf_sample *sample, long err, bool errno_summary)
2164{
2165 struct int_node *inode;
2166 struct syscall_stats *stats;
2167 u64 duration = 0;
2168
2169 inode = intlist__findnew(ttrace->syscall_stats, id);
2170 if (inode == NULL)
2171 return;
2172
2173 stats = inode->priv;
2174 if (stats == NULL) {
2175 stats = malloc(sizeof(*stats));
2176 if (stats == NULL)
2177 return;
2178
2179 stats->nr_failures = 0;
2180 stats->max_errno = 0;
2181 stats->errnos = NULL;
2182 init_stats(&stats->stats);
2183 inode->priv = stats;
2184 }
2185
2186 if (ttrace->entry_time && sample->time > ttrace->entry_time)
2187 duration = sample->time - ttrace->entry_time;
2188
2189 update_stats(&stats->stats, duration);
2190
2191 if (err < 0) {
2192 ++stats->nr_failures;
2193
2194 if (!errno_summary)
2195 return;
2196
2197 err = -err;
2198 if (err > stats->max_errno) {
2199 u32 *new_errnos = realloc(stats->errnos, err * sizeof(u32));
2200
2201 if (new_errnos) {
2202 memset(new_errnos + stats->max_errno, 0, (err - stats->max_errno) * sizeof(u32));
2203 } else {
2204 pr_debug("Not enough memory for errno stats for thread \"%s\"(%d/%d), results will be incomplete\n",
2205 thread__comm_str(thread), thread->pid_, thread->tid);
2206 return;
2207 }
2208
2209 stats->errnos = new_errnos;
2210 stats->max_errno = err;
2211 }
2212
2213 ++stats->errnos[err - 1];
2214 }
2215}
2216
2217static int trace__printf_interrupted_entry(struct trace *trace)
2218{
2219 struct thread_trace *ttrace;
2220 size_t printed;
2221 int len;
2222
2223 if (trace->failure_only || trace->current == NULL)
2224 return 0;
2225
2226 ttrace = thread__priv(trace->current);
2227
2228 if (!ttrace->entry_pending)
2229 return 0;
2230
2231 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
2232 printed += len = fprintf(trace->output, "%s)", ttrace->entry_str);
2233
2234 if (len < trace->args_alignment - 4)
2235 printed += fprintf(trace->output, "%-*s", trace->args_alignment - 4 - len, " ");
2236
2237 printed += fprintf(trace->output, " ...\n");
2238
2239 ttrace->entry_pending = false;
2240 ++trace->nr_events_printed;
2241
2242 return printed;
2243}
2244
2245static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
2246 struct perf_sample *sample, struct thread *thread)
2247{
2248 int printed = 0;
2249
2250 if (trace->print_sample) {
2251 double ts = (double)sample->time / NSEC_PER_MSEC;
2252
2253 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
2254 evsel__name(evsel), ts,
2255 thread__comm_str(thread),
2256 sample->pid, sample->tid, sample->cpu);
2257 }
2258
2259 return printed;
2260}
2261
2262static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
2263{
2264 void *augmented_args = NULL;
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279 int args_size = raw_augmented_args_size ?: sc->args_size;
2280
2281 *augmented_args_size = sample->raw_size - args_size;
2282 if (*augmented_args_size > 0)
2283 augmented_args = sample->raw_data + args_size;
2284
2285 return augmented_args;
2286}
2287
2288static void syscall__exit(struct syscall *sc)
2289{
2290 if (!sc)
2291 return;
2292
2293 free(sc->arg_fmt);
2294}
2295
2296static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
2297 union perf_event *event __maybe_unused,
2298 struct perf_sample *sample)
2299{
2300 char *msg;
2301 void *args;
2302 int printed = 0;
2303 struct thread *thread;
2304 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2305 int augmented_args_size = 0;
2306 void *augmented_args = NULL;
2307 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2308 struct thread_trace *ttrace;
2309
2310 if (sc == NULL)
2311 return -1;
2312
2313 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2314 ttrace = thread__trace(thread, trace->output);
2315 if (ttrace == NULL)
2316 goto out_put;
2317
2318 trace__fprintf_sample(trace, evsel, sample, thread);
2319
2320 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2321
2322 if (ttrace->entry_str == NULL) {
2323 ttrace->entry_str = malloc(trace__entry_str_size);
2324 if (!ttrace->entry_str)
2325 goto out_put;
2326 }
2327
2328 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
2329 trace__printf_interrupted_entry(trace);
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340 if (evsel != trace->syscalls.events.sys_enter)
2341 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2342 ttrace->entry_time = sample->time;
2343 msg = ttrace->entry_str;
2344 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
2345
2346 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
2347 args, augmented_args, augmented_args_size, trace, thread);
2348
2349 if (sc->is_exit) {
2350 if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
2351 int alignment = 0;
2352
2353 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
2354 printed = fprintf(trace->output, "%s)", ttrace->entry_str);
2355 if (trace->args_alignment > printed)
2356 alignment = trace->args_alignment - printed;
2357 fprintf(trace->output, "%*s= ?\n", alignment, " ");
2358 }
2359 } else {
2360 ttrace->entry_pending = true;
2361
2362 ttrace->filename.pending_open = false;
2363 }
2364
2365 if (trace->current != thread) {
2366 thread__put(trace->current);
2367 trace->current = thread__get(thread);
2368 }
2369 err = 0;
2370out_put:
2371 thread__put(thread);
2372 return err;
2373}
2374
2375static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
2376 struct perf_sample *sample)
2377{
2378 struct thread_trace *ttrace;
2379 struct thread *thread;
2380 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2381 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2382 char msg[1024];
2383 void *args, *augmented_args = NULL;
2384 int augmented_args_size;
2385
2386 if (sc == NULL)
2387 return -1;
2388
2389 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2390 ttrace = thread__trace(thread, trace->output);
2391
2392
2393
2394
2395 if (ttrace == NULL)
2396 goto out_put;
2397
2398 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2399 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2400 syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
2401 fprintf(trace->output, "%s", msg);
2402 err = 0;
2403out_put:
2404 thread__put(thread);
2405 return err;
2406}
2407
2408static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel,
2409 struct perf_sample *sample,
2410 struct callchain_cursor *cursor)
2411{
2412 struct addr_location al;
2413 int max_stack = evsel->core.attr.sample_max_stack ?
2414 evsel->core.attr.sample_max_stack :
2415 trace->max_stack;
2416 int err;
2417
2418 if (machine__resolve(trace->host, &al, sample) < 0)
2419 return -1;
2420
2421 err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
2422 addr_location__put(&al);
2423 return err;
2424}
2425
2426static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
2427{
2428
2429 const unsigned int print_opts = EVSEL__PRINT_SYM |
2430 EVSEL__PRINT_DSO |
2431 EVSEL__PRINT_UNKNOWN_AS_ADDR;
2432
2433 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output);
2434}
2435
2436static const char *errno_to_name(struct evsel *evsel, int err)
2437{
2438 struct perf_env *env = evsel__env(evsel);
2439 const char *arch_name = perf_env__arch(env);
2440
2441 return arch_syscalls__strerrno(arch_name, err);
2442}
2443
2444static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
2445 union perf_event *event __maybe_unused,
2446 struct perf_sample *sample)
2447{
2448 long ret;
2449 u64 duration = 0;
2450 bool duration_calculated = false;
2451 struct thread *thread;
2452 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
2453 int alignment = trace->args_alignment;
2454 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2455 struct thread_trace *ttrace;
2456
2457 if (sc == NULL)
2458 return -1;
2459
2460 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2461 ttrace = thread__trace(thread, trace->output);
2462 if (ttrace == NULL)
2463 goto out_put;
2464
2465 trace__fprintf_sample(trace, evsel, sample, thread);
2466
2467 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2468
2469 if (trace->summary)
2470 thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary);
2471
2472 if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
2473 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2474 ttrace->filename.pending_open = false;
2475 ++trace->stats.vfs_getname;
2476 }
2477
2478 if (ttrace->entry_time) {
2479 duration = sample->time - ttrace->entry_time;
2480 if (trace__filter_duration(trace, duration))
2481 goto out;
2482 duration_calculated = true;
2483 } else if (trace->duration_filter)
2484 goto out;
2485
2486 if (sample->callchain) {
2487 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2488 if (callchain_ret == 0) {
2489 if (callchain_cursor.nr < trace->min_stack)
2490 goto out;
2491 callchain_ret = 1;
2492 }
2493 }
2494
2495 if (trace->summary_only || (ret >= 0 && trace->failure_only))
2496 goto out;
2497
2498 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
2499
2500 if (ttrace->entry_pending) {
2501 printed = fprintf(trace->output, "%s", ttrace->entry_str);
2502 } else {
2503 printed += fprintf(trace->output, " ... [");
2504 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2505 printed += 9;
2506 printed += fprintf(trace->output, "]: %s()", sc->name);
2507 }
2508
2509 printed++;
2510
2511 if (alignment > printed)
2512 alignment -= printed;
2513 else
2514 alignment = 0;
2515
2516 fprintf(trace->output, ")%*s= ", alignment, " ");
2517
2518 if (sc->fmt == NULL) {
2519 if (ret < 0)
2520 goto errno_print;
2521signed_print:
2522 fprintf(trace->output, "%ld", ret);
2523 } else if (ret < 0) {
2524errno_print: {
2525 char bf[STRERR_BUFSIZE];
2526 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
2527 *e = errno_to_name(evsel, -ret);
2528
2529 fprintf(trace->output, "-1 %s (%s)", e, emsg);
2530 }
2531 } else if (ret == 0 && sc->fmt->timeout)
2532 fprintf(trace->output, "0 (Timeout)");
2533 else if (ttrace->ret_scnprintf) {
2534 char bf[1024];
2535 struct syscall_arg arg = {
2536 .val = ret,
2537 .thread = thread,
2538 .trace = trace,
2539 };
2540 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
2541 ttrace->ret_scnprintf = NULL;
2542 fprintf(trace->output, "%s", bf);
2543 } else if (sc->fmt->hexret)
2544 fprintf(trace->output, "%#lx", ret);
2545 else if (sc->fmt->errpid) {
2546 struct thread *child = machine__find_thread(trace->host, ret, ret);
2547
2548 if (child != NULL) {
2549 fprintf(trace->output, "%ld", ret);
2550 if (child->comm_set)
2551 fprintf(trace->output, " (%s)", thread__comm_str(child));
2552 thread__put(child);
2553 }
2554 } else
2555 goto signed_print;
2556
2557 fputc('\n', trace->output);
2558
2559
2560
2561
2562
2563 if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
2564 interrupted = true;
2565
2566 if (callchain_ret > 0)
2567 trace__fprintf_callchain(trace, sample);
2568 else if (callchain_ret < 0)
2569 pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2570out:
2571 ttrace->entry_pending = false;
2572 err = 0;
2573out_put:
2574 thread__put(thread);
2575 return err;
2576}
2577
2578static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
2579 union perf_event *event __maybe_unused,
2580 struct perf_sample *sample)
2581{
2582 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2583 struct thread_trace *ttrace;
2584 size_t filename_len, entry_str_len, to_move;
2585 ssize_t remaining_space;
2586 char *pos;
2587 const char *filename = evsel__rawptr(evsel, sample, "pathname");
2588
2589 if (!thread)
2590 goto out;
2591
2592 ttrace = thread__priv(thread);
2593 if (!ttrace)
2594 goto out_put;
2595
2596 filename_len = strlen(filename);
2597 if (filename_len == 0)
2598 goto out_put;
2599
2600 if (ttrace->filename.namelen < filename_len) {
2601 char *f = realloc(ttrace->filename.name, filename_len + 1);
2602
2603 if (f == NULL)
2604 goto out_put;
2605
2606 ttrace->filename.namelen = filename_len;
2607 ttrace->filename.name = f;
2608 }
2609
2610 strcpy(ttrace->filename.name, filename);
2611 ttrace->filename.pending_open = true;
2612
2613 if (!ttrace->filename.ptr)
2614 goto out_put;
2615
2616 entry_str_len = strlen(ttrace->entry_str);
2617 remaining_space = trace__entry_str_size - entry_str_len - 1;
2618 if (remaining_space <= 0)
2619 goto out_put;
2620
2621 if (filename_len > (size_t)remaining_space) {
2622 filename += filename_len - remaining_space;
2623 filename_len = remaining_space;
2624 }
2625
2626 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1;
2627 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2628 memmove(pos + filename_len, pos, to_move);
2629 memcpy(pos, filename, filename_len);
2630
2631 ttrace->filename.ptr = 0;
2632 ttrace->filename.entry_str_pos = 0;
2633out_put:
2634 thread__put(thread);
2635out:
2636 return 0;
2637}
2638
2639static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
2640 union perf_event *event __maybe_unused,
2641 struct perf_sample *sample)
2642{
2643 u64 runtime = evsel__intval(evsel, sample, "runtime");
2644 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2645 struct thread *thread = machine__findnew_thread(trace->host,
2646 sample->pid,
2647 sample->tid);
2648 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2649
2650 if (ttrace == NULL)
2651 goto out_dump;
2652
2653 ttrace->runtime_ms += runtime_ms;
2654 trace->runtime_ms += runtime_ms;
2655out_put:
2656 thread__put(thread);
2657 return 0;
2658
2659out_dump:
2660 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2661 evsel->name,
2662 evsel__strval(evsel, sample, "comm"),
2663 (pid_t)evsel__intval(evsel, sample, "pid"),
2664 runtime,
2665 evsel__intval(evsel, sample, "vruntime"));
2666 goto out_put;
2667}
2668
2669static int bpf_output__printer(enum binary_printer_ops op,
2670 unsigned int val, void *extra __maybe_unused, FILE *fp)
2671{
2672 unsigned char ch = (unsigned char)val;
2673
2674 switch (op) {
2675 case BINARY_PRINT_CHAR_DATA:
2676 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
2677 case BINARY_PRINT_DATA_BEGIN:
2678 case BINARY_PRINT_LINE_BEGIN:
2679 case BINARY_PRINT_ADDR:
2680 case BINARY_PRINT_NUM_DATA:
2681 case BINARY_PRINT_NUM_PAD:
2682 case BINARY_PRINT_SEP:
2683 case BINARY_PRINT_CHAR_PAD:
2684 case BINARY_PRINT_LINE_END:
2685 case BINARY_PRINT_DATA_END:
2686 default:
2687 break;
2688 }
2689
2690 return 0;
2691}
2692
2693static void bpf_output__fprintf(struct trace *trace,
2694 struct perf_sample *sample)
2695{
2696 binary__fprintf(sample->raw_data, sample->raw_size, 8,
2697 bpf_output__printer, NULL, trace->output);
2698 ++trace->nr_events_printed;
2699}
2700
2701static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, struct perf_sample *sample,
2702 struct thread *thread, void *augmented_args, int augmented_args_size)
2703{
2704 char bf[2048];
2705 size_t size = sizeof(bf);
2706 struct tep_format_field *field = evsel->tp_format->format.fields;
2707 struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel);
2708 size_t printed = 0;
2709 unsigned long val;
2710 u8 bit = 1;
2711 struct syscall_arg syscall_arg = {
2712 .augmented = {
2713 .size = augmented_args_size,
2714 .args = augmented_args,
2715 },
2716 .idx = 0,
2717 .mask = 0,
2718 .trace = trace,
2719 .thread = thread,
2720 .show_string_prefix = trace->show_string_prefix,
2721 };
2722
2723 for (; field && arg; field = field->next, ++syscall_arg.idx, bit <<= 1, ++arg) {
2724 if (syscall_arg.mask & bit)
2725 continue;
2726
2727 syscall_arg.len = 0;
2728 syscall_arg.fmt = arg;
2729 if (field->flags & TEP_FIELD_IS_ARRAY) {
2730 int offset = field->offset;
2731
2732 if (field->flags & TEP_FIELD_IS_DYNAMIC) {
2733 offset = format_field__intval(field, sample, evsel->needs_swap);
2734 syscall_arg.len = offset >> 16;
2735 offset &= 0xffff;
2736 if (field->flags & TEP_FIELD_IS_RELATIVE)
2737 offset += field->offset + field->size;
2738 }
2739
2740 val = (uintptr_t)(sample->raw_data + offset);
2741 } else
2742 val = format_field__intval(field, sample, evsel->needs_swap);
2743
2744
2745
2746
2747 val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val);
2748
2749
2750
2751
2752
2753
2754 if (val == 0 &&
2755 !trace->show_zeros &&
2756 !((arg->show_zero ||
2757 arg->scnprintf == SCA_STRARRAY ||
2758 arg->scnprintf == SCA_STRARRAYS) &&
2759 arg->parm))
2760 continue;
2761
2762 printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2763
2764
2765
2766
2767
2768 if (1 || trace->show_arg_names)
2769 printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2770
2771 printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val);
2772 }
2773
2774 return printed + fprintf(trace->output, "%s", bf);
2775}
2776
2777static int trace__event_handler(struct trace *trace, struct evsel *evsel,
2778 union perf_event *event __maybe_unused,
2779 struct perf_sample *sample)
2780{
2781 struct thread *thread;
2782 int callchain_ret = 0;
2783
2784
2785
2786
2787
2788
2789 if (evsel->disabled)
2790 return 0;
2791
2792 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2793
2794 if (sample->callchain) {
2795 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2796 if (callchain_ret == 0) {
2797 if (callchain_cursor.nr < trace->min_stack)
2798 goto out;
2799 callchain_ret = 1;
2800 }
2801 }
2802
2803 trace__printf_interrupted_entry(trace);
2804 trace__fprintf_tstamp(trace, sample->time, trace->output);
2805
2806 if (trace->trace_syscalls && trace->show_duration)
2807 fprintf(trace->output, "( ): ");
2808
2809 if (thread)
2810 trace__fprintf_comm_tid(trace, thread, trace->output);
2811
2812 if (evsel == trace->syscalls.events.augmented) {
2813 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
2814 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2815
2816 if (sc) {
2817 fprintf(trace->output, "%s(", sc->name);
2818 trace__fprintf_sys_enter(trace, evsel, sample);
2819 fputc(')', trace->output);
2820 goto newline;
2821 }
2822
2823
2824
2825
2826
2827
2828 }
2829
2830 fprintf(trace->output, "%s(", evsel->name);
2831
2832 if (evsel__is_bpf_output(evsel)) {
2833 bpf_output__fprintf(trace, sample);
2834 } else if (evsel->tp_format) {
2835 if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
2836 trace__fprintf_sys_enter(trace, evsel, sample)) {
2837 if (trace->libtraceevent_print) {
2838 event_format__fprintf(evsel->tp_format, sample->cpu,
2839 sample->raw_data, sample->raw_size,
2840 trace->output);
2841 } else {
2842 trace__fprintf_tp_fields(trace, evsel, sample, thread, NULL, 0);
2843 }
2844 }
2845 }
2846
2847newline:
2848 fprintf(trace->output, ")\n");
2849
2850 if (callchain_ret > 0)
2851 trace__fprintf_callchain(trace, sample);
2852 else if (callchain_ret < 0)
2853 pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2854
2855 ++trace->nr_events_printed;
2856
2857 if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
2858 evsel__disable(evsel);
2859 evsel__close(evsel);
2860 }
2861out:
2862 thread__put(thread);
2863 return 0;
2864}
2865
2866static void print_location(FILE *f, struct perf_sample *sample,
2867 struct addr_location *al,
2868 bool print_dso, bool print_sym)
2869{
2870
2871 if ((verbose > 0 || print_dso) && al->map)
2872 fprintf(f, "%s@", al->map->dso->long_name);
2873
2874 if ((verbose > 0 || print_sym) && al->sym)
2875 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2876 al->addr - al->sym->start);
2877 else if (al->map)
2878 fprintf(f, "0x%" PRIx64, al->addr);
2879 else
2880 fprintf(f, "0x%" PRIx64, sample->addr);
2881}
2882
2883static int trace__pgfault(struct trace *trace,
2884 struct evsel *evsel,
2885 union perf_event *event __maybe_unused,
2886 struct perf_sample *sample)
2887{
2888 struct thread *thread;
2889 struct addr_location al;
2890 char map_type = 'd';
2891 struct thread_trace *ttrace;
2892 int err = -1;
2893 int callchain_ret = 0;
2894
2895 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2896
2897 if (sample->callchain) {
2898 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2899 if (callchain_ret == 0) {
2900 if (callchain_cursor.nr < trace->min_stack)
2901 goto out_put;
2902 callchain_ret = 1;
2903 }
2904 }
2905
2906 ttrace = thread__trace(thread, trace->output);
2907 if (ttrace == NULL)
2908 goto out_put;
2909
2910 if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2911 ttrace->pfmaj++;
2912 else
2913 ttrace->pfmin++;
2914
2915 if (trace->summary_only)
2916 goto out;
2917
2918 thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
2919
2920 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
2921
2922 fprintf(trace->output, "%sfault [",
2923 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2924 "maj" : "min");
2925
2926 print_location(trace->output, sample, &al, false, true);
2927
2928 fprintf(trace->output, "] => ");
2929
2930 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2931
2932 if (!al.map) {
2933 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2934
2935 if (al.map)
2936 map_type = 'x';
2937 else
2938 map_type = '?';
2939 }
2940
2941 print_location(trace->output, sample, &al, true, false);
2942
2943 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2944
2945 if (callchain_ret > 0)
2946 trace__fprintf_callchain(trace, sample);
2947 else if (callchain_ret < 0)
2948 pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2949
2950 ++trace->nr_events_printed;
2951out:
2952 err = 0;
2953out_put:
2954 thread__put(thread);
2955 return err;
2956}
2957
2958static void trace__set_base_time(struct trace *trace,
2959 struct evsel *evsel,
2960 struct perf_sample *sample)
2961{
2962
2963
2964
2965
2966
2967
2968
2969
2970 if (trace->base_time == 0 && !trace->full_time &&
2971 (evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
2972 trace->base_time = sample->time;
2973}
2974
2975static int trace__process_sample(struct perf_tool *tool,
2976 union perf_event *event,
2977 struct perf_sample *sample,
2978 struct evsel *evsel,
2979 struct machine *machine __maybe_unused)
2980{
2981 struct trace *trace = container_of(tool, struct trace, tool);
2982 struct thread *thread;
2983 int err = 0;
2984
2985 tracepoint_handler handler = evsel->handler;
2986
2987 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2988 if (thread && thread__is_filtered(thread))
2989 goto out;
2990
2991 trace__set_base_time(trace, evsel, sample);
2992
2993 if (handler) {
2994 ++trace->nr_events;
2995 handler(trace, evsel, event, sample);
2996 }
2997out:
2998 thread__put(thread);
2999 return err;
3000}
3001
3002static int trace__record(struct trace *trace, int argc, const char **argv)
3003{
3004 unsigned int rec_argc, i, j;
3005 const char **rec_argv;
3006 const char * const record_args[] = {
3007 "record",
3008 "-R",
3009 "-m", "1024",
3010 "-c", "1",
3011 };
3012 pid_t pid = getpid();
3013 char *filter = asprintf__tp_filter_pids(1, &pid);
3014 const char * const sc_args[] = { "-e", };
3015 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
3016 const char * const majpf_args[] = { "-e", "major-faults" };
3017 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
3018 const char * const minpf_args[] = { "-e", "minor-faults" };
3019 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
3020 int err = -1;
3021
3022
3023 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 3 +
3024 majpf_args_nr + minpf_args_nr + argc;
3025 rec_argv = calloc(rec_argc + 1, sizeof(char *));
3026
3027 if (rec_argv == NULL || filter == NULL)
3028 goto out_free;
3029
3030 j = 0;
3031 for (i = 0; i < ARRAY_SIZE(record_args); i++)
3032 rec_argv[j++] = record_args[i];
3033
3034 if (trace->trace_syscalls) {
3035 for (i = 0; i < sc_args_nr; i++)
3036 rec_argv[j++] = sc_args[i];
3037
3038
3039 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
3040 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
3041 else if (is_valid_tracepoint("syscalls:sys_enter"))
3042 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
3043 else {
3044 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
3045 goto out_free;
3046 }
3047 }
3048
3049 rec_argv[j++] = "--filter";
3050 rec_argv[j++] = filter;
3051
3052 if (trace->trace_pgfaults & TRACE_PFMAJ)
3053 for (i = 0; i < majpf_args_nr; i++)
3054 rec_argv[j++] = majpf_args[i];
3055
3056 if (trace->trace_pgfaults & TRACE_PFMIN)
3057 for (i = 0; i < minpf_args_nr; i++)
3058 rec_argv[j++] = minpf_args[i];
3059
3060 for (i = 0; i < (unsigned int)argc; i++)
3061 rec_argv[j++] = argv[i];
3062
3063 err = cmd_record(j, rec_argv);
3064out_free:
3065 free(filter);
3066 free(rec_argv);
3067 return err;
3068}
3069
3070static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
3071
3072static bool evlist__add_vfs_getname(struct evlist *evlist)
3073{
3074 bool found = false;
3075 struct evsel *evsel, *tmp;
3076 struct parse_events_error err;
3077 int ret;
3078
3079 parse_events_error__init(&err);
3080 ret = parse_events(evlist, "probe:vfs_getname*", &err);
3081 parse_events_error__exit(&err);
3082 if (ret)
3083 return false;
3084
3085 evlist__for_each_entry_safe(evlist, evsel, tmp) {
3086 if (!strstarts(evsel__name(evsel), "probe:vfs_getname"))
3087 continue;
3088
3089 if (evsel__field(evsel, "pathname")) {
3090 evsel->handler = trace__vfs_getname;
3091 found = true;
3092 continue;
3093 }
3094
3095 list_del_init(&evsel->core.node);
3096 evsel->evlist = NULL;
3097 evsel__delete(evsel);
3098 }
3099
3100 return found;
3101}
3102
3103static struct evsel *evsel__new_pgfault(u64 config)
3104{
3105 struct evsel *evsel;
3106 struct perf_event_attr attr = {
3107 .type = PERF_TYPE_SOFTWARE,
3108 .mmap_data = 1,
3109 };
3110
3111 attr.config = config;
3112 attr.sample_period = 1;
3113
3114 event_attr_init(&attr);
3115
3116 evsel = evsel__new(&attr);
3117 if (evsel)
3118 evsel->handler = trace__pgfault;
3119
3120 return evsel;
3121}
3122
3123static void evlist__free_syscall_tp_fields(struct evlist *evlist)
3124{
3125 struct evsel *evsel;
3126
3127 evlist__for_each_entry(evlist, evsel) {
3128 struct evsel_trace *et = evsel->priv;
3129
3130 if (!et || !evsel->tp_format || strcmp(evsel->tp_format->system, "syscalls"))
3131 continue;
3132
3133 free(et->fmt);
3134 free(et);
3135 }
3136}
3137
3138static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
3139{
3140 const u32 type = event->header.type;
3141 struct evsel *evsel;
3142
3143 if (type != PERF_RECORD_SAMPLE) {
3144 trace__process_event(trace, trace->host, event, sample);
3145 return;
3146 }
3147
3148 evsel = evlist__id2evsel(trace->evlist, sample->id);
3149 if (evsel == NULL) {
3150 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
3151 return;
3152 }
3153
3154 if (evswitch__discard(&trace->evswitch, evsel))
3155 return;
3156
3157 trace__set_base_time(trace, evsel, sample);
3158
3159 if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
3160 sample->raw_data == NULL) {
3161 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
3162 evsel__name(evsel), sample->tid,
3163 sample->cpu, sample->raw_size);
3164 } else {
3165 tracepoint_handler handler = evsel->handler;
3166 handler(trace, evsel, event, sample);
3167 }
3168
3169 if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
3170 interrupted = true;
3171}
3172
3173static int trace__add_syscall_newtp(struct trace *trace)
3174{
3175 int ret = -1;
3176 struct evlist *evlist = trace->evlist;
3177 struct evsel *sys_enter, *sys_exit;
3178
3179 sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
3180 if (sys_enter == NULL)
3181 goto out;
3182
3183 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
3184 goto out_delete_sys_enter;
3185
3186 sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
3187 if (sys_exit == NULL)
3188 goto out_delete_sys_enter;
3189
3190 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
3191 goto out_delete_sys_exit;
3192
3193 evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
3194 evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
3195
3196 evlist__add(evlist, sys_enter);
3197 evlist__add(evlist, sys_exit);
3198
3199 if (callchain_param.enabled && !trace->kernel_syscallchains) {
3200
3201
3202
3203
3204
3205 sys_exit->core.attr.exclude_callchain_kernel = 1;
3206 }
3207
3208 trace->syscalls.events.sys_enter = sys_enter;
3209 trace->syscalls.events.sys_exit = sys_exit;
3210
3211 ret = 0;
3212out:
3213 return ret;
3214
3215out_delete_sys_exit:
3216 evsel__delete_priv(sys_exit);
3217out_delete_sys_enter:
3218 evsel__delete_priv(sys_enter);
3219 goto out;
3220}
3221
3222static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
3223{
3224 int err = -1;
3225 struct evsel *sys_exit;
3226 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
3227 trace->ev_qualifier_ids.nr,
3228 trace->ev_qualifier_ids.entries);
3229
3230 if (filter == NULL)
3231 goto out_enomem;
3232
3233 if (!evsel__append_tp_filter(trace->syscalls.events.sys_enter, filter)) {
3234 sys_exit = trace->syscalls.events.sys_exit;
3235 err = evsel__append_tp_filter(sys_exit, filter);
3236 }
3237
3238 free(filter);
3239out:
3240 return err;
3241out_enomem:
3242 errno = ENOMEM;
3243 goto out;
3244}
3245
3246#ifdef HAVE_LIBBPF_SUPPORT
3247static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
3248{
3249 if (trace->bpf_obj == NULL)
3250 return NULL;
3251
3252 return bpf_object__find_map_by_name(trace->bpf_obj, name);
3253}
3254
3255static void trace__set_bpf_map_filtered_pids(struct trace *trace)
3256{
3257 trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
3258}
3259
3260static void trace__set_bpf_map_syscalls(struct trace *trace)
3261{
3262 trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
3263 trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
3264 trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
3265}
3266
3267static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
3268{
3269 struct bpf_program *pos, *prog = NULL;
3270 const char *sec_name;
3271
3272 if (trace->bpf_obj == NULL)
3273 return NULL;
3274
3275 bpf_object__for_each_program(pos, trace->bpf_obj) {
3276 sec_name = bpf_program__section_name(pos);
3277 if (sec_name && !strcmp(sec_name, name)) {
3278 prog = pos;
3279 break;
3280 }
3281 }
3282
3283 return prog;
3284}
3285
3286static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
3287 const char *prog_name, const char *type)
3288{
3289 struct bpf_program *prog;
3290
3291 if (prog_name == NULL) {
3292 char default_prog_name[256];
3293 scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
3294 prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3295 if (prog != NULL)
3296 goto out_found;
3297 if (sc->fmt && sc->fmt->alias) {
3298 scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
3299 prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3300 if (prog != NULL)
3301 goto out_found;
3302 }
3303 goto out_unaugmented;
3304 }
3305
3306 prog = trace__find_bpf_program_by_title(trace, prog_name);
3307
3308 if (prog != NULL) {
3309out_found:
3310 return prog;
3311 }
3312
3313 pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
3314 prog_name, type, sc->name);
3315out_unaugmented:
3316 return trace->syscalls.unaugmented_prog;
3317}
3318
3319static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
3320{
3321 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3322
3323 if (sc == NULL)
3324 return;
3325
3326 sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3327 sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit");
3328}
3329
3330static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
3331{
3332 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3333 return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3334}
3335
3336static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
3337{
3338 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3339 return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3340}
3341
3342static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
3343{
3344 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3345 int arg = 0;
3346
3347 if (sc == NULL)
3348 goto out;
3349
3350 for (; arg < sc->nr_args; ++arg) {
3351 entry->string_args_len[arg] = 0;
3352 if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) {
3353
3354 entry->string_args_len[arg] = PATH_MAX;
3355 }
3356 }
3357out:
3358 for (; arg < 6; ++arg)
3359 entry->string_args_len[arg] = 0;
3360}
3361static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
3362{
3363 int fd = bpf_map__fd(trace->syscalls.map);
3364 struct bpf_map_syscall_entry value = {
3365 .enabled = !trace->not_ev_qualifier,
3366 };
3367 int err = 0;
3368 size_t i;
3369
3370 for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
3371 int key = trace->ev_qualifier_ids.entries[i];
3372
3373 if (value.enabled) {
3374 trace__init_bpf_map_syscall_args(trace, key, &value);
3375 trace__init_syscall_bpf_progs(trace, key);
3376 }
3377
3378 err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
3379 if (err)
3380 break;
3381 }
3382
3383 return err;
3384}
3385
3386static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
3387{
3388 int fd = bpf_map__fd(trace->syscalls.map);
3389 struct bpf_map_syscall_entry value = {
3390 .enabled = enabled,
3391 };
3392 int err = 0, key;
3393
3394 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3395 if (enabled)
3396 trace__init_bpf_map_syscall_args(trace, key, &value);
3397
3398 err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
3399 if (err)
3400 break;
3401 }
3402
3403 return err;
3404}
3405
3406static int trace__init_syscalls_bpf_map(struct trace *trace)
3407{
3408 bool enabled = true;
3409
3410 if (trace->ev_qualifier_ids.nr)
3411 enabled = trace->not_ev_qualifier;
3412
3413 return __trace__init_syscalls_bpf_map(trace, enabled);
3414}
3415
3416static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
3417{
3418 struct tep_format_field *field, *candidate_field;
3419 int id;
3420
3421
3422
3423
3424 for (field = sc->args; field; field = field->next) {
3425 if (field->flags & TEP_FIELD_IS_POINTER)
3426 goto try_to_find_pair;
3427 }
3428
3429 return NULL;
3430
3431try_to_find_pair:
3432 for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) {
3433 struct syscall *pair = trace__syscall_info(trace, NULL, id);
3434 struct bpf_program *pair_prog;
3435 bool is_candidate = false;
3436
3437 if (pair == NULL || pair == sc ||
3438 pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
3439 continue;
3440
3441 for (field = sc->args, candidate_field = pair->args;
3442 field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
3443 bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
3444 candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
3445
3446 if (is_pointer) {
3447 if (!candidate_is_pointer) {
3448
3449 continue;
3450 }
3451 } else {
3452 if (candidate_is_pointer) {
3453
3454 goto next_candidate;
3455 }
3456 continue;
3457 }
3458
3459 if (strcmp(field->type, candidate_field->type))
3460 goto next_candidate;
3461
3462 is_candidate = true;
3463 }
3464
3465 if (!is_candidate)
3466 goto next_candidate;
3467
3468
3469
3470
3471
3472
3473 if (candidate_field) {
3474 for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next)
3475 if (candidate_field->flags & TEP_FIELD_IS_POINTER)
3476 goto next_candidate;
3477 }
3478
3479 pair_prog = pair->bpf_prog.sys_enter;
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489 if (pair_prog == NULL) {
3490 pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3491 if (pair_prog == trace->syscalls.unaugmented_prog)
3492 goto next_candidate;
3493 }
3494
3495 pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
3496 return pair_prog;
3497 next_candidate:
3498 continue;
3499 }
3500
3501 return NULL;
3502}
3503
3504static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
3505{
3506 int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
3507 map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
3508 int err = 0, key;
3509
3510 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3511 int prog_fd;
3512
3513 if (!trace__syscall_enabled(trace, key))
3514 continue;
3515
3516 trace__init_syscall_bpf_progs(trace, key);
3517
3518
3519 prog_fd = trace__bpf_prog_sys_enter_fd(trace, key);
3520 err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3521 if (err)
3522 break;
3523 prog_fd = trace__bpf_prog_sys_exit_fd(trace, key);
3524 err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
3525 if (err)
3526 break;
3527 }
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3558 struct syscall *sc = trace__syscall_info(trace, NULL, key);
3559 struct bpf_program *pair_prog;
3560 int prog_fd;
3561
3562 if (sc == NULL || sc->bpf_prog.sys_enter == NULL)
3563 continue;
3564
3565
3566
3567
3568
3569 if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
3570 continue;
3571
3572
3573
3574
3575
3576 pair_prog = trace__find_usable_bpf_prog_entry(trace, sc);
3577 if (pair_prog == NULL)
3578 continue;
3579
3580 sc->bpf_prog.sys_enter = pair_prog;
3581
3582
3583
3584
3585
3586 prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter);
3587 err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3588 if (err)
3589 break;
3590 }
3591
3592
3593 return err;
3594}
3595
3596static void trace__delete_augmented_syscalls(struct trace *trace)
3597{
3598 struct evsel *evsel, *tmp;
3599
3600 evlist__remove(trace->evlist, trace->syscalls.events.augmented);
3601 evsel__delete(trace->syscalls.events.augmented);
3602 trace->syscalls.events.augmented = NULL;
3603
3604 evlist__for_each_entry_safe(trace->evlist, tmp, evsel) {
3605 if (evsel->bpf_obj == trace->bpf_obj) {
3606 evlist__remove(trace->evlist, evsel);
3607 evsel__delete(evsel);
3608 }
3609
3610 }
3611
3612 bpf_object__close(trace->bpf_obj);
3613 trace->bpf_obj = NULL;
3614}
3615#else
3616static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused,
3617 const char *name __maybe_unused)
3618{
3619 return NULL;
3620}
3621
3622static void trace__set_bpf_map_filtered_pids(struct trace *trace __maybe_unused)
3623{
3624}
3625
3626static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused)
3627{
3628}
3629
3630static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
3631{
3632 return 0;
3633}
3634
3635static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused)
3636{
3637 return 0;
3638}
3639
3640static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused,
3641 const char *name __maybe_unused)
3642{
3643 return NULL;
3644}
3645
3646static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
3647{
3648 return 0;
3649}
3650
3651static void trace__delete_augmented_syscalls(struct trace *trace __maybe_unused)
3652{
3653}
3654#endif
3655
3656static bool trace__only_augmented_syscalls_evsels(struct trace *trace)
3657{
3658 struct evsel *evsel;
3659
3660 evlist__for_each_entry(trace->evlist, evsel) {
3661 if (evsel == trace->syscalls.events.augmented ||
3662 evsel->bpf_obj == trace->bpf_obj)
3663 continue;
3664
3665 return false;
3666 }
3667
3668 return true;
3669}
3670
3671static int trace__set_ev_qualifier_filter(struct trace *trace)
3672{
3673 if (trace->syscalls.map)
3674 return trace__set_ev_qualifier_bpf_filter(trace);
3675 if (trace->syscalls.events.sys_enter)
3676 return trace__set_ev_qualifier_tp_filter(trace);
3677 return 0;
3678}
3679
3680static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused,
3681 size_t npids __maybe_unused, pid_t *pids __maybe_unused)
3682{
3683 int err = 0;
3684#ifdef HAVE_LIBBPF_SUPPORT
3685 bool value = true;
3686 int map_fd = bpf_map__fd(map);
3687 size_t i;
3688
3689 for (i = 0; i < npids; ++i) {
3690 err = bpf_map_update_elem(map_fd, &pids[i], &value, BPF_ANY);
3691 if (err)
3692 break;
3693 }
3694#endif
3695 return err;
3696}
3697
3698static int trace__set_filter_loop_pids(struct trace *trace)
3699{
3700 unsigned int nr = 1, err;
3701 pid_t pids[32] = {
3702 getpid(),
3703 };
3704 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
3705
3706 while (thread && nr < ARRAY_SIZE(pids)) {
3707 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
3708
3709 if (parent == NULL)
3710 break;
3711
3712 if (!strcmp(thread__comm_str(parent), "sshd") ||
3713 strstarts(thread__comm_str(parent), "gnome-terminal")) {
3714 pids[nr++] = parent->tid;
3715 break;
3716 }
3717 thread = parent;
3718 }
3719
3720 err = evlist__append_tp_filter_pids(trace->evlist, nr, pids);
3721 if (!err && trace->filter_pids.map)
3722 err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
3723
3724 return err;
3725}
3726
3727static int trace__set_filter_pids(struct trace *trace)
3728{
3729 int err = 0;
3730
3731
3732
3733
3734
3735
3736 if (trace->filter_pids.nr > 0) {
3737 err = evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
3738 trace->filter_pids.entries);
3739 if (!err && trace->filter_pids.map) {
3740 err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
3741 trace->filter_pids.entries);
3742 }
3743 } else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) {
3744 err = trace__set_filter_loop_pids(trace);
3745 }
3746
3747 return err;
3748}
3749
3750static int __trace__deliver_event(struct trace *trace, union perf_event *event)
3751{
3752 struct evlist *evlist = trace->evlist;
3753 struct perf_sample sample;
3754 int err = evlist__parse_sample(evlist, event, &sample);
3755
3756 if (err)
3757 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
3758 else
3759 trace__handle_event(trace, event, &sample);
3760
3761 return 0;
3762}
3763
3764static int __trace__flush_events(struct trace *trace)
3765{
3766 u64 first = ordered_events__first_time(&trace->oe.data);
3767 u64 flush = trace->oe.last - NSEC_PER_SEC;
3768
3769
3770 if (first && first < flush)
3771 return ordered_events__flush_time(&trace->oe.data, flush);
3772
3773 return 0;
3774}
3775
3776static int trace__flush_events(struct trace *trace)
3777{
3778 return !trace->sort_events ? 0 : __trace__flush_events(trace);
3779}
3780
3781static int trace__deliver_event(struct trace *trace, union perf_event *event)
3782{
3783 int err;
3784
3785 if (!trace->sort_events)
3786 return __trace__deliver_event(trace, event);
3787
3788 err = evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
3789 if (err && err != -1)
3790 return err;
3791
3792 err = ordered_events__queue(&trace->oe.data, event, trace->oe.last, 0, NULL);
3793 if (err)
3794 return err;
3795
3796 return trace__flush_events(trace);
3797}
3798
3799static int ordered_events__deliver_event(struct ordered_events *oe,
3800 struct ordered_event *event)
3801{
3802 struct trace *trace = container_of(oe, struct trace, oe.data);
3803
3804 return __trace__deliver_event(trace, event->event);
3805}
3806
3807static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg)
3808{
3809 struct tep_format_field *field;
3810 struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
3811
3812 if (evsel->tp_format == NULL || fmt == NULL)
3813 return NULL;
3814
3815 for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt)
3816 if (strcmp(field->name, arg) == 0)
3817 return fmt;
3818
3819 return NULL;
3820}
3821
3822static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel)
3823{
3824 char *tok, *left = evsel->filter, *new_filter = evsel->filter;
3825
3826 while ((tok = strpbrk(left, "=<>!")) != NULL) {
3827 char *right = tok + 1, *right_end;
3828
3829 if (*right == '=')
3830 ++right;
3831
3832 while (isspace(*right))
3833 ++right;
3834
3835 if (*right == '\0')
3836 break;
3837
3838 while (!isalpha(*left))
3839 if (++left == tok) {
3840
3841
3842
3843
3844 return 0;
3845 }
3846
3847 right_end = right + 1;
3848 while (isalnum(*right_end) || *right_end == '_' || *right_end == '|')
3849 ++right_end;
3850
3851 if (isalpha(*right)) {
3852 struct syscall_arg_fmt *fmt;
3853 int left_size = tok - left,
3854 right_size = right_end - right;
3855 char arg[128];
3856
3857 while (isspace(left[left_size - 1]))
3858 --left_size;
3859
3860 scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
3861
3862 fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg);
3863 if (fmt == NULL) {
3864 pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
3865 arg, evsel->name, evsel->filter);
3866 return -1;
3867 }
3868
3869 pr_debug2("trying to expand \"%s\" \"%.*s\" \"%.*s\" -> ",
3870 arg, (int)(right - tok), tok, right_size, right);
3871
3872 if (fmt->strtoul) {
3873 u64 val;
3874 struct syscall_arg syscall_arg = {
3875 .parm = fmt->parm,
3876 };
3877
3878 if (fmt->strtoul(right, right_size, &syscall_arg, &val)) {
3879 char *n, expansion[19];
3880 int expansion_lenght = scnprintf(expansion, sizeof(expansion), "%#" PRIx64, val);
3881 int expansion_offset = right - new_filter;
3882
3883 pr_debug("%s", expansion);
3884
3885 if (asprintf(&n, "%.*s%s%s", expansion_offset, new_filter, expansion, right_end) < 0) {
3886 pr_debug(" out of memory!\n");
3887 free(new_filter);
3888 return -1;
3889 }
3890 if (new_filter != evsel->filter)
3891 free(new_filter);
3892 left = n + expansion_offset + expansion_lenght;
3893 new_filter = n;
3894 } else {
3895 pr_err("\"%.*s\" not found for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3896 right_size, right, arg, evsel->name, evsel->filter);
3897 return -1;
3898 }
3899 } else {
3900 pr_err("No resolver (strtoul) for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3901 arg, evsel->name, evsel->filter);
3902 return -1;
3903 }
3904
3905 pr_debug("\n");
3906 } else {
3907 left = right_end;
3908 }
3909 }
3910
3911 if (new_filter != evsel->filter) {
3912 pr_debug("New filter for %s: %s\n", evsel->name, new_filter);
3913 evsel__set_filter(evsel, new_filter);
3914 free(new_filter);
3915 }
3916
3917 return 0;
3918}
3919
3920static int trace__expand_filters(struct trace *trace, struct evsel **err_evsel)
3921{
3922 struct evlist *evlist = trace->evlist;
3923 struct evsel *evsel;
3924
3925 evlist__for_each_entry(evlist, evsel) {
3926 if (evsel->filter == NULL)
3927 continue;
3928
3929 if (trace__expand_filter(trace, evsel)) {
3930 *err_evsel = evsel;
3931 return -1;
3932 }
3933 }
3934
3935 return 0;
3936}
3937
3938static int trace__run(struct trace *trace, int argc, const char **argv)
3939{
3940 struct evlist *evlist = trace->evlist;
3941 struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
3942 int err = -1, i;
3943 unsigned long before;
3944 const bool forks = argc > 0;
3945 bool draining = false;
3946
3947 trace->live = true;
3948
3949 if (!trace->raw_augmented_syscalls) {
3950 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
3951 goto out_error_raw_syscalls;
3952
3953 if (trace->trace_syscalls)
3954 trace->vfs_getname = evlist__add_vfs_getname(evlist);
3955 }
3956
3957 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
3958 pgfault_maj = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
3959 if (pgfault_maj == NULL)
3960 goto out_error_mem;
3961 evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
3962 evlist__add(evlist, pgfault_maj);
3963 }
3964
3965 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
3966 pgfault_min = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
3967 if (pgfault_min == NULL)
3968 goto out_error_mem;
3969 evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
3970 evlist__add(evlist, pgfault_min);
3971 }
3972
3973
3974 trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid;
3975
3976 if (trace->sched &&
3977 evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
3978 goto out_error_sched_stat_runtime;
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004 if (trace->cgroup)
4005 evlist__set_default_cgroup(trace->evlist, trace->cgroup);
4006
4007 err = evlist__create_maps(evlist, &trace->opts.target);
4008 if (err < 0) {
4009 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
4010 goto out_delete_evlist;
4011 }
4012
4013 err = trace__symbols_init(trace, evlist);
4014 if (err < 0) {
4015 fprintf(trace->output, "Problems initializing symbol libraries!\n");
4016 goto out_delete_evlist;
4017 }
4018
4019 evlist__config(evlist, &trace->opts, &callchain_param);
4020
4021 if (forks) {
4022 err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL);
4023 if (err < 0) {
4024 fprintf(trace->output, "Couldn't run the workload!\n");
4025 goto out_delete_evlist;
4026 }
4027 workload_pid = evlist->workload.pid;
4028 }
4029
4030 err = evlist__open(evlist);
4031 if (err < 0)
4032 goto out_error_open;
4033
4034 err = bpf__apply_obj_config();
4035 if (err) {
4036 char errbuf[BUFSIZ];
4037
4038 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
4039 pr_err("ERROR: Apply config to BPF failed: %s\n",
4040 errbuf);
4041 goto out_error_open;
4042 }
4043
4044 err = trace__set_filter_pids(trace);
4045 if (err < 0)
4046 goto out_error_mem;
4047
4048 if (trace->syscalls.map)
4049 trace__init_syscalls_bpf_map(trace);
4050
4051 if (trace->syscalls.prog_array.sys_enter)
4052 trace__init_syscalls_bpf_prog_array_maps(trace);
4053
4054 if (trace->ev_qualifier_ids.nr > 0) {
4055 err = trace__set_ev_qualifier_filter(trace);
4056 if (err < 0)
4057 goto out_errno;
4058
4059 if (trace->syscalls.events.sys_exit) {
4060 pr_debug("event qualifier tracepoint filter: %s\n",
4061 trace->syscalls.events.sys_exit->filter);
4062 }
4063 }
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076 trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close"));
4077
4078 err = trace__expand_filters(trace, &evsel);
4079 if (err)
4080 goto out_delete_evlist;
4081 err = evlist__apply_filters(evlist, &evsel);
4082 if (err < 0)
4083 goto out_error_apply_filters;
4084
4085 if (trace->dump.map)
4086 bpf_map__fprintf(trace->dump.map, trace->output);
4087
4088 err = evlist__mmap(evlist, trace->opts.mmap_pages);
4089 if (err < 0)
4090 goto out_error_mmap;
4091
4092 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
4093 evlist__enable(evlist);
4094
4095 if (forks)
4096 evlist__start_workload(evlist);
4097
4098 if (trace->opts.initial_delay) {
4099 usleep(trace->opts.initial_delay * 1000);
4100 evlist__enable(evlist);
4101 }
4102
4103 trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
4104 evlist->core.threads->nr > 1 ||
4105 evlist__first(evlist)->core.attr.inherit;
4106
4107
4108
4109
4110
4111
4112
4113 evlist__for_each_entry(evlist, evsel) {
4114 if (evsel__has_callchain(evsel) &&
4115 evsel->core.attr.sample_max_stack == 0)
4116 evsel->core.attr.sample_max_stack = trace->max_stack;
4117 }
4118again:
4119 before = trace->nr_events;
4120
4121 for (i = 0; i < evlist->core.nr_mmaps; i++) {
4122 union perf_event *event;
4123 struct mmap *md;
4124
4125 md = &evlist->mmap[i];
4126 if (perf_mmap__read_init(&md->core) < 0)
4127 continue;
4128
4129 while ((event = perf_mmap__read_event(&md->core)) != NULL) {
4130 ++trace->nr_events;
4131
4132 err = trace__deliver_event(trace, event);
4133 if (err)
4134 goto out_disable;
4135
4136 perf_mmap__consume(&md->core);
4137
4138 if (interrupted)
4139 goto out_disable;
4140
4141 if (done && !draining) {
4142 evlist__disable(evlist);
4143 draining = true;
4144 }
4145 }
4146 perf_mmap__read_done(&md->core);
4147 }
4148
4149 if (trace->nr_events == before) {
4150 int timeout = done ? 100 : -1;
4151
4152 if (!draining && evlist__poll(evlist, timeout) > 0) {
4153 if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
4154 draining = true;
4155
4156 goto again;
4157 } else {
4158 if (trace__flush_events(trace))
4159 goto out_disable;
4160 }
4161 } else {
4162 goto again;
4163 }
4164
4165out_disable:
4166 thread__zput(trace->current);
4167
4168 evlist__disable(evlist);
4169
4170 if (trace->sort_events)
4171 ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
4172
4173 if (!err) {
4174 if (trace->summary)
4175 trace__fprintf_thread_summary(trace, trace->output);
4176
4177 if (trace->show_tool_stats) {
4178 fprintf(trace->output, "Stats:\n "
4179 " vfs_getname : %" PRIu64 "\n"
4180 " proc_getname: %" PRIu64 "\n",
4181 trace->stats.vfs_getname,
4182 trace->stats.proc_getname);
4183 }
4184 }
4185
4186out_delete_evlist:
4187 trace__symbols__exit(trace);
4188 evlist__free_syscall_tp_fields(evlist);
4189 evlist__delete(evlist);
4190 cgroup__put(trace->cgroup);
4191 trace->evlist = NULL;
4192 trace->live = false;
4193 return err;
4194{
4195 char errbuf[BUFSIZ];
4196
4197out_error_sched_stat_runtime:
4198 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
4199 goto out_error;
4200
4201out_error_raw_syscalls:
4202 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
4203 goto out_error;
4204
4205out_error_mmap:
4206 evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
4207 goto out_error;
4208
4209out_error_open:
4210 evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
4211
4212out_error:
4213 fprintf(trace->output, "%s\n", errbuf);
4214 goto out_delete_evlist;
4215
4216out_error_apply_filters:
4217 fprintf(trace->output,
4218 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
4219 evsel->filter, evsel__name(evsel), errno,
4220 str_error_r(errno, errbuf, sizeof(errbuf)));
4221 goto out_delete_evlist;
4222}
4223out_error_mem:
4224 fprintf(trace->output, "Not enough memory to run!\n");
4225 goto out_delete_evlist;
4226
4227out_errno:
4228 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
4229 goto out_delete_evlist;
4230}
4231
4232static int trace__replay(struct trace *trace)
4233{
4234 const struct evsel_str_handler handlers[] = {
4235 { "probe:vfs_getname", trace__vfs_getname, },
4236 };
4237 struct perf_data data = {
4238 .path = input_name,
4239 .mode = PERF_DATA_MODE_READ,
4240 .force = trace->force,
4241 };
4242 struct perf_session *session;
4243 struct evsel *evsel;
4244 int err = -1;
4245
4246 trace->tool.sample = trace__process_sample;
4247 trace->tool.mmap = perf_event__process_mmap;
4248 trace->tool.mmap2 = perf_event__process_mmap2;
4249 trace->tool.comm = perf_event__process_comm;
4250 trace->tool.exit = perf_event__process_exit;
4251 trace->tool.fork = perf_event__process_fork;
4252 trace->tool.attr = perf_event__process_attr;
4253 trace->tool.tracing_data = perf_event__process_tracing_data;
4254 trace->tool.build_id = perf_event__process_build_id;
4255 trace->tool.namespaces = perf_event__process_namespaces;
4256
4257 trace->tool.ordered_events = true;
4258 trace->tool.ordering_requires_timestamps = true;
4259
4260
4261 trace->multiple_threads = true;
4262
4263 session = perf_session__new(&data, &trace->tool);
4264 if (IS_ERR(session))
4265 return PTR_ERR(session);
4266
4267 if (trace->opts.target.pid)
4268 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
4269
4270 if (trace->opts.target.tid)
4271 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
4272
4273 if (symbol__init(&session->header.env) < 0)
4274 goto out;
4275
4276 trace->host = &session->machines.host;
4277
4278 err = perf_session__set_tracepoints_handlers(session, handlers);
4279 if (err)
4280 goto out;
4281
4282 evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter");
4283 trace->syscalls.events.sys_enter = evsel;
4284
4285 if (evsel == NULL)
4286 evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter");
4287
4288 if (evsel &&
4289 (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
4290 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
4291 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
4292 goto out;
4293 }
4294
4295 evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit");
4296 trace->syscalls.events.sys_exit = evsel;
4297 if (evsel == NULL)
4298 evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit");
4299 if (evsel &&
4300 (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
4301 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
4302 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
4303 goto out;
4304 }
4305
4306 evlist__for_each_entry(session->evlist, evsel) {
4307 if (evsel->core.attr.type == PERF_TYPE_SOFTWARE &&
4308 (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
4309 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
4310 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS))
4311 evsel->handler = trace__pgfault;
4312 }
4313
4314 setup_pager();
4315
4316 err = perf_session__process_events(session);
4317 if (err)
4318 pr_err("Failed to process events, error %d", err);
4319
4320 else if (trace->summary)
4321 trace__fprintf_thread_summary(trace, trace->output);
4322
4323out:
4324 perf_session__delete(session);
4325
4326 return err;
4327}
4328
4329static size_t trace__fprintf_threads_header(FILE *fp)
4330{
4331 size_t printed;
4332
4333 printed = fprintf(fp, "\n Summary of events:\n\n");
4334
4335 return printed;
4336}
4337
4338DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
4339 struct syscall_stats *stats;
4340 double msecs;
4341 int syscall;
4342)
4343{
4344 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
4345 struct syscall_stats *stats = source->priv;
4346
4347 entry->syscall = source->i;
4348 entry->stats = stats;
4349 entry->msecs = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0;
4350}
4351
4352static size_t thread__dump_stats(struct thread_trace *ttrace,
4353 struct trace *trace, FILE *fp)
4354{
4355 size_t printed = 0;
4356 struct syscall *sc;
4357 struct rb_node *nd;
4358 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
4359
4360 if (syscall_stats == NULL)
4361 return 0;
4362
4363 printed += fprintf(fp, "\n");
4364
4365 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
4366 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
4367 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
4368
4369 resort_rb__for_each_entry(nd, syscall_stats) {
4370 struct syscall_stats *stats = syscall_stats_entry->stats;
4371 if (stats) {
4372 double min = (double)(stats->stats.min) / NSEC_PER_MSEC;
4373 double max = (double)(stats->stats.max) / NSEC_PER_MSEC;
4374 double avg = avg_stats(&stats->stats);
4375 double pct;
4376 u64 n = (u64)stats->stats.n;
4377
4378 pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
4379 avg /= NSEC_PER_MSEC;
4380
4381 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
4382 printed += fprintf(fp, " %-15s", sc->name);
4383 printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f",
4384 n, stats->nr_failures, syscall_stats_entry->msecs, min, avg);
4385 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
4386
4387 if (trace->errno_summary && stats->nr_failures) {
4388 const char *arch_name = perf_env__arch(trace->host->env);
4389 int e;
4390
4391 for (e = 0; e < stats->max_errno; ++e) {
4392 if (stats->errnos[e] != 0)
4393 fprintf(fp, "\t\t\t\t%s: %d\n", arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]);
4394 }
4395 }
4396 }
4397 }
4398
4399 resort_rb__delete(syscall_stats);
4400 printed += fprintf(fp, "\n\n");
4401
4402 return printed;
4403}
4404
4405static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
4406{
4407 size_t printed = 0;
4408 struct thread_trace *ttrace = thread__priv(thread);
4409 double ratio;
4410
4411 if (ttrace == NULL)
4412 return 0;
4413
4414 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
4415
4416 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
4417 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
4418 printed += fprintf(fp, "%.1f%%", ratio);
4419 if (ttrace->pfmaj)
4420 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
4421 if (ttrace->pfmin)
4422 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
4423 if (trace->sched)
4424 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
4425 else if (fputc('\n', fp) != EOF)
4426 ++printed;
4427
4428 printed += thread__dump_stats(ttrace, trace, fp);
4429
4430 return printed;
4431}
4432
4433static unsigned long thread__nr_events(struct thread_trace *ttrace)
4434{
4435 return ttrace ? ttrace->nr_events : 0;
4436}
4437
4438DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
4439 struct thread *thread;
4440)
4441{
4442 entry->thread = rb_entry(nd, struct thread, rb_node);
4443}
4444
4445static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
4446{
4447 size_t printed = trace__fprintf_threads_header(fp);
4448 struct rb_node *nd;
4449 int i;
4450
4451 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
4452 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
4453
4454 if (threads == NULL) {
4455 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
4456 return 0;
4457 }
4458
4459 resort_rb__for_each_entry(nd, threads)
4460 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
4461
4462 resort_rb__delete(threads);
4463 }
4464 return printed;
4465}
4466
4467static int trace__set_duration(const struct option *opt, const char *str,
4468 int unset __maybe_unused)
4469{
4470 struct trace *trace = opt->value;
4471
4472 trace->duration_filter = atof(str);
4473 return 0;
4474}
4475
4476static int trace__set_filter_pids_from_option(const struct option *opt, const char *str,
4477 int unset __maybe_unused)
4478{
4479 int ret = -1;
4480 size_t i;
4481 struct trace *trace = opt->value;
4482
4483
4484
4485
4486 struct intlist *list = intlist__new(str);
4487
4488 if (list == NULL)
4489 return -1;
4490
4491 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
4492 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
4493
4494 if (trace->filter_pids.entries == NULL)
4495 goto out;
4496
4497 trace->filter_pids.entries[0] = getpid();
4498
4499 for (i = 1; i < trace->filter_pids.nr; ++i)
4500 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
4501
4502 intlist__delete(list);
4503 ret = 0;
4504out:
4505 return ret;
4506}
4507
4508static int trace__open_output(struct trace *trace, const char *filename)
4509{
4510 struct stat st;
4511
4512 if (!stat(filename, &st) && st.st_size) {
4513 char oldname[PATH_MAX];
4514
4515 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
4516 unlink(oldname);
4517 rename(filename, oldname);
4518 }
4519
4520 trace->output = fopen(filename, "w");
4521
4522 return trace->output == NULL ? -errno : 0;
4523}
4524
4525static int parse_pagefaults(const struct option *opt, const char *str,
4526 int unset __maybe_unused)
4527{
4528 int *trace_pgfaults = opt->value;
4529
4530 if (strcmp(str, "all") == 0)
4531 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
4532 else if (strcmp(str, "maj") == 0)
4533 *trace_pgfaults |= TRACE_PFMAJ;
4534 else if (strcmp(str, "min") == 0)
4535 *trace_pgfaults |= TRACE_PFMIN;
4536 else
4537 return -1;
4538
4539 return 0;
4540}
4541
4542static void evlist__set_default_evsel_handler(struct evlist *evlist, void *handler)
4543{
4544 struct evsel *evsel;
4545
4546 evlist__for_each_entry(evlist, evsel) {
4547 if (evsel->handler == NULL)
4548 evsel->handler = handler;
4549 }
4550}
4551
4552static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name)
4553{
4554 struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
4555
4556 if (fmt) {
4557 struct syscall_fmt *scfmt = syscall_fmt__find(name);
4558
4559 if (scfmt) {
4560 int skip = 0;
4561
4562 if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 ||
4563 strcmp(evsel->tp_format->format.fields->name, "nr") == 0)
4564 ++skip;
4565
4566 memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt));
4567 }
4568 }
4569}
4570
4571static int evlist__set_syscall_tp_fields(struct evlist *evlist)
4572{
4573 struct evsel *evsel;
4574
4575 evlist__for_each_entry(evlist, evsel) {
4576 if (evsel->priv || !evsel->tp_format)
4577 continue;
4578
4579 if (strcmp(evsel->tp_format->system, "syscalls")) {
4580 evsel__init_tp_arg_scnprintf(evsel);
4581 continue;
4582 }
4583
4584 if (evsel__init_syscall_tp(evsel))
4585 return -1;
4586
4587 if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
4588 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
4589
4590 if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
4591 return -1;
4592
4593 evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1);
4594 } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
4595 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
4596
4597 if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
4598 return -1;
4599
4600 evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1);
4601 }
4602 }
4603
4604 return 0;
4605}
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615static int trace__parse_events_option(const struct option *opt, const char *str,
4616 int unset __maybe_unused)
4617{
4618 struct trace *trace = (struct trace *)opt->value;
4619 const char *s = str;
4620 char *sep = NULL, *lists[2] = { NULL, NULL, };
4621 int len = strlen(str) + 1, err = -1, list, idx;
4622 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
4623 char group_name[PATH_MAX];
4624 struct syscall_fmt *fmt;
4625
4626 if (strace_groups_dir == NULL)
4627 return -1;
4628
4629 if (*s == '!') {
4630 ++s;
4631 trace->not_ev_qualifier = true;
4632 }
4633
4634 while (1) {
4635 if ((sep = strchr(s, ',')) != NULL)
4636 *sep = '\0';
4637
4638 list = 0;
4639 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
4640 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
4641 list = 1;
4642 goto do_concat;
4643 }
4644
4645 fmt = syscall_fmt__find_by_alias(s);
4646 if (fmt != NULL) {
4647 list = 1;
4648 s = fmt->name;
4649 } else {
4650 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
4651 if (access(group_name, R_OK) == 0)
4652 list = 1;
4653 }
4654do_concat:
4655 if (lists[list]) {
4656 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
4657 } else {
4658 lists[list] = malloc(len);
4659 if (lists[list] == NULL)
4660 goto out;
4661 strcpy(lists[list], s);
4662 }
4663
4664 if (!sep)
4665 break;
4666
4667 *sep = ',';
4668 s = sep + 1;
4669 }
4670
4671 if (lists[1] != NULL) {
4672 struct strlist_config slist_config = {
4673 .dirname = strace_groups_dir,
4674 };
4675
4676 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
4677 if (trace->ev_qualifier == NULL) {
4678 fputs("Not enough memory to parse event qualifier", trace->output);
4679 goto out;
4680 }
4681
4682 if (trace__validate_ev_qualifier(trace))
4683 goto out;
4684 trace->trace_syscalls = true;
4685 }
4686
4687 err = 0;
4688
4689 if (lists[0]) {
4690 struct option o = {
4691 .value = &trace->evlist,
4692 };
4693 err = parse_events_option(&o, lists[0], 0);
4694 }
4695out:
4696 free(strace_groups_dir);
4697 free(lists[0]);
4698 free(lists[1]);
4699 if (sep)
4700 *sep = ',';
4701
4702 return err;
4703}
4704
4705static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
4706{
4707 struct trace *trace = opt->value;
4708
4709 if (!list_empty(&trace->evlist->core.entries)) {
4710 struct option o = {
4711 .value = &trace->evlist,
4712 };
4713 return parse_cgroups(&o, str, unset);
4714 }
4715 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
4716
4717 return 0;
4718}
4719
4720static int trace__config(const char *var, const char *value, void *arg)
4721{
4722 struct trace *trace = arg;
4723 int err = 0;
4724
4725 if (!strcmp(var, "trace.add_events")) {
4726 trace->perfconfig_events = strdup(value);
4727 if (trace->perfconfig_events == NULL) {
4728 pr_err("Not enough memory for %s\n", "trace.add_events");
4729 return -1;
4730 }
4731 } else if (!strcmp(var, "trace.show_timestamp")) {
4732 trace->show_tstamp = perf_config_bool(var, value);
4733 } else if (!strcmp(var, "trace.show_duration")) {
4734 trace->show_duration = perf_config_bool(var, value);
4735 } else if (!strcmp(var, "trace.show_arg_names")) {
4736 trace->show_arg_names = perf_config_bool(var, value);
4737 if (!trace->show_arg_names)
4738 trace->show_zeros = true;
4739 } else if (!strcmp(var, "trace.show_zeros")) {
4740 bool new_show_zeros = perf_config_bool(var, value);
4741 if (!trace->show_arg_names && !new_show_zeros) {
4742 pr_warning("trace.show_zeros has to be set when trace.show_arg_names=no\n");
4743 goto out;
4744 }
4745 trace->show_zeros = new_show_zeros;
4746 } else if (!strcmp(var, "trace.show_prefix")) {
4747 trace->show_string_prefix = perf_config_bool(var, value);
4748 } else if (!strcmp(var, "trace.no_inherit")) {
4749 trace->opts.no_inherit = perf_config_bool(var, value);
4750 } else if (!strcmp(var, "trace.args_alignment")) {
4751 int args_alignment = 0;
4752 if (perf_config_int(&args_alignment, var, value) == 0)
4753 trace->args_alignment = args_alignment;
4754 } else if (!strcmp(var, "trace.tracepoint_beautifiers")) {
4755 if (strcasecmp(value, "libtraceevent") == 0)
4756 trace->libtraceevent_print = true;
4757 else if (strcasecmp(value, "libbeauty") == 0)
4758 trace->libtraceevent_print = false;
4759 }
4760out:
4761 return err;
4762}
4763
4764static void trace__exit(struct trace *trace)
4765{
4766 int i;
4767
4768 strlist__delete(trace->ev_qualifier);
4769 free(trace->ev_qualifier_ids.entries);
4770 if (trace->syscalls.table) {
4771 for (i = 0; i <= trace->sctbl->syscalls.max_id; i++)
4772 syscall__exit(&trace->syscalls.table[i]);
4773 free(trace->syscalls.table);
4774 }
4775 syscalltbl__delete(trace->sctbl);
4776 zfree(&trace->perfconfig_events);
4777}
4778
4779int cmd_trace(int argc, const char **argv)
4780{
4781 const char *trace_usage[] = {
4782 "perf trace [<options>] [<command>]",
4783 "perf trace [<options>] -- <command> [<options>]",
4784 "perf trace record [<options>] [<command>]",
4785 "perf trace record [<options>] -- <command> [<options>]",
4786 NULL
4787 };
4788 struct trace trace = {
4789 .opts = {
4790 .target = {
4791 .uid = UINT_MAX,
4792 .uses_mmap = true,
4793 },
4794 .user_freq = UINT_MAX,
4795 .user_interval = ULLONG_MAX,
4796 .no_buffering = true,
4797 .mmap_pages = UINT_MAX,
4798 },
4799 .output = stderr,
4800 .show_comm = true,
4801 .show_tstamp = true,
4802 .show_duration = true,
4803 .show_arg_names = true,
4804 .args_alignment = 70,
4805 .trace_syscalls = false,
4806 .kernel_syscallchains = false,
4807 .max_stack = UINT_MAX,
4808 .max_events = ULONG_MAX,
4809 };
4810 const char *map_dump_str = NULL;
4811 const char *output_name = NULL;
4812 const struct option trace_options[] = {
4813 OPT_CALLBACK('e', "event", &trace, "event",
4814 "event/syscall selector. use 'perf list' to list available events",
4815 trace__parse_events_option),
4816 OPT_CALLBACK(0, "filter", &trace.evlist, "filter",
4817 "event filter", parse_filter),
4818 OPT_BOOLEAN(0, "comm", &trace.show_comm,
4819 "show the thread COMM next to its id"),
4820 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
4821 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
4822 trace__parse_events_option),
4823 OPT_STRING('o', "output", &output_name, "file", "output file name"),
4824 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
4825 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
4826 "trace events on existing process id"),
4827 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
4828 "trace events on existing thread id"),
4829 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
4830 "pids to filter (by the kernel)", trace__set_filter_pids_from_option),
4831 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
4832 "system-wide collection from all CPUs"),
4833 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
4834 "list of cpus to monitor"),
4835 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
4836 "child tasks do not inherit counters"),
4837 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
4838 "number of mmap data pages", evlist__parse_mmap_pages),
4839 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
4840 "user to profile"),
4841 OPT_CALLBACK(0, "duration", &trace, "float",
4842 "show only events with duration > N.M ms",
4843 trace__set_duration),
4844#ifdef HAVE_LIBBPF_SUPPORT
4845 OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
4846#endif
4847 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
4848 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4849 OPT_BOOLEAN('T', "time", &trace.full_time,
4850 "Show full timestamp, not time relative to first start"),
4851 OPT_BOOLEAN(0, "failure", &trace.failure_only,
4852 "Show only syscalls that failed"),
4853 OPT_BOOLEAN('s', "summary", &trace.summary_only,
4854 "Show only syscall summary with statistics"),
4855 OPT_BOOLEAN('S', "with-summary", &trace.summary,
4856 "Show all syscalls and summary with statistics"),
4857 OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary,
4858 "Show errno stats per syscall, use with -s or -S"),
4859 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
4860 "Trace pagefaults", parse_pagefaults, "maj"),
4861 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
4862 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
4863 OPT_CALLBACK(0, "call-graph", &trace.opts,
4864 "record_mode[,record_size]", record_callchain_help,
4865 &record_parse_callchain_opt),
4866 OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print,
4867 "Use libtraceevent to print the tracepoint arguments."),
4868 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
4869 "Show the kernel callchains on the syscall exit path"),
4870 OPT_ULONG(0, "max-events", &trace.max_events,
4871 "Set the maximum number of events to print, exit after that is reached. "),
4872 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
4873 "Set the minimum stack depth when parsing the callchain, "
4874 "anything below the specified depth will be ignored."),
4875 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
4876 "Set the maximum stack depth when parsing the callchain, "
4877 "anything beyond the specified depth will be ignored. "
4878 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
4879 OPT_BOOLEAN(0, "sort-events", &trace.sort_events,
4880 "Sort batch of events before processing, use if getting out of order events"),
4881 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
4882 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
4883 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
4884 "per thread proc mmap processing timeout in ms"),
4885 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
4886 trace__parse_cgroups),
4887 OPT_INTEGER('D', "delay", &trace.opts.initial_delay,
4888 "ms to wait before starting measurement after program "
4889 "start"),
4890 OPTS_EVSWITCH(&trace.evswitch),
4891 OPT_END()
4892 };
4893 bool __maybe_unused max_stack_user_set = true;
4894 bool mmap_pages_user_set = true;
4895 struct evsel *evsel;
4896 const char * const trace_subcommands[] = { "record", NULL };
4897 int err = -1;
4898 char bf[BUFSIZ];
4899 struct sigaction sigchld_act;
4900
4901 signal(SIGSEGV, sighandler_dump_stack);
4902 signal(SIGFPE, sighandler_dump_stack);
4903 signal(SIGINT, sighandler_interrupt);
4904
4905 memset(&sigchld_act, 0, sizeof(sigchld_act));
4906 sigchld_act.sa_flags = SA_SIGINFO;
4907 sigchld_act.sa_sigaction = sighandler_chld;
4908 sigaction(SIGCHLD, &sigchld_act, NULL);
4909
4910 trace.evlist = evlist__new();
4911 trace.sctbl = syscalltbl__new();
4912
4913 if (trace.evlist == NULL || trace.sctbl == NULL) {
4914 pr_err("Not enough memory to run!\n");
4915 err = -ENOMEM;
4916 goto out;
4917 }
4918
4919
4920
4921
4922
4923
4924
4925
4926 rlimit__bump_memlock();
4927
4928 err = perf_config(trace__config, &trace);
4929 if (err)
4930 goto out;
4931
4932 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
4933 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
4947 trace.evlist->core.nr_entries == 0 ) {
4948 trace.trace_syscalls = true;
4949 }
4950
4951
4952
4953
4954
4955
4956 if (trace.perfconfig_events != NULL) {
4957 struct parse_events_error parse_err;
4958
4959 parse_events_error__init(&parse_err);
4960 err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err);
4961 if (err)
4962 parse_events_error__print(&parse_err, trace.perfconfig_events);
4963 parse_events_error__exit(&parse_err);
4964 if (err)
4965 goto out;
4966 }
4967
4968 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
4969 usage_with_options_msg(trace_usage, trace_options,
4970 "cgroup monitoring only available in system-wide mode");
4971 }
4972
4973 evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
4974 if (IS_ERR(evsel)) {
4975 bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
4976 pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
4977 goto out;
4978 }
4979
4980 if (evsel) {
4981 trace.syscalls.events.augmented = evsel;
4982
4983 evsel = evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
4984 if (evsel == NULL) {
4985 pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n");
4986 goto out;
4987 }
4988
4989 if (evsel->bpf_obj == NULL) {
4990 pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n");
4991 goto out;
4992 }
4993
4994 trace.bpf_obj = evsel->bpf_obj;
4995
4996
4997
4998
4999
5000
5001 if (!trace.trace_syscalls && trace__only_augmented_syscalls_evsels(&trace))
5002 trace.trace_syscalls = true;
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028 if (!trace.trace_syscalls) {
5029 trace__delete_augmented_syscalls(&trace);
5030 } else {
5031 trace__set_bpf_map_filtered_pids(&trace);
5032 trace__set_bpf_map_syscalls(&trace);
5033 trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented");
5034 }
5035 }
5036
5037 err = bpf__setup_stdout(trace.evlist);
5038 if (err) {
5039 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
5040 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
5041 goto out;
5042 }
5043
5044 err = -1;
5045
5046 if (map_dump_str) {
5047 trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
5048 if (trace.dump.map == NULL) {
5049 pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
5050 goto out;
5051 }
5052 }
5053
5054 if (trace.trace_pgfaults) {
5055 trace.opts.sample_address = true;
5056 trace.opts.sample_time = true;
5057 }
5058
5059 if (trace.opts.mmap_pages == UINT_MAX)
5060 mmap_pages_user_set = false;
5061
5062 if (trace.max_stack == UINT_MAX) {
5063 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
5064 max_stack_user_set = false;
5065 }
5066
5067#ifdef HAVE_DWARF_UNWIND_SUPPORT
5068 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
5069 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
5070 }
5071#endif
5072
5073 if (callchain_param.enabled) {
5074 if (!mmap_pages_user_set && geteuid() == 0)
5075 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
5076
5077 symbol_conf.use_callchain = true;
5078 }
5079
5080 if (trace.evlist->core.nr_entries > 0) {
5081 evlist__set_default_evsel_handler(trace.evlist, trace__event_handler);
5082 if (evlist__set_syscall_tp_fields(trace.evlist)) {
5083 perror("failed to set syscalls:* tracepoint fields");
5084 goto out;
5085 }
5086 }
5087
5088 if (trace.sort_events) {
5089 ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace);
5090 ordered_events__set_copy_on_queue(&trace.oe.data, true);
5091 }
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104 if (trace.syscalls.events.augmented) {
5105 evlist__for_each_entry(trace.evlist, evsel) {
5106 bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
5107
5108 if (raw_syscalls_sys_exit) {
5109 trace.raw_augmented_syscalls = true;
5110 goto init_augmented_syscall_tp;
5111 }
5112
5113 if (trace.syscalls.events.augmented->priv == NULL &&
5114 strstr(evsel__name(evsel), "syscalls:sys_enter")) {
5115 struct evsel *augmented = trace.syscalls.events.augmented;
5116 if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
5117 evsel__init_augmented_syscall_tp_args(augmented))
5118 goto out;
5119
5120
5121
5122
5123
5124 augmented->handler = trace__sys_enter;
5125
5126
5127
5128
5129
5130
5131 if (evsel__init_augmented_syscall_tp(evsel, evsel) ||
5132 evsel__init_augmented_syscall_tp_args(evsel))
5133 goto out;
5134 evsel->handler = trace__sys_enter;
5135 }
5136
5137 if (strstarts(evsel__name(evsel), "syscalls:sys_exit_")) {
5138 struct syscall_tp *sc;
5139init_augmented_syscall_tp:
5140 if (evsel__init_augmented_syscall_tp(evsel, evsel))
5141 goto out;
5142 sc = __evsel__syscall_tp(evsel);
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162 if (trace.raw_augmented_syscalls)
5163 trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
5164 evsel__init_augmented_syscall_tp_ret(evsel);
5165 evsel->handler = trace__sys_exit;
5166 }
5167 }
5168 }
5169
5170 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
5171 return trace__record(&trace, argc-1, &argv[1]);
5172
5173
5174 if (trace.errno_summary && !trace.summary && !trace.summary_only)
5175 trace.summary_only = true;
5176
5177
5178 if (trace.summary_only)
5179 trace.summary = trace.summary_only;
5180
5181 if (output_name != NULL) {
5182 err = trace__open_output(&trace, output_name);
5183 if (err < 0) {
5184 perror("failed to create output file");
5185 goto out;
5186 }
5187 }
5188
5189 err = evswitch__init(&trace.evswitch, trace.evlist, stderr);
5190 if (err)
5191 goto out_close;
5192
5193 err = target__validate(&trace.opts.target);
5194 if (err) {
5195 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
5196 fprintf(trace.output, "%s", bf);
5197 goto out_close;
5198 }
5199
5200 err = target__parse_uid(&trace.opts.target);
5201 if (err) {
5202 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
5203 fprintf(trace.output, "%s", bf);
5204 goto out_close;
5205 }
5206
5207 if (!argc && target__none(&trace.opts.target))
5208 trace.opts.target.system_wide = true;
5209
5210 if (input_name)
5211 err = trace__replay(&trace);
5212 else
5213 err = trace__run(&trace, argc, argv);
5214
5215out_close:
5216 if (output_name != NULL)
5217 fclose(trace.output);
5218out:
5219 trace__exit(&trace);
5220 return err;
5221}
5222