1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include "util/record.h"
18#include <traceevent/event-parse.h>
19#include <api/fs/tracing_path.h>
20#include <bpf/bpf.h>
21#include "util/bpf_map.h"
22#include "util/rlimit.h"
23#include "builtin.h"
24#include "util/cgroup.h"
25#include "util/color.h"
26#include "util/config.h"
27#include "util/debug.h"
28#include "util/dso.h"
29#include "util/env.h"
30#include "util/event.h"
31#include "util/evsel.h"
32#include "util/evsel_fprintf.h"
33#include "util/synthetic-events.h"
34#include "util/evlist.h"
35#include "util/evswitch.h"
36#include "util/mmap.h"
37#include <subcmd/pager.h>
38#include <subcmd/exec-cmd.h>
39#include "util/machine.h"
40#include "util/map.h"
41#include "util/symbol.h"
42#include "util/path.h"
43#include "util/session.h"
44#include "util/thread.h"
45#include <subcmd/parse-options.h>
46#include "util/strlist.h"
47#include "util/intlist.h"
48#include "util/thread_map.h"
49#include "util/stat.h"
50#include "util/tool.h"
51#include "util/util.h"
52#include "trace/beauty/beauty.h"
53#include "trace-event.h"
54#include "util/parse-events.h"
55#include "util/bpf-loader.h"
56#include "callchain.h"
57#include "print_binary.h"
58#include "string2.h"
59#include "syscalltbl.h"
60#include "rb_resort.h"
61#include "../perf.h"
62
63#include <errno.h>
64#include <inttypes.h>
65#include <poll.h>
66#include <signal.h>
67#include <stdlib.h>
68#include <string.h>
69#include <linux/err.h>
70#include <linux/filter.h>
71#include <linux/kernel.h>
72#include <linux/random.h>
73#include <linux/stringify.h>
74#include <linux/time64.h>
75#include <linux/zalloc.h>
76#include <fcntl.h>
77#include <sys/sysmacros.h>
78
79#include <linux/ctype.h>
80#include <perf/mmap.h>
81
82#ifndef O_CLOEXEC
83# define O_CLOEXEC 02000000
84#endif
85
86#ifndef F_LINUX_SPECIFIC_BASE
87# define F_LINUX_SPECIFIC_BASE 1024
88#endif
89
90
91
92
93struct syscall_arg_fmt {
94 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
95 bool (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val);
96 unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
97 void *parm;
98 const char *name;
99 u16 nr_entries;
100 bool show_zero;
101};
102
103struct syscall_fmt {
104 const char *name;
105 const char *alias;
106 struct {
107 const char *sys_enter,
108 *sys_exit;
109 } bpf_prog_name;
110 struct syscall_arg_fmt arg[6];
111 u8 nr_args;
112 bool errpid;
113 bool timeout;
114 bool hexret;
115};
116
117struct trace {
118 struct perf_tool tool;
119 struct syscalltbl *sctbl;
120 struct {
121 struct syscall *table;
122 struct bpf_map *map;
123 struct {
124 struct bpf_map *sys_enter,
125 *sys_exit;
126 } prog_array;
127 struct {
128 struct evsel *sys_enter,
129 *sys_exit,
130 *augmented;
131 } events;
132 struct bpf_program *unaugmented_prog;
133 } syscalls;
134 struct {
135 struct bpf_map *map;
136 } dump;
137 struct record_opts opts;
138 struct evlist *evlist;
139 struct machine *host;
140 struct thread *current;
141 struct bpf_object *bpf_obj;
142 struct cgroup *cgroup;
143 u64 base_time;
144 FILE *output;
145 unsigned long nr_events;
146 unsigned long nr_events_printed;
147 unsigned long max_events;
148 struct evswitch evswitch;
149 struct strlist *ev_qualifier;
150 struct {
151 size_t nr;
152 int *entries;
153 } ev_qualifier_ids;
154 struct {
155 size_t nr;
156 pid_t *entries;
157 struct bpf_map *map;
158 } filter_pids;
159 double duration_filter;
160 double runtime_ms;
161 struct {
162 u64 vfs_getname,
163 proc_getname;
164 } stats;
165 unsigned int max_stack;
166 unsigned int min_stack;
167 int raw_augmented_syscalls_args_size;
168 bool raw_augmented_syscalls;
169 bool fd_path_disabled;
170 bool sort_events;
171 bool not_ev_qualifier;
172 bool live;
173 bool full_time;
174 bool sched;
175 bool multiple_threads;
176 bool summary;
177 bool summary_only;
178 bool errno_summary;
179 bool failure_only;
180 bool show_comm;
181 bool print_sample;
182 bool show_tool_stats;
183 bool trace_syscalls;
184 bool libtraceevent_print;
185 bool kernel_syscallchains;
186 s16 args_alignment;
187 bool show_tstamp;
188 bool show_duration;
189 bool show_zeros;
190 bool show_arg_names;
191 bool show_string_prefix;
192 bool force;
193 bool vfs_getname;
194 int trace_pgfaults;
195 char *perfconfig_events;
196 struct {
197 struct ordered_events data;
198 u64 last;
199 } oe;
200};
201
202struct tp_field {
203 int offset;
204 union {
205 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
206 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
207 };
208};
209
210#define TP_UINT_FIELD(bits) \
211static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
212{ \
213 u##bits value; \
214 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
215 return value; \
216}
217
218TP_UINT_FIELD(8);
219TP_UINT_FIELD(16);
220TP_UINT_FIELD(32);
221TP_UINT_FIELD(64);
222
223#define TP_UINT_FIELD__SWAPPED(bits) \
224static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
225{ \
226 u##bits value; \
227 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
228 return bswap_##bits(value);\
229}
230
231TP_UINT_FIELD__SWAPPED(16);
232TP_UINT_FIELD__SWAPPED(32);
233TP_UINT_FIELD__SWAPPED(64);
234
235static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
236{
237 field->offset = offset;
238
239 switch (size) {
240 case 1:
241 field->integer = tp_field__u8;
242 break;
243 case 2:
244 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
245 break;
246 case 4:
247 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
248 break;
249 case 8:
250 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
251 break;
252 default:
253 return -1;
254 }
255
256 return 0;
257}
258
259static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
260{
261 return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
262}
263
264static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
265{
266 return sample->raw_data + field->offset;
267}
268
269static int __tp_field__init_ptr(struct tp_field *field, int offset)
270{
271 field->offset = offset;
272 field->pointer = tp_field__ptr;
273 return 0;
274}
275
276static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
277{
278 return __tp_field__init_ptr(field, format_field->offset);
279}
280
281struct syscall_tp {
282 struct tp_field id;
283 union {
284 struct tp_field args, ret;
285 };
286};
287
288
289
290
291
292
293struct evsel_trace {
294 struct syscall_tp sc;
295 struct syscall_arg_fmt *fmt;
296};
297
298static struct evsel_trace *evsel_trace__new(void)
299{
300 return zalloc(sizeof(struct evsel_trace));
301}
302
303static void evsel_trace__delete(struct evsel_trace *et)
304{
305 if (et == NULL)
306 return;
307
308 zfree(&et->fmt);
309 free(et);
310}
311
312
313
314
315
316static inline struct syscall_tp *__evsel__syscall_tp(struct evsel *evsel)
317{
318 struct evsel_trace *et = evsel->priv;
319
320 return &et->sc;
321}
322
323static struct syscall_tp *evsel__syscall_tp(struct evsel *evsel)
324{
325 if (evsel->priv == NULL) {
326 evsel->priv = evsel_trace__new();
327 if (evsel->priv == NULL)
328 return NULL;
329 }
330
331 return __evsel__syscall_tp(evsel);
332}
333
334
335
336
337static inline struct syscall_arg_fmt *__evsel__syscall_arg_fmt(struct evsel *evsel)
338{
339 struct evsel_trace *et = evsel->priv;
340
341 return et->fmt;
342}
343
344static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel)
345{
346 struct evsel_trace *et = evsel->priv;
347
348 if (evsel->priv == NULL) {
349 et = evsel->priv = evsel_trace__new();
350
351 if (et == NULL)
352 return NULL;
353 }
354
355 if (et->fmt == NULL) {
356 et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
357 if (et->fmt == NULL)
358 goto out_delete;
359 }
360
361 return __evsel__syscall_arg_fmt(evsel);
362
363out_delete:
364 evsel_trace__delete(evsel->priv);
365 evsel->priv = NULL;
366 return NULL;
367}
368
369static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name)
370{
371 struct tep_format_field *format_field = evsel__field(evsel, name);
372
373 if (format_field == NULL)
374 return -1;
375
376 return tp_field__init_uint(field, format_field, evsel->needs_swap);
377}
378
379#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
380 ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
381 evsel__init_tp_uint_field(evsel, &sc->name, #name); })
382
383static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name)
384{
385 struct tep_format_field *format_field = evsel__field(evsel, name);
386
387 if (format_field == NULL)
388 return -1;
389
390 return tp_field__init_ptr(field, format_field);
391}
392
393#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
394 ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
395 evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
396
397static void evsel__delete_priv(struct evsel *evsel)
398{
399 zfree(&evsel->priv);
400 evsel__delete(evsel);
401}
402
403static int evsel__init_syscall_tp(struct evsel *evsel)
404{
405 struct syscall_tp *sc = evsel__syscall_tp(evsel);
406
407 if (sc != NULL) {
408 if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
409 evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
410 return -ENOENT;
411 return 0;
412 }
413
414 return -ENOMEM;
415}
416
417static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
418{
419 struct syscall_tp *sc = evsel__syscall_tp(evsel);
420
421 if (sc != NULL) {
422 struct tep_format_field *syscall_id = evsel__field(tp, "id");
423 if (syscall_id == NULL)
424 syscall_id = evsel__field(tp, "__syscall_nr");
425 if (syscall_id == NULL ||
426 __tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
427 return -EINVAL;
428
429 return 0;
430 }
431
432 return -ENOMEM;
433}
434
435static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
436{
437 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
438
439 return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
440}
441
442static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
443{
444 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
445
446 return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
447}
448
449static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
450{
451 if (evsel__syscall_tp(evsel) != NULL) {
452 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
453 return -ENOENT;
454
455 evsel->handler = handler;
456 return 0;
457 }
458
459 return -ENOMEM;
460}
461
462static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
463{
464 struct evsel *evsel = evsel__newtp("raw_syscalls", direction);
465
466
467 if (IS_ERR(evsel))
468 evsel = evsel__newtp("syscalls", direction);
469
470 if (IS_ERR(evsel))
471 return NULL;
472
473 if (evsel__init_raw_syscall_tp(evsel, handler))
474 goto out_delete;
475
476 return evsel;
477
478out_delete:
479 evsel__delete_priv(evsel);
480 return NULL;
481}
482
483#define perf_evsel__sc_tp_uint(evsel, name, sample) \
484 ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
485 fields->name.integer(&fields->name, sample); })
486
487#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
488 ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
489 fields->name.pointer(&fields->name, sample); })
490
491size_t strarray__scnprintf_suffix(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_suffix, int val)
492{
493 int idx = val - sa->offset;
494
495 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
496 size_t printed = scnprintf(bf, size, intfmt, val);
497 if (show_suffix)
498 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
499 return printed;
500 }
501
502 return scnprintf(bf, size, "%s%s", sa->entries[idx], show_suffix ? sa->prefix : "");
503}
504
505size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
506{
507 int idx = val - sa->offset;
508
509 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
510 size_t printed = scnprintf(bf, size, intfmt, val);
511 if (show_prefix)
512 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
513 return printed;
514 }
515
516 return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
517}
518
519static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
520 const char *intfmt,
521 struct syscall_arg *arg)
522{
523 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->show_string_prefix, arg->val);
524}
525
526static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
527 struct syscall_arg *arg)
528{
529 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
530}
531
532#define SCA_STRARRAY syscall_arg__scnprintf_strarray
533
534bool syscall_arg__strtoul_strarray(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
535{
536 return strarray__strtoul(arg->parm, bf, size, ret);
537}
538
539bool syscall_arg__strtoul_strarray_flags(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
540{
541 return strarray__strtoul_flags(arg->parm, bf, size, ret);
542}
543
544bool syscall_arg__strtoul_strarrays(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
545{
546 return strarrays__strtoul(arg->parm, bf, size, ret);
547}
548
549size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg)
550{
551 return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val);
552}
553
554size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
555{
556 size_t printed;
557 int i;
558
559 for (i = 0; i < sas->nr_entries; ++i) {
560 struct strarray *sa = sas->entries[i];
561 int idx = val - sa->offset;
562
563 if (idx >= 0 && idx < sa->nr_entries) {
564 if (sa->entries[idx] == NULL)
565 break;
566 return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
567 }
568 }
569
570 printed = scnprintf(bf, size, intfmt, val);
571 if (show_prefix)
572 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sas->entries[0]->prefix);
573 return printed;
574}
575
576bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret)
577{
578 int i;
579
580 for (i = 0; i < sa->nr_entries; ++i) {
581 if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') {
582 *ret = sa->offset + i;
583 return true;
584 }
585 }
586
587 return false;
588}
589
590bool strarray__strtoul_flags(struct strarray *sa, char *bf, size_t size, u64 *ret)
591{
592 u64 val = 0;
593 char *tok = bf, *sep, *end;
594
595 *ret = 0;
596
597 while (size != 0) {
598 int toklen = size;
599
600 sep = memchr(tok, '|', size);
601 if (sep != NULL) {
602 size -= sep - tok + 1;
603
604 end = sep - 1;
605 while (end > tok && isspace(*end))
606 --end;
607
608 toklen = end - tok + 1;
609 }
610
611 while (isspace(*tok))
612 ++tok;
613
614 if (isalpha(*tok) || *tok == '_') {
615 if (!strarray__strtoul(sa, tok, toklen, &val))
616 return false;
617 } else {
618 bool is_hexa = tok[0] == 0 && (tok[1] = 'x' || tok[1] == 'X');
619
620 val = strtoul(tok, NULL, is_hexa ? 16 : 0);
621 }
622
623 *ret |= (1 << (val - 1));
624
625 if (sep == NULL)
626 break;
627 tok = sep + 1;
628 }
629
630 return true;
631}
632
633bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret)
634{
635 int i;
636
637 for (i = 0; i < sas->nr_entries; ++i) {
638 struct strarray *sa = sas->entries[i];
639
640 if (strarray__strtoul(sa, bf, size, ret))
641 return true;
642 }
643
644 return false;
645}
646
647size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
648 struct syscall_arg *arg)
649{
650 return strarrays__scnprintf(arg->parm, bf, size, "%d", arg->show_string_prefix, arg->val);
651}
652
653#ifndef AT_FDCWD
654#define AT_FDCWD -100
655#endif
656
657static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
658 struct syscall_arg *arg)
659{
660 int fd = arg->val;
661 const char *prefix = "AT_FD";
662
663 if (fd == AT_FDCWD)
664 return scnprintf(bf, size, "%s%s", arg->show_string_prefix ? prefix : "", "CWD");
665
666 return syscall_arg__scnprintf_fd(bf, size, arg);
667}
668
669#define SCA_FDAT syscall_arg__scnprintf_fd_at
670
671static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
672 struct syscall_arg *arg);
673
674#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
675
676size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
677{
678 return scnprintf(bf, size, "%#lx", arg->val);
679}
680
681size_t syscall_arg__scnprintf_ptr(char *bf, size_t size, struct syscall_arg *arg)
682{
683 if (arg->val == 0)
684 return scnprintf(bf, size, "NULL");
685 return syscall_arg__scnprintf_hex(bf, size, arg);
686}
687
688size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
689{
690 return scnprintf(bf, size, "%d", arg->val);
691}
692
693size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
694{
695 return scnprintf(bf, size, "%ld", arg->val);
696}
697
698static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg)
699{
700
701
702
703 return scnprintf(bf, size, "\"%-.*s\"", arg->fmt->nr_entries ?: arg->len, arg->val);
704}
705
706#define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array
707
708static const char *bpf_cmd[] = {
709 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
710 "MAP_GET_NEXT_KEY", "PROG_LOAD", "OBJ_PIN", "OBJ_GET", "PROG_ATTACH",
711 "PROG_DETACH", "PROG_TEST_RUN", "PROG_GET_NEXT_ID", "MAP_GET_NEXT_ID",
712 "PROG_GET_FD_BY_ID", "MAP_GET_FD_BY_ID", "OBJ_GET_INFO_BY_FD",
713 "PROG_QUERY", "RAW_TRACEPOINT_OPEN", "BTF_LOAD", "BTF_GET_FD_BY_ID",
714 "TASK_FD_QUERY", "MAP_LOOKUP_AND_DELETE_ELEM", "MAP_FREEZE",
715 "BTF_GET_NEXT_ID", "MAP_LOOKUP_BATCH", "MAP_LOOKUP_AND_DELETE_BATCH",
716 "MAP_UPDATE_BATCH", "MAP_DELETE_BATCH", "LINK_CREATE", "LINK_UPDATE",
717 "LINK_GET_FD_BY_ID", "LINK_GET_NEXT_ID", "ENABLE_STATS", "ITER_CREATE",
718 "LINK_DETACH", "PROG_BIND_MAP",
719};
720static DEFINE_STRARRAY(bpf_cmd, "BPF_");
721
722static const char *fsmount_flags[] = {
723 [1] = "CLOEXEC",
724};
725static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
726
727#include "trace/beauty/generated/fsconfig_arrays.c"
728
729static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");
730
731static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
732static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
733
734static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
735static DEFINE_STRARRAY(itimers, "ITIMER_");
736
737static const char *keyctl_options[] = {
738 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
739 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
740 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
741 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
742 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
743};
744static DEFINE_STRARRAY(keyctl_options, "KEYCTL_");
745
746static const char *whences[] = { "SET", "CUR", "END",
747#ifdef SEEK_DATA
748"DATA",
749#endif
750#ifdef SEEK_HOLE
751"HOLE",
752#endif
753};
754static DEFINE_STRARRAY(whences, "SEEK_");
755
756static const char *fcntl_cmds[] = {
757 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
758 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
759 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
760 "GETOWNER_UIDS",
761};
762static DEFINE_STRARRAY(fcntl_cmds, "F_");
763
764static const char *fcntl_linux_specific_cmds[] = {
765 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
766 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
767 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
768};
769
770static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, "F_", F_LINUX_SPECIFIC_BASE);
771
772static struct strarray *fcntl_cmds_arrays[] = {
773 &strarray__fcntl_cmds,
774 &strarray__fcntl_linux_specific_cmds,
775};
776
777static DEFINE_STRARRAYS(fcntl_cmds_arrays);
778
779static const char *rlimit_resources[] = {
780 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
781 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
782 "RTTIME",
783};
784static DEFINE_STRARRAY(rlimit_resources, "RLIMIT_");
785
786static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
787static DEFINE_STRARRAY(sighow, "SIG_");
788
789static const char *clockid[] = {
790 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
791 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
792 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
793};
794static DEFINE_STRARRAY(clockid, "CLOCK_");
795
796static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
797 struct syscall_arg *arg)
798{
799 bool show_prefix = arg->show_string_prefix;
800 const char *suffix = "_OK";
801 size_t printed = 0;
802 int mode = arg->val;
803
804 if (mode == F_OK)
805 return scnprintf(bf, size, "F%s", show_prefix ? suffix : "");
806#define P_MODE(n) \
807 if (mode & n##_OK) { \
808 printed += scnprintf(bf + printed, size - printed, "%s%s", #n, show_prefix ? suffix : ""); \
809 mode &= ~n##_OK; \
810 }
811
812 P_MODE(R);
813 P_MODE(W);
814 P_MODE(X);
815#undef P_MODE
816
817 if (mode)
818 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
819
820 return printed;
821}
822
823#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
824
825static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
826 struct syscall_arg *arg);
827
828#define SCA_FILENAME syscall_arg__scnprintf_filename
829
830static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
831 struct syscall_arg *arg)
832{
833 bool show_prefix = arg->show_string_prefix;
834 const char *prefix = "O_";
835 int printed = 0, flags = arg->val;
836
837#define P_FLAG(n) \
838 if (flags & O_##n) { \
839 printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
840 flags &= ~O_##n; \
841 }
842
843 P_FLAG(CLOEXEC);
844 P_FLAG(NONBLOCK);
845#undef P_FLAG
846
847 if (flags)
848 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
849
850 return printed;
851}
852
853#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
854
855#ifndef GRND_NONBLOCK
856#define GRND_NONBLOCK 0x0001
857#endif
858#ifndef GRND_RANDOM
859#define GRND_RANDOM 0x0002
860#endif
861
862static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
863 struct syscall_arg *arg)
864{
865 bool show_prefix = arg->show_string_prefix;
866 const char *prefix = "GRND_";
867 int printed = 0, flags = arg->val;
868
869#define P_FLAG(n) \
870 if (flags & GRND_##n) { \
871 printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
872 flags &= ~GRND_##n; \
873 }
874
875 P_FLAG(RANDOM);
876 P_FLAG(NONBLOCK);
877#undef P_FLAG
878
879 if (flags)
880 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
881
882 return printed;
883}
884
885#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
886
887#define STRARRAY(name, array) \
888 { .scnprintf = SCA_STRARRAY, \
889 .strtoul = STUL_STRARRAY, \
890 .parm = &strarray__##array, }
891
892#define STRARRAY_FLAGS(name, array) \
893 { .scnprintf = SCA_STRARRAY_FLAGS, \
894 .strtoul = STUL_STRARRAY_FLAGS, \
895 .parm = &strarray__##array, }
896
897#include "trace/beauty/arch_errno_names.c"
898#include "trace/beauty/eventfd.c"
899#include "trace/beauty/futex_op.c"
900#include "trace/beauty/futex_val3.c"
901#include "trace/beauty/mmap.c"
902#include "trace/beauty/mode_t.c"
903#include "trace/beauty/msg_flags.c"
904#include "trace/beauty/open_flags.c"
905#include "trace/beauty/perf_event_open.c"
906#include "trace/beauty/pid.c"
907#include "trace/beauty/sched_policy.c"
908#include "trace/beauty/seccomp.c"
909#include "trace/beauty/signum.c"
910#include "trace/beauty/socket_type.c"
911#include "trace/beauty/waitid_options.c"
912
913static struct syscall_fmt syscall_fmts[] = {
914 { .name = "access",
915 .arg = { [1] = { .scnprintf = SCA_ACCMODE, }, }, },
916 { .name = "arch_prctl",
917 .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, },
918 [1] = { .scnprintf = SCA_PTR, }, }, },
919 { .name = "bind",
920 .arg = { [0] = { .scnprintf = SCA_INT, },
921 [1] = { .scnprintf = SCA_SOCKADDR, },
922 [2] = { .scnprintf = SCA_INT, }, }, },
923 { .name = "bpf",
924 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
925 { .name = "brk", .hexret = true,
926 .arg = { [0] = { .scnprintf = SCA_PTR, }, }, },
927 { .name = "clock_gettime",
928 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
929 { .name = "clone", .errpid = true, .nr_args = 5,
930 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
931 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
932 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
933 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
934 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
935 { .name = "close",
936 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, }, }, },
937 { .name = "connect",
938 .arg = { [0] = { .scnprintf = SCA_INT, },
939 [1] = { .scnprintf = SCA_SOCKADDR, },
940 [2] = { .scnprintf = SCA_INT, }, }, },
941 { .name = "epoll_ctl",
942 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
943 { .name = "eventfd2",
944 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, }, }, },
945 { .name = "fchmodat",
946 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
947 { .name = "fchownat",
948 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
949 { .name = "fcntl",
950 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD,
951 .strtoul = STUL_STRARRAYS,
952 .parm = &strarrays__fcntl_cmds_arrays,
953 .show_zero = true, },
954 [2] = { .scnprintf = SCA_FCNTL_ARG, }, }, },
955 { .name = "flock",
956 .arg = { [1] = { .scnprintf = SCA_FLOCK, }, }, },
957 { .name = "fsconfig",
958 .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
959 { .name = "fsmount",
960 .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
961 [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, }, }, },
962 { .name = "fspick",
963 .arg = { [0] = { .scnprintf = SCA_FDAT, },
964 [1] = { .scnprintf = SCA_FILENAME, },
965 [2] = { .scnprintf = SCA_FSPICK_FLAGS, }, }, },
966 { .name = "fstat", .alias = "newfstat", },
967 { .name = "fstatat", .alias = "newfstatat", },
968 { .name = "futex",
969 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, },
970 [5] = { .scnprintf = SCA_FUTEX_VAL3, }, }, },
971 { .name = "futimesat",
972 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
973 { .name = "getitimer",
974 .arg = { [0] = STRARRAY(which, itimers), }, },
975 { .name = "getpid", .errpid = true, },
976 { .name = "getpgid", .errpid = true, },
977 { .name = "getppid", .errpid = true, },
978 { .name = "getrandom",
979 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, }, }, },
980 { .name = "getrlimit",
981 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
982 { .name = "gettid", .errpid = true, },
983 { .name = "ioctl",
984 .arg = {
985#if defined(__i386__) || defined(__x86_64__)
986
987
988
989 [1] = { .scnprintf = SCA_IOCTL_CMD, },
990 [2] = { .scnprintf = SCA_HEX, }, }, },
991#else
992 [2] = { .scnprintf = SCA_HEX, }, }, },
993#endif
994 { .name = "kcmp", .nr_args = 5,
995 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
996 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
997 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
998 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
999 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
1000 { .name = "keyctl",
1001 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
1002 { .name = "kill",
1003 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
1004 { .name = "linkat",
1005 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1006 { .name = "lseek",
1007 .arg = { [2] = STRARRAY(whence, whences), }, },
1008 { .name = "lstat", .alias = "newlstat", },
1009 { .name = "madvise",
1010 .arg = { [0] = { .scnprintf = SCA_HEX, },
1011 [2] = { .scnprintf = SCA_MADV_BHV, }, }, },
1012 { .name = "mkdirat",
1013 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1014 { .name = "mknodat",
1015 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1016 { .name = "mmap", .hexret = true,
1017
1018#if defined(__s390x__)
1019 .alias = "old_mmap",
1020#endif
1021 .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, },
1022 [3] = { .scnprintf = SCA_MMAP_FLAGS,
1023 .strtoul = STUL_STRARRAY_FLAGS,
1024 .parm = &strarray__mmap_flags, },
1025 [5] = { .scnprintf = SCA_HEX, }, }, },
1026 { .name = "mount",
1027 .arg = { [0] = { .scnprintf = SCA_FILENAME, },
1028 [3] = { .scnprintf = SCA_MOUNT_FLAGS,
1029 .mask_val = SCAMV_MOUNT_FLAGS, }, }, },
1030 { .name = "move_mount",
1031 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1032 [1] = { .scnprintf = SCA_FILENAME, },
1033 [2] = { .scnprintf = SCA_FDAT, },
1034 [3] = { .scnprintf = SCA_FILENAME, },
1035 [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, }, }, },
1036 { .name = "mprotect",
1037 .arg = { [0] = { .scnprintf = SCA_HEX, },
1038 [2] = { .scnprintf = SCA_MMAP_PROT, }, }, },
1039 { .name = "mq_unlink",
1040 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1041 { .name = "mremap", .hexret = true,
1042 .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, }, }, },
1043 { .name = "name_to_handle_at",
1044 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1045 { .name = "newfstatat",
1046 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1047 { .name = "open",
1048 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
1049 { .name = "open_by_handle_at",
1050 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1051 [2] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
1052 { .name = "openat",
1053 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1054 [2] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
1055 { .name = "perf_event_open",
1056 .arg = { [2] = { .scnprintf = SCA_INT, },
1057 [3] = { .scnprintf = SCA_FD, },
1058 [4] = { .scnprintf = SCA_PERF_FLAGS, }, }, },
1059 { .name = "pipe2",
1060 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, }, }, },
1061 { .name = "pkey_alloc",
1062 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, }, }, },
1063 { .name = "pkey_free",
1064 .arg = { [0] = { .scnprintf = SCA_INT, }, }, },
1065 { .name = "pkey_mprotect",
1066 .arg = { [0] = { .scnprintf = SCA_HEX, },
1067 [2] = { .scnprintf = SCA_MMAP_PROT, },
1068 [3] = { .scnprintf = SCA_INT, }, }, },
1069 { .name = "poll", .timeout = true, },
1070 { .name = "ppoll", .timeout = true, },
1071 { .name = "prctl",
1072 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION,
1073 .strtoul = STUL_STRARRAY,
1074 .parm = &strarray__prctl_options, },
1075 [1] = { .scnprintf = SCA_PRCTL_ARG2, },
1076 [2] = { .scnprintf = SCA_PRCTL_ARG3, }, }, },
1077 { .name = "pread", .alias = "pread64", },
1078 { .name = "preadv", .alias = "pread", },
1079 { .name = "prlimit64",
1080 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
1081 { .name = "pwrite", .alias = "pwrite64", },
1082 { .name = "readlinkat",
1083 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1084 { .name = "recvfrom",
1085 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1086 { .name = "recvmmsg",
1087 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1088 { .name = "recvmsg",
1089 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1090 { .name = "renameat",
1091 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1092 [2] = { .scnprintf = SCA_FDAT, }, }, },
1093 { .name = "renameat2",
1094 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1095 [2] = { .scnprintf = SCA_FDAT, },
1096 [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, }, }, },
1097 { .name = "rt_sigaction",
1098 .arg = { [0] = { .scnprintf = SCA_SIGNUM, }, }, },
1099 { .name = "rt_sigprocmask",
1100 .arg = { [0] = STRARRAY(how, sighow), }, },
1101 { .name = "rt_sigqueueinfo",
1102 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
1103 { .name = "rt_tgsigqueueinfo",
1104 .arg = { [2] = { .scnprintf = SCA_SIGNUM, }, }, },
1105 { .name = "sched_setscheduler",
1106 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, }, }, },
1107 { .name = "seccomp",
1108 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, },
1109 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, }, }, },
1110 { .name = "select", .timeout = true, },
1111 { .name = "sendfile", .alias = "sendfile64", },
1112 { .name = "sendmmsg",
1113 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1114 { .name = "sendmsg",
1115 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1116 { .name = "sendto",
1117 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, },
1118 [4] = { .scnprintf = SCA_SOCKADDR, }, }, },
1119 { .name = "set_tid_address", .errpid = true, },
1120 { .name = "setitimer",
1121 .arg = { [0] = STRARRAY(which, itimers), }, },
1122 { .name = "setrlimit",
1123 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1124 { .name = "socket",
1125 .arg = { [0] = STRARRAY(family, socket_families),
1126 [1] = { .scnprintf = SCA_SK_TYPE, },
1127 [2] = { .scnprintf = SCA_SK_PROTO, }, }, },
1128 { .name = "socketpair",
1129 .arg = { [0] = STRARRAY(family, socket_families),
1130 [1] = { .scnprintf = SCA_SK_TYPE, },
1131 [2] = { .scnprintf = SCA_SK_PROTO, }, }, },
1132 { .name = "stat", .alias = "newstat", },
1133 { .name = "statx",
1134 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1135 [2] = { .scnprintf = SCA_STATX_FLAGS, } ,
1136 [3] = { .scnprintf = SCA_STATX_MASK, }, }, },
1137 { .name = "swapoff",
1138 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1139 { .name = "swapon",
1140 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1141 { .name = "symlinkat",
1142 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1143 { .name = "sync_file_range",
1144 .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, }, }, },
1145 { .name = "tgkill",
1146 .arg = { [2] = { .scnprintf = SCA_SIGNUM, }, }, },
1147 { .name = "tkill",
1148 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
1149 { .name = "umount2", .alias = "umount",
1150 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1151 { .name = "uname", .alias = "newuname", },
1152 { .name = "unlinkat",
1153 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1154 { .name = "utimensat",
1155 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1156 { .name = "wait4", .errpid = true,
1157 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, }, }, },
1158 { .name = "waitid", .errpid = true,
1159 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, }, }, },
1160};
1161
1162static int syscall_fmt__cmp(const void *name, const void *fmtp)
1163{
1164 const struct syscall_fmt *fmt = fmtp;
1165 return strcmp(name, fmt->name);
1166}
1167
1168static struct syscall_fmt *__syscall_fmt__find(struct syscall_fmt *fmts, const int nmemb, const char *name)
1169{
1170 return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1171}
1172
1173static struct syscall_fmt *syscall_fmt__find(const char *name)
1174{
1175 const int nmemb = ARRAY_SIZE(syscall_fmts);
1176 return __syscall_fmt__find(syscall_fmts, nmemb, name);
1177}
1178
1179static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts, const int nmemb, const char *alias)
1180{
1181 int i;
1182
1183 for (i = 0; i < nmemb; ++i) {
1184 if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0)
1185 return &fmts[i];
1186 }
1187
1188 return NULL;
1189}
1190
1191static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
1192{
1193 const int nmemb = ARRAY_SIZE(syscall_fmts);
1194 return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias);
1195}
1196
1197
1198
1199
1200
1201
1202
1203struct syscall {
1204 struct tep_event *tp_format;
1205 int nr_args;
1206 int args_size;
1207 struct {
1208 struct bpf_program *sys_enter,
1209 *sys_exit;
1210 } bpf_prog;
1211 bool is_exit;
1212 bool is_open;
1213 bool nonexistent;
1214 struct tep_format_field *args;
1215 const char *name;
1216 struct syscall_fmt *fmt;
1217 struct syscall_arg_fmt *arg_fmt;
1218};
1219
1220
1221
1222
1223
1224
1225struct bpf_map_syscall_entry {
1226 bool enabled;
1227 u16 string_args_len[6];
1228};
1229
1230
1231
1232
1233
1234
1235
1236
1237static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
1238{
1239 double duration = (double)t / NSEC_PER_MSEC;
1240 size_t printed = fprintf(fp, "(");
1241
1242 if (!calculated)
1243 printed += fprintf(fp, " ");
1244 else if (duration >= 1.0)
1245 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1246 else if (duration >= 0.01)
1247 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1248 else
1249 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1250 return printed + fprintf(fp, "): ");
1251}
1252
1253
1254
1255
1256
1257
1258
1259
1260struct thread_trace {
1261 u64 entry_time;
1262 bool entry_pending;
1263 unsigned long nr_events;
1264 unsigned long pfmaj, pfmin;
1265 char *entry_str;
1266 double runtime_ms;
1267 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1268 struct {
1269 unsigned long ptr;
1270 short int entry_str_pos;
1271 bool pending_open;
1272 unsigned int namelen;
1273 char *name;
1274 } filename;
1275 struct {
1276 int max;
1277 struct file *table;
1278 } files;
1279
1280 struct intlist *syscall_stats;
1281};
1282
1283static struct thread_trace *thread_trace__new(void)
1284{
1285 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1286
1287 if (ttrace) {
1288 ttrace->files.max = -1;
1289 ttrace->syscall_stats = intlist__new(NULL);
1290 }
1291
1292 return ttrace;
1293}
1294
1295static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1296{
1297 struct thread_trace *ttrace;
1298
1299 if (thread == NULL)
1300 goto fail;
1301
1302 if (thread__priv(thread) == NULL)
1303 thread__set_priv(thread, thread_trace__new());
1304
1305 if (thread__priv(thread) == NULL)
1306 goto fail;
1307
1308 ttrace = thread__priv(thread);
1309 ++ttrace->nr_events;
1310
1311 return ttrace;
1312fail:
1313 color_fprintf(fp, PERF_COLOR_RED,
1314 "WARNING: not enough memory, dropping samples!\n");
1315 return NULL;
1316}
1317
1318
1319void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
1320 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
1321{
1322 struct thread_trace *ttrace = thread__priv(arg->thread);
1323
1324 ttrace->ret_scnprintf = ret_scnprintf;
1325}
1326
1327#define TRACE_PFMAJ (1 << 0)
1328#define TRACE_PFMIN (1 << 1)
1329
1330static const size_t trace__entry_str_size = 2048;
1331
1332static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
1333{
1334 if (fd < 0)
1335 return NULL;
1336
1337 if (fd > ttrace->files.max) {
1338 struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
1339
1340 if (nfiles == NULL)
1341 return NULL;
1342
1343 if (ttrace->files.max != -1) {
1344 memset(nfiles + ttrace->files.max + 1, 0,
1345 (fd - ttrace->files.max) * sizeof(struct file));
1346 } else {
1347 memset(nfiles, 0, (fd + 1) * sizeof(struct file));
1348 }
1349
1350 ttrace->files.table = nfiles;
1351 ttrace->files.max = fd;
1352 }
1353
1354 return ttrace->files.table + fd;
1355}
1356
1357struct file *thread__files_entry(struct thread *thread, int fd)
1358{
1359 return thread_trace__files_entry(thread__priv(thread), fd);
1360}
1361
1362static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1363{
1364 struct thread_trace *ttrace = thread__priv(thread);
1365 struct file *file = thread_trace__files_entry(ttrace, fd);
1366
1367 if (file != NULL) {
1368 struct stat st;
1369 if (stat(pathname, &st) == 0)
1370 file->dev_maj = major(st.st_rdev);
1371 file->pathname = strdup(pathname);
1372 if (file->pathname)
1373 return 0;
1374 }
1375
1376 return -1;
1377}
1378
1379static int thread__read_fd_path(struct thread *thread, int fd)
1380{
1381 char linkname[PATH_MAX], pathname[PATH_MAX];
1382 struct stat st;
1383 int ret;
1384
1385 if (thread->pid_ == thread->tid) {
1386 scnprintf(linkname, sizeof(linkname),
1387 "/proc/%d/fd/%d", thread->pid_, fd);
1388 } else {
1389 scnprintf(linkname, sizeof(linkname),
1390 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1391 }
1392
1393 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1394 return -1;
1395
1396 ret = readlink(linkname, pathname, sizeof(pathname));
1397
1398 if (ret < 0 || ret > st.st_size)
1399 return -1;
1400
1401 pathname[ret] = '\0';
1402 return trace__set_fd_pathname(thread, fd, pathname);
1403}
1404
1405static const char *thread__fd_path(struct thread *thread, int fd,
1406 struct trace *trace)
1407{
1408 struct thread_trace *ttrace = thread__priv(thread);
1409
1410 if (ttrace == NULL || trace->fd_path_disabled)
1411 return NULL;
1412
1413 if (fd < 0)
1414 return NULL;
1415
1416 if ((fd > ttrace->files.max || ttrace->files.table[fd].pathname == NULL)) {
1417 if (!trace->live)
1418 return NULL;
1419 ++trace->stats.proc_getname;
1420 if (thread__read_fd_path(thread, fd))
1421 return NULL;
1422 }
1423
1424 return ttrace->files.table[fd].pathname;
1425}
1426
1427size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
1428{
1429 int fd = arg->val;
1430 size_t printed = scnprintf(bf, size, "%d", fd);
1431 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1432
1433 if (path)
1434 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1435
1436 return printed;
1437}
1438
1439size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1440{
1441 size_t printed = scnprintf(bf, size, "%d", fd);
1442 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1443
1444 if (thread) {
1445 const char *path = thread__fd_path(thread, fd, trace);
1446
1447 if (path)
1448 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1449
1450 thread__put(thread);
1451 }
1452
1453 return printed;
1454}
1455
1456static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1457 struct syscall_arg *arg)
1458{
1459 int fd = arg->val;
1460 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1461 struct thread_trace *ttrace = thread__priv(arg->thread);
1462
1463 if (ttrace && fd >= 0 && fd <= ttrace->files.max)
1464 zfree(&ttrace->files.table[fd].pathname);
1465
1466 return printed;
1467}
1468
1469static void thread__set_filename_pos(struct thread *thread, const char *bf,
1470 unsigned long ptr)
1471{
1472 struct thread_trace *ttrace = thread__priv(thread);
1473
1474 ttrace->filename.ptr = ptr;
1475 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1476}
1477
1478static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
1479{
1480 struct augmented_arg *augmented_arg = arg->augmented.args;
1481 size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
1482
1483
1484
1485
1486 int consumed = sizeof(*augmented_arg) + augmented_arg->size;
1487
1488 arg->augmented.args = ((void *)arg->augmented.args) + consumed;
1489 arg->augmented.size -= consumed;
1490
1491 return printed;
1492}
1493
1494static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1495 struct syscall_arg *arg)
1496{
1497 unsigned long ptr = arg->val;
1498
1499 if (arg->augmented.args)
1500 return syscall_arg__scnprintf_augmented_string(arg, bf, size);
1501
1502 if (!arg->trace->vfs_getname)
1503 return scnprintf(bf, size, "%#x", ptr);
1504
1505 thread__set_filename_pos(arg->thread, bf, ptr);
1506 return 0;
1507}
1508
1509static bool trace__filter_duration(struct trace *trace, double t)
1510{
1511 return t < (trace->duration_filter * NSEC_PER_MSEC);
1512}
1513
1514static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1515{
1516 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1517
1518 return fprintf(fp, "%10.3f ", ts);
1519}
1520
1521
1522
1523
1524
1525
1526
1527static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1528{
1529 if (tstamp > 0)
1530 return __trace__fprintf_tstamp(trace, tstamp, fp);
1531
1532 return fprintf(fp, " ? ");
1533}
1534
1535static bool done = false;
1536static bool interrupted = false;
1537
1538static void sig_handler(int sig)
1539{
1540 done = true;
1541 interrupted = sig == SIGINT;
1542}
1543
1544static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
1545{
1546 size_t printed = 0;
1547
1548 if (trace->multiple_threads) {
1549 if (trace->show_comm)
1550 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1551 printed += fprintf(fp, "%d ", thread->tid);
1552 }
1553
1554 return printed;
1555}
1556
1557static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1558 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1559{
1560 size_t printed = 0;
1561
1562 if (trace->show_tstamp)
1563 printed = trace__fprintf_tstamp(trace, tstamp, fp);
1564 if (trace->show_duration)
1565 printed += fprintf_duration(duration, duration_calculated, fp);
1566 return printed + trace__fprintf_comm_tid(trace, thread, fp);
1567}
1568
1569static int trace__process_event(struct trace *trace, struct machine *machine,
1570 union perf_event *event, struct perf_sample *sample)
1571{
1572 int ret = 0;
1573
1574 switch (event->header.type) {
1575 case PERF_RECORD_LOST:
1576 color_fprintf(trace->output, PERF_COLOR_RED,
1577 "LOST %" PRIu64 " events!\n", event->lost.lost);
1578 ret = machine__process_lost_event(machine, event, sample);
1579 break;
1580 default:
1581 ret = machine__process_event(machine, event, sample);
1582 break;
1583 }
1584
1585 return ret;
1586}
1587
1588static int trace__tool_process(struct perf_tool *tool,
1589 union perf_event *event,
1590 struct perf_sample *sample,
1591 struct machine *machine)
1592{
1593 struct trace *trace = container_of(tool, struct trace, tool);
1594 return trace__process_event(trace, machine, event, sample);
1595}
1596
1597static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1598{
1599 struct machine *machine = vmachine;
1600
1601 if (machine->kptr_restrict_warned)
1602 return NULL;
1603
1604 if (symbol_conf.kptr_restrict) {
1605 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1606 "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1607 "Kernel samples will not be resolved.\n");
1608 machine->kptr_restrict_warned = true;
1609 return NULL;
1610 }
1611
1612 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1613}
1614
1615static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
1616{
1617 int err = symbol__init(NULL);
1618
1619 if (err)
1620 return err;
1621
1622 trace->host = machine__new_host();
1623 if (trace->host == NULL)
1624 return -ENOMEM;
1625
1626 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1627 if (err < 0)
1628 goto out;
1629
1630 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1631 evlist->core.threads, trace__tool_process, false,
1632 1);
1633out:
1634 if (err)
1635 symbol__exit();
1636
1637 return err;
1638}
1639
1640static void trace__symbols__exit(struct trace *trace)
1641{
1642 machine__exit(trace->host);
1643 trace->host = NULL;
1644
1645 symbol__exit();
1646}
1647
1648static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
1649{
1650 int idx;
1651
1652 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1653 nr_args = sc->fmt->nr_args;
1654
1655 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
1656 if (sc->arg_fmt == NULL)
1657 return -1;
1658
1659 for (idx = 0; idx < nr_args; ++idx) {
1660 if (sc->fmt)
1661 sc->arg_fmt[idx] = sc->fmt->arg[idx];
1662 }
1663
1664 sc->nr_args = nr_args;
1665 return 0;
1666}
1667
1668static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = {
1669 { .name = "msr", .scnprintf = SCA_X86_MSR, .strtoul = STUL_X86_MSR, },
1670 { .name = "vector", .scnprintf = SCA_X86_IRQ_VECTORS, .strtoul = STUL_X86_IRQ_VECTORS, },
1671};
1672
1673static int syscall_arg_fmt__cmp(const void *name, const void *fmtp)
1674{
1675 const struct syscall_arg_fmt *fmt = fmtp;
1676 return strcmp(name, fmt->name);
1677}
1678
1679static struct syscall_arg_fmt *
1680__syscall_arg_fmt__find_by_name(struct syscall_arg_fmt *fmts, const int nmemb, const char *name)
1681{
1682 return bsearch(name, fmts, nmemb, sizeof(struct syscall_arg_fmt), syscall_arg_fmt__cmp);
1683}
1684
1685static struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name)
1686{
1687 const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name);
1688 return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
1689}
1690
1691static struct tep_format_field *
1692syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field)
1693{
1694 struct tep_format_field *last_field = NULL;
1695 int len;
1696
1697 for (; field; field = field->next, ++arg) {
1698 last_field = field;
1699
1700 if (arg->scnprintf)
1701 continue;
1702
1703 len = strlen(field->name);
1704
1705 if (strcmp(field->type, "const char *") == 0 &&
1706 ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
1707 strstr(field->name, "path") != NULL))
1708 arg->scnprintf = SCA_FILENAME;
1709 else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
1710 arg->scnprintf = SCA_PTR;
1711 else if (strcmp(field->type, "pid_t") == 0)
1712 arg->scnprintf = SCA_PID;
1713 else if (strcmp(field->type, "umode_t") == 0)
1714 arg->scnprintf = SCA_MODE_T;
1715 else if ((field->flags & TEP_FIELD_IS_ARRAY) && strstr(field->type, "char")) {
1716 arg->scnprintf = SCA_CHAR_ARRAY;
1717 arg->nr_entries = field->arraylen;
1718 } else if ((strcmp(field->type, "int") == 0 ||
1719 strcmp(field->type, "unsigned int") == 0 ||
1720 strcmp(field->type, "long") == 0) &&
1721 len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
1722
1723
1724
1725
1726
1727
1728
1729 arg->scnprintf = SCA_FD;
1730 } else {
1731 struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name);
1732
1733 if (fmt) {
1734 arg->scnprintf = fmt->scnprintf;
1735 arg->strtoul = fmt->strtoul;
1736 }
1737 }
1738 }
1739
1740 return last_field;
1741}
1742
1743static int syscall__set_arg_fmts(struct syscall *sc)
1744{
1745 struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args);
1746
1747 if (last_field)
1748 sc->args_size = last_field->offset + last_field->size;
1749
1750 return 0;
1751}
1752
1753static int trace__read_syscall_info(struct trace *trace, int id)
1754{
1755 char tp_name[128];
1756 struct syscall *sc;
1757 const char *name = syscalltbl__name(trace->sctbl, id);
1758
1759#ifdef HAVE_SYSCALL_TABLE_SUPPORT
1760 if (trace->syscalls.table == NULL) {
1761 trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
1762 if (trace->syscalls.table == NULL)
1763 return -ENOMEM;
1764 }
1765#else
1766 if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) {
1767
1768 struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1769
1770 if (table == NULL)
1771 return -ENOMEM;
1772
1773
1774 if (trace->syscalls.table == NULL)
1775 memset(table, 0, (id + 1) * sizeof(*sc));
1776 else
1777 memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
1778
1779 trace->syscalls.table = table;
1780 trace->sctbl->syscalls.max_id = id;
1781 }
1782#endif
1783 sc = trace->syscalls.table + id;
1784 if (sc->nonexistent)
1785 return 0;
1786
1787 if (name == NULL) {
1788 sc->nonexistent = true;
1789 return 0;
1790 }
1791
1792 sc->name = name;
1793 sc->fmt = syscall_fmt__find(sc->name);
1794
1795 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1796 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1797
1798 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1799 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1800 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1801 }
1802
1803 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1804 return -ENOMEM;
1805
1806 if (IS_ERR(sc->tp_format))
1807 return PTR_ERR(sc->tp_format);
1808
1809 sc->args = sc->tp_format->format.fields;
1810
1811
1812
1813
1814
1815 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1816 sc->args = sc->args->next;
1817 --sc->nr_args;
1818 }
1819
1820 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1821 sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
1822
1823 return syscall__set_arg_fmts(sc);
1824}
1825
1826static int evsel__init_tp_arg_scnprintf(struct evsel *evsel)
1827{
1828 struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
1829
1830 if (fmt != NULL) {
1831 syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields);
1832 return 0;
1833 }
1834
1835 return -ENOMEM;
1836}
1837
1838static int intcmp(const void *a, const void *b)
1839{
1840 const int *one = a, *another = b;
1841
1842 return *one - *another;
1843}
1844
1845static int trace__validate_ev_qualifier(struct trace *trace)
1846{
1847 int err = 0;
1848 bool printed_invalid_prefix = false;
1849 struct str_node *pos;
1850 size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier);
1851
1852 trace->ev_qualifier_ids.entries = malloc(nr_allocated *
1853 sizeof(trace->ev_qualifier_ids.entries[0]));
1854
1855 if (trace->ev_qualifier_ids.entries == NULL) {
1856 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1857 trace->output);
1858 err = -EINVAL;
1859 goto out;
1860 }
1861
1862 strlist__for_each_entry(pos, trace->ev_qualifier) {
1863 const char *sc = pos->s;
1864 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
1865
1866 if (id < 0) {
1867 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1868 if (id >= 0)
1869 goto matches;
1870
1871 if (!printed_invalid_prefix) {
1872 pr_debug("Skipping unknown syscalls: ");
1873 printed_invalid_prefix = true;
1874 } else {
1875 pr_debug(", ");
1876 }
1877
1878 pr_debug("%s", sc);
1879 continue;
1880 }
1881matches:
1882 trace->ev_qualifier_ids.entries[nr_used++] = id;
1883 if (match_next == -1)
1884 continue;
1885
1886 while (1) {
1887 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1888 if (id < 0)
1889 break;
1890 if (nr_allocated == nr_used) {
1891 void *entries;
1892
1893 nr_allocated += 8;
1894 entries = realloc(trace->ev_qualifier_ids.entries,
1895 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1896 if (entries == NULL) {
1897 err = -ENOMEM;
1898 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1899 goto out_free;
1900 }
1901 trace->ev_qualifier_ids.entries = entries;
1902 }
1903 trace->ev_qualifier_ids.entries[nr_used++] = id;
1904 }
1905 }
1906
1907 trace->ev_qualifier_ids.nr = nr_used;
1908 qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp);
1909out:
1910 if (printed_invalid_prefix)
1911 pr_debug("\n");
1912 return err;
1913out_free:
1914 zfree(&trace->ev_qualifier_ids.entries);
1915 trace->ev_qualifier_ids.nr = 0;
1916 goto out;
1917}
1918
1919static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id)
1920{
1921 bool in_ev_qualifier;
1922
1923 if (trace->ev_qualifier_ids.nr == 0)
1924 return true;
1925
1926 in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries,
1927 trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL;
1928
1929 if (in_ev_qualifier)
1930 return !trace->not_ev_qualifier;
1931
1932 return trace->not_ev_qualifier;
1933}
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1944{
1945 unsigned long val;
1946 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
1947
1948 memcpy(&val, p, sizeof(val));
1949 return val;
1950}
1951
1952static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1953 struct syscall_arg *arg)
1954{
1955 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1956 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1957
1958 return scnprintf(bf, size, "arg%d: ", arg->idx);
1959}
1960
1961
1962
1963
1964
1965
1966static unsigned long syscall_arg_fmt__mask_val(struct syscall_arg_fmt *fmt, struct syscall_arg *arg, unsigned long val)
1967{
1968 if (fmt && fmt->mask_val)
1969 return fmt->mask_val(arg, val);
1970
1971 return val;
1972}
1973
1974static size_t syscall_arg_fmt__scnprintf_val(struct syscall_arg_fmt *fmt, char *bf, size_t size,
1975 struct syscall_arg *arg, unsigned long val)
1976{
1977 if (fmt && fmt->scnprintf) {
1978 arg->val = val;
1979 if (fmt->parm)
1980 arg->parm = fmt->parm;
1981 return fmt->scnprintf(bf, size, arg);
1982 }
1983 return scnprintf(bf, size, "%ld", val);
1984}
1985
1986static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1987 unsigned char *args, void *augmented_args, int augmented_args_size,
1988 struct trace *trace, struct thread *thread)
1989{
1990 size_t printed = 0;
1991 unsigned long val;
1992 u8 bit = 1;
1993 struct syscall_arg arg = {
1994 .args = args,
1995 .augmented = {
1996 .size = augmented_args_size,
1997 .args = augmented_args,
1998 },
1999 .idx = 0,
2000 .mask = 0,
2001 .trace = trace,
2002 .thread = thread,
2003 .show_string_prefix = trace->show_string_prefix,
2004 };
2005 struct thread_trace *ttrace = thread__priv(thread);
2006
2007
2008
2009
2010
2011
2012 ttrace->ret_scnprintf = NULL;
2013
2014 if (sc->args != NULL) {
2015 struct tep_format_field *field;
2016
2017 for (field = sc->args; field;
2018 field = field->next, ++arg.idx, bit <<= 1) {
2019 if (arg.mask & bit)
2020 continue;
2021
2022 arg.fmt = &sc->arg_fmt[arg.idx];
2023 val = syscall_arg__val(&arg, arg.idx);
2024
2025
2026
2027
2028 val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val);
2029
2030
2031
2032
2033
2034
2035 if (val == 0 &&
2036 !trace->show_zeros &&
2037 !(sc->arg_fmt &&
2038 (sc->arg_fmt[arg.idx].show_zero ||
2039 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
2040 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
2041 sc->arg_fmt[arg.idx].parm))
2042 continue;
2043
2044 printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2045
2046 if (trace->show_arg_names)
2047 printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2048
2049 printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx],
2050 bf + printed, size - printed, &arg, val);
2051 }
2052 } else if (IS_ERR(sc->tp_format)) {
2053
2054
2055
2056
2057
2058 while (arg.idx < sc->nr_args) {
2059 if (arg.mask & bit)
2060 goto next_arg;
2061 val = syscall_arg__val(&arg, arg.idx);
2062 if (printed)
2063 printed += scnprintf(bf + printed, size - printed, ", ");
2064 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
2065 printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], bf + printed, size - printed, &arg, val);
2066next_arg:
2067 ++arg.idx;
2068 bit <<= 1;
2069 }
2070 }
2071
2072 return printed;
2073}
2074
2075typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel,
2076 union perf_event *event,
2077 struct perf_sample *sample);
2078
2079static struct syscall *trace__syscall_info(struct trace *trace,
2080 struct evsel *evsel, int id)
2081{
2082 int err = 0;
2083
2084 if (id < 0) {
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096 if (verbose > 1) {
2097 static u64 n;
2098 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
2099 id, evsel__name(evsel), ++n);
2100 }
2101 return NULL;
2102 }
2103
2104 err = -EINVAL;
2105
2106#ifdef HAVE_SYSCALL_TABLE_SUPPORT
2107 if (id > trace->sctbl->syscalls.max_id) {
2108#else
2109 if (id >= trace->sctbl->syscalls.max_id) {
2110
2111
2112
2113
2114
2115 err = trace__read_syscall_info(trace, id);
2116 if (err)
2117#endif
2118 goto out_cant_read;
2119 }
2120
2121 if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
2122 (err = trace__read_syscall_info(trace, id)) != 0)
2123 goto out_cant_read;
2124
2125 if (trace->syscalls.table[id].name == NULL) {
2126 if (trace->syscalls.table[id].nonexistent)
2127 return NULL;
2128 goto out_cant_read;
2129 }
2130
2131 return &trace->syscalls.table[id];
2132
2133out_cant_read:
2134 if (verbose > 0) {
2135 char sbuf[STRERR_BUFSIZE];
2136 fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf)));
2137 if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL)
2138 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
2139 fputs(" information\n", trace->output);
2140 }
2141 return NULL;
2142}
2143
2144struct syscall_stats {
2145 struct stats stats;
2146 u64 nr_failures;
2147 int max_errno;
2148 u32 *errnos;
2149};
2150
2151static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
2152 int id, struct perf_sample *sample, long err, bool errno_summary)
2153{
2154 struct int_node *inode;
2155 struct syscall_stats *stats;
2156 u64 duration = 0;
2157
2158 inode = intlist__findnew(ttrace->syscall_stats, id);
2159 if (inode == NULL)
2160 return;
2161
2162 stats = inode->priv;
2163 if (stats == NULL) {
2164 stats = malloc(sizeof(*stats));
2165 if (stats == NULL)
2166 return;
2167
2168 stats->nr_failures = 0;
2169 stats->max_errno = 0;
2170 stats->errnos = NULL;
2171 init_stats(&stats->stats);
2172 inode->priv = stats;
2173 }
2174
2175 if (ttrace->entry_time && sample->time > ttrace->entry_time)
2176 duration = sample->time - ttrace->entry_time;
2177
2178 update_stats(&stats->stats, duration);
2179
2180 if (err < 0) {
2181 ++stats->nr_failures;
2182
2183 if (!errno_summary)
2184 return;
2185
2186 err = -err;
2187 if (err > stats->max_errno) {
2188 u32 *new_errnos = realloc(stats->errnos, err * sizeof(u32));
2189
2190 if (new_errnos) {
2191 memset(new_errnos + stats->max_errno, 0, (err - stats->max_errno) * sizeof(u32));
2192 } else {
2193 pr_debug("Not enough memory for errno stats for thread \"%s\"(%d/%d), results will be incomplete\n",
2194 thread__comm_str(thread), thread->pid_, thread->tid);
2195 return;
2196 }
2197
2198 stats->errnos = new_errnos;
2199 stats->max_errno = err;
2200 }
2201
2202 ++stats->errnos[err - 1];
2203 }
2204}
2205
2206static int trace__printf_interrupted_entry(struct trace *trace)
2207{
2208 struct thread_trace *ttrace;
2209 size_t printed;
2210 int len;
2211
2212 if (trace->failure_only || trace->current == NULL)
2213 return 0;
2214
2215 ttrace = thread__priv(trace->current);
2216
2217 if (!ttrace->entry_pending)
2218 return 0;
2219
2220 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
2221 printed += len = fprintf(trace->output, "%s)", ttrace->entry_str);
2222
2223 if (len < trace->args_alignment - 4)
2224 printed += fprintf(trace->output, "%-*s", trace->args_alignment - 4 - len, " ");
2225
2226 printed += fprintf(trace->output, " ...\n");
2227
2228 ttrace->entry_pending = false;
2229 ++trace->nr_events_printed;
2230
2231 return printed;
2232}
2233
2234static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
2235 struct perf_sample *sample, struct thread *thread)
2236{
2237 int printed = 0;
2238
2239 if (trace->print_sample) {
2240 double ts = (double)sample->time / NSEC_PER_MSEC;
2241
2242 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
2243 evsel__name(evsel), ts,
2244 thread__comm_str(thread),
2245 sample->pid, sample->tid, sample->cpu);
2246 }
2247
2248 return printed;
2249}
2250
2251static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
2252{
2253 void *augmented_args = NULL;
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268 int args_size = raw_augmented_args_size ?: sc->args_size;
2269
2270 *augmented_args_size = sample->raw_size - args_size;
2271 if (*augmented_args_size > 0)
2272 augmented_args = sample->raw_data + args_size;
2273
2274 return augmented_args;
2275}
2276
2277static void syscall__exit(struct syscall *sc)
2278{
2279 if (!sc)
2280 return;
2281
2282 free(sc->arg_fmt);
2283}
2284
2285static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
2286 union perf_event *event __maybe_unused,
2287 struct perf_sample *sample)
2288{
2289 char *msg;
2290 void *args;
2291 int printed = 0;
2292 struct thread *thread;
2293 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2294 int augmented_args_size = 0;
2295 void *augmented_args = NULL;
2296 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2297 struct thread_trace *ttrace;
2298
2299 if (sc == NULL)
2300 return -1;
2301
2302 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2303 ttrace = thread__trace(thread, trace->output);
2304 if (ttrace == NULL)
2305 goto out_put;
2306
2307 trace__fprintf_sample(trace, evsel, sample, thread);
2308
2309 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2310
2311 if (ttrace->entry_str == NULL) {
2312 ttrace->entry_str = malloc(trace__entry_str_size);
2313 if (!ttrace->entry_str)
2314 goto out_put;
2315 }
2316
2317 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
2318 trace__printf_interrupted_entry(trace);
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329 if (evsel != trace->syscalls.events.sys_enter)
2330 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2331 ttrace->entry_time = sample->time;
2332 msg = ttrace->entry_str;
2333 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
2334
2335 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
2336 args, augmented_args, augmented_args_size, trace, thread);
2337
2338 if (sc->is_exit) {
2339 if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
2340 int alignment = 0;
2341
2342 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
2343 printed = fprintf(trace->output, "%s)", ttrace->entry_str);
2344 if (trace->args_alignment > printed)
2345 alignment = trace->args_alignment - printed;
2346 fprintf(trace->output, "%*s= ?\n", alignment, " ");
2347 }
2348 } else {
2349 ttrace->entry_pending = true;
2350
2351 ttrace->filename.pending_open = false;
2352 }
2353
2354 if (trace->current != thread) {
2355 thread__put(trace->current);
2356 trace->current = thread__get(thread);
2357 }
2358 err = 0;
2359out_put:
2360 thread__put(thread);
2361 return err;
2362}
2363
2364static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
2365 struct perf_sample *sample)
2366{
2367 struct thread_trace *ttrace;
2368 struct thread *thread;
2369 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2370 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2371 char msg[1024];
2372 void *args, *augmented_args = NULL;
2373 int augmented_args_size;
2374
2375 if (sc == NULL)
2376 return -1;
2377
2378 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2379 ttrace = thread__trace(thread, trace->output);
2380
2381
2382
2383
2384 if (ttrace == NULL)
2385 goto out_put;
2386
2387 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2388 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2389 syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
2390 fprintf(trace->output, "%s", msg);
2391 err = 0;
2392out_put:
2393 thread__put(thread);
2394 return err;
2395}
2396
2397static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel,
2398 struct perf_sample *sample,
2399 struct callchain_cursor *cursor)
2400{
2401 struct addr_location al;
2402 int max_stack = evsel->core.attr.sample_max_stack ?
2403 evsel->core.attr.sample_max_stack :
2404 trace->max_stack;
2405 int err;
2406
2407 if (machine__resolve(trace->host, &al, sample) < 0)
2408 return -1;
2409
2410 err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
2411 addr_location__put(&al);
2412 return err;
2413}
2414
2415static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
2416{
2417
2418 const unsigned int print_opts = EVSEL__PRINT_SYM |
2419 EVSEL__PRINT_DSO |
2420 EVSEL__PRINT_UNKNOWN_AS_ADDR;
2421
2422 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output);
2423}
2424
2425static const char *errno_to_name(struct evsel *evsel, int err)
2426{
2427 struct perf_env *env = evsel__env(evsel);
2428 const char *arch_name = perf_env__arch(env);
2429
2430 return arch_syscalls__strerrno(arch_name, err);
2431}
2432
2433static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
2434 union perf_event *event __maybe_unused,
2435 struct perf_sample *sample)
2436{
2437 long ret;
2438 u64 duration = 0;
2439 bool duration_calculated = false;
2440 struct thread *thread;
2441 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
2442 int alignment = trace->args_alignment;
2443 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2444 struct thread_trace *ttrace;
2445
2446 if (sc == NULL)
2447 return -1;
2448
2449 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2450 ttrace = thread__trace(thread, trace->output);
2451 if (ttrace == NULL)
2452 goto out_put;
2453
2454 trace__fprintf_sample(trace, evsel, sample, thread);
2455
2456 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2457
2458 if (trace->summary)
2459 thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary);
2460
2461 if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
2462 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2463 ttrace->filename.pending_open = false;
2464 ++trace->stats.vfs_getname;
2465 }
2466
2467 if (ttrace->entry_time) {
2468 duration = sample->time - ttrace->entry_time;
2469 if (trace__filter_duration(trace, duration))
2470 goto out;
2471 duration_calculated = true;
2472 } else if (trace->duration_filter)
2473 goto out;
2474
2475 if (sample->callchain) {
2476 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2477 if (callchain_ret == 0) {
2478 if (callchain_cursor.nr < trace->min_stack)
2479 goto out;
2480 callchain_ret = 1;
2481 }
2482 }
2483
2484 if (trace->summary_only || (ret >= 0 && trace->failure_only))
2485 goto out;
2486
2487 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
2488
2489 if (ttrace->entry_pending) {
2490 printed = fprintf(trace->output, "%s", ttrace->entry_str);
2491 } else {
2492 printed += fprintf(trace->output, " ... [");
2493 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2494 printed += 9;
2495 printed += fprintf(trace->output, "]: %s()", sc->name);
2496 }
2497
2498 printed++;
2499
2500 if (alignment > printed)
2501 alignment -= printed;
2502 else
2503 alignment = 0;
2504
2505 fprintf(trace->output, ")%*s= ", alignment, " ");
2506
2507 if (sc->fmt == NULL) {
2508 if (ret < 0)
2509 goto errno_print;
2510signed_print:
2511 fprintf(trace->output, "%ld", ret);
2512 } else if (ret < 0) {
2513errno_print: {
2514 char bf[STRERR_BUFSIZE];
2515 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
2516 *e = errno_to_name(evsel, -ret);
2517
2518 fprintf(trace->output, "-1 %s (%s)", e, emsg);
2519 }
2520 } else if (ret == 0 && sc->fmt->timeout)
2521 fprintf(trace->output, "0 (Timeout)");
2522 else if (ttrace->ret_scnprintf) {
2523 char bf[1024];
2524 struct syscall_arg arg = {
2525 .val = ret,
2526 .thread = thread,
2527 .trace = trace,
2528 };
2529 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
2530 ttrace->ret_scnprintf = NULL;
2531 fprintf(trace->output, "%s", bf);
2532 } else if (sc->fmt->hexret)
2533 fprintf(trace->output, "%#lx", ret);
2534 else if (sc->fmt->errpid) {
2535 struct thread *child = machine__find_thread(trace->host, ret, ret);
2536
2537 if (child != NULL) {
2538 fprintf(trace->output, "%ld", ret);
2539 if (child->comm_set)
2540 fprintf(trace->output, " (%s)", thread__comm_str(child));
2541 thread__put(child);
2542 }
2543 } else
2544 goto signed_print;
2545
2546 fputc('\n', trace->output);
2547
2548
2549
2550
2551
2552 if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
2553 interrupted = true;
2554
2555 if (callchain_ret > 0)
2556 trace__fprintf_callchain(trace, sample);
2557 else if (callchain_ret < 0)
2558 pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2559out:
2560 ttrace->entry_pending = false;
2561 err = 0;
2562out_put:
2563 thread__put(thread);
2564 return err;
2565}
2566
2567static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
2568 union perf_event *event __maybe_unused,
2569 struct perf_sample *sample)
2570{
2571 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2572 struct thread_trace *ttrace;
2573 size_t filename_len, entry_str_len, to_move;
2574 ssize_t remaining_space;
2575 char *pos;
2576 const char *filename = evsel__rawptr(evsel, sample, "pathname");
2577
2578 if (!thread)
2579 goto out;
2580
2581 ttrace = thread__priv(thread);
2582 if (!ttrace)
2583 goto out_put;
2584
2585 filename_len = strlen(filename);
2586 if (filename_len == 0)
2587 goto out_put;
2588
2589 if (ttrace->filename.namelen < filename_len) {
2590 char *f = realloc(ttrace->filename.name, filename_len + 1);
2591
2592 if (f == NULL)
2593 goto out_put;
2594
2595 ttrace->filename.namelen = filename_len;
2596 ttrace->filename.name = f;
2597 }
2598
2599 strcpy(ttrace->filename.name, filename);
2600 ttrace->filename.pending_open = true;
2601
2602 if (!ttrace->filename.ptr)
2603 goto out_put;
2604
2605 entry_str_len = strlen(ttrace->entry_str);
2606 remaining_space = trace__entry_str_size - entry_str_len - 1;
2607 if (remaining_space <= 0)
2608 goto out_put;
2609
2610 if (filename_len > (size_t)remaining_space) {
2611 filename += filename_len - remaining_space;
2612 filename_len = remaining_space;
2613 }
2614
2615 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1;
2616 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2617 memmove(pos + filename_len, pos, to_move);
2618 memcpy(pos, filename, filename_len);
2619
2620 ttrace->filename.ptr = 0;
2621 ttrace->filename.entry_str_pos = 0;
2622out_put:
2623 thread__put(thread);
2624out:
2625 return 0;
2626}
2627
2628static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
2629 union perf_event *event __maybe_unused,
2630 struct perf_sample *sample)
2631{
2632 u64 runtime = evsel__intval(evsel, sample, "runtime");
2633 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2634 struct thread *thread = machine__findnew_thread(trace->host,
2635 sample->pid,
2636 sample->tid);
2637 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2638
2639 if (ttrace == NULL)
2640 goto out_dump;
2641
2642 ttrace->runtime_ms += runtime_ms;
2643 trace->runtime_ms += runtime_ms;
2644out_put:
2645 thread__put(thread);
2646 return 0;
2647
2648out_dump:
2649 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2650 evsel->name,
2651 evsel__strval(evsel, sample, "comm"),
2652 (pid_t)evsel__intval(evsel, sample, "pid"),
2653 runtime,
2654 evsel__intval(evsel, sample, "vruntime"));
2655 goto out_put;
2656}
2657
2658static int bpf_output__printer(enum binary_printer_ops op,
2659 unsigned int val, void *extra __maybe_unused, FILE *fp)
2660{
2661 unsigned char ch = (unsigned char)val;
2662
2663 switch (op) {
2664 case BINARY_PRINT_CHAR_DATA:
2665 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
2666 case BINARY_PRINT_DATA_BEGIN:
2667 case BINARY_PRINT_LINE_BEGIN:
2668 case BINARY_PRINT_ADDR:
2669 case BINARY_PRINT_NUM_DATA:
2670 case BINARY_PRINT_NUM_PAD:
2671 case BINARY_PRINT_SEP:
2672 case BINARY_PRINT_CHAR_PAD:
2673 case BINARY_PRINT_LINE_END:
2674 case BINARY_PRINT_DATA_END:
2675 default:
2676 break;
2677 }
2678
2679 return 0;
2680}
2681
2682static void bpf_output__fprintf(struct trace *trace,
2683 struct perf_sample *sample)
2684{
2685 binary__fprintf(sample->raw_data, sample->raw_size, 8,
2686 bpf_output__printer, NULL, trace->output);
2687 ++trace->nr_events_printed;
2688}
2689
2690static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, struct perf_sample *sample,
2691 struct thread *thread, void *augmented_args, int augmented_args_size)
2692{
2693 char bf[2048];
2694 size_t size = sizeof(bf);
2695 struct tep_format_field *field = evsel->tp_format->format.fields;
2696 struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel);
2697 size_t printed = 0;
2698 unsigned long val;
2699 u8 bit = 1;
2700 struct syscall_arg syscall_arg = {
2701 .augmented = {
2702 .size = augmented_args_size,
2703 .args = augmented_args,
2704 },
2705 .idx = 0,
2706 .mask = 0,
2707 .trace = trace,
2708 .thread = thread,
2709 .show_string_prefix = trace->show_string_prefix,
2710 };
2711
2712 for (; field && arg; field = field->next, ++syscall_arg.idx, bit <<= 1, ++arg) {
2713 if (syscall_arg.mask & bit)
2714 continue;
2715
2716 syscall_arg.len = 0;
2717 syscall_arg.fmt = arg;
2718 if (field->flags & TEP_FIELD_IS_ARRAY) {
2719 int offset = field->offset;
2720
2721 if (field->flags & TEP_FIELD_IS_DYNAMIC) {
2722 offset = format_field__intval(field, sample, evsel->needs_swap);
2723 syscall_arg.len = offset >> 16;
2724 offset &= 0xffff;
2725 }
2726
2727 val = (uintptr_t)(sample->raw_data + offset);
2728 } else
2729 val = format_field__intval(field, sample, evsel->needs_swap);
2730
2731
2732
2733
2734 val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val);
2735
2736
2737
2738
2739
2740
2741 if (val == 0 &&
2742 !trace->show_zeros &&
2743 !((arg->show_zero ||
2744 arg->scnprintf == SCA_STRARRAY ||
2745 arg->scnprintf == SCA_STRARRAYS) &&
2746 arg->parm))
2747 continue;
2748
2749 printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2750
2751
2752
2753
2754
2755 if (1 || trace->show_arg_names)
2756 printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2757
2758 printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val);
2759 }
2760
2761 return printed + fprintf(trace->output, "%s", bf);
2762}
2763
2764static int trace__event_handler(struct trace *trace, struct evsel *evsel,
2765 union perf_event *event __maybe_unused,
2766 struct perf_sample *sample)
2767{
2768 struct thread *thread;
2769 int callchain_ret = 0;
2770
2771
2772
2773
2774
2775
2776 if (evsel->disabled)
2777 return 0;
2778
2779 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2780
2781 if (sample->callchain) {
2782 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2783 if (callchain_ret == 0) {
2784 if (callchain_cursor.nr < trace->min_stack)
2785 goto out;
2786 callchain_ret = 1;
2787 }
2788 }
2789
2790 trace__printf_interrupted_entry(trace);
2791 trace__fprintf_tstamp(trace, sample->time, trace->output);
2792
2793 if (trace->trace_syscalls && trace->show_duration)
2794 fprintf(trace->output, "( ): ");
2795
2796 if (thread)
2797 trace__fprintf_comm_tid(trace, thread, trace->output);
2798
2799 if (evsel == trace->syscalls.events.augmented) {
2800 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
2801 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2802
2803 if (sc) {
2804 fprintf(trace->output, "%s(", sc->name);
2805 trace__fprintf_sys_enter(trace, evsel, sample);
2806 fputc(')', trace->output);
2807 goto newline;
2808 }
2809
2810
2811
2812
2813
2814
2815 }
2816
2817 fprintf(trace->output, "%s(", evsel->name);
2818
2819 if (evsel__is_bpf_output(evsel)) {
2820 bpf_output__fprintf(trace, sample);
2821 } else if (evsel->tp_format) {
2822 if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
2823 trace__fprintf_sys_enter(trace, evsel, sample)) {
2824 if (trace->libtraceevent_print) {
2825 event_format__fprintf(evsel->tp_format, sample->cpu,
2826 sample->raw_data, sample->raw_size,
2827 trace->output);
2828 } else {
2829 trace__fprintf_tp_fields(trace, evsel, sample, thread, NULL, 0);
2830 }
2831 }
2832 }
2833
2834newline:
2835 fprintf(trace->output, ")\n");
2836
2837 if (callchain_ret > 0)
2838 trace__fprintf_callchain(trace, sample);
2839 else if (callchain_ret < 0)
2840 pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2841
2842 ++trace->nr_events_printed;
2843
2844 if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
2845 evsel__disable(evsel);
2846 evsel__close(evsel);
2847 }
2848out:
2849 thread__put(thread);
2850 return 0;
2851}
2852
2853static void print_location(FILE *f, struct perf_sample *sample,
2854 struct addr_location *al,
2855 bool print_dso, bool print_sym)
2856{
2857
2858 if ((verbose > 0 || print_dso) && al->map)
2859 fprintf(f, "%s@", al->map->dso->long_name);
2860
2861 if ((verbose > 0 || print_sym) && al->sym)
2862 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2863 al->addr - al->sym->start);
2864 else if (al->map)
2865 fprintf(f, "0x%" PRIx64, al->addr);
2866 else
2867 fprintf(f, "0x%" PRIx64, sample->addr);
2868}
2869
2870static int trace__pgfault(struct trace *trace,
2871 struct evsel *evsel,
2872 union perf_event *event __maybe_unused,
2873 struct perf_sample *sample)
2874{
2875 struct thread *thread;
2876 struct addr_location al;
2877 char map_type = 'd';
2878 struct thread_trace *ttrace;
2879 int err = -1;
2880 int callchain_ret = 0;
2881
2882 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2883
2884 if (sample->callchain) {
2885 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2886 if (callchain_ret == 0) {
2887 if (callchain_cursor.nr < trace->min_stack)
2888 goto out_put;
2889 callchain_ret = 1;
2890 }
2891 }
2892
2893 ttrace = thread__trace(thread, trace->output);
2894 if (ttrace == NULL)
2895 goto out_put;
2896
2897 if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2898 ttrace->pfmaj++;
2899 else
2900 ttrace->pfmin++;
2901
2902 if (trace->summary_only)
2903 goto out;
2904
2905 thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
2906
2907 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
2908
2909 fprintf(trace->output, "%sfault [",
2910 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2911 "maj" : "min");
2912
2913 print_location(trace->output, sample, &al, false, true);
2914
2915 fprintf(trace->output, "] => ");
2916
2917 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2918
2919 if (!al.map) {
2920 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2921
2922 if (al.map)
2923 map_type = 'x';
2924 else
2925 map_type = '?';
2926 }
2927
2928 print_location(trace->output, sample, &al, true, false);
2929
2930 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2931
2932 if (callchain_ret > 0)
2933 trace__fprintf_callchain(trace, sample);
2934 else if (callchain_ret < 0)
2935 pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
2936
2937 ++trace->nr_events_printed;
2938out:
2939 err = 0;
2940out_put:
2941 thread__put(thread);
2942 return err;
2943}
2944
2945static void trace__set_base_time(struct trace *trace,
2946 struct evsel *evsel,
2947 struct perf_sample *sample)
2948{
2949
2950
2951
2952
2953
2954
2955
2956
2957 if (trace->base_time == 0 && !trace->full_time &&
2958 (evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
2959 trace->base_time = sample->time;
2960}
2961
2962static int trace__process_sample(struct perf_tool *tool,
2963 union perf_event *event,
2964 struct perf_sample *sample,
2965 struct evsel *evsel,
2966 struct machine *machine __maybe_unused)
2967{
2968 struct trace *trace = container_of(tool, struct trace, tool);
2969 struct thread *thread;
2970 int err = 0;
2971
2972 tracepoint_handler handler = evsel->handler;
2973
2974 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2975 if (thread && thread__is_filtered(thread))
2976 goto out;
2977
2978 trace__set_base_time(trace, evsel, sample);
2979
2980 if (handler) {
2981 ++trace->nr_events;
2982 handler(trace, evsel, event, sample);
2983 }
2984out:
2985 thread__put(thread);
2986 return err;
2987}
2988
2989static int trace__record(struct trace *trace, int argc, const char **argv)
2990{
2991 unsigned int rec_argc, i, j;
2992 const char **rec_argv;
2993 const char * const record_args[] = {
2994 "record",
2995 "-R",
2996 "-m", "1024",
2997 "-c", "1",
2998 };
2999 pid_t pid = getpid();
3000 char *filter = asprintf__tp_filter_pids(1, &pid);
3001 const char * const sc_args[] = { "-e", };
3002 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
3003 const char * const majpf_args[] = { "-e", "major-faults" };
3004 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
3005 const char * const minpf_args[] = { "-e", "minor-faults" };
3006 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
3007 int err = -1;
3008
3009
3010 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 3 +
3011 majpf_args_nr + minpf_args_nr + argc;
3012 rec_argv = calloc(rec_argc + 1, sizeof(char *));
3013
3014 if (rec_argv == NULL || filter == NULL)
3015 goto out_free;
3016
3017 j = 0;
3018 for (i = 0; i < ARRAY_SIZE(record_args); i++)
3019 rec_argv[j++] = record_args[i];
3020
3021 if (trace->trace_syscalls) {
3022 for (i = 0; i < sc_args_nr; i++)
3023 rec_argv[j++] = sc_args[i];
3024
3025
3026 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
3027 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
3028 else if (is_valid_tracepoint("syscalls:sys_enter"))
3029 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
3030 else {
3031 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
3032 goto out_free;
3033 }
3034 }
3035
3036 rec_argv[j++] = "--filter";
3037 rec_argv[j++] = filter;
3038
3039 if (trace->trace_pgfaults & TRACE_PFMAJ)
3040 for (i = 0; i < majpf_args_nr; i++)
3041 rec_argv[j++] = majpf_args[i];
3042
3043 if (trace->trace_pgfaults & TRACE_PFMIN)
3044 for (i = 0; i < minpf_args_nr; i++)
3045 rec_argv[j++] = minpf_args[i];
3046
3047 for (i = 0; i < (unsigned int)argc; i++)
3048 rec_argv[j++] = argv[i];
3049
3050 err = cmd_record(j, rec_argv);
3051out_free:
3052 free(filter);
3053 free(rec_argv);
3054 return err;
3055}
3056
3057static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
3058
3059static bool evlist__add_vfs_getname(struct evlist *evlist)
3060{
3061 bool found = false;
3062 struct evsel *evsel, *tmp;
3063 struct parse_events_error err;
3064 int ret;
3065
3066 bzero(&err, sizeof(err));
3067 ret = parse_events(evlist, "probe:vfs_getname*", &err);
3068 if (ret) {
3069 free(err.str);
3070 free(err.help);
3071 free(err.first_str);
3072 free(err.first_help);
3073 return false;
3074 }
3075
3076 evlist__for_each_entry_safe(evlist, evsel, tmp) {
3077 if (!strstarts(evsel__name(evsel), "probe:vfs_getname"))
3078 continue;
3079
3080 if (evsel__field(evsel, "pathname")) {
3081 evsel->handler = trace__vfs_getname;
3082 found = true;
3083 continue;
3084 }
3085
3086 list_del_init(&evsel->core.node);
3087 evsel->evlist = NULL;
3088 evsel__delete(evsel);
3089 }
3090
3091 return found;
3092}
3093
3094static struct evsel *evsel__new_pgfault(u64 config)
3095{
3096 struct evsel *evsel;
3097 struct perf_event_attr attr = {
3098 .type = PERF_TYPE_SOFTWARE,
3099 .mmap_data = 1,
3100 };
3101
3102 attr.config = config;
3103 attr.sample_period = 1;
3104
3105 event_attr_init(&attr);
3106
3107 evsel = evsel__new(&attr);
3108 if (evsel)
3109 evsel->handler = trace__pgfault;
3110
3111 return evsel;
3112}
3113
3114static void evlist__free_syscall_tp_fields(struct evlist *evlist)
3115{
3116 struct evsel *evsel;
3117
3118 evlist__for_each_entry(evlist, evsel) {
3119 struct evsel_trace *et = evsel->priv;
3120
3121 if (!et || !evsel->tp_format || strcmp(evsel->tp_format->system, "syscalls"))
3122 continue;
3123
3124 free(et->fmt);
3125 free(et);
3126 }
3127}
3128
3129static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
3130{
3131 const u32 type = event->header.type;
3132 struct evsel *evsel;
3133
3134 if (type != PERF_RECORD_SAMPLE) {
3135 trace__process_event(trace, trace->host, event, sample);
3136 return;
3137 }
3138
3139 evsel = evlist__id2evsel(trace->evlist, sample->id);
3140 if (evsel == NULL) {
3141 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
3142 return;
3143 }
3144
3145 if (evswitch__discard(&trace->evswitch, evsel))
3146 return;
3147
3148 trace__set_base_time(trace, evsel, sample);
3149
3150 if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
3151 sample->raw_data == NULL) {
3152 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
3153 evsel__name(evsel), sample->tid,
3154 sample->cpu, sample->raw_size);
3155 } else {
3156 tracepoint_handler handler = evsel->handler;
3157 handler(trace, evsel, event, sample);
3158 }
3159
3160 if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
3161 interrupted = true;
3162}
3163
3164static int trace__add_syscall_newtp(struct trace *trace)
3165{
3166 int ret = -1;
3167 struct evlist *evlist = trace->evlist;
3168 struct evsel *sys_enter, *sys_exit;
3169
3170 sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
3171 if (sys_enter == NULL)
3172 goto out;
3173
3174 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
3175 goto out_delete_sys_enter;
3176
3177 sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
3178 if (sys_exit == NULL)
3179 goto out_delete_sys_enter;
3180
3181 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
3182 goto out_delete_sys_exit;
3183
3184 evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
3185 evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
3186
3187 evlist__add(evlist, sys_enter);
3188 evlist__add(evlist, sys_exit);
3189
3190 if (callchain_param.enabled && !trace->kernel_syscallchains) {
3191
3192
3193
3194
3195
3196 sys_exit->core.attr.exclude_callchain_kernel = 1;
3197 }
3198
3199 trace->syscalls.events.sys_enter = sys_enter;
3200 trace->syscalls.events.sys_exit = sys_exit;
3201
3202 ret = 0;
3203out:
3204 return ret;
3205
3206out_delete_sys_exit:
3207 evsel__delete_priv(sys_exit);
3208out_delete_sys_enter:
3209 evsel__delete_priv(sys_enter);
3210 goto out;
3211}
3212
3213static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
3214{
3215 int err = -1;
3216 struct evsel *sys_exit;
3217 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
3218 trace->ev_qualifier_ids.nr,
3219 trace->ev_qualifier_ids.entries);
3220
3221 if (filter == NULL)
3222 goto out_enomem;
3223
3224 if (!evsel__append_tp_filter(trace->syscalls.events.sys_enter, filter)) {
3225 sys_exit = trace->syscalls.events.sys_exit;
3226 err = evsel__append_tp_filter(sys_exit, filter);
3227 }
3228
3229 free(filter);
3230out:
3231 return err;
3232out_enomem:
3233 errno = ENOMEM;
3234 goto out;
3235}
3236
3237#ifdef HAVE_LIBBPF_SUPPORT
3238static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
3239{
3240 if (trace->bpf_obj == NULL)
3241 return NULL;
3242
3243 return bpf_object__find_map_by_name(trace->bpf_obj, name);
3244}
3245
3246static void trace__set_bpf_map_filtered_pids(struct trace *trace)
3247{
3248 trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
3249}
3250
3251static void trace__set_bpf_map_syscalls(struct trace *trace)
3252{
3253 trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
3254 trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
3255 trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
3256}
3257
3258static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
3259{
3260 if (trace->bpf_obj == NULL)
3261 return NULL;
3262
3263 return bpf_object__find_program_by_title(trace->bpf_obj, name);
3264}
3265
3266static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
3267 const char *prog_name, const char *type)
3268{
3269 struct bpf_program *prog;
3270
3271 if (prog_name == NULL) {
3272 char default_prog_name[256];
3273 scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
3274 prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3275 if (prog != NULL)
3276 goto out_found;
3277 if (sc->fmt && sc->fmt->alias) {
3278 scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
3279 prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3280 if (prog != NULL)
3281 goto out_found;
3282 }
3283 goto out_unaugmented;
3284 }
3285
3286 prog = trace__find_bpf_program_by_title(trace, prog_name);
3287
3288 if (prog != NULL) {
3289out_found:
3290 return prog;
3291 }
3292
3293 pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
3294 prog_name, type, sc->name);
3295out_unaugmented:
3296 return trace->syscalls.unaugmented_prog;
3297}
3298
3299static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
3300{
3301 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3302
3303 if (sc == NULL)
3304 return;
3305
3306 sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3307 sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit");
3308}
3309
3310static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
3311{
3312 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3313 return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3314}
3315
3316static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
3317{
3318 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3319 return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3320}
3321
3322static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
3323{
3324 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3325 int arg = 0;
3326
3327 if (sc == NULL)
3328 goto out;
3329
3330 for (; arg < sc->nr_args; ++arg) {
3331 entry->string_args_len[arg] = 0;
3332 if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) {
3333
3334 entry->string_args_len[arg] = PATH_MAX;
3335 }
3336 }
3337out:
3338 for (; arg < 6; ++arg)
3339 entry->string_args_len[arg] = 0;
3340}
3341static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
3342{
3343 int fd = bpf_map__fd(trace->syscalls.map);
3344 struct bpf_map_syscall_entry value = {
3345 .enabled = !trace->not_ev_qualifier,
3346 };
3347 int err = 0;
3348 size_t i;
3349
3350 for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
3351 int key = trace->ev_qualifier_ids.entries[i];
3352
3353 if (value.enabled) {
3354 trace__init_bpf_map_syscall_args(trace, key, &value);
3355 trace__init_syscall_bpf_progs(trace, key);
3356 }
3357
3358 err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
3359 if (err)
3360 break;
3361 }
3362
3363 return err;
3364}
3365
3366static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
3367{
3368 int fd = bpf_map__fd(trace->syscalls.map);
3369 struct bpf_map_syscall_entry value = {
3370 .enabled = enabled,
3371 };
3372 int err = 0, key;
3373
3374 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3375 if (enabled)
3376 trace__init_bpf_map_syscall_args(trace, key, &value);
3377
3378 err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
3379 if (err)
3380 break;
3381 }
3382
3383 return err;
3384}
3385
3386static int trace__init_syscalls_bpf_map(struct trace *trace)
3387{
3388 bool enabled = true;
3389
3390 if (trace->ev_qualifier_ids.nr)
3391 enabled = trace->not_ev_qualifier;
3392
3393 return __trace__init_syscalls_bpf_map(trace, enabled);
3394}
3395
3396static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
3397{
3398 struct tep_format_field *field, *candidate_field;
3399 int id;
3400
3401
3402
3403
3404 for (field = sc->args; field; field = field->next) {
3405 if (field->flags & TEP_FIELD_IS_POINTER)
3406 goto try_to_find_pair;
3407 }
3408
3409 return NULL;
3410
3411try_to_find_pair:
3412 for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) {
3413 struct syscall *pair = trace__syscall_info(trace, NULL, id);
3414 struct bpf_program *pair_prog;
3415 bool is_candidate = false;
3416
3417 if (pair == NULL || pair == sc ||
3418 pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
3419 continue;
3420
3421 for (field = sc->args, candidate_field = pair->args;
3422 field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
3423 bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
3424 candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
3425
3426 if (is_pointer) {
3427 if (!candidate_is_pointer) {
3428
3429 continue;
3430 }
3431 } else {
3432 if (candidate_is_pointer) {
3433
3434 goto next_candidate;
3435 }
3436 continue;
3437 }
3438
3439 if (strcmp(field->type, candidate_field->type))
3440 goto next_candidate;
3441
3442 is_candidate = true;
3443 }
3444
3445 if (!is_candidate)
3446 goto next_candidate;
3447
3448
3449
3450
3451
3452
3453 if (candidate_field) {
3454 for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next)
3455 if (candidate_field->flags & TEP_FIELD_IS_POINTER)
3456 goto next_candidate;
3457 }
3458
3459 pair_prog = pair->bpf_prog.sys_enter;
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469 if (pair_prog == NULL) {
3470 pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3471 if (pair_prog == trace->syscalls.unaugmented_prog)
3472 goto next_candidate;
3473 }
3474
3475 pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
3476 return pair_prog;
3477 next_candidate:
3478 continue;
3479 }
3480
3481 return NULL;
3482}
3483
3484static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
3485{
3486 int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
3487 map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
3488 int err = 0, key;
3489
3490 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3491 int prog_fd;
3492
3493 if (!trace__syscall_enabled(trace, key))
3494 continue;
3495
3496 trace__init_syscall_bpf_progs(trace, key);
3497
3498
3499 prog_fd = trace__bpf_prog_sys_enter_fd(trace, key);
3500 err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3501 if (err)
3502 break;
3503 prog_fd = trace__bpf_prog_sys_exit_fd(trace, key);
3504 err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
3505 if (err)
3506 break;
3507 }
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3538 struct syscall *sc = trace__syscall_info(trace, NULL, key);
3539 struct bpf_program *pair_prog;
3540 int prog_fd;
3541
3542 if (sc == NULL || sc->bpf_prog.sys_enter == NULL)
3543 continue;
3544
3545
3546
3547
3548
3549 if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
3550 continue;
3551
3552
3553
3554
3555
3556 pair_prog = trace__find_usable_bpf_prog_entry(trace, sc);
3557 if (pair_prog == NULL)
3558 continue;
3559
3560 sc->bpf_prog.sys_enter = pair_prog;
3561
3562
3563
3564
3565
3566 prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter);
3567 err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3568 if (err)
3569 break;
3570 }
3571
3572
3573 return err;
3574}
3575
3576static void trace__delete_augmented_syscalls(struct trace *trace)
3577{
3578 struct evsel *evsel, *tmp;
3579
3580 evlist__remove(trace->evlist, trace->syscalls.events.augmented);
3581 evsel__delete(trace->syscalls.events.augmented);
3582 trace->syscalls.events.augmented = NULL;
3583
3584 evlist__for_each_entry_safe(trace->evlist, tmp, evsel) {
3585 if (evsel->bpf_obj == trace->bpf_obj) {
3586 evlist__remove(trace->evlist, evsel);
3587 evsel__delete(evsel);
3588 }
3589
3590 }
3591
3592 bpf_object__close(trace->bpf_obj);
3593 trace->bpf_obj = NULL;
3594}
3595#else
3596static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused,
3597 const char *name __maybe_unused)
3598{
3599 return NULL;
3600}
3601
3602static void trace__set_bpf_map_filtered_pids(struct trace *trace __maybe_unused)
3603{
3604}
3605
3606static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused)
3607{
3608}
3609
3610static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
3611{
3612 return 0;
3613}
3614
3615static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused)
3616{
3617 return 0;
3618}
3619
3620static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused,
3621 const char *name __maybe_unused)
3622{
3623 return NULL;
3624}
3625
3626static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
3627{
3628 return 0;
3629}
3630
3631static void trace__delete_augmented_syscalls(struct trace *trace __maybe_unused)
3632{
3633}
3634#endif
3635
3636static bool trace__only_augmented_syscalls_evsels(struct trace *trace)
3637{
3638 struct evsel *evsel;
3639
3640 evlist__for_each_entry(trace->evlist, evsel) {
3641 if (evsel == trace->syscalls.events.augmented ||
3642 evsel->bpf_obj == trace->bpf_obj)
3643 continue;
3644
3645 return false;
3646 }
3647
3648 return true;
3649}
3650
3651static int trace__set_ev_qualifier_filter(struct trace *trace)
3652{
3653 if (trace->syscalls.map)
3654 return trace__set_ev_qualifier_bpf_filter(trace);
3655 if (trace->syscalls.events.sys_enter)
3656 return trace__set_ev_qualifier_tp_filter(trace);
3657 return 0;
3658}
3659
3660static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused,
3661 size_t npids __maybe_unused, pid_t *pids __maybe_unused)
3662{
3663 int err = 0;
3664#ifdef HAVE_LIBBPF_SUPPORT
3665 bool value = true;
3666 int map_fd = bpf_map__fd(map);
3667 size_t i;
3668
3669 for (i = 0; i < npids; ++i) {
3670 err = bpf_map_update_elem(map_fd, &pids[i], &value, BPF_ANY);
3671 if (err)
3672 break;
3673 }
3674#endif
3675 return err;
3676}
3677
3678static int trace__set_filter_loop_pids(struct trace *trace)
3679{
3680 unsigned int nr = 1, err;
3681 pid_t pids[32] = {
3682 getpid(),
3683 };
3684 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
3685
3686 while (thread && nr < ARRAY_SIZE(pids)) {
3687 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
3688
3689 if (parent == NULL)
3690 break;
3691
3692 if (!strcmp(thread__comm_str(parent), "sshd") ||
3693 strstarts(thread__comm_str(parent), "gnome-terminal")) {
3694 pids[nr++] = parent->tid;
3695 break;
3696 }
3697 thread = parent;
3698 }
3699
3700 err = evlist__append_tp_filter_pids(trace->evlist, nr, pids);
3701 if (!err && trace->filter_pids.map)
3702 err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
3703
3704 return err;
3705}
3706
3707static int trace__set_filter_pids(struct trace *trace)
3708{
3709 int err = 0;
3710
3711
3712
3713
3714
3715
3716 if (trace->filter_pids.nr > 0) {
3717 err = evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
3718 trace->filter_pids.entries);
3719 if (!err && trace->filter_pids.map) {
3720 err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
3721 trace->filter_pids.entries);
3722 }
3723 } else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) {
3724 err = trace__set_filter_loop_pids(trace);
3725 }
3726
3727 return err;
3728}
3729
3730static int __trace__deliver_event(struct trace *trace, union perf_event *event)
3731{
3732 struct evlist *evlist = trace->evlist;
3733 struct perf_sample sample;
3734 int err = evlist__parse_sample(evlist, event, &sample);
3735
3736 if (err)
3737 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
3738 else
3739 trace__handle_event(trace, event, &sample);
3740
3741 return 0;
3742}
3743
3744static int __trace__flush_events(struct trace *trace)
3745{
3746 u64 first = ordered_events__first_time(&trace->oe.data);
3747 u64 flush = trace->oe.last - NSEC_PER_SEC;
3748
3749
3750 if (first && first < flush)
3751 return ordered_events__flush_time(&trace->oe.data, flush);
3752
3753 return 0;
3754}
3755
3756static int trace__flush_events(struct trace *trace)
3757{
3758 return !trace->sort_events ? 0 : __trace__flush_events(trace);
3759}
3760
3761static int trace__deliver_event(struct trace *trace, union perf_event *event)
3762{
3763 int err;
3764
3765 if (!trace->sort_events)
3766 return __trace__deliver_event(trace, event);
3767
3768 err = evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
3769 if (err && err != -1)
3770 return err;
3771
3772 err = ordered_events__queue(&trace->oe.data, event, trace->oe.last, 0);
3773 if (err)
3774 return err;
3775
3776 return trace__flush_events(trace);
3777}
3778
3779static int ordered_events__deliver_event(struct ordered_events *oe,
3780 struct ordered_event *event)
3781{
3782 struct trace *trace = container_of(oe, struct trace, oe.data);
3783
3784 return __trace__deliver_event(trace, event->event);
3785}
3786
3787static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg)
3788{
3789 struct tep_format_field *field;
3790 struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
3791
3792 if (evsel->tp_format == NULL || fmt == NULL)
3793 return NULL;
3794
3795 for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt)
3796 if (strcmp(field->name, arg) == 0)
3797 return fmt;
3798
3799 return NULL;
3800}
3801
3802static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel)
3803{
3804 char *tok, *left = evsel->filter, *new_filter = evsel->filter;
3805
3806 while ((tok = strpbrk(left, "=<>!")) != NULL) {
3807 char *right = tok + 1, *right_end;
3808
3809 if (*right == '=')
3810 ++right;
3811
3812 while (isspace(*right))
3813 ++right;
3814
3815 if (*right == '\0')
3816 break;
3817
3818 while (!isalpha(*left))
3819 if (++left == tok) {
3820
3821
3822
3823
3824 return 0;
3825 }
3826
3827 right_end = right + 1;
3828 while (isalnum(*right_end) || *right_end == '_' || *right_end == '|')
3829 ++right_end;
3830
3831 if (isalpha(*right)) {
3832 struct syscall_arg_fmt *fmt;
3833 int left_size = tok - left,
3834 right_size = right_end - right;
3835 char arg[128];
3836
3837 while (isspace(left[left_size - 1]))
3838 --left_size;
3839
3840 scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
3841
3842 fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg);
3843 if (fmt == NULL) {
3844 pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
3845 arg, evsel->name, evsel->filter);
3846 return -1;
3847 }
3848
3849 pr_debug2("trying to expand \"%s\" \"%.*s\" \"%.*s\" -> ",
3850 arg, (int)(right - tok), tok, right_size, right);
3851
3852 if (fmt->strtoul) {
3853 u64 val;
3854 struct syscall_arg syscall_arg = {
3855 .parm = fmt->parm,
3856 };
3857
3858 if (fmt->strtoul(right, right_size, &syscall_arg, &val)) {
3859 char *n, expansion[19];
3860 int expansion_lenght = scnprintf(expansion, sizeof(expansion), "%#" PRIx64, val);
3861 int expansion_offset = right - new_filter;
3862
3863 pr_debug("%s", expansion);
3864
3865 if (asprintf(&n, "%.*s%s%s", expansion_offset, new_filter, expansion, right_end) < 0) {
3866 pr_debug(" out of memory!\n");
3867 free(new_filter);
3868 return -1;
3869 }
3870 if (new_filter != evsel->filter)
3871 free(new_filter);
3872 left = n + expansion_offset + expansion_lenght;
3873 new_filter = n;
3874 } else {
3875 pr_err("\"%.*s\" not found for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3876 right_size, right, arg, evsel->name, evsel->filter);
3877 return -1;
3878 }
3879 } else {
3880 pr_err("No resolver (strtoul) for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3881 arg, evsel->name, evsel->filter);
3882 return -1;
3883 }
3884
3885 pr_debug("\n");
3886 } else {
3887 left = right_end;
3888 }
3889 }
3890
3891 if (new_filter != evsel->filter) {
3892 pr_debug("New filter for %s: %s\n", evsel->name, new_filter);
3893 evsel__set_filter(evsel, new_filter);
3894 free(new_filter);
3895 }
3896
3897 return 0;
3898}
3899
3900static int trace__expand_filters(struct trace *trace, struct evsel **err_evsel)
3901{
3902 struct evlist *evlist = trace->evlist;
3903 struct evsel *evsel;
3904
3905 evlist__for_each_entry(evlist, evsel) {
3906 if (evsel->filter == NULL)
3907 continue;
3908
3909 if (trace__expand_filter(trace, evsel)) {
3910 *err_evsel = evsel;
3911 return -1;
3912 }
3913 }
3914
3915 return 0;
3916}
3917
3918static int trace__run(struct trace *trace, int argc, const char **argv)
3919{
3920 struct evlist *evlist = trace->evlist;
3921 struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
3922 int err = -1, i;
3923 unsigned long before;
3924 const bool forks = argc > 0;
3925 bool draining = false;
3926
3927 trace->live = true;
3928
3929 if (!trace->raw_augmented_syscalls) {
3930 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
3931 goto out_error_raw_syscalls;
3932
3933 if (trace->trace_syscalls)
3934 trace->vfs_getname = evlist__add_vfs_getname(evlist);
3935 }
3936
3937 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
3938 pgfault_maj = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
3939 if (pgfault_maj == NULL)
3940 goto out_error_mem;
3941 evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
3942 evlist__add(evlist, pgfault_maj);
3943 }
3944
3945 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
3946 pgfault_min = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
3947 if (pgfault_min == NULL)
3948 goto out_error_mem;
3949 evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
3950 evlist__add(evlist, pgfault_min);
3951 }
3952
3953 if (trace->sched &&
3954 evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
3955 goto out_error_sched_stat_runtime;
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981 if (trace->cgroup)
3982 evlist__set_default_cgroup(trace->evlist, trace->cgroup);
3983
3984 err = evlist__create_maps(evlist, &trace->opts.target);
3985 if (err < 0) {
3986 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
3987 goto out_delete_evlist;
3988 }
3989
3990 err = trace__symbols_init(trace, evlist);
3991 if (err < 0) {
3992 fprintf(trace->output, "Problems initializing symbol libraries!\n");
3993 goto out_delete_evlist;
3994 }
3995
3996 evlist__config(evlist, &trace->opts, &callchain_param);
3997
3998 if (forks) {
3999 err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL);
4000 if (err < 0) {
4001 fprintf(trace->output, "Couldn't run the workload!\n");
4002 goto out_delete_evlist;
4003 }
4004 }
4005
4006 err = evlist__open(evlist);
4007 if (err < 0)
4008 goto out_error_open;
4009
4010 err = bpf__apply_obj_config();
4011 if (err) {
4012 char errbuf[BUFSIZ];
4013
4014 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
4015 pr_err("ERROR: Apply config to BPF failed: %s\n",
4016 errbuf);
4017 goto out_error_open;
4018 }
4019
4020 err = trace__set_filter_pids(trace);
4021 if (err < 0)
4022 goto out_error_mem;
4023
4024 if (trace->syscalls.map)
4025 trace__init_syscalls_bpf_map(trace);
4026
4027 if (trace->syscalls.prog_array.sys_enter)
4028 trace__init_syscalls_bpf_prog_array_maps(trace);
4029
4030 if (trace->ev_qualifier_ids.nr > 0) {
4031 err = trace__set_ev_qualifier_filter(trace);
4032 if (err < 0)
4033 goto out_errno;
4034
4035 if (trace->syscalls.events.sys_exit) {
4036 pr_debug("event qualifier tracepoint filter: %s\n",
4037 trace->syscalls.events.sys_exit->filter);
4038 }
4039 }
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052 trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close"));
4053
4054 err = trace__expand_filters(trace, &evsel);
4055 if (err)
4056 goto out_delete_evlist;
4057 err = evlist__apply_filters(evlist, &evsel);
4058 if (err < 0)
4059 goto out_error_apply_filters;
4060
4061 if (trace->dump.map)
4062 bpf_map__fprintf(trace->dump.map, trace->output);
4063
4064 err = evlist__mmap(evlist, trace->opts.mmap_pages);
4065 if (err < 0)
4066 goto out_error_mmap;
4067
4068 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
4069 evlist__enable(evlist);
4070
4071 if (forks)
4072 evlist__start_workload(evlist);
4073
4074 if (trace->opts.initial_delay) {
4075 usleep(trace->opts.initial_delay * 1000);
4076 evlist__enable(evlist);
4077 }
4078
4079 trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
4080 evlist->core.threads->nr > 1 ||
4081 evlist__first(evlist)->core.attr.inherit;
4082
4083
4084
4085
4086
4087
4088
4089 evlist__for_each_entry(evlist, evsel) {
4090 if (evsel__has_callchain(evsel) &&
4091 evsel->core.attr.sample_max_stack == 0)
4092 evsel->core.attr.sample_max_stack = trace->max_stack;
4093 }
4094again:
4095 before = trace->nr_events;
4096
4097 for (i = 0; i < evlist->core.nr_mmaps; i++) {
4098 union perf_event *event;
4099 struct mmap *md;
4100
4101 md = &evlist->mmap[i];
4102 if (perf_mmap__read_init(&md->core) < 0)
4103 continue;
4104
4105 while ((event = perf_mmap__read_event(&md->core)) != NULL) {
4106 ++trace->nr_events;
4107
4108 err = trace__deliver_event(trace, event);
4109 if (err)
4110 goto out_disable;
4111
4112 perf_mmap__consume(&md->core);
4113
4114 if (interrupted)
4115 goto out_disable;
4116
4117 if (done && !draining) {
4118 evlist__disable(evlist);
4119 draining = true;
4120 }
4121 }
4122 perf_mmap__read_done(&md->core);
4123 }
4124
4125 if (trace->nr_events == before) {
4126 int timeout = done ? 100 : -1;
4127
4128 if (!draining && evlist__poll(evlist, timeout) > 0) {
4129 if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
4130 draining = true;
4131
4132 goto again;
4133 } else {
4134 if (trace__flush_events(trace))
4135 goto out_disable;
4136 }
4137 } else {
4138 goto again;
4139 }
4140
4141out_disable:
4142 thread__zput(trace->current);
4143
4144 evlist__disable(evlist);
4145
4146 if (trace->sort_events)
4147 ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
4148
4149 if (!err) {
4150 if (trace->summary)
4151 trace__fprintf_thread_summary(trace, trace->output);
4152
4153 if (trace->show_tool_stats) {
4154 fprintf(trace->output, "Stats:\n "
4155 " vfs_getname : %" PRIu64 "\n"
4156 " proc_getname: %" PRIu64 "\n",
4157 trace->stats.vfs_getname,
4158 trace->stats.proc_getname);
4159 }
4160 }
4161
4162out_delete_evlist:
4163 trace__symbols__exit(trace);
4164 evlist__free_syscall_tp_fields(evlist);
4165 evlist__delete(evlist);
4166 cgroup__put(trace->cgroup);
4167 trace->evlist = NULL;
4168 trace->live = false;
4169 return err;
4170{
4171 char errbuf[BUFSIZ];
4172
4173out_error_sched_stat_runtime:
4174 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
4175 goto out_error;
4176
4177out_error_raw_syscalls:
4178 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
4179 goto out_error;
4180
4181out_error_mmap:
4182 evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
4183 goto out_error;
4184
4185out_error_open:
4186 evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
4187
4188out_error:
4189 fprintf(trace->output, "%s\n", errbuf);
4190 goto out_delete_evlist;
4191
4192out_error_apply_filters:
4193 fprintf(trace->output,
4194 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
4195 evsel->filter, evsel__name(evsel), errno,
4196 str_error_r(errno, errbuf, sizeof(errbuf)));
4197 goto out_delete_evlist;
4198}
4199out_error_mem:
4200 fprintf(trace->output, "Not enough memory to run!\n");
4201 goto out_delete_evlist;
4202
4203out_errno:
4204 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
4205 goto out_delete_evlist;
4206}
4207
4208static int trace__replay(struct trace *trace)
4209{
4210 const struct evsel_str_handler handlers[] = {
4211 { "probe:vfs_getname", trace__vfs_getname, },
4212 };
4213 struct perf_data data = {
4214 .path = input_name,
4215 .mode = PERF_DATA_MODE_READ,
4216 .force = trace->force,
4217 };
4218 struct perf_session *session;
4219 struct evsel *evsel;
4220 int err = -1;
4221
4222 trace->tool.sample = trace__process_sample;
4223 trace->tool.mmap = perf_event__process_mmap;
4224 trace->tool.mmap2 = perf_event__process_mmap2;
4225 trace->tool.comm = perf_event__process_comm;
4226 trace->tool.exit = perf_event__process_exit;
4227 trace->tool.fork = perf_event__process_fork;
4228 trace->tool.attr = perf_event__process_attr;
4229 trace->tool.tracing_data = perf_event__process_tracing_data;
4230 trace->tool.build_id = perf_event__process_build_id;
4231 trace->tool.namespaces = perf_event__process_namespaces;
4232
4233 trace->tool.ordered_events = true;
4234 trace->tool.ordering_requires_timestamps = true;
4235
4236
4237 trace->multiple_threads = true;
4238
4239 session = perf_session__new(&data, &trace->tool);
4240 if (IS_ERR(session))
4241 return PTR_ERR(session);
4242
4243 if (trace->opts.target.pid)
4244 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
4245
4246 if (trace->opts.target.tid)
4247 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
4248
4249 if (symbol__init(&session->header.env) < 0)
4250 goto out;
4251
4252 trace->host = &session->machines.host;
4253
4254 err = perf_session__set_tracepoints_handlers(session, handlers);
4255 if (err)
4256 goto out;
4257
4258 evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter");
4259
4260 if (evsel == NULL)
4261 evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter");
4262
4263 if (evsel &&
4264 (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
4265 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
4266 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
4267 goto out;
4268 }
4269
4270 evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit");
4271 if (evsel == NULL)
4272 evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit");
4273 if (evsel &&
4274 (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
4275 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
4276 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
4277 goto out;
4278 }
4279
4280 evlist__for_each_entry(session->evlist, evsel) {
4281 if (evsel->core.attr.type == PERF_TYPE_SOFTWARE &&
4282 (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
4283 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
4284 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS))
4285 evsel->handler = trace__pgfault;
4286 }
4287
4288 setup_pager();
4289
4290 err = perf_session__process_events(session);
4291 if (err)
4292 pr_err("Failed to process events, error %d", err);
4293
4294 else if (trace->summary)
4295 trace__fprintf_thread_summary(trace, trace->output);
4296
4297out:
4298 perf_session__delete(session);
4299
4300 return err;
4301}
4302
4303static size_t trace__fprintf_threads_header(FILE *fp)
4304{
4305 size_t printed;
4306
4307 printed = fprintf(fp, "\n Summary of events:\n\n");
4308
4309 return printed;
4310}
4311
4312DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
4313 struct syscall_stats *stats;
4314 double msecs;
4315 int syscall;
4316)
4317{
4318 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
4319 struct syscall_stats *stats = source->priv;
4320
4321 entry->syscall = source->i;
4322 entry->stats = stats;
4323 entry->msecs = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0;
4324}
4325
4326static size_t thread__dump_stats(struct thread_trace *ttrace,
4327 struct trace *trace, FILE *fp)
4328{
4329 size_t printed = 0;
4330 struct syscall *sc;
4331 struct rb_node *nd;
4332 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
4333
4334 if (syscall_stats == NULL)
4335 return 0;
4336
4337 printed += fprintf(fp, "\n");
4338
4339 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
4340 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
4341 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
4342
4343 resort_rb__for_each_entry(nd, syscall_stats) {
4344 struct syscall_stats *stats = syscall_stats_entry->stats;
4345 if (stats) {
4346 double min = (double)(stats->stats.min) / NSEC_PER_MSEC;
4347 double max = (double)(stats->stats.max) / NSEC_PER_MSEC;
4348 double avg = avg_stats(&stats->stats);
4349 double pct;
4350 u64 n = (u64)stats->stats.n;
4351
4352 pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
4353 avg /= NSEC_PER_MSEC;
4354
4355 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
4356 printed += fprintf(fp, " %-15s", sc->name);
4357 printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f",
4358 n, stats->nr_failures, syscall_stats_entry->msecs, min, avg);
4359 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
4360
4361 if (trace->errno_summary && stats->nr_failures) {
4362 const char *arch_name = perf_env__arch(trace->host->env);
4363 int e;
4364
4365 for (e = 0; e < stats->max_errno; ++e) {
4366 if (stats->errnos[e] != 0)
4367 fprintf(fp, "\t\t\t\t%s: %d\n", arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]);
4368 }
4369 }
4370 }
4371 }
4372
4373 resort_rb__delete(syscall_stats);
4374 printed += fprintf(fp, "\n\n");
4375
4376 return printed;
4377}
4378
4379static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
4380{
4381 size_t printed = 0;
4382 struct thread_trace *ttrace = thread__priv(thread);
4383 double ratio;
4384
4385 if (ttrace == NULL)
4386 return 0;
4387
4388 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
4389
4390 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
4391 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
4392 printed += fprintf(fp, "%.1f%%", ratio);
4393 if (ttrace->pfmaj)
4394 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
4395 if (ttrace->pfmin)
4396 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
4397 if (trace->sched)
4398 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
4399 else if (fputc('\n', fp) != EOF)
4400 ++printed;
4401
4402 printed += thread__dump_stats(ttrace, trace, fp);
4403
4404 return printed;
4405}
4406
4407static unsigned long thread__nr_events(struct thread_trace *ttrace)
4408{
4409 return ttrace ? ttrace->nr_events : 0;
4410}
4411
4412DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
4413 struct thread *thread;
4414)
4415{
4416 entry->thread = rb_entry(nd, struct thread, rb_node);
4417}
4418
4419static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
4420{
4421 size_t printed = trace__fprintf_threads_header(fp);
4422 struct rb_node *nd;
4423 int i;
4424
4425 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
4426 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
4427
4428 if (threads == NULL) {
4429 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
4430 return 0;
4431 }
4432
4433 resort_rb__for_each_entry(nd, threads)
4434 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
4435
4436 resort_rb__delete(threads);
4437 }
4438 return printed;
4439}
4440
4441static int trace__set_duration(const struct option *opt, const char *str,
4442 int unset __maybe_unused)
4443{
4444 struct trace *trace = opt->value;
4445
4446 trace->duration_filter = atof(str);
4447 return 0;
4448}
4449
4450static int trace__set_filter_pids_from_option(const struct option *opt, const char *str,
4451 int unset __maybe_unused)
4452{
4453 int ret = -1;
4454 size_t i;
4455 struct trace *trace = opt->value;
4456
4457
4458
4459
4460 struct intlist *list = intlist__new(str);
4461
4462 if (list == NULL)
4463 return -1;
4464
4465 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
4466 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
4467
4468 if (trace->filter_pids.entries == NULL)
4469 goto out;
4470
4471 trace->filter_pids.entries[0] = getpid();
4472
4473 for (i = 1; i < trace->filter_pids.nr; ++i)
4474 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
4475
4476 intlist__delete(list);
4477 ret = 0;
4478out:
4479 return ret;
4480}
4481
4482static int trace__open_output(struct trace *trace, const char *filename)
4483{
4484 struct stat st;
4485
4486 if (!stat(filename, &st) && st.st_size) {
4487 char oldname[PATH_MAX];
4488
4489 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
4490 unlink(oldname);
4491 rename(filename, oldname);
4492 }
4493
4494 trace->output = fopen(filename, "w");
4495
4496 return trace->output == NULL ? -errno : 0;
4497}
4498
4499static int parse_pagefaults(const struct option *opt, const char *str,
4500 int unset __maybe_unused)
4501{
4502 int *trace_pgfaults = opt->value;
4503
4504 if (strcmp(str, "all") == 0)
4505 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
4506 else if (strcmp(str, "maj") == 0)
4507 *trace_pgfaults |= TRACE_PFMAJ;
4508 else if (strcmp(str, "min") == 0)
4509 *trace_pgfaults |= TRACE_PFMIN;
4510 else
4511 return -1;
4512
4513 return 0;
4514}
4515
4516static void evlist__set_default_evsel_handler(struct evlist *evlist, void *handler)
4517{
4518 struct evsel *evsel;
4519
4520 evlist__for_each_entry(evlist, evsel) {
4521 if (evsel->handler == NULL)
4522 evsel->handler = handler;
4523 }
4524}
4525
4526static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name)
4527{
4528 struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
4529
4530 if (fmt) {
4531 struct syscall_fmt *scfmt = syscall_fmt__find(name);
4532
4533 if (scfmt) {
4534 int skip = 0;
4535
4536 if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 ||
4537 strcmp(evsel->tp_format->format.fields->name, "nr") == 0)
4538 ++skip;
4539
4540 memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt));
4541 }
4542 }
4543}
4544
4545static int evlist__set_syscall_tp_fields(struct evlist *evlist)
4546{
4547 struct evsel *evsel;
4548
4549 evlist__for_each_entry(evlist, evsel) {
4550 if (evsel->priv || !evsel->tp_format)
4551 continue;
4552
4553 if (strcmp(evsel->tp_format->system, "syscalls")) {
4554 evsel__init_tp_arg_scnprintf(evsel);
4555 continue;
4556 }
4557
4558 if (evsel__init_syscall_tp(evsel))
4559 return -1;
4560
4561 if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
4562 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
4563
4564 if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
4565 return -1;
4566
4567 evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1);
4568 } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
4569 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
4570
4571 if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
4572 return -1;
4573
4574 evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1);
4575 }
4576 }
4577
4578 return 0;
4579}
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589static int trace__parse_events_option(const struct option *opt, const char *str,
4590 int unset __maybe_unused)
4591{
4592 struct trace *trace = (struct trace *)opt->value;
4593 const char *s = str;
4594 char *sep = NULL, *lists[2] = { NULL, NULL, };
4595 int len = strlen(str) + 1, err = -1, list, idx;
4596 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
4597 char group_name[PATH_MAX];
4598 struct syscall_fmt *fmt;
4599
4600 if (strace_groups_dir == NULL)
4601 return -1;
4602
4603 if (*s == '!') {
4604 ++s;
4605 trace->not_ev_qualifier = true;
4606 }
4607
4608 while (1) {
4609 if ((sep = strchr(s, ',')) != NULL)
4610 *sep = '\0';
4611
4612 list = 0;
4613 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
4614 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
4615 list = 1;
4616 goto do_concat;
4617 }
4618
4619 fmt = syscall_fmt__find_by_alias(s);
4620 if (fmt != NULL) {
4621 list = 1;
4622 s = fmt->name;
4623 } else {
4624 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
4625 if (access(group_name, R_OK) == 0)
4626 list = 1;
4627 }
4628do_concat:
4629 if (lists[list]) {
4630 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
4631 } else {
4632 lists[list] = malloc(len);
4633 if (lists[list] == NULL)
4634 goto out;
4635 strcpy(lists[list], s);
4636 }
4637
4638 if (!sep)
4639 break;
4640
4641 *sep = ',';
4642 s = sep + 1;
4643 }
4644
4645 if (lists[1] != NULL) {
4646 struct strlist_config slist_config = {
4647 .dirname = strace_groups_dir,
4648 };
4649
4650 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
4651 if (trace->ev_qualifier == NULL) {
4652 fputs("Not enough memory to parse event qualifier", trace->output);
4653 goto out;
4654 }
4655
4656 if (trace__validate_ev_qualifier(trace))
4657 goto out;
4658 trace->trace_syscalls = true;
4659 }
4660
4661 err = 0;
4662
4663 if (lists[0]) {
4664 struct option o = {
4665 .value = &trace->evlist,
4666 };
4667 err = parse_events_option(&o, lists[0], 0);
4668 }
4669out:
4670 free(strace_groups_dir);
4671 free(lists[0]);
4672 free(lists[1]);
4673 if (sep)
4674 *sep = ',';
4675
4676 return err;
4677}
4678
4679static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
4680{
4681 struct trace *trace = opt->value;
4682
4683 if (!list_empty(&trace->evlist->core.entries)) {
4684 struct option o = {
4685 .value = &trace->evlist,
4686 };
4687 return parse_cgroups(&o, str, unset);
4688 }
4689 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
4690
4691 return 0;
4692}
4693
4694static int trace__config(const char *var, const char *value, void *arg)
4695{
4696 struct trace *trace = arg;
4697 int err = 0;
4698
4699 if (!strcmp(var, "trace.add_events")) {
4700 trace->perfconfig_events = strdup(value);
4701 if (trace->perfconfig_events == NULL) {
4702 pr_err("Not enough memory for %s\n", "trace.add_events");
4703 return -1;
4704 }
4705 } else if (!strcmp(var, "trace.show_timestamp")) {
4706 trace->show_tstamp = perf_config_bool(var, value);
4707 } else if (!strcmp(var, "trace.show_duration")) {
4708 trace->show_duration = perf_config_bool(var, value);
4709 } else if (!strcmp(var, "trace.show_arg_names")) {
4710 trace->show_arg_names = perf_config_bool(var, value);
4711 if (!trace->show_arg_names)
4712 trace->show_zeros = true;
4713 } else if (!strcmp(var, "trace.show_zeros")) {
4714 bool new_show_zeros = perf_config_bool(var, value);
4715 if (!trace->show_arg_names && !new_show_zeros) {
4716 pr_warning("trace.show_zeros has to be set when trace.show_arg_names=no\n");
4717 goto out;
4718 }
4719 trace->show_zeros = new_show_zeros;
4720 } else if (!strcmp(var, "trace.show_prefix")) {
4721 trace->show_string_prefix = perf_config_bool(var, value);
4722 } else if (!strcmp(var, "trace.no_inherit")) {
4723 trace->opts.no_inherit = perf_config_bool(var, value);
4724 } else if (!strcmp(var, "trace.args_alignment")) {
4725 int args_alignment = 0;
4726 if (perf_config_int(&args_alignment, var, value) == 0)
4727 trace->args_alignment = args_alignment;
4728 } else if (!strcmp(var, "trace.tracepoint_beautifiers")) {
4729 if (strcasecmp(value, "libtraceevent") == 0)
4730 trace->libtraceevent_print = true;
4731 else if (strcasecmp(value, "libbeauty") == 0)
4732 trace->libtraceevent_print = false;
4733 }
4734out:
4735 return err;
4736}
4737
4738static void trace__exit(struct trace *trace)
4739{
4740 int i;
4741
4742 strlist__delete(trace->ev_qualifier);
4743 free(trace->ev_qualifier_ids.entries);
4744 if (trace->syscalls.table) {
4745 for (i = 0; i <= trace->sctbl->syscalls.max_id; i++)
4746 syscall__exit(&trace->syscalls.table[i]);
4747 free(trace->syscalls.table);
4748 }
4749 syscalltbl__delete(trace->sctbl);
4750 zfree(&trace->perfconfig_events);
4751}
4752
4753int cmd_trace(int argc, const char **argv)
4754{
4755 const char *trace_usage[] = {
4756 "perf trace [<options>] [<command>]",
4757 "perf trace [<options>] -- <command> [<options>]",
4758 "perf trace record [<options>] [<command>]",
4759 "perf trace record [<options>] -- <command> [<options>]",
4760 NULL
4761 };
4762 struct trace trace = {
4763 .opts = {
4764 .target = {
4765 .uid = UINT_MAX,
4766 .uses_mmap = true,
4767 },
4768 .user_freq = UINT_MAX,
4769 .user_interval = ULLONG_MAX,
4770 .no_buffering = true,
4771 .mmap_pages = UINT_MAX,
4772 },
4773 .output = stderr,
4774 .show_comm = true,
4775 .show_tstamp = true,
4776 .show_duration = true,
4777 .show_arg_names = true,
4778 .args_alignment = 70,
4779 .trace_syscalls = false,
4780 .kernel_syscallchains = false,
4781 .max_stack = UINT_MAX,
4782 .max_events = ULONG_MAX,
4783 };
4784 const char *map_dump_str = NULL;
4785 const char *output_name = NULL;
4786 const struct option trace_options[] = {
4787 OPT_CALLBACK('e', "event", &trace, "event",
4788 "event/syscall selector. use 'perf list' to list available events",
4789 trace__parse_events_option),
4790 OPT_CALLBACK(0, "filter", &trace.evlist, "filter",
4791 "event filter", parse_filter),
4792 OPT_BOOLEAN(0, "comm", &trace.show_comm,
4793 "show the thread COMM next to its id"),
4794 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
4795 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
4796 trace__parse_events_option),
4797 OPT_STRING('o', "output", &output_name, "file", "output file name"),
4798 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
4799 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
4800 "trace events on existing process id"),
4801 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
4802 "trace events on existing thread id"),
4803 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
4804 "pids to filter (by the kernel)", trace__set_filter_pids_from_option),
4805 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
4806 "system-wide collection from all CPUs"),
4807 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
4808 "list of cpus to monitor"),
4809 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
4810 "child tasks do not inherit counters"),
4811 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
4812 "number of mmap data pages", evlist__parse_mmap_pages),
4813 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
4814 "user to profile"),
4815 OPT_CALLBACK(0, "duration", &trace, "float",
4816 "show only events with duration > N.M ms",
4817 trace__set_duration),
4818#ifdef HAVE_LIBBPF_SUPPORT
4819 OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
4820#endif
4821 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
4822 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4823 OPT_BOOLEAN('T', "time", &trace.full_time,
4824 "Show full timestamp, not time relative to first start"),
4825 OPT_BOOLEAN(0, "failure", &trace.failure_only,
4826 "Show only syscalls that failed"),
4827 OPT_BOOLEAN('s', "summary", &trace.summary_only,
4828 "Show only syscall summary with statistics"),
4829 OPT_BOOLEAN('S', "with-summary", &trace.summary,
4830 "Show all syscalls and summary with statistics"),
4831 OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary,
4832 "Show errno stats per syscall, use with -s or -S"),
4833 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
4834 "Trace pagefaults", parse_pagefaults, "maj"),
4835 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
4836 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
4837 OPT_CALLBACK(0, "call-graph", &trace.opts,
4838 "record_mode[,record_size]", record_callchain_help,
4839 &record_parse_callchain_opt),
4840 OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print,
4841 "Use libtraceevent to print the tracepoint arguments."),
4842 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
4843 "Show the kernel callchains on the syscall exit path"),
4844 OPT_ULONG(0, "max-events", &trace.max_events,
4845 "Set the maximum number of events to print, exit after that is reached. "),
4846 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
4847 "Set the minimum stack depth when parsing the callchain, "
4848 "anything below the specified depth will be ignored."),
4849 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
4850 "Set the maximum stack depth when parsing the callchain, "
4851 "anything beyond the specified depth will be ignored. "
4852 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
4853 OPT_BOOLEAN(0, "sort-events", &trace.sort_events,
4854 "Sort batch of events before processing, use if getting out of order events"),
4855 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
4856 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
4857 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
4858 "per thread proc mmap processing timeout in ms"),
4859 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
4860 trace__parse_cgroups),
4861 OPT_INTEGER('D', "delay", &trace.opts.initial_delay,
4862 "ms to wait before starting measurement after program "
4863 "start"),
4864 OPTS_EVSWITCH(&trace.evswitch),
4865 OPT_END()
4866 };
4867 bool __maybe_unused max_stack_user_set = true;
4868 bool mmap_pages_user_set = true;
4869 struct evsel *evsel;
4870 const char * const trace_subcommands[] = { "record", NULL };
4871 int err = -1;
4872 char bf[BUFSIZ];
4873
4874 signal(SIGSEGV, sighandler_dump_stack);
4875 signal(SIGFPE, sighandler_dump_stack);
4876 signal(SIGCHLD, sig_handler);
4877 signal(SIGINT, sig_handler);
4878
4879 trace.evlist = evlist__new();
4880 trace.sctbl = syscalltbl__new();
4881
4882 if (trace.evlist == NULL || trace.sctbl == NULL) {
4883 pr_err("Not enough memory to run!\n");
4884 err = -ENOMEM;
4885 goto out;
4886 }
4887
4888
4889
4890
4891
4892
4893
4894
4895 rlimit__bump_memlock();
4896
4897 err = perf_config(trace__config, &trace);
4898 if (err)
4899 goto out;
4900
4901 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
4902 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
4916 trace.evlist->core.nr_entries == 0 ) {
4917 trace.trace_syscalls = true;
4918 }
4919
4920
4921
4922
4923
4924
4925 if (trace.perfconfig_events != NULL) {
4926 struct parse_events_error parse_err;
4927
4928 bzero(&parse_err, sizeof(parse_err));
4929 err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err);
4930 if (err) {
4931 parse_events_print_error(&parse_err, trace.perfconfig_events);
4932 goto out;
4933 }
4934 }
4935
4936 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
4937 usage_with_options_msg(trace_usage, trace_options,
4938 "cgroup monitoring only available in system-wide mode");
4939 }
4940
4941 evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
4942 if (IS_ERR(evsel)) {
4943 bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
4944 pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
4945 goto out;
4946 }
4947
4948 if (evsel) {
4949 trace.syscalls.events.augmented = evsel;
4950
4951 evsel = evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
4952 if (evsel == NULL) {
4953 pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n");
4954 goto out;
4955 }
4956
4957 if (evsel->bpf_obj == NULL) {
4958 pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n");
4959 goto out;
4960 }
4961
4962 trace.bpf_obj = evsel->bpf_obj;
4963
4964
4965
4966
4967
4968
4969 if (!trace.trace_syscalls && trace__only_augmented_syscalls_evsels(&trace))
4970 trace.trace_syscalls = true;
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996 if (!trace.trace_syscalls) {
4997 trace__delete_augmented_syscalls(&trace);
4998 } else {
4999 trace__set_bpf_map_filtered_pids(&trace);
5000 trace__set_bpf_map_syscalls(&trace);
5001 trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented");
5002 }
5003 }
5004
5005 err = bpf__setup_stdout(trace.evlist);
5006 if (err) {
5007 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
5008 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
5009 goto out;
5010 }
5011
5012 err = -1;
5013
5014 if (map_dump_str) {
5015 trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
5016 if (trace.dump.map == NULL) {
5017 pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
5018 goto out;
5019 }
5020 }
5021
5022 if (trace.trace_pgfaults) {
5023 trace.opts.sample_address = true;
5024 trace.opts.sample_time = true;
5025 }
5026
5027 if (trace.opts.mmap_pages == UINT_MAX)
5028 mmap_pages_user_set = false;
5029
5030 if (trace.max_stack == UINT_MAX) {
5031 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
5032 max_stack_user_set = false;
5033 }
5034
5035#ifdef HAVE_DWARF_UNWIND_SUPPORT
5036 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
5037 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
5038 }
5039#endif
5040
5041 if (callchain_param.enabled) {
5042 if (!mmap_pages_user_set && geteuid() == 0)
5043 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
5044
5045 symbol_conf.use_callchain = true;
5046 }
5047
5048 if (trace.evlist->core.nr_entries > 0) {
5049 evlist__set_default_evsel_handler(trace.evlist, trace__event_handler);
5050 if (evlist__set_syscall_tp_fields(trace.evlist)) {
5051 perror("failed to set syscalls:* tracepoint fields");
5052 goto out;
5053 }
5054 }
5055
5056 if (trace.sort_events) {
5057 ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace);
5058 ordered_events__set_copy_on_queue(&trace.oe.data, true);
5059 }
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072 if (trace.syscalls.events.augmented) {
5073 evlist__for_each_entry(trace.evlist, evsel) {
5074 bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
5075
5076 if (raw_syscalls_sys_exit) {
5077 trace.raw_augmented_syscalls = true;
5078 goto init_augmented_syscall_tp;
5079 }
5080
5081 if (trace.syscalls.events.augmented->priv == NULL &&
5082 strstr(evsel__name(evsel), "syscalls:sys_enter")) {
5083 struct evsel *augmented = trace.syscalls.events.augmented;
5084 if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
5085 evsel__init_augmented_syscall_tp_args(augmented))
5086 goto out;
5087
5088
5089
5090
5091
5092 augmented->handler = trace__sys_enter;
5093
5094
5095
5096
5097
5098
5099 if (evsel__init_augmented_syscall_tp(evsel, evsel) ||
5100 evsel__init_augmented_syscall_tp_args(evsel))
5101 goto out;
5102 evsel->handler = trace__sys_enter;
5103 }
5104
5105 if (strstarts(evsel__name(evsel), "syscalls:sys_exit_")) {
5106 struct syscall_tp *sc;
5107init_augmented_syscall_tp:
5108 if (evsel__init_augmented_syscall_tp(evsel, evsel))
5109 goto out;
5110 sc = __evsel__syscall_tp(evsel);
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130 if (trace.raw_augmented_syscalls)
5131 trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
5132 evsel__init_augmented_syscall_tp_ret(evsel);
5133 evsel->handler = trace__sys_exit;
5134 }
5135 }
5136 }
5137
5138 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
5139 return trace__record(&trace, argc-1, &argv[1]);
5140
5141
5142 if (trace.errno_summary && !trace.summary && !trace.summary_only)
5143 trace.summary_only = true;
5144
5145
5146 if (trace.summary_only)
5147 trace.summary = trace.summary_only;
5148
5149 if (output_name != NULL) {
5150 err = trace__open_output(&trace, output_name);
5151 if (err < 0) {
5152 perror("failed to create output file");
5153 goto out;
5154 }
5155 }
5156
5157 err = evswitch__init(&trace.evswitch, trace.evlist, stderr);
5158 if (err)
5159 goto out_close;
5160
5161 err = target__validate(&trace.opts.target);
5162 if (err) {
5163 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
5164 fprintf(trace.output, "%s", bf);
5165 goto out_close;
5166 }
5167
5168 err = target__parse_uid(&trace.opts.target);
5169 if (err) {
5170 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
5171 fprintf(trace.output, "%s", bf);
5172 goto out_close;
5173 }
5174
5175 if (!argc && target__none(&trace.opts.target))
5176 trace.opts.target.system_wide = true;
5177
5178 if (input_name)
5179 err = trace__replay(&trace);
5180 else
5181 err = trace__run(&trace, argc, argv);
5182
5183out_close:
5184 if (output_name != NULL)
5185 fclose(trace.output);
5186out:
5187 trace__exit(&trace);
5188 return err;
5189}
5190