1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include "util/record.h"
18#include <traceevent/event-parse.h>
19#include <api/fs/tracing_path.h>
20#include <bpf/bpf.h>
21#include "util/bpf_map.h"
22#include "util/rlimit.h"
23#include "builtin.h"
24#include "util/cgroup.h"
25#include "util/color.h"
26#include "util/config.h"
27#include "util/debug.h"
28#include "util/dso.h"
29#include "util/env.h"
30#include "util/event.h"
31#include "util/evsel.h"
32#include "util/evsel_fprintf.h"
33#include "util/synthetic-events.h"
34#include "util/evlist.h"
35#include "util/evswitch.h"
36#include "util/mmap.h"
37#include <subcmd/pager.h>
38#include <subcmd/exec-cmd.h>
39#include "util/machine.h"
40#include "util/map.h"
41#include "util/symbol.h"
42#include "util/path.h"
43#include "util/session.h"
44#include "util/thread.h"
45#include <subcmd/parse-options.h>
46#include "util/strlist.h"
47#include "util/intlist.h"
48#include "util/thread_map.h"
49#include "util/stat.h"
50#include "util/tool.h"
51#include "util/util.h"
52#include "trace/beauty/beauty.h"
53#include "trace-event.h"
54#include "util/parse-events.h"
55#include "util/bpf-loader.h"
56#include "callchain.h"
57#include "print_binary.h"
58#include "string2.h"
59#include "syscalltbl.h"
60#include "rb_resort.h"
61#include "../perf.h"
62
63#include <errno.h>
64#include <inttypes.h>
65#include <poll.h>
66#include <signal.h>
67#include <stdlib.h>
68#include <string.h>
69#include <linux/err.h>
70#include <linux/filter.h>
71#include <linux/kernel.h>
72#include <linux/random.h>
73#include <linux/stringify.h>
74#include <linux/time64.h>
75#include <linux/zalloc.h>
76#include <fcntl.h>
77#include <sys/sysmacros.h>
78
79#include <linux/ctype.h>
80#include <perf/mmap.h>
81
82#ifndef O_CLOEXEC
83# define O_CLOEXEC 02000000
84#endif
85
86#ifndef F_LINUX_SPECIFIC_BASE
87# define F_LINUX_SPECIFIC_BASE 1024
88#endif
89
90
91
92
93struct syscall_arg_fmt {
94 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
95 bool (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val);
96 unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
97 void *parm;
98 const char *name;
99 u16 nr_entries;
100 bool show_zero;
101};
102
103struct syscall_fmt {
104 const char *name;
105 const char *alias;
106 struct {
107 const char *sys_enter,
108 *sys_exit;
109 } bpf_prog_name;
110 struct syscall_arg_fmt arg[6];
111 u8 nr_args;
112 bool errpid;
113 bool timeout;
114 bool hexret;
115};
116
117struct trace {
118 struct perf_tool tool;
119 struct syscalltbl *sctbl;
120 struct {
121 struct syscall *table;
122 struct bpf_map *map;
123 struct {
124 struct bpf_map *sys_enter,
125 *sys_exit;
126 } prog_array;
127 struct {
128 struct evsel *sys_enter,
129 *sys_exit,
130 *augmented;
131 } events;
132 struct bpf_program *unaugmented_prog;
133 } syscalls;
134 struct {
135 struct bpf_map *map;
136 } dump;
137 struct record_opts opts;
138 struct evlist *evlist;
139 struct machine *host;
140 struct thread *current;
141 struct bpf_object *bpf_obj;
142 struct cgroup *cgroup;
143 u64 base_time;
144 FILE *output;
145 unsigned long nr_events;
146 unsigned long nr_events_printed;
147 unsigned long max_events;
148 struct evswitch evswitch;
149 struct strlist *ev_qualifier;
150 struct {
151 size_t nr;
152 int *entries;
153 } ev_qualifier_ids;
154 struct {
155 size_t nr;
156 pid_t *entries;
157 struct bpf_map *map;
158 } filter_pids;
159 double duration_filter;
160 double runtime_ms;
161 struct {
162 u64 vfs_getname,
163 proc_getname;
164 } stats;
165 unsigned int max_stack;
166 unsigned int min_stack;
167 int raw_augmented_syscalls_args_size;
168 bool raw_augmented_syscalls;
169 bool fd_path_disabled;
170 bool sort_events;
171 bool not_ev_qualifier;
172 bool live;
173 bool full_time;
174 bool sched;
175 bool multiple_threads;
176 bool summary;
177 bool summary_only;
178 bool errno_summary;
179 bool failure_only;
180 bool show_comm;
181 bool print_sample;
182 bool show_tool_stats;
183 bool trace_syscalls;
184 bool libtraceevent_print;
185 bool kernel_syscallchains;
186 s16 args_alignment;
187 bool show_tstamp;
188 bool show_duration;
189 bool show_zeros;
190 bool show_arg_names;
191 bool show_string_prefix;
192 bool force;
193 bool vfs_getname;
194 int trace_pgfaults;
195 char *perfconfig_events;
196 struct {
197 struct ordered_events data;
198 u64 last;
199 } oe;
200};
201
202struct tp_field {
203 int offset;
204 union {
205 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
206 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
207 };
208};
209
210#define TP_UINT_FIELD(bits) \
211static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
212{ \
213 u##bits value; \
214 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
215 return value; \
216}
217
218TP_UINT_FIELD(8);
219TP_UINT_FIELD(16);
220TP_UINT_FIELD(32);
221TP_UINT_FIELD(64);
222
223#define TP_UINT_FIELD__SWAPPED(bits) \
224static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
225{ \
226 u##bits value; \
227 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
228 return bswap_##bits(value);\
229}
230
231TP_UINT_FIELD__SWAPPED(16);
232TP_UINT_FIELD__SWAPPED(32);
233TP_UINT_FIELD__SWAPPED(64);
234
235static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
236{
237 field->offset = offset;
238
239 switch (size) {
240 case 1:
241 field->integer = tp_field__u8;
242 break;
243 case 2:
244 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
245 break;
246 case 4:
247 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
248 break;
249 case 8:
250 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
251 break;
252 default:
253 return -1;
254 }
255
256 return 0;
257}
258
259static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap)
260{
261 return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
262}
263
264static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
265{
266 return sample->raw_data + field->offset;
267}
268
269static int __tp_field__init_ptr(struct tp_field *field, int offset)
270{
271 field->offset = offset;
272 field->pointer = tp_field__ptr;
273 return 0;
274}
275
276static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field)
277{
278 return __tp_field__init_ptr(field, format_field->offset);
279}
280
281struct syscall_tp {
282 struct tp_field id;
283 union {
284 struct tp_field args, ret;
285 };
286};
287
288
289
290
291
292
293struct evsel_trace {
294 struct syscall_tp sc;
295 struct syscall_arg_fmt *fmt;
296};
297
298static struct evsel_trace *evsel_trace__new(void)
299{
300 return zalloc(sizeof(struct evsel_trace));
301}
302
303static void evsel_trace__delete(struct evsel_trace *et)
304{
305 if (et == NULL)
306 return;
307
308 zfree(&et->fmt);
309 free(et);
310}
311
312
313
314
315
316static inline struct syscall_tp *__evsel__syscall_tp(struct evsel *evsel)
317{
318 struct evsel_trace *et = evsel->priv;
319
320 return &et->sc;
321}
322
323static struct syscall_tp *evsel__syscall_tp(struct evsel *evsel)
324{
325 if (evsel->priv == NULL) {
326 evsel->priv = evsel_trace__new();
327 if (evsel->priv == NULL)
328 return NULL;
329 }
330
331 return __evsel__syscall_tp(evsel);
332}
333
334
335
336
337static inline struct syscall_arg_fmt *__evsel__syscall_arg_fmt(struct evsel *evsel)
338{
339 struct evsel_trace *et = evsel->priv;
340
341 return et->fmt;
342}
343
344static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel)
345{
346 struct evsel_trace *et = evsel->priv;
347
348 if (evsel->priv == NULL) {
349 et = evsel->priv = evsel_trace__new();
350
351 if (et == NULL)
352 return NULL;
353 }
354
355 if (et->fmt == NULL) {
356 et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
357 if (et->fmt == NULL)
358 goto out_delete;
359 }
360
361 return __evsel__syscall_arg_fmt(evsel);
362
363out_delete:
364 evsel_trace__delete(evsel->priv);
365 evsel->priv = NULL;
366 return NULL;
367}
368
369static int perf_evsel__init_tp_uint_field(struct evsel *evsel,
370 struct tp_field *field,
371 const char *name)
372{
373 struct tep_format_field *format_field = perf_evsel__field(evsel, name);
374
375 if (format_field == NULL)
376 return -1;
377
378 return tp_field__init_uint(field, format_field, evsel->needs_swap);
379}
380
381#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
382 ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
383 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
384
385static int perf_evsel__init_tp_ptr_field(struct evsel *evsel,
386 struct tp_field *field,
387 const char *name)
388{
389 struct tep_format_field *format_field = perf_evsel__field(evsel, name);
390
391 if (format_field == NULL)
392 return -1;
393
394 return tp_field__init_ptr(field, format_field);
395}
396
397#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
398 ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
399 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
400
401static void evsel__delete_priv(struct evsel *evsel)
402{
403 zfree(&evsel->priv);
404 evsel__delete(evsel);
405}
406
407static int perf_evsel__init_syscall_tp(struct evsel *evsel)
408{
409 struct syscall_tp *sc = evsel__syscall_tp(evsel);
410
411 if (sc != NULL) {
412 if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
413 perf_evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
414 return -ENOENT;
415 return 0;
416 }
417
418 return -ENOMEM;
419}
420
421static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
422{
423 struct syscall_tp *sc = evsel__syscall_tp(evsel);
424
425 if (sc != NULL) {
426 struct tep_format_field *syscall_id = perf_evsel__field(tp, "id");
427 if (syscall_id == NULL)
428 syscall_id = perf_evsel__field(tp, "__syscall_nr");
429 if (syscall_id == NULL ||
430 __tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
431 return -EINVAL;
432
433 return 0;
434 }
435
436 return -ENOMEM;
437}
438
439static int perf_evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
440{
441 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
442
443 return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
444}
445
446static int perf_evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
447{
448 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
449
450 return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
451}
452
453static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
454{
455 if (evsel__syscall_tp(evsel) != NULL) {
456 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
457 return -ENOENT;
458
459 evsel->handler = handler;
460 return 0;
461 }
462
463 return -ENOMEM;
464}
465
466static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
467{
468 struct evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
469
470
471 if (IS_ERR(evsel))
472 evsel = perf_evsel__newtp("syscalls", direction);
473
474 if (IS_ERR(evsel))
475 return NULL;
476
477 if (perf_evsel__init_raw_syscall_tp(evsel, handler))
478 goto out_delete;
479
480 return evsel;
481
482out_delete:
483 evsel__delete_priv(evsel);
484 return NULL;
485}
486
487#define perf_evsel__sc_tp_uint(evsel, name, sample) \
488 ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
489 fields->name.integer(&fields->name, sample); })
490
491#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
492 ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \
493 fields->name.pointer(&fields->name, sample); })
494
495size_t strarray__scnprintf_suffix(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_suffix, int val)
496{
497 int idx = val - sa->offset;
498
499 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
500 size_t printed = scnprintf(bf, size, intfmt, val);
501 if (show_suffix)
502 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
503 return printed;
504 }
505
506 return scnprintf(bf, size, "%s%s", sa->entries[idx], show_suffix ? sa->prefix : "");
507}
508
509size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
510{
511 int idx = val - sa->offset;
512
513 if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) {
514 size_t printed = scnprintf(bf, size, intfmt, val);
515 if (show_prefix)
516 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix);
517 return printed;
518 }
519
520 return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
521}
522
523static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
524 const char *intfmt,
525 struct syscall_arg *arg)
526{
527 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->show_string_prefix, arg->val);
528}
529
530static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
531 struct syscall_arg *arg)
532{
533 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
534}
535
536#define SCA_STRARRAY syscall_arg__scnprintf_strarray
537
538bool syscall_arg__strtoul_strarray(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
539{
540 return strarray__strtoul(arg->parm, bf, size, ret);
541}
542
543bool syscall_arg__strtoul_strarray_flags(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
544{
545 return strarray__strtoul_flags(arg->parm, bf, size, ret);
546}
547
548bool syscall_arg__strtoul_strarrays(char *bf, size_t size, struct syscall_arg *arg, u64 *ret)
549{
550 return strarrays__strtoul(arg->parm, bf, size, ret);
551}
552
553size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg)
554{
555 return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val);
556}
557
558size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
559{
560 size_t printed;
561 int i;
562
563 for (i = 0; i < sas->nr_entries; ++i) {
564 struct strarray *sa = sas->entries[i];
565 int idx = val - sa->offset;
566
567 if (idx >= 0 && idx < sa->nr_entries) {
568 if (sa->entries[idx] == NULL)
569 break;
570 return scnprintf(bf, size, "%s%s", show_prefix ? sa->prefix : "", sa->entries[idx]);
571 }
572 }
573
574 printed = scnprintf(bf, size, intfmt, val);
575 if (show_prefix)
576 printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sas->entries[0]->prefix);
577 return printed;
578}
579
580bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret)
581{
582 int i;
583
584 for (i = 0; i < sa->nr_entries; ++i) {
585 if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') {
586 *ret = sa->offset + i;
587 return true;
588 }
589 }
590
591 return false;
592}
593
594bool strarray__strtoul_flags(struct strarray *sa, char *bf, size_t size, u64 *ret)
595{
596 u64 val = 0;
597 char *tok = bf, *sep, *end;
598
599 *ret = 0;
600
601 while (size != 0) {
602 int toklen = size;
603
604 sep = memchr(tok, '|', size);
605 if (sep != NULL) {
606 size -= sep - tok + 1;
607
608 end = sep - 1;
609 while (end > tok && isspace(*end))
610 --end;
611
612 toklen = end - tok + 1;
613 }
614
615 while (isspace(*tok))
616 ++tok;
617
618 if (isalpha(*tok) || *tok == '_') {
619 if (!strarray__strtoul(sa, tok, toklen, &val))
620 return false;
621 } else {
622 bool is_hexa = tok[0] == 0 && (tok[1] = 'x' || tok[1] == 'X');
623
624 val = strtoul(tok, NULL, is_hexa ? 16 : 0);
625 }
626
627 *ret |= (1 << (val - 1));
628
629 if (sep == NULL)
630 break;
631 tok = sep + 1;
632 }
633
634 return true;
635}
636
637bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret)
638{
639 int i;
640
641 for (i = 0; i < sas->nr_entries; ++i) {
642 struct strarray *sa = sas->entries[i];
643
644 if (strarray__strtoul(sa, bf, size, ret))
645 return true;
646 }
647
648 return false;
649}
650
651size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
652 struct syscall_arg *arg)
653{
654 return strarrays__scnprintf(arg->parm, bf, size, "%d", arg->show_string_prefix, arg->val);
655}
656
657#ifndef AT_FDCWD
658#define AT_FDCWD -100
659#endif
660
661static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
662 struct syscall_arg *arg)
663{
664 int fd = arg->val;
665 const char *prefix = "AT_FD";
666
667 if (fd == AT_FDCWD)
668 return scnprintf(bf, size, "%s%s", arg->show_string_prefix ? prefix : "", "CWD");
669
670 return syscall_arg__scnprintf_fd(bf, size, arg);
671}
672
673#define SCA_FDAT syscall_arg__scnprintf_fd_at
674
675static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
676 struct syscall_arg *arg);
677
678#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
679
680size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
681{
682 return scnprintf(bf, size, "%#lx", arg->val);
683}
684
685size_t syscall_arg__scnprintf_ptr(char *bf, size_t size, struct syscall_arg *arg)
686{
687 if (arg->val == 0)
688 return scnprintf(bf, size, "NULL");
689 return syscall_arg__scnprintf_hex(bf, size, arg);
690}
691
692size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
693{
694 return scnprintf(bf, size, "%d", arg->val);
695}
696
697size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
698{
699 return scnprintf(bf, size, "%ld", arg->val);
700}
701
702static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg)
703{
704
705
706
707 return scnprintf(bf, size, "\"%-.*s\"", arg->fmt->nr_entries ?: arg->len, arg->val);
708}
709
710#define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array
711
712static const char *bpf_cmd[] = {
713 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
714 "MAP_GET_NEXT_KEY", "PROG_LOAD",
715};
716static DEFINE_STRARRAY(bpf_cmd, "BPF_");
717
718static const char *fsmount_flags[] = {
719 [1] = "CLOEXEC",
720};
721static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
722
723#include "trace/beauty/generated/fsconfig_arrays.c"
724
725static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");
726
727static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
728static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
729
730static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
731static DEFINE_STRARRAY(itimers, "ITIMER_");
732
733static const char *keyctl_options[] = {
734 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
735 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
736 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
737 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
738 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
739};
740static DEFINE_STRARRAY(keyctl_options, "KEYCTL_");
741
742static const char *whences[] = { "SET", "CUR", "END",
743#ifdef SEEK_DATA
744"DATA",
745#endif
746#ifdef SEEK_HOLE
747"HOLE",
748#endif
749};
750static DEFINE_STRARRAY(whences, "SEEK_");
751
752static const char *fcntl_cmds[] = {
753 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
754 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
755 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
756 "GETOWNER_UIDS",
757};
758static DEFINE_STRARRAY(fcntl_cmds, "F_");
759
760static const char *fcntl_linux_specific_cmds[] = {
761 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
762 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
763 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
764};
765
766static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, "F_", F_LINUX_SPECIFIC_BASE);
767
768static struct strarray *fcntl_cmds_arrays[] = {
769 &strarray__fcntl_cmds,
770 &strarray__fcntl_linux_specific_cmds,
771};
772
773static DEFINE_STRARRAYS(fcntl_cmds_arrays);
774
775static const char *rlimit_resources[] = {
776 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
777 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
778 "RTTIME",
779};
780static DEFINE_STRARRAY(rlimit_resources, "RLIMIT_");
781
782static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
783static DEFINE_STRARRAY(sighow, "SIG_");
784
785static const char *clockid[] = {
786 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
787 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
788 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
789};
790static DEFINE_STRARRAY(clockid, "CLOCK_");
791
792static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
793 struct syscall_arg *arg)
794{
795 bool show_prefix = arg->show_string_prefix;
796 const char *suffix = "_OK";
797 size_t printed = 0;
798 int mode = arg->val;
799
800 if (mode == F_OK)
801 return scnprintf(bf, size, "F%s", show_prefix ? suffix : "");
802#define P_MODE(n) \
803 if (mode & n##_OK) { \
804 printed += scnprintf(bf + printed, size - printed, "%s%s", #n, show_prefix ? suffix : ""); \
805 mode &= ~n##_OK; \
806 }
807
808 P_MODE(R);
809 P_MODE(W);
810 P_MODE(X);
811#undef P_MODE
812
813 if (mode)
814 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
815
816 return printed;
817}
818
819#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
820
821static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
822 struct syscall_arg *arg);
823
824#define SCA_FILENAME syscall_arg__scnprintf_filename
825
826static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
827 struct syscall_arg *arg)
828{
829 bool show_prefix = arg->show_string_prefix;
830 const char *prefix = "O_";
831 int printed = 0, flags = arg->val;
832
833#define P_FLAG(n) \
834 if (flags & O_##n) { \
835 printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
836 flags &= ~O_##n; \
837 }
838
839 P_FLAG(CLOEXEC);
840 P_FLAG(NONBLOCK);
841#undef P_FLAG
842
843 if (flags)
844 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
845
846 return printed;
847}
848
849#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
850
851#ifndef GRND_NONBLOCK
852#define GRND_NONBLOCK 0x0001
853#endif
854#ifndef GRND_RANDOM
855#define GRND_RANDOM 0x0002
856#endif
857
858static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
859 struct syscall_arg *arg)
860{
861 bool show_prefix = arg->show_string_prefix;
862 const char *prefix = "GRND_";
863 int printed = 0, flags = arg->val;
864
865#define P_FLAG(n) \
866 if (flags & GRND_##n) { \
867 printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
868 flags &= ~GRND_##n; \
869 }
870
871 P_FLAG(RANDOM);
872 P_FLAG(NONBLOCK);
873#undef P_FLAG
874
875 if (flags)
876 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
877
878 return printed;
879}
880
881#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
882
883#define STRARRAY(name, array) \
884 { .scnprintf = SCA_STRARRAY, \
885 .strtoul = STUL_STRARRAY, \
886 .parm = &strarray__##array, }
887
888#define STRARRAY_FLAGS(name, array) \
889 { .scnprintf = SCA_STRARRAY_FLAGS, \
890 .strtoul = STUL_STRARRAY_FLAGS, \
891 .parm = &strarray__##array, }
892
893#include "trace/beauty/arch_errno_names.c"
894#include "trace/beauty/eventfd.c"
895#include "trace/beauty/futex_op.c"
896#include "trace/beauty/futex_val3.c"
897#include "trace/beauty/mmap.c"
898#include "trace/beauty/mode_t.c"
899#include "trace/beauty/msg_flags.c"
900#include "trace/beauty/open_flags.c"
901#include "trace/beauty/perf_event_open.c"
902#include "trace/beauty/pid.c"
903#include "trace/beauty/sched_policy.c"
904#include "trace/beauty/seccomp.c"
905#include "trace/beauty/signum.c"
906#include "trace/beauty/socket_type.c"
907#include "trace/beauty/waitid_options.c"
908
909static struct syscall_fmt syscall_fmts[] = {
910 { .name = "access",
911 .arg = { [1] = { .scnprintf = SCA_ACCMODE, }, }, },
912 { .name = "arch_prctl",
913 .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, },
914 [1] = { .scnprintf = SCA_PTR, }, }, },
915 { .name = "bind",
916 .arg = { [0] = { .scnprintf = SCA_INT, },
917 [1] = { .scnprintf = SCA_SOCKADDR, },
918 [2] = { .scnprintf = SCA_INT, }, }, },
919 { .name = "bpf",
920 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
921 { .name = "brk", .hexret = true,
922 .arg = { [0] = { .scnprintf = SCA_PTR, }, }, },
923 { .name = "clock_gettime",
924 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
925 { .name = "clone", .errpid = true, .nr_args = 5,
926 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
927 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
928 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
929 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
930 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
931 { .name = "close",
932 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, }, }, },
933 { .name = "connect",
934 .arg = { [0] = { .scnprintf = SCA_INT, },
935 [1] = { .scnprintf = SCA_SOCKADDR, },
936 [2] = { .scnprintf = SCA_INT, }, }, },
937 { .name = "epoll_ctl",
938 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
939 { .name = "eventfd2",
940 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, }, }, },
941 { .name = "fchmodat",
942 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
943 { .name = "fchownat",
944 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
945 { .name = "fcntl",
946 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD,
947 .strtoul = STUL_STRARRAYS,
948 .parm = &strarrays__fcntl_cmds_arrays,
949 .show_zero = true, },
950 [2] = { .scnprintf = SCA_FCNTL_ARG, }, }, },
951 { .name = "flock",
952 .arg = { [1] = { .scnprintf = SCA_FLOCK, }, }, },
953 { .name = "fsconfig",
954 .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
955 { .name = "fsmount",
956 .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
957 [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, }, }, },
958 { .name = "fspick",
959 .arg = { [0] = { .scnprintf = SCA_FDAT, },
960 [1] = { .scnprintf = SCA_FILENAME, },
961 [2] = { .scnprintf = SCA_FSPICK_FLAGS, }, }, },
962 { .name = "fstat", .alias = "newfstat", },
963 { .name = "fstatat", .alias = "newfstatat", },
964 { .name = "futex",
965 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, },
966 [5] = { .scnprintf = SCA_FUTEX_VAL3, }, }, },
967 { .name = "futimesat",
968 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
969 { .name = "getitimer",
970 .arg = { [0] = STRARRAY(which, itimers), }, },
971 { .name = "getpid", .errpid = true, },
972 { .name = "getpgid", .errpid = true, },
973 { .name = "getppid", .errpid = true, },
974 { .name = "getrandom",
975 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, }, }, },
976 { .name = "getrlimit",
977 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
978 { .name = "gettid", .errpid = true, },
979 { .name = "ioctl",
980 .arg = {
981#if defined(__i386__) || defined(__x86_64__)
982
983
984
985 [1] = { .scnprintf = SCA_IOCTL_CMD, },
986 [2] = { .scnprintf = SCA_HEX, }, }, },
987#else
988 [2] = { .scnprintf = SCA_HEX, }, }, },
989#endif
990 { .name = "kcmp", .nr_args = 5,
991 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
992 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
993 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
994 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
995 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
996 { .name = "keyctl",
997 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
998 { .name = "kill",
999 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
1000 { .name = "linkat",
1001 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1002 { .name = "lseek",
1003 .arg = { [2] = STRARRAY(whence, whences), }, },
1004 { .name = "lstat", .alias = "newlstat", },
1005 { .name = "madvise",
1006 .arg = { [0] = { .scnprintf = SCA_HEX, },
1007 [2] = { .scnprintf = SCA_MADV_BHV, }, }, },
1008 { .name = "mkdirat",
1009 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1010 { .name = "mknodat",
1011 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1012 { .name = "mmap", .hexret = true,
1013
1014#if defined(__s390x__)
1015 .alias = "old_mmap",
1016#endif
1017 .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, },
1018 [3] = { .scnprintf = SCA_MMAP_FLAGS,
1019 .strtoul = STUL_STRARRAY_FLAGS,
1020 .parm = &strarray__mmap_flags, },
1021 [5] = { .scnprintf = SCA_HEX, }, }, },
1022 { .name = "mount",
1023 .arg = { [0] = { .scnprintf = SCA_FILENAME, },
1024 [3] = { .scnprintf = SCA_MOUNT_FLAGS,
1025 .mask_val = SCAMV_MOUNT_FLAGS, }, }, },
1026 { .name = "move_mount",
1027 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1028 [1] = { .scnprintf = SCA_FILENAME, },
1029 [2] = { .scnprintf = SCA_FDAT, },
1030 [3] = { .scnprintf = SCA_FILENAME, },
1031 [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, }, }, },
1032 { .name = "mprotect",
1033 .arg = { [0] = { .scnprintf = SCA_HEX, },
1034 [2] = { .scnprintf = SCA_MMAP_PROT, }, }, },
1035 { .name = "mq_unlink",
1036 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1037 { .name = "mremap", .hexret = true,
1038 .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, }, }, },
1039 { .name = "name_to_handle_at",
1040 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1041 { .name = "newfstatat",
1042 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1043 { .name = "open",
1044 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
1045 { .name = "open_by_handle_at",
1046 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1047 [2] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
1048 { .name = "openat",
1049 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1050 [2] = { .scnprintf = SCA_OPEN_FLAGS, }, }, },
1051 { .name = "perf_event_open",
1052 .arg = { [2] = { .scnprintf = SCA_INT, },
1053 [3] = { .scnprintf = SCA_FD, },
1054 [4] = { .scnprintf = SCA_PERF_FLAGS, }, }, },
1055 { .name = "pipe2",
1056 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, }, }, },
1057 { .name = "pkey_alloc",
1058 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, }, }, },
1059 { .name = "pkey_free",
1060 .arg = { [0] = { .scnprintf = SCA_INT, }, }, },
1061 { .name = "pkey_mprotect",
1062 .arg = { [0] = { .scnprintf = SCA_HEX, },
1063 [2] = { .scnprintf = SCA_MMAP_PROT, },
1064 [3] = { .scnprintf = SCA_INT, }, }, },
1065 { .name = "poll", .timeout = true, },
1066 { .name = "ppoll", .timeout = true, },
1067 { .name = "prctl",
1068 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, },
1069 [1] = { .scnprintf = SCA_PRCTL_ARG2, },
1070 [2] = { .scnprintf = SCA_PRCTL_ARG3, }, }, },
1071 { .name = "pread", .alias = "pread64", },
1072 { .name = "preadv", .alias = "pread", },
1073 { .name = "prlimit64",
1074 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
1075 { .name = "pwrite", .alias = "pwrite64", },
1076 { .name = "readlinkat",
1077 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1078 { .name = "recvfrom",
1079 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1080 { .name = "recvmmsg",
1081 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1082 { .name = "recvmsg",
1083 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1084 { .name = "renameat",
1085 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1086 [2] = { .scnprintf = SCA_FDAT, }, }, },
1087 { .name = "renameat2",
1088 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1089 [2] = { .scnprintf = SCA_FDAT, },
1090 [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, }, }, },
1091 { .name = "rt_sigaction",
1092 .arg = { [0] = { .scnprintf = SCA_SIGNUM, }, }, },
1093 { .name = "rt_sigprocmask",
1094 .arg = { [0] = STRARRAY(how, sighow), }, },
1095 { .name = "rt_sigqueueinfo",
1096 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
1097 { .name = "rt_tgsigqueueinfo",
1098 .arg = { [2] = { .scnprintf = SCA_SIGNUM, }, }, },
1099 { .name = "sched_setscheduler",
1100 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, }, }, },
1101 { .name = "seccomp",
1102 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, },
1103 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, }, }, },
1104 { .name = "select", .timeout = true, },
1105 { .name = "sendfile", .alias = "sendfile64", },
1106 { .name = "sendmmsg",
1107 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1108 { .name = "sendmsg",
1109 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, }, }, },
1110 { .name = "sendto",
1111 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, },
1112 [4] = { .scnprintf = SCA_SOCKADDR, }, }, },
1113 { .name = "set_tid_address", .errpid = true, },
1114 { .name = "setitimer",
1115 .arg = { [0] = STRARRAY(which, itimers), }, },
1116 { .name = "setrlimit",
1117 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1118 { .name = "socket",
1119 .arg = { [0] = STRARRAY(family, socket_families),
1120 [1] = { .scnprintf = SCA_SK_TYPE, },
1121 [2] = { .scnprintf = SCA_SK_PROTO, }, }, },
1122 { .name = "socketpair",
1123 .arg = { [0] = STRARRAY(family, socket_families),
1124 [1] = { .scnprintf = SCA_SK_TYPE, },
1125 [2] = { .scnprintf = SCA_SK_PROTO, }, }, },
1126 { .name = "stat", .alias = "newstat", },
1127 { .name = "statx",
1128 .arg = { [0] = { .scnprintf = SCA_FDAT, },
1129 [2] = { .scnprintf = SCA_STATX_FLAGS, } ,
1130 [3] = { .scnprintf = SCA_STATX_MASK, }, }, },
1131 { .name = "swapoff",
1132 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1133 { .name = "swapon",
1134 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1135 { .name = "symlinkat",
1136 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1137 { .name = "sync_file_range",
1138 .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, }, }, },
1139 { .name = "tgkill",
1140 .arg = { [2] = { .scnprintf = SCA_SIGNUM, }, }, },
1141 { .name = "tkill",
1142 .arg = { [1] = { .scnprintf = SCA_SIGNUM, }, }, },
1143 { .name = "umount2", .alias = "umount",
1144 .arg = { [0] = { .scnprintf = SCA_FILENAME, }, }, },
1145 { .name = "uname", .alias = "newuname", },
1146 { .name = "unlinkat",
1147 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1148 { .name = "utimensat",
1149 .arg = { [0] = { .scnprintf = SCA_FDAT, }, }, },
1150 { .name = "wait4", .errpid = true,
1151 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, }, }, },
1152 { .name = "waitid", .errpid = true,
1153 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, }, }, },
1154};
1155
1156static int syscall_fmt__cmp(const void *name, const void *fmtp)
1157{
1158 const struct syscall_fmt *fmt = fmtp;
1159 return strcmp(name, fmt->name);
1160}
1161
1162static struct syscall_fmt *__syscall_fmt__find(struct syscall_fmt *fmts, const int nmemb, const char *name)
1163{
1164 return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1165}
1166
1167static struct syscall_fmt *syscall_fmt__find(const char *name)
1168{
1169 const int nmemb = ARRAY_SIZE(syscall_fmts);
1170 return __syscall_fmt__find(syscall_fmts, nmemb, name);
1171}
1172
1173static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts, const int nmemb, const char *alias)
1174{
1175 int i;
1176
1177 for (i = 0; i < nmemb; ++i) {
1178 if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0)
1179 return &fmts[i];
1180 }
1181
1182 return NULL;
1183}
1184
1185static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
1186{
1187 const int nmemb = ARRAY_SIZE(syscall_fmts);
1188 return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias);
1189}
1190
1191
1192
1193
1194
1195
1196
1197struct syscall {
1198 struct tep_event *tp_format;
1199 int nr_args;
1200 int args_size;
1201 struct {
1202 struct bpf_program *sys_enter,
1203 *sys_exit;
1204 } bpf_prog;
1205 bool is_exit;
1206 bool is_open;
1207 bool nonexistent;
1208 struct tep_format_field *args;
1209 const char *name;
1210 struct syscall_fmt *fmt;
1211 struct syscall_arg_fmt *arg_fmt;
1212};
1213
1214
1215
1216
1217
1218
1219struct bpf_map_syscall_entry {
1220 bool enabled;
1221 u16 string_args_len[6];
1222};
1223
1224
1225
1226
1227
1228
1229
1230
1231static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
1232{
1233 double duration = (double)t / NSEC_PER_MSEC;
1234 size_t printed = fprintf(fp, "(");
1235
1236 if (!calculated)
1237 printed += fprintf(fp, " ");
1238 else if (duration >= 1.0)
1239 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1240 else if (duration >= 0.01)
1241 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1242 else
1243 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1244 return printed + fprintf(fp, "): ");
1245}
1246
1247
1248
1249
1250
1251
1252
1253
1254struct thread_trace {
1255 u64 entry_time;
1256 bool entry_pending;
1257 unsigned long nr_events;
1258 unsigned long pfmaj, pfmin;
1259 char *entry_str;
1260 double runtime_ms;
1261 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1262 struct {
1263 unsigned long ptr;
1264 short int entry_str_pos;
1265 bool pending_open;
1266 unsigned int namelen;
1267 char *name;
1268 } filename;
1269 struct {
1270 int max;
1271 struct file *table;
1272 } files;
1273
1274 struct intlist *syscall_stats;
1275};
1276
1277static struct thread_trace *thread_trace__new(void)
1278{
1279 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1280
1281 if (ttrace) {
1282 ttrace->files.max = -1;
1283 ttrace->syscall_stats = intlist__new(NULL);
1284 }
1285
1286 return ttrace;
1287}
1288
1289static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1290{
1291 struct thread_trace *ttrace;
1292
1293 if (thread == NULL)
1294 goto fail;
1295
1296 if (thread__priv(thread) == NULL)
1297 thread__set_priv(thread, thread_trace__new());
1298
1299 if (thread__priv(thread) == NULL)
1300 goto fail;
1301
1302 ttrace = thread__priv(thread);
1303 ++ttrace->nr_events;
1304
1305 return ttrace;
1306fail:
1307 color_fprintf(fp, PERF_COLOR_RED,
1308 "WARNING: not enough memory, dropping samples!\n");
1309 return NULL;
1310}
1311
1312
1313void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
1314 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
1315{
1316 struct thread_trace *ttrace = thread__priv(arg->thread);
1317
1318 ttrace->ret_scnprintf = ret_scnprintf;
1319}
1320
1321#define TRACE_PFMAJ (1 << 0)
1322#define TRACE_PFMIN (1 << 1)
1323
1324static const size_t trace__entry_str_size = 2048;
1325
1326static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
1327{
1328 if (fd < 0)
1329 return NULL;
1330
1331 if (fd > ttrace->files.max) {
1332 struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
1333
1334 if (nfiles == NULL)
1335 return NULL;
1336
1337 if (ttrace->files.max != -1) {
1338 memset(nfiles + ttrace->files.max + 1, 0,
1339 (fd - ttrace->files.max) * sizeof(struct file));
1340 } else {
1341 memset(nfiles, 0, (fd + 1) * sizeof(struct file));
1342 }
1343
1344 ttrace->files.table = nfiles;
1345 ttrace->files.max = fd;
1346 }
1347
1348 return ttrace->files.table + fd;
1349}
1350
1351struct file *thread__files_entry(struct thread *thread, int fd)
1352{
1353 return thread_trace__files_entry(thread__priv(thread), fd);
1354}
1355
1356static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1357{
1358 struct thread_trace *ttrace = thread__priv(thread);
1359 struct file *file = thread_trace__files_entry(ttrace, fd);
1360
1361 if (file != NULL) {
1362 struct stat st;
1363 if (stat(pathname, &st) == 0)
1364 file->dev_maj = major(st.st_rdev);
1365 file->pathname = strdup(pathname);
1366 if (file->pathname)
1367 return 0;
1368 }
1369
1370 return -1;
1371}
1372
1373static int thread__read_fd_path(struct thread *thread, int fd)
1374{
1375 char linkname[PATH_MAX], pathname[PATH_MAX];
1376 struct stat st;
1377 int ret;
1378
1379 if (thread->pid_ == thread->tid) {
1380 scnprintf(linkname, sizeof(linkname),
1381 "/proc/%d/fd/%d", thread->pid_, fd);
1382 } else {
1383 scnprintf(linkname, sizeof(linkname),
1384 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1385 }
1386
1387 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1388 return -1;
1389
1390 ret = readlink(linkname, pathname, sizeof(pathname));
1391
1392 if (ret < 0 || ret > st.st_size)
1393 return -1;
1394
1395 pathname[ret] = '\0';
1396 return trace__set_fd_pathname(thread, fd, pathname);
1397}
1398
1399static const char *thread__fd_path(struct thread *thread, int fd,
1400 struct trace *trace)
1401{
1402 struct thread_trace *ttrace = thread__priv(thread);
1403
1404 if (ttrace == NULL || trace->fd_path_disabled)
1405 return NULL;
1406
1407 if (fd < 0)
1408 return NULL;
1409
1410 if ((fd > ttrace->files.max || ttrace->files.table[fd].pathname == NULL)) {
1411 if (!trace->live)
1412 return NULL;
1413 ++trace->stats.proc_getname;
1414 if (thread__read_fd_path(thread, fd))
1415 return NULL;
1416 }
1417
1418 return ttrace->files.table[fd].pathname;
1419}
1420
1421size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
1422{
1423 int fd = arg->val;
1424 size_t printed = scnprintf(bf, size, "%d", fd);
1425 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1426
1427 if (path)
1428 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1429
1430 return printed;
1431}
1432
1433size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1434{
1435 size_t printed = scnprintf(bf, size, "%d", fd);
1436 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1437
1438 if (thread) {
1439 const char *path = thread__fd_path(thread, fd, trace);
1440
1441 if (path)
1442 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1443
1444 thread__put(thread);
1445 }
1446
1447 return printed;
1448}
1449
1450static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1451 struct syscall_arg *arg)
1452{
1453 int fd = arg->val;
1454 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1455 struct thread_trace *ttrace = thread__priv(arg->thread);
1456
1457 if (ttrace && fd >= 0 && fd <= ttrace->files.max)
1458 zfree(&ttrace->files.table[fd].pathname);
1459
1460 return printed;
1461}
1462
1463static void thread__set_filename_pos(struct thread *thread, const char *bf,
1464 unsigned long ptr)
1465{
1466 struct thread_trace *ttrace = thread__priv(thread);
1467
1468 ttrace->filename.ptr = ptr;
1469 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1470}
1471
1472static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
1473{
1474 struct augmented_arg *augmented_arg = arg->augmented.args;
1475 size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
1476
1477
1478
1479
1480 int consumed = sizeof(*augmented_arg) + augmented_arg->size;
1481
1482 arg->augmented.args = ((void *)arg->augmented.args) + consumed;
1483 arg->augmented.size -= consumed;
1484
1485 return printed;
1486}
1487
1488static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1489 struct syscall_arg *arg)
1490{
1491 unsigned long ptr = arg->val;
1492
1493 if (arg->augmented.args)
1494 return syscall_arg__scnprintf_augmented_string(arg, bf, size);
1495
1496 if (!arg->trace->vfs_getname)
1497 return scnprintf(bf, size, "%#x", ptr);
1498
1499 thread__set_filename_pos(arg->thread, bf, ptr);
1500 return 0;
1501}
1502
1503static bool trace__filter_duration(struct trace *trace, double t)
1504{
1505 return t < (trace->duration_filter * NSEC_PER_MSEC);
1506}
1507
1508static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1509{
1510 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1511
1512 return fprintf(fp, "%10.3f ", ts);
1513}
1514
1515
1516
1517
1518
1519
1520
1521static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1522{
1523 if (tstamp > 0)
1524 return __trace__fprintf_tstamp(trace, tstamp, fp);
1525
1526 return fprintf(fp, " ? ");
1527}
1528
1529static bool done = false;
1530static bool interrupted = false;
1531
1532static void sig_handler(int sig)
1533{
1534 done = true;
1535 interrupted = sig == SIGINT;
1536}
1537
1538static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
1539{
1540 size_t printed = 0;
1541
1542 if (trace->multiple_threads) {
1543 if (trace->show_comm)
1544 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1545 printed += fprintf(fp, "%d ", thread->tid);
1546 }
1547
1548 return printed;
1549}
1550
1551static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1552 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1553{
1554 size_t printed = 0;
1555
1556 if (trace->show_tstamp)
1557 printed = trace__fprintf_tstamp(trace, tstamp, fp);
1558 if (trace->show_duration)
1559 printed += fprintf_duration(duration, duration_calculated, fp);
1560 return printed + trace__fprintf_comm_tid(trace, thread, fp);
1561}
1562
1563static int trace__process_event(struct trace *trace, struct machine *machine,
1564 union perf_event *event, struct perf_sample *sample)
1565{
1566 int ret = 0;
1567
1568 switch (event->header.type) {
1569 case PERF_RECORD_LOST:
1570 color_fprintf(trace->output, PERF_COLOR_RED,
1571 "LOST %" PRIu64 " events!\n", event->lost.lost);
1572 ret = machine__process_lost_event(machine, event, sample);
1573 break;
1574 default:
1575 ret = machine__process_event(machine, event, sample);
1576 break;
1577 }
1578
1579 return ret;
1580}
1581
1582static int trace__tool_process(struct perf_tool *tool,
1583 union perf_event *event,
1584 struct perf_sample *sample,
1585 struct machine *machine)
1586{
1587 struct trace *trace = container_of(tool, struct trace, tool);
1588 return trace__process_event(trace, machine, event, sample);
1589}
1590
1591static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1592{
1593 struct machine *machine = vmachine;
1594
1595 if (machine->kptr_restrict_warned)
1596 return NULL;
1597
1598 if (symbol_conf.kptr_restrict) {
1599 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1600 "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1601 "Kernel samples will not be resolved.\n");
1602 machine->kptr_restrict_warned = true;
1603 return NULL;
1604 }
1605
1606 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1607}
1608
1609static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
1610{
1611 int err = symbol__init(NULL);
1612
1613 if (err)
1614 return err;
1615
1616 trace->host = machine__new_host();
1617 if (trace->host == NULL)
1618 return -ENOMEM;
1619
1620 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1621 if (err < 0)
1622 goto out;
1623
1624 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1625 evlist->core.threads, trace__tool_process, false,
1626 1);
1627out:
1628 if (err)
1629 symbol__exit();
1630
1631 return err;
1632}
1633
1634static void trace__symbols__exit(struct trace *trace)
1635{
1636 machine__exit(trace->host);
1637 trace->host = NULL;
1638
1639 symbol__exit();
1640}
1641
1642static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
1643{
1644 int idx;
1645
1646 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1647 nr_args = sc->fmt->nr_args;
1648
1649 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
1650 if (sc->arg_fmt == NULL)
1651 return -1;
1652
1653 for (idx = 0; idx < nr_args; ++idx) {
1654 if (sc->fmt)
1655 sc->arg_fmt[idx] = sc->fmt->arg[idx];
1656 }
1657
1658 sc->nr_args = nr_args;
1659 return 0;
1660}
1661
1662static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = {
1663 { .name = "msr", .scnprintf = SCA_X86_MSR, .strtoul = STUL_X86_MSR, },
1664 { .name = "vector", .scnprintf = SCA_X86_IRQ_VECTORS, .strtoul = STUL_X86_IRQ_VECTORS, },
1665};
1666
1667static int syscall_arg_fmt__cmp(const void *name, const void *fmtp)
1668{
1669 const struct syscall_arg_fmt *fmt = fmtp;
1670 return strcmp(name, fmt->name);
1671}
1672
1673static struct syscall_arg_fmt *
1674__syscall_arg_fmt__find_by_name(struct syscall_arg_fmt *fmts, const int nmemb, const char *name)
1675{
1676 return bsearch(name, fmts, nmemb, sizeof(struct syscall_arg_fmt), syscall_arg_fmt__cmp);
1677}
1678
1679static struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name)
1680{
1681 const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name);
1682 return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
1683}
1684
1685static struct tep_format_field *
1686syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field)
1687{
1688 struct tep_format_field *last_field = NULL;
1689 int len;
1690
1691 for (; field; field = field->next, ++arg) {
1692 last_field = field;
1693
1694 if (arg->scnprintf)
1695 continue;
1696
1697 len = strlen(field->name);
1698
1699 if (strcmp(field->type, "const char *") == 0 &&
1700 ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
1701 strstr(field->name, "path") != NULL))
1702 arg->scnprintf = SCA_FILENAME;
1703 else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
1704 arg->scnprintf = SCA_PTR;
1705 else if (strcmp(field->type, "pid_t") == 0)
1706 arg->scnprintf = SCA_PID;
1707 else if (strcmp(field->type, "umode_t") == 0)
1708 arg->scnprintf = SCA_MODE_T;
1709 else if ((field->flags & TEP_FIELD_IS_ARRAY) && strstr(field->type, "char")) {
1710 arg->scnprintf = SCA_CHAR_ARRAY;
1711 arg->nr_entries = field->arraylen;
1712 } else if ((strcmp(field->type, "int") == 0 ||
1713 strcmp(field->type, "unsigned int") == 0 ||
1714 strcmp(field->type, "long") == 0) &&
1715 len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
1716
1717
1718
1719
1720
1721
1722
1723 arg->scnprintf = SCA_FD;
1724 } else {
1725 struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name);
1726
1727 if (fmt) {
1728 arg->scnprintf = fmt->scnprintf;
1729 arg->strtoul = fmt->strtoul;
1730 }
1731 }
1732 }
1733
1734 return last_field;
1735}
1736
1737static int syscall__set_arg_fmts(struct syscall *sc)
1738{
1739 struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args);
1740
1741 if (last_field)
1742 sc->args_size = last_field->offset + last_field->size;
1743
1744 return 0;
1745}
1746
1747static int trace__read_syscall_info(struct trace *trace, int id)
1748{
1749 char tp_name[128];
1750 struct syscall *sc;
1751 const char *name = syscalltbl__name(trace->sctbl, id);
1752
1753 if (trace->syscalls.table == NULL) {
1754 trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
1755 if (trace->syscalls.table == NULL)
1756 return -ENOMEM;
1757 }
1758
1759 sc = trace->syscalls.table + id;
1760 if (sc->nonexistent)
1761 return 0;
1762
1763 if (name == NULL) {
1764 sc->nonexistent = true;
1765 return 0;
1766 }
1767
1768 sc->name = name;
1769 sc->fmt = syscall_fmt__find(sc->name);
1770
1771 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1772 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1773
1774 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1775 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1776 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1777 }
1778
1779 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1780 return -ENOMEM;
1781
1782 if (IS_ERR(sc->tp_format))
1783 return PTR_ERR(sc->tp_format);
1784
1785 sc->args = sc->tp_format->format.fields;
1786
1787
1788
1789
1790
1791 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1792 sc->args = sc->args->next;
1793 --sc->nr_args;
1794 }
1795
1796 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1797 sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
1798
1799 return syscall__set_arg_fmts(sc);
1800}
1801
1802static int perf_evsel__init_tp_arg_scnprintf(struct evsel *evsel)
1803{
1804 struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
1805
1806 if (fmt != NULL) {
1807 syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields);
1808 return 0;
1809 }
1810
1811 return -ENOMEM;
1812}
1813
1814static int intcmp(const void *a, const void *b)
1815{
1816 const int *one = a, *another = b;
1817
1818 return *one - *another;
1819}
1820
1821static int trace__validate_ev_qualifier(struct trace *trace)
1822{
1823 int err = 0;
1824 bool printed_invalid_prefix = false;
1825 struct str_node *pos;
1826 size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier);
1827
1828 trace->ev_qualifier_ids.entries = malloc(nr_allocated *
1829 sizeof(trace->ev_qualifier_ids.entries[0]));
1830
1831 if (trace->ev_qualifier_ids.entries == NULL) {
1832 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1833 trace->output);
1834 err = -EINVAL;
1835 goto out;
1836 }
1837
1838 strlist__for_each_entry(pos, trace->ev_qualifier) {
1839 const char *sc = pos->s;
1840 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
1841
1842 if (id < 0) {
1843 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1844 if (id >= 0)
1845 goto matches;
1846
1847 if (!printed_invalid_prefix) {
1848 pr_debug("Skipping unknown syscalls: ");
1849 printed_invalid_prefix = true;
1850 } else {
1851 pr_debug(", ");
1852 }
1853
1854 pr_debug("%s", sc);
1855 continue;
1856 }
1857matches:
1858 trace->ev_qualifier_ids.entries[nr_used++] = id;
1859 if (match_next == -1)
1860 continue;
1861
1862 while (1) {
1863 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1864 if (id < 0)
1865 break;
1866 if (nr_allocated == nr_used) {
1867 void *entries;
1868
1869 nr_allocated += 8;
1870 entries = realloc(trace->ev_qualifier_ids.entries,
1871 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1872 if (entries == NULL) {
1873 err = -ENOMEM;
1874 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1875 goto out_free;
1876 }
1877 trace->ev_qualifier_ids.entries = entries;
1878 }
1879 trace->ev_qualifier_ids.entries[nr_used++] = id;
1880 }
1881 }
1882
1883 trace->ev_qualifier_ids.nr = nr_used;
1884 qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp);
1885out:
1886 if (printed_invalid_prefix)
1887 pr_debug("\n");
1888 return err;
1889out_free:
1890 zfree(&trace->ev_qualifier_ids.entries);
1891 trace->ev_qualifier_ids.nr = 0;
1892 goto out;
1893}
1894
1895static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id)
1896{
1897 bool in_ev_qualifier;
1898
1899 if (trace->ev_qualifier_ids.nr == 0)
1900 return true;
1901
1902 in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries,
1903 trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL;
1904
1905 if (in_ev_qualifier)
1906 return !trace->not_ev_qualifier;
1907
1908 return trace->not_ev_qualifier;
1909}
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1920{
1921 unsigned long val;
1922 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
1923
1924 memcpy(&val, p, sizeof(val));
1925 return val;
1926}
1927
1928static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1929 struct syscall_arg *arg)
1930{
1931 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1932 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1933
1934 return scnprintf(bf, size, "arg%d: ", arg->idx);
1935}
1936
1937
1938
1939
1940
1941
1942static unsigned long syscall_arg_fmt__mask_val(struct syscall_arg_fmt *fmt, struct syscall_arg *arg, unsigned long val)
1943{
1944 if (fmt && fmt->mask_val)
1945 return fmt->mask_val(arg, val);
1946
1947 return val;
1948}
1949
1950static size_t syscall_arg_fmt__scnprintf_val(struct syscall_arg_fmt *fmt, char *bf, size_t size,
1951 struct syscall_arg *arg, unsigned long val)
1952{
1953 if (fmt && fmt->scnprintf) {
1954 arg->val = val;
1955 if (fmt->parm)
1956 arg->parm = fmt->parm;
1957 return fmt->scnprintf(bf, size, arg);
1958 }
1959 return scnprintf(bf, size, "%ld", val);
1960}
1961
1962static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1963 unsigned char *args, void *augmented_args, int augmented_args_size,
1964 struct trace *trace, struct thread *thread)
1965{
1966 size_t printed = 0;
1967 unsigned long val;
1968 u8 bit = 1;
1969 struct syscall_arg arg = {
1970 .args = args,
1971 .augmented = {
1972 .size = augmented_args_size,
1973 .args = augmented_args,
1974 },
1975 .idx = 0,
1976 .mask = 0,
1977 .trace = trace,
1978 .thread = thread,
1979 .show_string_prefix = trace->show_string_prefix,
1980 };
1981 struct thread_trace *ttrace = thread__priv(thread);
1982
1983
1984
1985
1986
1987
1988 ttrace->ret_scnprintf = NULL;
1989
1990 if (sc->args != NULL) {
1991 struct tep_format_field *field;
1992
1993 for (field = sc->args; field;
1994 field = field->next, ++arg.idx, bit <<= 1) {
1995 if (arg.mask & bit)
1996 continue;
1997
1998 arg.fmt = &sc->arg_fmt[arg.idx];
1999 val = syscall_arg__val(&arg, arg.idx);
2000
2001
2002
2003
2004 val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val);
2005
2006
2007
2008
2009
2010
2011 if (val == 0 &&
2012 !trace->show_zeros &&
2013 !(sc->arg_fmt &&
2014 (sc->arg_fmt[arg.idx].show_zero ||
2015 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
2016 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
2017 sc->arg_fmt[arg.idx].parm))
2018 continue;
2019
2020 printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2021
2022 if (trace->show_arg_names)
2023 printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2024
2025 printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx],
2026 bf + printed, size - printed, &arg, val);
2027 }
2028 } else if (IS_ERR(sc->tp_format)) {
2029
2030
2031
2032
2033
2034 while (arg.idx < sc->nr_args) {
2035 if (arg.mask & bit)
2036 goto next_arg;
2037 val = syscall_arg__val(&arg, arg.idx);
2038 if (printed)
2039 printed += scnprintf(bf + printed, size - printed, ", ");
2040 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
2041 printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], bf + printed, size - printed, &arg, val);
2042next_arg:
2043 ++arg.idx;
2044 bit <<= 1;
2045 }
2046 }
2047
2048 return printed;
2049}
2050
2051typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel,
2052 union perf_event *event,
2053 struct perf_sample *sample);
2054
2055static struct syscall *trace__syscall_info(struct trace *trace,
2056 struct evsel *evsel, int id)
2057{
2058 int err = 0;
2059
2060 if (id < 0) {
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072 if (verbose > 1) {
2073 static u64 n;
2074 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
2075 id, perf_evsel__name(evsel), ++n);
2076 }
2077 return NULL;
2078 }
2079
2080 err = -EINVAL;
2081
2082 if (id > trace->sctbl->syscalls.max_id)
2083 goto out_cant_read;
2084
2085 if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
2086 (err = trace__read_syscall_info(trace, id)) != 0)
2087 goto out_cant_read;
2088
2089 if (trace->syscalls.table[id].name == NULL) {
2090 if (trace->syscalls.table[id].nonexistent)
2091 return NULL;
2092 goto out_cant_read;
2093 }
2094
2095 return &trace->syscalls.table[id];
2096
2097out_cant_read:
2098 if (verbose > 0) {
2099 char sbuf[STRERR_BUFSIZE];
2100 fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf)));
2101 if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL)
2102 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
2103 fputs(" information\n", trace->output);
2104 }
2105 return NULL;
2106}
2107
2108struct syscall_stats {
2109 struct stats stats;
2110 u64 nr_failures;
2111 int max_errno;
2112 u32 *errnos;
2113};
2114
2115static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace,
2116 int id, struct perf_sample *sample, long err, bool errno_summary)
2117{
2118 struct int_node *inode;
2119 struct syscall_stats *stats;
2120 u64 duration = 0;
2121
2122 inode = intlist__findnew(ttrace->syscall_stats, id);
2123 if (inode == NULL)
2124 return;
2125
2126 stats = inode->priv;
2127 if (stats == NULL) {
2128 stats = malloc(sizeof(*stats));
2129 if (stats == NULL)
2130 return;
2131
2132 stats->nr_failures = 0;
2133 stats->max_errno = 0;
2134 stats->errnos = NULL;
2135 init_stats(&stats->stats);
2136 inode->priv = stats;
2137 }
2138
2139 if (ttrace->entry_time && sample->time > ttrace->entry_time)
2140 duration = sample->time - ttrace->entry_time;
2141
2142 update_stats(&stats->stats, duration);
2143
2144 if (err < 0) {
2145 ++stats->nr_failures;
2146
2147 if (!errno_summary)
2148 return;
2149
2150 err = -err;
2151 if (err > stats->max_errno) {
2152 u32 *new_errnos = realloc(stats->errnos, err * sizeof(u32));
2153
2154 if (new_errnos) {
2155 memset(new_errnos + stats->max_errno, 0, (err - stats->max_errno) * sizeof(u32));
2156 } else {
2157 pr_debug("Not enough memory for errno stats for thread \"%s\"(%d/%d), results will be incomplete\n",
2158 thread__comm_str(thread), thread->pid_, thread->tid);
2159 return;
2160 }
2161
2162 stats->errnos = new_errnos;
2163 stats->max_errno = err;
2164 }
2165
2166 ++stats->errnos[err - 1];
2167 }
2168}
2169
2170static int trace__printf_interrupted_entry(struct trace *trace)
2171{
2172 struct thread_trace *ttrace;
2173 size_t printed;
2174 int len;
2175
2176 if (trace->failure_only || trace->current == NULL)
2177 return 0;
2178
2179 ttrace = thread__priv(trace->current);
2180
2181 if (!ttrace->entry_pending)
2182 return 0;
2183
2184 printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
2185 printed += len = fprintf(trace->output, "%s)", ttrace->entry_str);
2186
2187 if (len < trace->args_alignment - 4)
2188 printed += fprintf(trace->output, "%-*s", trace->args_alignment - 4 - len, " ");
2189
2190 printed += fprintf(trace->output, " ...\n");
2191
2192 ttrace->entry_pending = false;
2193 ++trace->nr_events_printed;
2194
2195 return printed;
2196}
2197
2198static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
2199 struct perf_sample *sample, struct thread *thread)
2200{
2201 int printed = 0;
2202
2203 if (trace->print_sample) {
2204 double ts = (double)sample->time / NSEC_PER_MSEC;
2205
2206 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
2207 perf_evsel__name(evsel), ts,
2208 thread__comm_str(thread),
2209 sample->pid, sample->tid, sample->cpu);
2210 }
2211
2212 return printed;
2213}
2214
2215static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
2216{
2217 void *augmented_args = NULL;
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232 int args_size = raw_augmented_args_size ?: sc->args_size;
2233
2234 *augmented_args_size = sample->raw_size - args_size;
2235 if (*augmented_args_size > 0)
2236 augmented_args = sample->raw_data + args_size;
2237
2238 return augmented_args;
2239}
2240
2241static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
2242 union perf_event *event __maybe_unused,
2243 struct perf_sample *sample)
2244{
2245 char *msg;
2246 void *args;
2247 int printed = 0;
2248 struct thread *thread;
2249 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2250 int augmented_args_size = 0;
2251 void *augmented_args = NULL;
2252 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2253 struct thread_trace *ttrace;
2254
2255 if (sc == NULL)
2256 return -1;
2257
2258 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2259 ttrace = thread__trace(thread, trace->output);
2260 if (ttrace == NULL)
2261 goto out_put;
2262
2263 trace__fprintf_sample(trace, evsel, sample, thread);
2264
2265 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2266
2267 if (ttrace->entry_str == NULL) {
2268 ttrace->entry_str = malloc(trace__entry_str_size);
2269 if (!ttrace->entry_str)
2270 goto out_put;
2271 }
2272
2273 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
2274 trace__printf_interrupted_entry(trace);
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285 if (evsel != trace->syscalls.events.sys_enter)
2286 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2287 ttrace->entry_time = sample->time;
2288 msg = ttrace->entry_str;
2289 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
2290
2291 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
2292 args, augmented_args, augmented_args_size, trace, thread);
2293
2294 if (sc->is_exit) {
2295 if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
2296 int alignment = 0;
2297
2298 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
2299 printed = fprintf(trace->output, "%s)", ttrace->entry_str);
2300 if (trace->args_alignment > printed)
2301 alignment = trace->args_alignment - printed;
2302 fprintf(trace->output, "%*s= ?\n", alignment, " ");
2303 }
2304 } else {
2305 ttrace->entry_pending = true;
2306
2307 ttrace->filename.pending_open = false;
2308 }
2309
2310 if (trace->current != thread) {
2311 thread__put(trace->current);
2312 trace->current = thread__get(thread);
2313 }
2314 err = 0;
2315out_put:
2316 thread__put(thread);
2317 return err;
2318}
2319
2320static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
2321 struct perf_sample *sample)
2322{
2323 struct thread_trace *ttrace;
2324 struct thread *thread;
2325 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2326 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2327 char msg[1024];
2328 void *args, *augmented_args = NULL;
2329 int augmented_args_size;
2330
2331 if (sc == NULL)
2332 return -1;
2333
2334 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2335 ttrace = thread__trace(thread, trace->output);
2336
2337
2338
2339
2340 if (ttrace == NULL)
2341 goto out_put;
2342
2343 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2344 augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size);
2345 syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
2346 fprintf(trace->output, "%s", msg);
2347 err = 0;
2348out_put:
2349 thread__put(thread);
2350 return err;
2351}
2352
2353static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel,
2354 struct perf_sample *sample,
2355 struct callchain_cursor *cursor)
2356{
2357 struct addr_location al;
2358 int max_stack = evsel->core.attr.sample_max_stack ?
2359 evsel->core.attr.sample_max_stack :
2360 trace->max_stack;
2361 int err;
2362
2363 if (machine__resolve(trace->host, &al, sample) < 0)
2364 return -1;
2365
2366 err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
2367 addr_location__put(&al);
2368 return err;
2369}
2370
2371static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
2372{
2373
2374 const unsigned int print_opts = EVSEL__PRINT_SYM |
2375 EVSEL__PRINT_DSO |
2376 EVSEL__PRINT_UNKNOWN_AS_ADDR;
2377
2378 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, symbol_conf.bt_stop_list, trace->output);
2379}
2380
2381static const char *errno_to_name(struct evsel *evsel, int err)
2382{
2383 struct perf_env *env = perf_evsel__env(evsel);
2384 const char *arch_name = perf_env__arch(env);
2385
2386 return arch_syscalls__strerrno(arch_name, err);
2387}
2388
2389static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
2390 union perf_event *event __maybe_unused,
2391 struct perf_sample *sample)
2392{
2393 long ret;
2394 u64 duration = 0;
2395 bool duration_calculated = false;
2396 struct thread *thread;
2397 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
2398 int alignment = trace->args_alignment;
2399 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2400 struct thread_trace *ttrace;
2401
2402 if (sc == NULL)
2403 return -1;
2404
2405 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2406 ttrace = thread__trace(thread, trace->output);
2407 if (ttrace == NULL)
2408 goto out_put;
2409
2410 trace__fprintf_sample(trace, evsel, sample, thread);
2411
2412 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2413
2414 if (trace->summary)
2415 thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary);
2416
2417 if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
2418 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2419 ttrace->filename.pending_open = false;
2420 ++trace->stats.vfs_getname;
2421 }
2422
2423 if (ttrace->entry_time) {
2424 duration = sample->time - ttrace->entry_time;
2425 if (trace__filter_duration(trace, duration))
2426 goto out;
2427 duration_calculated = true;
2428 } else if (trace->duration_filter)
2429 goto out;
2430
2431 if (sample->callchain) {
2432 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2433 if (callchain_ret == 0) {
2434 if (callchain_cursor.nr < trace->min_stack)
2435 goto out;
2436 callchain_ret = 1;
2437 }
2438 }
2439
2440 if (trace->summary_only || (ret >= 0 && trace->failure_only))
2441 goto out;
2442
2443 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
2444
2445 if (ttrace->entry_pending) {
2446 printed = fprintf(trace->output, "%s", ttrace->entry_str);
2447 } else {
2448 printed += fprintf(trace->output, " ... [");
2449 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2450 printed += 9;
2451 printed += fprintf(trace->output, "]: %s()", sc->name);
2452 }
2453
2454 printed++;
2455
2456 if (alignment > printed)
2457 alignment -= printed;
2458 else
2459 alignment = 0;
2460
2461 fprintf(trace->output, ")%*s= ", alignment, " ");
2462
2463 if (sc->fmt == NULL) {
2464 if (ret < 0)
2465 goto errno_print;
2466signed_print:
2467 fprintf(trace->output, "%ld", ret);
2468 } else if (ret < 0) {
2469errno_print: {
2470 char bf[STRERR_BUFSIZE];
2471 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
2472 *e = errno_to_name(evsel, -ret);
2473
2474 fprintf(trace->output, "-1 %s (%s)", e, emsg);
2475 }
2476 } else if (ret == 0 && sc->fmt->timeout)
2477 fprintf(trace->output, "0 (Timeout)");
2478 else if (ttrace->ret_scnprintf) {
2479 char bf[1024];
2480 struct syscall_arg arg = {
2481 .val = ret,
2482 .thread = thread,
2483 .trace = trace,
2484 };
2485 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
2486 ttrace->ret_scnprintf = NULL;
2487 fprintf(trace->output, "%s", bf);
2488 } else if (sc->fmt->hexret)
2489 fprintf(trace->output, "%#lx", ret);
2490 else if (sc->fmt->errpid) {
2491 struct thread *child = machine__find_thread(trace->host, ret, ret);
2492
2493 if (child != NULL) {
2494 fprintf(trace->output, "%ld", ret);
2495 if (child->comm_set)
2496 fprintf(trace->output, " (%s)", thread__comm_str(child));
2497 thread__put(child);
2498 }
2499 } else
2500 goto signed_print;
2501
2502 fputc('\n', trace->output);
2503
2504
2505
2506
2507
2508 if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
2509 interrupted = true;
2510
2511 if (callchain_ret > 0)
2512 trace__fprintf_callchain(trace, sample);
2513 else if (callchain_ret < 0)
2514 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2515out:
2516 ttrace->entry_pending = false;
2517 err = 0;
2518out_put:
2519 thread__put(thread);
2520 return err;
2521}
2522
2523static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
2524 union perf_event *event __maybe_unused,
2525 struct perf_sample *sample)
2526{
2527 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2528 struct thread_trace *ttrace;
2529 size_t filename_len, entry_str_len, to_move;
2530 ssize_t remaining_space;
2531 char *pos;
2532 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2533
2534 if (!thread)
2535 goto out;
2536
2537 ttrace = thread__priv(thread);
2538 if (!ttrace)
2539 goto out_put;
2540
2541 filename_len = strlen(filename);
2542 if (filename_len == 0)
2543 goto out_put;
2544
2545 if (ttrace->filename.namelen < filename_len) {
2546 char *f = realloc(ttrace->filename.name, filename_len + 1);
2547
2548 if (f == NULL)
2549 goto out_put;
2550
2551 ttrace->filename.namelen = filename_len;
2552 ttrace->filename.name = f;
2553 }
2554
2555 strcpy(ttrace->filename.name, filename);
2556 ttrace->filename.pending_open = true;
2557
2558 if (!ttrace->filename.ptr)
2559 goto out_put;
2560
2561 entry_str_len = strlen(ttrace->entry_str);
2562 remaining_space = trace__entry_str_size - entry_str_len - 1;
2563 if (remaining_space <= 0)
2564 goto out_put;
2565
2566 if (filename_len > (size_t)remaining_space) {
2567 filename += filename_len - remaining_space;
2568 filename_len = remaining_space;
2569 }
2570
2571 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1;
2572 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2573 memmove(pos + filename_len, pos, to_move);
2574 memcpy(pos, filename, filename_len);
2575
2576 ttrace->filename.ptr = 0;
2577 ttrace->filename.entry_str_pos = 0;
2578out_put:
2579 thread__put(thread);
2580out:
2581 return 0;
2582}
2583
2584static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
2585 union perf_event *event __maybe_unused,
2586 struct perf_sample *sample)
2587{
2588 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2589 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2590 struct thread *thread = machine__findnew_thread(trace->host,
2591 sample->pid,
2592 sample->tid);
2593 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2594
2595 if (ttrace == NULL)
2596 goto out_dump;
2597
2598 ttrace->runtime_ms += runtime_ms;
2599 trace->runtime_ms += runtime_ms;
2600out_put:
2601 thread__put(thread);
2602 return 0;
2603
2604out_dump:
2605 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2606 evsel->name,
2607 perf_evsel__strval(evsel, sample, "comm"),
2608 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2609 runtime,
2610 perf_evsel__intval(evsel, sample, "vruntime"));
2611 goto out_put;
2612}
2613
2614static int bpf_output__printer(enum binary_printer_ops op,
2615 unsigned int val, void *extra __maybe_unused, FILE *fp)
2616{
2617 unsigned char ch = (unsigned char)val;
2618
2619 switch (op) {
2620 case BINARY_PRINT_CHAR_DATA:
2621 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
2622 case BINARY_PRINT_DATA_BEGIN:
2623 case BINARY_PRINT_LINE_BEGIN:
2624 case BINARY_PRINT_ADDR:
2625 case BINARY_PRINT_NUM_DATA:
2626 case BINARY_PRINT_NUM_PAD:
2627 case BINARY_PRINT_SEP:
2628 case BINARY_PRINT_CHAR_PAD:
2629 case BINARY_PRINT_LINE_END:
2630 case BINARY_PRINT_DATA_END:
2631 default:
2632 break;
2633 }
2634
2635 return 0;
2636}
2637
2638static void bpf_output__fprintf(struct trace *trace,
2639 struct perf_sample *sample)
2640{
2641 binary__fprintf(sample->raw_data, sample->raw_size, 8,
2642 bpf_output__printer, NULL, trace->output);
2643 ++trace->nr_events_printed;
2644}
2645
2646static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, struct perf_sample *sample,
2647 struct thread *thread, void *augmented_args, int augmented_args_size)
2648{
2649 char bf[2048];
2650 size_t size = sizeof(bf);
2651 struct tep_format_field *field = evsel->tp_format->format.fields;
2652 struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel);
2653 size_t printed = 0;
2654 unsigned long val;
2655 u8 bit = 1;
2656 struct syscall_arg syscall_arg = {
2657 .augmented = {
2658 .size = augmented_args_size,
2659 .args = augmented_args,
2660 },
2661 .idx = 0,
2662 .mask = 0,
2663 .trace = trace,
2664 .thread = thread,
2665 .show_string_prefix = trace->show_string_prefix,
2666 };
2667
2668 for (; field && arg; field = field->next, ++syscall_arg.idx, bit <<= 1, ++arg) {
2669 if (syscall_arg.mask & bit)
2670 continue;
2671
2672 syscall_arg.len = 0;
2673 syscall_arg.fmt = arg;
2674 if (field->flags & TEP_FIELD_IS_ARRAY) {
2675 int offset = field->offset;
2676
2677 if (field->flags & TEP_FIELD_IS_DYNAMIC) {
2678 offset = format_field__intval(field, sample, evsel->needs_swap);
2679 syscall_arg.len = offset >> 16;
2680 offset &= 0xffff;
2681 }
2682
2683 val = (uintptr_t)(sample->raw_data + offset);
2684 } else
2685 val = format_field__intval(field, sample, evsel->needs_swap);
2686
2687
2688
2689
2690 val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val);
2691
2692
2693
2694
2695
2696
2697 if (val == 0 &&
2698 !trace->show_zeros &&
2699 !((arg->show_zero ||
2700 arg->scnprintf == SCA_STRARRAY ||
2701 arg->scnprintf == SCA_STRARRAYS) &&
2702 arg->parm))
2703 continue;
2704
2705 printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : "");
2706
2707
2708
2709
2710
2711 if (1 || trace->show_arg_names)
2712 printed += scnprintf(bf + printed, size - printed, "%s: ", field->name);
2713
2714 printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val);
2715 }
2716
2717 return printed + fprintf(trace->output, "%s", bf);
2718}
2719
2720static int trace__event_handler(struct trace *trace, struct evsel *evsel,
2721 union perf_event *event __maybe_unused,
2722 struct perf_sample *sample)
2723{
2724 struct thread *thread;
2725 int callchain_ret = 0;
2726
2727
2728
2729
2730
2731
2732 if (evsel->disabled)
2733 return 0;
2734
2735 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2736
2737 if (sample->callchain) {
2738 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2739 if (callchain_ret == 0) {
2740 if (callchain_cursor.nr < trace->min_stack)
2741 goto out;
2742 callchain_ret = 1;
2743 }
2744 }
2745
2746 trace__printf_interrupted_entry(trace);
2747 trace__fprintf_tstamp(trace, sample->time, trace->output);
2748
2749 if (trace->trace_syscalls && trace->show_duration)
2750 fprintf(trace->output, "( ): ");
2751
2752 if (thread)
2753 trace__fprintf_comm_tid(trace, thread, trace->output);
2754
2755 if (evsel == trace->syscalls.events.augmented) {
2756 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
2757 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2758
2759 if (sc) {
2760 fprintf(trace->output, "%s(", sc->name);
2761 trace__fprintf_sys_enter(trace, evsel, sample);
2762 fputc(')', trace->output);
2763 goto newline;
2764 }
2765
2766
2767
2768
2769
2770
2771 }
2772
2773 fprintf(trace->output, "%s(", evsel->name);
2774
2775 if (perf_evsel__is_bpf_output(evsel)) {
2776 bpf_output__fprintf(trace, sample);
2777 } else if (evsel->tp_format) {
2778 if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
2779 trace__fprintf_sys_enter(trace, evsel, sample)) {
2780 if (trace->libtraceevent_print) {
2781 event_format__fprintf(evsel->tp_format, sample->cpu,
2782 sample->raw_data, sample->raw_size,
2783 trace->output);
2784 } else {
2785 trace__fprintf_tp_fields(trace, evsel, sample, thread, NULL, 0);
2786 }
2787 }
2788 }
2789
2790newline:
2791 fprintf(trace->output, ")\n");
2792
2793 if (callchain_ret > 0)
2794 trace__fprintf_callchain(trace, sample);
2795 else if (callchain_ret < 0)
2796 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2797
2798 ++trace->nr_events_printed;
2799
2800 if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
2801 evsel__disable(evsel);
2802 evsel__close(evsel);
2803 }
2804out:
2805 thread__put(thread);
2806 return 0;
2807}
2808
2809static void print_location(FILE *f, struct perf_sample *sample,
2810 struct addr_location *al,
2811 bool print_dso, bool print_sym)
2812{
2813
2814 if ((verbose > 0 || print_dso) && al->map)
2815 fprintf(f, "%s@", al->map->dso->long_name);
2816
2817 if ((verbose > 0 || print_sym) && al->sym)
2818 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2819 al->addr - al->sym->start);
2820 else if (al->map)
2821 fprintf(f, "0x%" PRIx64, al->addr);
2822 else
2823 fprintf(f, "0x%" PRIx64, sample->addr);
2824}
2825
2826static int trace__pgfault(struct trace *trace,
2827 struct evsel *evsel,
2828 union perf_event *event __maybe_unused,
2829 struct perf_sample *sample)
2830{
2831 struct thread *thread;
2832 struct addr_location al;
2833 char map_type = 'd';
2834 struct thread_trace *ttrace;
2835 int err = -1;
2836 int callchain_ret = 0;
2837
2838 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2839
2840 if (sample->callchain) {
2841 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2842 if (callchain_ret == 0) {
2843 if (callchain_cursor.nr < trace->min_stack)
2844 goto out_put;
2845 callchain_ret = 1;
2846 }
2847 }
2848
2849 ttrace = thread__trace(thread, trace->output);
2850 if (ttrace == NULL)
2851 goto out_put;
2852
2853 if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2854 ttrace->pfmaj++;
2855 else
2856 ttrace->pfmin++;
2857
2858 if (trace->summary_only)
2859 goto out;
2860
2861 thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
2862
2863 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
2864
2865 fprintf(trace->output, "%sfault [",
2866 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2867 "maj" : "min");
2868
2869 print_location(trace->output, sample, &al, false, true);
2870
2871 fprintf(trace->output, "] => ");
2872
2873 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2874
2875 if (!al.map) {
2876 thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
2877
2878 if (al.map)
2879 map_type = 'x';
2880 else
2881 map_type = '?';
2882 }
2883
2884 print_location(trace->output, sample, &al, true, false);
2885
2886 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2887
2888 if (callchain_ret > 0)
2889 trace__fprintf_callchain(trace, sample);
2890 else if (callchain_ret < 0)
2891 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
2892
2893 ++trace->nr_events_printed;
2894out:
2895 err = 0;
2896out_put:
2897 thread__put(thread);
2898 return err;
2899}
2900
2901static void trace__set_base_time(struct trace *trace,
2902 struct evsel *evsel,
2903 struct perf_sample *sample)
2904{
2905
2906
2907
2908
2909
2910
2911
2912
2913 if (trace->base_time == 0 && !trace->full_time &&
2914 (evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
2915 trace->base_time = sample->time;
2916}
2917
2918static int trace__process_sample(struct perf_tool *tool,
2919 union perf_event *event,
2920 struct perf_sample *sample,
2921 struct evsel *evsel,
2922 struct machine *machine __maybe_unused)
2923{
2924 struct trace *trace = container_of(tool, struct trace, tool);
2925 struct thread *thread;
2926 int err = 0;
2927
2928 tracepoint_handler handler = evsel->handler;
2929
2930 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2931 if (thread && thread__is_filtered(thread))
2932 goto out;
2933
2934 trace__set_base_time(trace, evsel, sample);
2935
2936 if (handler) {
2937 ++trace->nr_events;
2938 handler(trace, evsel, event, sample);
2939 }
2940out:
2941 thread__put(thread);
2942 return err;
2943}
2944
2945static int trace__record(struct trace *trace, int argc, const char **argv)
2946{
2947 unsigned int rec_argc, i, j;
2948 const char **rec_argv;
2949 const char * const record_args[] = {
2950 "record",
2951 "-R",
2952 "-m", "1024",
2953 "-c", "1",
2954 };
2955 pid_t pid = getpid();
2956 char *filter = asprintf__tp_filter_pids(1, &pid);
2957 const char * const sc_args[] = { "-e", };
2958 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2959 const char * const majpf_args[] = { "-e", "major-faults" };
2960 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2961 const char * const minpf_args[] = { "-e", "minor-faults" };
2962 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2963 int err = -1;
2964
2965
2966 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 3 +
2967 majpf_args_nr + minpf_args_nr + argc;
2968 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2969
2970 if (rec_argv == NULL || filter == NULL)
2971 goto out_free;
2972
2973 j = 0;
2974 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2975 rec_argv[j++] = record_args[i];
2976
2977 if (trace->trace_syscalls) {
2978 for (i = 0; i < sc_args_nr; i++)
2979 rec_argv[j++] = sc_args[i];
2980
2981
2982 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2983 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2984 else if (is_valid_tracepoint("syscalls:sys_enter"))
2985 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2986 else {
2987 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2988 goto out_free;
2989 }
2990 }
2991
2992 rec_argv[j++] = "--filter";
2993 rec_argv[j++] = filter;
2994
2995 if (trace->trace_pgfaults & TRACE_PFMAJ)
2996 for (i = 0; i < majpf_args_nr; i++)
2997 rec_argv[j++] = majpf_args[i];
2998
2999 if (trace->trace_pgfaults & TRACE_PFMIN)
3000 for (i = 0; i < minpf_args_nr; i++)
3001 rec_argv[j++] = minpf_args[i];
3002
3003 for (i = 0; i < (unsigned int)argc; i++)
3004 rec_argv[j++] = argv[i];
3005
3006 err = cmd_record(j, rec_argv);
3007out_free:
3008 free(filter);
3009 free(rec_argv);
3010 return err;
3011}
3012
3013static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
3014
3015static bool evlist__add_vfs_getname(struct evlist *evlist)
3016{
3017 bool found = false;
3018 struct evsel *evsel, *tmp;
3019 struct parse_events_error err;
3020 int ret;
3021
3022 bzero(&err, sizeof(err));
3023 ret = parse_events(evlist, "probe:vfs_getname*", &err);
3024 if (ret) {
3025 free(err.str);
3026 free(err.help);
3027 free(err.first_str);
3028 free(err.first_help);
3029 return false;
3030 }
3031
3032 evlist__for_each_entry_safe(evlist, evsel, tmp) {
3033 if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname"))
3034 continue;
3035
3036 if (perf_evsel__field(evsel, "pathname")) {
3037 evsel->handler = trace__vfs_getname;
3038 found = true;
3039 continue;
3040 }
3041
3042 list_del_init(&evsel->core.node);
3043 evsel->evlist = NULL;
3044 evsel__delete(evsel);
3045 }
3046
3047 return found;
3048}
3049
3050static struct evsel *perf_evsel__new_pgfault(u64 config)
3051{
3052 struct evsel *evsel;
3053 struct perf_event_attr attr = {
3054 .type = PERF_TYPE_SOFTWARE,
3055 .mmap_data = 1,
3056 };
3057
3058 attr.config = config;
3059 attr.sample_period = 1;
3060
3061 event_attr_init(&attr);
3062
3063 evsel = evsel__new(&attr);
3064 if (evsel)
3065 evsel->handler = trace__pgfault;
3066
3067 return evsel;
3068}
3069
3070static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
3071{
3072 const u32 type = event->header.type;
3073 struct evsel *evsel;
3074
3075 if (type != PERF_RECORD_SAMPLE) {
3076 trace__process_event(trace, trace->host, event, sample);
3077 return;
3078 }
3079
3080 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
3081 if (evsel == NULL) {
3082 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
3083 return;
3084 }
3085
3086 if (evswitch__discard(&trace->evswitch, evsel))
3087 return;
3088
3089 trace__set_base_time(trace, evsel, sample);
3090
3091 if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
3092 sample->raw_data == NULL) {
3093 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
3094 perf_evsel__name(evsel), sample->tid,
3095 sample->cpu, sample->raw_size);
3096 } else {
3097 tracepoint_handler handler = evsel->handler;
3098 handler(trace, evsel, event, sample);
3099 }
3100
3101 if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
3102 interrupted = true;
3103}
3104
3105static int trace__add_syscall_newtp(struct trace *trace)
3106{
3107 int ret = -1;
3108 struct evlist *evlist = trace->evlist;
3109 struct evsel *sys_enter, *sys_exit;
3110
3111 sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
3112 if (sys_enter == NULL)
3113 goto out;
3114
3115 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
3116 goto out_delete_sys_enter;
3117
3118 sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
3119 if (sys_exit == NULL)
3120 goto out_delete_sys_enter;
3121
3122 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
3123 goto out_delete_sys_exit;
3124
3125 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
3126 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
3127
3128 evlist__add(evlist, sys_enter);
3129 evlist__add(evlist, sys_exit);
3130
3131 if (callchain_param.enabled && !trace->kernel_syscallchains) {
3132
3133
3134
3135
3136
3137 sys_exit->core.attr.exclude_callchain_kernel = 1;
3138 }
3139
3140 trace->syscalls.events.sys_enter = sys_enter;
3141 trace->syscalls.events.sys_exit = sys_exit;
3142
3143 ret = 0;
3144out:
3145 return ret;
3146
3147out_delete_sys_exit:
3148 evsel__delete_priv(sys_exit);
3149out_delete_sys_enter:
3150 evsel__delete_priv(sys_enter);
3151 goto out;
3152}
3153
3154static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
3155{
3156 int err = -1;
3157 struct evsel *sys_exit;
3158 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
3159 trace->ev_qualifier_ids.nr,
3160 trace->ev_qualifier_ids.entries);
3161
3162 if (filter == NULL)
3163 goto out_enomem;
3164
3165 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
3166 filter)) {
3167 sys_exit = trace->syscalls.events.sys_exit;
3168 err = perf_evsel__append_tp_filter(sys_exit, filter);
3169 }
3170
3171 free(filter);
3172out:
3173 return err;
3174out_enomem:
3175 errno = ENOMEM;
3176 goto out;
3177}
3178
3179#ifdef HAVE_LIBBPF_SUPPORT
3180static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
3181{
3182 if (trace->bpf_obj == NULL)
3183 return NULL;
3184
3185 return bpf_object__find_program_by_title(trace->bpf_obj, name);
3186}
3187
3188static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
3189 const char *prog_name, const char *type)
3190{
3191 struct bpf_program *prog;
3192
3193 if (prog_name == NULL) {
3194 char default_prog_name[256];
3195 scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
3196 prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3197 if (prog != NULL)
3198 goto out_found;
3199 if (sc->fmt && sc->fmt->alias) {
3200 scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
3201 prog = trace__find_bpf_program_by_title(trace, default_prog_name);
3202 if (prog != NULL)
3203 goto out_found;
3204 }
3205 goto out_unaugmented;
3206 }
3207
3208 prog = trace__find_bpf_program_by_title(trace, prog_name);
3209
3210 if (prog != NULL) {
3211out_found:
3212 return prog;
3213 }
3214
3215 pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
3216 prog_name, type, sc->name);
3217out_unaugmented:
3218 return trace->syscalls.unaugmented_prog;
3219}
3220
3221static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
3222{
3223 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3224
3225 if (sc == NULL)
3226 return;
3227
3228 sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3229 sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit");
3230}
3231
3232static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
3233{
3234 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3235 return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3236}
3237
3238static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
3239{
3240 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3241 return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
3242}
3243
3244static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
3245{
3246 struct syscall *sc = trace__syscall_info(trace, NULL, id);
3247 int arg = 0;
3248
3249 if (sc == NULL)
3250 goto out;
3251
3252 for (; arg < sc->nr_args; ++arg) {
3253 entry->string_args_len[arg] = 0;
3254 if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) {
3255
3256 entry->string_args_len[arg] = PATH_MAX;
3257 }
3258 }
3259out:
3260 for (; arg < 6; ++arg)
3261 entry->string_args_len[arg] = 0;
3262}
3263static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
3264{
3265 int fd = bpf_map__fd(trace->syscalls.map);
3266 struct bpf_map_syscall_entry value = {
3267 .enabled = !trace->not_ev_qualifier,
3268 };
3269 int err = 0;
3270 size_t i;
3271
3272 for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
3273 int key = trace->ev_qualifier_ids.entries[i];
3274
3275 if (value.enabled) {
3276 trace__init_bpf_map_syscall_args(trace, key, &value);
3277 trace__init_syscall_bpf_progs(trace, key);
3278 }
3279
3280 err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
3281 if (err)
3282 break;
3283 }
3284
3285 return err;
3286}
3287
3288static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
3289{
3290 int fd = bpf_map__fd(trace->syscalls.map);
3291 struct bpf_map_syscall_entry value = {
3292 .enabled = enabled,
3293 };
3294 int err = 0, key;
3295
3296 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3297 if (enabled)
3298 trace__init_bpf_map_syscall_args(trace, key, &value);
3299
3300 err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
3301 if (err)
3302 break;
3303 }
3304
3305 return err;
3306}
3307
3308static int trace__init_syscalls_bpf_map(struct trace *trace)
3309{
3310 bool enabled = true;
3311
3312 if (trace->ev_qualifier_ids.nr)
3313 enabled = trace->not_ev_qualifier;
3314
3315 return __trace__init_syscalls_bpf_map(trace, enabled);
3316}
3317
3318static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
3319{
3320 struct tep_format_field *field, *candidate_field;
3321 int id;
3322
3323
3324
3325
3326 for (field = sc->args; field; field = field->next) {
3327 if (field->flags & TEP_FIELD_IS_POINTER)
3328 goto try_to_find_pair;
3329 }
3330
3331 return NULL;
3332
3333try_to_find_pair:
3334 for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) {
3335 struct syscall *pair = trace__syscall_info(trace, NULL, id);
3336 struct bpf_program *pair_prog;
3337 bool is_candidate = false;
3338
3339 if (pair == NULL || pair == sc ||
3340 pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
3341 continue;
3342
3343 for (field = sc->args, candidate_field = pair->args;
3344 field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
3345 bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
3346 candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
3347
3348 if (is_pointer) {
3349 if (!candidate_is_pointer) {
3350
3351 continue;
3352 }
3353 } else {
3354 if (candidate_is_pointer) {
3355
3356 goto next_candidate;
3357 }
3358 continue;
3359 }
3360
3361 if (strcmp(field->type, candidate_field->type))
3362 goto next_candidate;
3363
3364 is_candidate = true;
3365 }
3366
3367 if (!is_candidate)
3368 goto next_candidate;
3369
3370
3371
3372
3373
3374
3375 if (candidate_field) {
3376 for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next)
3377 if (candidate_field->flags & TEP_FIELD_IS_POINTER)
3378 goto next_candidate;
3379 }
3380
3381 pair_prog = pair->bpf_prog.sys_enter;
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391 if (pair_prog == NULL) {
3392 pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
3393 if (pair_prog == trace->syscalls.unaugmented_prog)
3394 goto next_candidate;
3395 }
3396
3397 pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
3398 return pair_prog;
3399 next_candidate:
3400 continue;
3401 }
3402
3403 return NULL;
3404}
3405
3406static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
3407{
3408 int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
3409 map_exit_fd = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
3410 int err = 0, key;
3411
3412 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3413 int prog_fd;
3414
3415 if (!trace__syscall_enabled(trace, key))
3416 continue;
3417
3418 trace__init_syscall_bpf_progs(trace, key);
3419
3420
3421 prog_fd = trace__bpf_prog_sys_enter_fd(trace, key);
3422 err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3423 if (err)
3424 break;
3425 prog_fd = trace__bpf_prog_sys_exit_fd(trace, key);
3426 err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
3427 if (err)
3428 break;
3429 }
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
3460 struct syscall *sc = trace__syscall_info(trace, NULL, key);
3461 struct bpf_program *pair_prog;
3462 int prog_fd;
3463
3464 if (sc == NULL || sc->bpf_prog.sys_enter == NULL)
3465 continue;
3466
3467
3468
3469
3470
3471 if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
3472 continue;
3473
3474
3475
3476
3477
3478 pair_prog = trace__find_usable_bpf_prog_entry(trace, sc);
3479 if (pair_prog == NULL)
3480 continue;
3481
3482 sc->bpf_prog.sys_enter = pair_prog;
3483
3484
3485
3486
3487
3488 prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter);
3489 err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
3490 if (err)
3491 break;
3492 }
3493
3494
3495 return err;
3496}
3497
3498static void trace__delete_augmented_syscalls(struct trace *trace)
3499{
3500 struct evsel *evsel, *tmp;
3501
3502 evlist__remove(trace->evlist, trace->syscalls.events.augmented);
3503 evsel__delete(trace->syscalls.events.augmented);
3504 trace->syscalls.events.augmented = NULL;
3505
3506 evlist__for_each_entry_safe(trace->evlist, tmp, evsel) {
3507 if (evsel->bpf_obj == trace->bpf_obj) {
3508 evlist__remove(trace->evlist, evsel);
3509 evsel__delete(evsel);
3510 }
3511
3512 }
3513
3514 bpf_object__close(trace->bpf_obj);
3515 trace->bpf_obj = NULL;
3516}
3517#else
3518static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
3519{
3520 return 0;
3521}
3522
3523static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused)
3524{
3525 return 0;
3526}
3527
3528static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused,
3529 const char *name __maybe_unused)
3530{
3531 return NULL;
3532}
3533
3534static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
3535{
3536 return 0;
3537}
3538
3539static void trace__delete_augmented_syscalls(struct trace *trace __maybe_unused)
3540{
3541}
3542#endif
3543
3544static bool trace__only_augmented_syscalls_evsels(struct trace *trace)
3545{
3546 struct evsel *evsel;
3547
3548 evlist__for_each_entry(trace->evlist, evsel) {
3549 if (evsel == trace->syscalls.events.augmented ||
3550 evsel->bpf_obj == trace->bpf_obj)
3551 continue;
3552
3553 return false;
3554 }
3555
3556 return true;
3557}
3558
3559static int trace__set_ev_qualifier_filter(struct trace *trace)
3560{
3561 if (trace->syscalls.map)
3562 return trace__set_ev_qualifier_bpf_filter(trace);
3563 if (trace->syscalls.events.sys_enter)
3564 return trace__set_ev_qualifier_tp_filter(trace);
3565 return 0;
3566}
3567
3568static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused,
3569 size_t npids __maybe_unused, pid_t *pids __maybe_unused)
3570{
3571 int err = 0;
3572#ifdef HAVE_LIBBPF_SUPPORT
3573 bool value = true;
3574 int map_fd = bpf_map__fd(map);
3575 size_t i;
3576
3577 for (i = 0; i < npids; ++i) {
3578 err = bpf_map_update_elem(map_fd, &pids[i], &value, BPF_ANY);
3579 if (err)
3580 break;
3581 }
3582#endif
3583 return err;
3584}
3585
3586static int trace__set_filter_loop_pids(struct trace *trace)
3587{
3588 unsigned int nr = 1, err;
3589 pid_t pids[32] = {
3590 getpid(),
3591 };
3592 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
3593
3594 while (thread && nr < ARRAY_SIZE(pids)) {
3595 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
3596
3597 if (parent == NULL)
3598 break;
3599
3600 if (!strcmp(thread__comm_str(parent), "sshd") ||
3601 strstarts(thread__comm_str(parent), "gnome-terminal")) {
3602 pids[nr++] = parent->tid;
3603 break;
3604 }
3605 thread = parent;
3606 }
3607
3608 err = perf_evlist__append_tp_filter_pids(trace->evlist, nr, pids);
3609 if (!err && trace->filter_pids.map)
3610 err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
3611
3612 return err;
3613}
3614
3615static int trace__set_filter_pids(struct trace *trace)
3616{
3617 int err = 0;
3618
3619
3620
3621
3622
3623
3624 if (trace->filter_pids.nr > 0) {
3625 err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
3626 trace->filter_pids.entries);
3627 if (!err && trace->filter_pids.map) {
3628 err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
3629 trace->filter_pids.entries);
3630 }
3631 } else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) {
3632 err = trace__set_filter_loop_pids(trace);
3633 }
3634
3635 return err;
3636}
3637
3638static int __trace__deliver_event(struct trace *trace, union perf_event *event)
3639{
3640 struct evlist *evlist = trace->evlist;
3641 struct perf_sample sample;
3642 int err;
3643
3644 err = perf_evlist__parse_sample(evlist, event, &sample);
3645 if (err)
3646 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
3647 else
3648 trace__handle_event(trace, event, &sample);
3649
3650 return 0;
3651}
3652
3653static int __trace__flush_events(struct trace *trace)
3654{
3655 u64 first = ordered_events__first_time(&trace->oe.data);
3656 u64 flush = trace->oe.last - NSEC_PER_SEC;
3657
3658
3659 if (first && first < flush)
3660 return ordered_events__flush_time(&trace->oe.data, flush);
3661
3662 return 0;
3663}
3664
3665static int trace__flush_events(struct trace *trace)
3666{
3667 return !trace->sort_events ? 0 : __trace__flush_events(trace);
3668}
3669
3670static int trace__deliver_event(struct trace *trace, union perf_event *event)
3671{
3672 int err;
3673
3674 if (!trace->sort_events)
3675 return __trace__deliver_event(trace, event);
3676
3677 err = perf_evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
3678 if (err && err != -1)
3679 return err;
3680
3681 err = ordered_events__queue(&trace->oe.data, event, trace->oe.last, 0);
3682 if (err)
3683 return err;
3684
3685 return trace__flush_events(trace);
3686}
3687
3688static int ordered_events__deliver_event(struct ordered_events *oe,
3689 struct ordered_event *event)
3690{
3691 struct trace *trace = container_of(oe, struct trace, oe.data);
3692
3693 return __trace__deliver_event(trace, event->event);
3694}
3695
3696static struct syscall_arg_fmt *perf_evsel__syscall_arg_fmt(struct evsel *evsel, char *arg)
3697{
3698 struct tep_format_field *field;
3699 struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
3700
3701 if (evsel->tp_format == NULL || fmt == NULL)
3702 return NULL;
3703
3704 for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt)
3705 if (strcmp(field->name, arg) == 0)
3706 return fmt;
3707
3708 return NULL;
3709}
3710
3711static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel)
3712{
3713 char *tok, *left = evsel->filter, *new_filter = evsel->filter;
3714
3715 while ((tok = strpbrk(left, "=<>!")) != NULL) {
3716 char *right = tok + 1, *right_end;
3717
3718 if (*right == '=')
3719 ++right;
3720
3721 while (isspace(*right))
3722 ++right;
3723
3724 if (*right == '\0')
3725 break;
3726
3727 while (!isalpha(*left))
3728 if (++left == tok) {
3729
3730
3731
3732
3733 return 0;
3734 }
3735
3736 right_end = right + 1;
3737 while (isalnum(*right_end) || *right_end == '_' || *right_end == '|')
3738 ++right_end;
3739
3740 if (isalpha(*right)) {
3741 struct syscall_arg_fmt *fmt;
3742 int left_size = tok - left,
3743 right_size = right_end - right;
3744 char arg[128];
3745
3746 while (isspace(left[left_size - 1]))
3747 --left_size;
3748
3749 scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
3750
3751 fmt = perf_evsel__syscall_arg_fmt(evsel, arg);
3752 if (fmt == NULL) {
3753 pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
3754 arg, evsel->name, evsel->filter);
3755 return -1;
3756 }
3757
3758 pr_debug2("trying to expand \"%s\" \"%.*s\" \"%.*s\" -> ",
3759 arg, (int)(right - tok), tok, right_size, right);
3760
3761 if (fmt->strtoul) {
3762 u64 val;
3763 struct syscall_arg syscall_arg = {
3764 .parm = fmt->parm,
3765 };
3766
3767 if (fmt->strtoul(right, right_size, &syscall_arg, &val)) {
3768 char *n, expansion[19];
3769 int expansion_lenght = scnprintf(expansion, sizeof(expansion), "%#" PRIx64, val);
3770 int expansion_offset = right - new_filter;
3771
3772 pr_debug("%s", expansion);
3773
3774 if (asprintf(&n, "%.*s%s%s", expansion_offset, new_filter, expansion, right_end) < 0) {
3775 pr_debug(" out of memory!\n");
3776 free(new_filter);
3777 return -1;
3778 }
3779 if (new_filter != evsel->filter)
3780 free(new_filter);
3781 left = n + expansion_offset + expansion_lenght;
3782 new_filter = n;
3783 } else {
3784 pr_err("\"%.*s\" not found for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3785 right_size, right, arg, evsel->name, evsel->filter);
3786 return -1;
3787 }
3788 } else {
3789 pr_err("No resolver (strtoul) for \"%s\" in \"%s\", can't set filter \"%s\"\n",
3790 arg, evsel->name, evsel->filter);
3791 return -1;
3792 }
3793
3794 pr_debug("\n");
3795 } else {
3796 left = right_end;
3797 }
3798 }
3799
3800 if (new_filter != evsel->filter) {
3801 pr_debug("New filter for %s: %s\n", evsel->name, new_filter);
3802 perf_evsel__set_filter(evsel, new_filter);
3803 free(new_filter);
3804 }
3805
3806 return 0;
3807}
3808
3809static int trace__expand_filters(struct trace *trace, struct evsel **err_evsel)
3810{
3811 struct evlist *evlist = trace->evlist;
3812 struct evsel *evsel;
3813
3814 evlist__for_each_entry(evlist, evsel) {
3815 if (evsel->filter == NULL)
3816 continue;
3817
3818 if (trace__expand_filter(trace, evsel)) {
3819 *err_evsel = evsel;
3820 return -1;
3821 }
3822 }
3823
3824 return 0;
3825}
3826
3827static int trace__run(struct trace *trace, int argc, const char **argv)
3828{
3829 struct evlist *evlist = trace->evlist;
3830 struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
3831 int err = -1, i;
3832 unsigned long before;
3833 const bool forks = argc > 0;
3834 bool draining = false;
3835
3836 trace->live = true;
3837
3838 if (!trace->raw_augmented_syscalls) {
3839 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
3840 goto out_error_raw_syscalls;
3841
3842 if (trace->trace_syscalls)
3843 trace->vfs_getname = evlist__add_vfs_getname(evlist);
3844 }
3845
3846 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
3847 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
3848 if (pgfault_maj == NULL)
3849 goto out_error_mem;
3850 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
3851 evlist__add(evlist, pgfault_maj);
3852 }
3853
3854 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
3855 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
3856 if (pgfault_min == NULL)
3857 goto out_error_mem;
3858 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
3859 evlist__add(evlist, pgfault_min);
3860 }
3861
3862 if (trace->sched &&
3863 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
3864 trace__sched_stat_runtime))
3865 goto out_error_sched_stat_runtime;
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891 if (trace->cgroup)
3892 evlist__set_default_cgroup(trace->evlist, trace->cgroup);
3893
3894 err = perf_evlist__create_maps(evlist, &trace->opts.target);
3895 if (err < 0) {
3896 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
3897 goto out_delete_evlist;
3898 }
3899
3900 err = trace__symbols_init(trace, evlist);
3901 if (err < 0) {
3902 fprintf(trace->output, "Problems initializing symbol libraries!\n");
3903 goto out_delete_evlist;
3904 }
3905
3906 perf_evlist__config(evlist, &trace->opts, &callchain_param);
3907
3908 signal(SIGCHLD, sig_handler);
3909 signal(SIGINT, sig_handler);
3910
3911 if (forks) {
3912 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
3913 argv, false, NULL);
3914 if (err < 0) {
3915 fprintf(trace->output, "Couldn't run the workload!\n");
3916 goto out_delete_evlist;
3917 }
3918 }
3919
3920 err = evlist__open(evlist);
3921 if (err < 0)
3922 goto out_error_open;
3923
3924 err = bpf__apply_obj_config();
3925 if (err) {
3926 char errbuf[BUFSIZ];
3927
3928 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
3929 pr_err("ERROR: Apply config to BPF failed: %s\n",
3930 errbuf);
3931 goto out_error_open;
3932 }
3933
3934 err = trace__set_filter_pids(trace);
3935 if (err < 0)
3936 goto out_error_mem;
3937
3938 if (trace->syscalls.map)
3939 trace__init_syscalls_bpf_map(trace);
3940
3941 if (trace->syscalls.prog_array.sys_enter)
3942 trace__init_syscalls_bpf_prog_array_maps(trace);
3943
3944 if (trace->ev_qualifier_ids.nr > 0) {
3945 err = trace__set_ev_qualifier_filter(trace);
3946 if (err < 0)
3947 goto out_errno;
3948
3949 if (trace->syscalls.events.sys_exit) {
3950 pr_debug("event qualifier tracepoint filter: %s\n",
3951 trace->syscalls.events.sys_exit->filter);
3952 }
3953 }
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966 trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close"));
3967
3968 err = trace__expand_filters(trace, &evsel);
3969 if (err)
3970 goto out_delete_evlist;
3971 err = perf_evlist__apply_filters(evlist, &evsel);
3972 if (err < 0)
3973 goto out_error_apply_filters;
3974
3975 if (trace->dump.map)
3976 bpf_map__fprintf(trace->dump.map, trace->output);
3977
3978 err = evlist__mmap(evlist, trace->opts.mmap_pages);
3979 if (err < 0)
3980 goto out_error_mmap;
3981
3982 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
3983 evlist__enable(evlist);
3984
3985 if (forks)
3986 perf_evlist__start_workload(evlist);
3987
3988 if (trace->opts.initial_delay) {
3989 usleep(trace->opts.initial_delay * 1000);
3990 evlist__enable(evlist);
3991 }
3992
3993 trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
3994 evlist->core.threads->nr > 1 ||
3995 evlist__first(evlist)->core.attr.inherit;
3996
3997
3998
3999
4000
4001
4002
4003 evlist__for_each_entry(evlist, evsel) {
4004 if (evsel__has_callchain(evsel) &&
4005 evsel->core.attr.sample_max_stack == 0)
4006 evsel->core.attr.sample_max_stack = trace->max_stack;
4007 }
4008again:
4009 before = trace->nr_events;
4010
4011 for (i = 0; i < evlist->core.nr_mmaps; i++) {
4012 union perf_event *event;
4013 struct mmap *md;
4014
4015 md = &evlist->mmap[i];
4016 if (perf_mmap__read_init(&md->core) < 0)
4017 continue;
4018
4019 while ((event = perf_mmap__read_event(&md->core)) != NULL) {
4020 ++trace->nr_events;
4021
4022 err = trace__deliver_event(trace, event);
4023 if (err)
4024 goto out_disable;
4025
4026 perf_mmap__consume(&md->core);
4027
4028 if (interrupted)
4029 goto out_disable;
4030
4031 if (done && !draining) {
4032 evlist__disable(evlist);
4033 draining = true;
4034 }
4035 }
4036 perf_mmap__read_done(&md->core);
4037 }
4038
4039 if (trace->nr_events == before) {
4040 int timeout = done ? 100 : -1;
4041
4042 if (!draining && evlist__poll(evlist, timeout) > 0) {
4043 if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
4044 draining = true;
4045
4046 goto again;
4047 } else {
4048 if (trace__flush_events(trace))
4049 goto out_disable;
4050 }
4051 } else {
4052 goto again;
4053 }
4054
4055out_disable:
4056 thread__zput(trace->current);
4057
4058 evlist__disable(evlist);
4059
4060 if (trace->sort_events)
4061 ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
4062
4063 if (!err) {
4064 if (trace->summary)
4065 trace__fprintf_thread_summary(trace, trace->output);
4066
4067 if (trace->show_tool_stats) {
4068 fprintf(trace->output, "Stats:\n "
4069 " vfs_getname : %" PRIu64 "\n"
4070 " proc_getname: %" PRIu64 "\n",
4071 trace->stats.vfs_getname,
4072 trace->stats.proc_getname);
4073 }
4074 }
4075
4076out_delete_evlist:
4077 trace__symbols__exit(trace);
4078
4079 evlist__delete(evlist);
4080 cgroup__put(trace->cgroup);
4081 trace->evlist = NULL;
4082 trace->live = false;
4083 return err;
4084{
4085 char errbuf[BUFSIZ];
4086
4087out_error_sched_stat_runtime:
4088 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
4089 goto out_error;
4090
4091out_error_raw_syscalls:
4092 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
4093 goto out_error;
4094
4095out_error_mmap:
4096 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
4097 goto out_error;
4098
4099out_error_open:
4100 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
4101
4102out_error:
4103 fprintf(trace->output, "%s\n", errbuf);
4104 goto out_delete_evlist;
4105
4106out_error_apply_filters:
4107 fprintf(trace->output,
4108 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
4109 evsel->filter, perf_evsel__name(evsel), errno,
4110 str_error_r(errno, errbuf, sizeof(errbuf)));
4111 goto out_delete_evlist;
4112}
4113out_error_mem:
4114 fprintf(trace->output, "Not enough memory to run!\n");
4115 goto out_delete_evlist;
4116
4117out_errno:
4118 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
4119 goto out_delete_evlist;
4120}
4121
4122static int trace__replay(struct trace *trace)
4123{
4124 const struct evsel_str_handler handlers[] = {
4125 { "probe:vfs_getname", trace__vfs_getname, },
4126 };
4127 struct perf_data data = {
4128 .path = input_name,
4129 .mode = PERF_DATA_MODE_READ,
4130 .force = trace->force,
4131 };
4132 struct perf_session *session;
4133 struct evsel *evsel;
4134 int err = -1;
4135
4136 trace->tool.sample = trace__process_sample;
4137 trace->tool.mmap = perf_event__process_mmap;
4138 trace->tool.mmap2 = perf_event__process_mmap2;
4139 trace->tool.comm = perf_event__process_comm;
4140 trace->tool.exit = perf_event__process_exit;
4141 trace->tool.fork = perf_event__process_fork;
4142 trace->tool.attr = perf_event__process_attr;
4143 trace->tool.tracing_data = perf_event__process_tracing_data;
4144 trace->tool.build_id = perf_event__process_build_id;
4145 trace->tool.namespaces = perf_event__process_namespaces;
4146
4147 trace->tool.ordered_events = true;
4148 trace->tool.ordering_requires_timestamps = true;
4149
4150
4151 trace->multiple_threads = true;
4152
4153 session = perf_session__new(&data, false, &trace->tool);
4154 if (IS_ERR(session))
4155 return PTR_ERR(session);
4156
4157 if (trace->opts.target.pid)
4158 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
4159
4160 if (trace->opts.target.tid)
4161 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
4162
4163 if (symbol__init(&session->header.env) < 0)
4164 goto out;
4165
4166 trace->host = &session->machines.host;
4167
4168 err = perf_session__set_tracepoints_handlers(session, handlers);
4169 if (err)
4170 goto out;
4171
4172 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
4173 "raw_syscalls:sys_enter");
4174
4175 if (evsel == NULL)
4176 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
4177 "syscalls:sys_enter");
4178
4179 if (evsel &&
4180 (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
4181 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
4182 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
4183 goto out;
4184 }
4185
4186 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
4187 "raw_syscalls:sys_exit");
4188 if (evsel == NULL)
4189 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
4190 "syscalls:sys_exit");
4191 if (evsel &&
4192 (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
4193 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
4194 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
4195 goto out;
4196 }
4197
4198 evlist__for_each_entry(session->evlist, evsel) {
4199 if (evsel->core.attr.type == PERF_TYPE_SOFTWARE &&
4200 (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
4201 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
4202 evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS))
4203 evsel->handler = trace__pgfault;
4204 }
4205
4206 setup_pager();
4207
4208 err = perf_session__process_events(session);
4209 if (err)
4210 pr_err("Failed to process events, error %d", err);
4211
4212 else if (trace->summary)
4213 trace__fprintf_thread_summary(trace, trace->output);
4214
4215out:
4216 perf_session__delete(session);
4217
4218 return err;
4219}
4220
4221static size_t trace__fprintf_threads_header(FILE *fp)
4222{
4223 size_t printed;
4224
4225 printed = fprintf(fp, "\n Summary of events:\n\n");
4226
4227 return printed;
4228}
4229
4230DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
4231 struct syscall_stats *stats;
4232 double msecs;
4233 int syscall;
4234)
4235{
4236 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
4237 struct syscall_stats *stats = source->priv;
4238
4239 entry->syscall = source->i;
4240 entry->stats = stats;
4241 entry->msecs = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0;
4242}
4243
4244static size_t thread__dump_stats(struct thread_trace *ttrace,
4245 struct trace *trace, FILE *fp)
4246{
4247 size_t printed = 0;
4248 struct syscall *sc;
4249 struct rb_node *nd;
4250 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
4251
4252 if (syscall_stats == NULL)
4253 return 0;
4254
4255 printed += fprintf(fp, "\n");
4256
4257 printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
4258 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
4259 printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
4260
4261 resort_rb__for_each_entry(nd, syscall_stats) {
4262 struct syscall_stats *stats = syscall_stats_entry->stats;
4263 if (stats) {
4264 double min = (double)(stats->stats.min) / NSEC_PER_MSEC;
4265 double max = (double)(stats->stats.max) / NSEC_PER_MSEC;
4266 double avg = avg_stats(&stats->stats);
4267 double pct;
4268 u64 n = (u64)stats->stats.n;
4269
4270 pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
4271 avg /= NSEC_PER_MSEC;
4272
4273 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
4274 printed += fprintf(fp, " %-15s", sc->name);
4275 printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f",
4276 n, stats->nr_failures, syscall_stats_entry->msecs, min, avg);
4277 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
4278
4279 if (trace->errno_summary && stats->nr_failures) {
4280 const char *arch_name = perf_env__arch(trace->host->env);
4281 int e;
4282
4283 for (e = 0; e < stats->max_errno; ++e) {
4284 if (stats->errnos[e] != 0)
4285 fprintf(fp, "\t\t\t\t%s: %d\n", arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]);
4286 }
4287 }
4288 }
4289 }
4290
4291 resort_rb__delete(syscall_stats);
4292 printed += fprintf(fp, "\n\n");
4293
4294 return printed;
4295}
4296
4297static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
4298{
4299 size_t printed = 0;
4300 struct thread_trace *ttrace = thread__priv(thread);
4301 double ratio;
4302
4303 if (ttrace == NULL)
4304 return 0;
4305
4306 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
4307
4308 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
4309 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
4310 printed += fprintf(fp, "%.1f%%", ratio);
4311 if (ttrace->pfmaj)
4312 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
4313 if (ttrace->pfmin)
4314 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
4315 if (trace->sched)
4316 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
4317 else if (fputc('\n', fp) != EOF)
4318 ++printed;
4319
4320 printed += thread__dump_stats(ttrace, trace, fp);
4321
4322 return printed;
4323}
4324
4325static unsigned long thread__nr_events(struct thread_trace *ttrace)
4326{
4327 return ttrace ? ttrace->nr_events : 0;
4328}
4329
4330DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
4331 struct thread *thread;
4332)
4333{
4334 entry->thread = rb_entry(nd, struct thread, rb_node);
4335}
4336
4337static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
4338{
4339 size_t printed = trace__fprintf_threads_header(fp);
4340 struct rb_node *nd;
4341 int i;
4342
4343 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
4344 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
4345
4346 if (threads == NULL) {
4347 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
4348 return 0;
4349 }
4350
4351 resort_rb__for_each_entry(nd, threads)
4352 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
4353
4354 resort_rb__delete(threads);
4355 }
4356 return printed;
4357}
4358
4359static int trace__set_duration(const struct option *opt, const char *str,
4360 int unset __maybe_unused)
4361{
4362 struct trace *trace = opt->value;
4363
4364 trace->duration_filter = atof(str);
4365 return 0;
4366}
4367
4368static int trace__set_filter_pids_from_option(const struct option *opt, const char *str,
4369 int unset __maybe_unused)
4370{
4371 int ret = -1;
4372 size_t i;
4373 struct trace *trace = opt->value;
4374
4375
4376
4377
4378 struct intlist *list = intlist__new(str);
4379
4380 if (list == NULL)
4381 return -1;
4382
4383 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
4384 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
4385
4386 if (trace->filter_pids.entries == NULL)
4387 goto out;
4388
4389 trace->filter_pids.entries[0] = getpid();
4390
4391 for (i = 1; i < trace->filter_pids.nr; ++i)
4392 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
4393
4394 intlist__delete(list);
4395 ret = 0;
4396out:
4397 return ret;
4398}
4399
4400static int trace__open_output(struct trace *trace, const char *filename)
4401{
4402 struct stat st;
4403
4404 if (!stat(filename, &st) && st.st_size) {
4405 char oldname[PATH_MAX];
4406
4407 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
4408 unlink(oldname);
4409 rename(filename, oldname);
4410 }
4411
4412 trace->output = fopen(filename, "w");
4413
4414 return trace->output == NULL ? -errno : 0;
4415}
4416
4417static int parse_pagefaults(const struct option *opt, const char *str,
4418 int unset __maybe_unused)
4419{
4420 int *trace_pgfaults = opt->value;
4421
4422 if (strcmp(str, "all") == 0)
4423 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
4424 else if (strcmp(str, "maj") == 0)
4425 *trace_pgfaults |= TRACE_PFMAJ;
4426 else if (strcmp(str, "min") == 0)
4427 *trace_pgfaults |= TRACE_PFMIN;
4428 else
4429 return -1;
4430
4431 return 0;
4432}
4433
4434static void evlist__set_default_evsel_handler(struct evlist *evlist, void *handler)
4435{
4436 struct evsel *evsel;
4437
4438 evlist__for_each_entry(evlist, evsel) {
4439 if (evsel->handler == NULL)
4440 evsel->handler = handler;
4441 }
4442}
4443
4444static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name)
4445{
4446 struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
4447
4448 if (fmt) {
4449 struct syscall_fmt *scfmt = syscall_fmt__find(name);
4450
4451 if (scfmt) {
4452 int skip = 0;
4453
4454 if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 ||
4455 strcmp(evsel->tp_format->format.fields->name, "nr") == 0)
4456 ++skip;
4457
4458 memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt));
4459 }
4460 }
4461}
4462
4463static int evlist__set_syscall_tp_fields(struct evlist *evlist)
4464{
4465 struct evsel *evsel;
4466
4467 evlist__for_each_entry(evlist, evsel) {
4468 if (evsel->priv || !evsel->tp_format)
4469 continue;
4470
4471 if (strcmp(evsel->tp_format->system, "syscalls")) {
4472 perf_evsel__init_tp_arg_scnprintf(evsel);
4473 continue;
4474 }
4475
4476 if (perf_evsel__init_syscall_tp(evsel))
4477 return -1;
4478
4479 if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
4480 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
4481
4482 if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
4483 return -1;
4484
4485 evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1);
4486 } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
4487 struct syscall_tp *sc = __evsel__syscall_tp(evsel);
4488
4489 if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
4490 return -1;
4491
4492 evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1);
4493 }
4494 }
4495
4496 return 0;
4497}
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507static int trace__parse_events_option(const struct option *opt, const char *str,
4508 int unset __maybe_unused)
4509{
4510 struct trace *trace = (struct trace *)opt->value;
4511 const char *s = str;
4512 char *sep = NULL, *lists[2] = { NULL, NULL, };
4513 int len = strlen(str) + 1, err = -1, list, idx;
4514 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
4515 char group_name[PATH_MAX];
4516 struct syscall_fmt *fmt;
4517
4518 if (strace_groups_dir == NULL)
4519 return -1;
4520
4521 if (*s == '!') {
4522 ++s;
4523 trace->not_ev_qualifier = true;
4524 }
4525
4526 while (1) {
4527 if ((sep = strchr(s, ',')) != NULL)
4528 *sep = '\0';
4529
4530 list = 0;
4531 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
4532 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
4533 list = 1;
4534 goto do_concat;
4535 }
4536
4537 fmt = syscall_fmt__find_by_alias(s);
4538 if (fmt != NULL) {
4539 list = 1;
4540 s = fmt->name;
4541 } else {
4542 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
4543 if (access(group_name, R_OK) == 0)
4544 list = 1;
4545 }
4546do_concat:
4547 if (lists[list]) {
4548 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
4549 } else {
4550 lists[list] = malloc(len);
4551 if (lists[list] == NULL)
4552 goto out;
4553 strcpy(lists[list], s);
4554 }
4555
4556 if (!sep)
4557 break;
4558
4559 *sep = ',';
4560 s = sep + 1;
4561 }
4562
4563 if (lists[1] != NULL) {
4564 struct strlist_config slist_config = {
4565 .dirname = strace_groups_dir,
4566 };
4567
4568 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
4569 if (trace->ev_qualifier == NULL) {
4570 fputs("Not enough memory to parse event qualifier", trace->output);
4571 goto out;
4572 }
4573
4574 if (trace__validate_ev_qualifier(trace))
4575 goto out;
4576 trace->trace_syscalls = true;
4577 }
4578
4579 err = 0;
4580
4581 if (lists[0]) {
4582 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
4583 "event selector. use 'perf list' to list available events",
4584 parse_events_option);
4585 err = parse_events_option(&o, lists[0], 0);
4586 }
4587out:
4588 if (sep)
4589 *sep = ',';
4590
4591 return err;
4592}
4593
4594static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
4595{
4596 struct trace *trace = opt->value;
4597
4598 if (!list_empty(&trace->evlist->core.entries))
4599 return parse_cgroups(opt, str, unset);
4600
4601 trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
4602
4603 return 0;
4604}
4605
4606static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
4607{
4608 if (trace->bpf_obj == NULL)
4609 return NULL;
4610
4611 return bpf_object__find_map_by_name(trace->bpf_obj, name);
4612}
4613
4614static void trace__set_bpf_map_filtered_pids(struct trace *trace)
4615{
4616 trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
4617}
4618
4619static void trace__set_bpf_map_syscalls(struct trace *trace)
4620{
4621 trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
4622 trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
4623 trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
4624}
4625
4626static int trace__config(const char *var, const char *value, void *arg)
4627{
4628 struct trace *trace = arg;
4629 int err = 0;
4630
4631 if (!strcmp(var, "trace.add_events")) {
4632 trace->perfconfig_events = strdup(value);
4633 if (trace->perfconfig_events == NULL) {
4634 pr_err("Not enough memory for %s\n", "trace.add_events");
4635 return -1;
4636 }
4637 } else if (!strcmp(var, "trace.show_timestamp")) {
4638 trace->show_tstamp = perf_config_bool(var, value);
4639 } else if (!strcmp(var, "trace.show_duration")) {
4640 trace->show_duration = perf_config_bool(var, value);
4641 } else if (!strcmp(var, "trace.show_arg_names")) {
4642 trace->show_arg_names = perf_config_bool(var, value);
4643 if (!trace->show_arg_names)
4644 trace->show_zeros = true;
4645 } else if (!strcmp(var, "trace.show_zeros")) {
4646 bool new_show_zeros = perf_config_bool(var, value);
4647 if (!trace->show_arg_names && !new_show_zeros) {
4648 pr_warning("trace.show_zeros has to be set when trace.show_arg_names=no\n");
4649 goto out;
4650 }
4651 trace->show_zeros = new_show_zeros;
4652 } else if (!strcmp(var, "trace.show_prefix")) {
4653 trace->show_string_prefix = perf_config_bool(var, value);
4654 } else if (!strcmp(var, "trace.no_inherit")) {
4655 trace->opts.no_inherit = perf_config_bool(var, value);
4656 } else if (!strcmp(var, "trace.args_alignment")) {
4657 int args_alignment = 0;
4658 if (perf_config_int(&args_alignment, var, value) == 0)
4659 trace->args_alignment = args_alignment;
4660 } else if (!strcmp(var, "trace.tracepoint_beautifiers")) {
4661 if (strcasecmp(value, "libtraceevent") == 0)
4662 trace->libtraceevent_print = true;
4663 else if (strcasecmp(value, "libbeauty") == 0)
4664 trace->libtraceevent_print = false;
4665 }
4666out:
4667 return err;
4668}
4669
4670int cmd_trace(int argc, const char **argv)
4671{
4672 const char *trace_usage[] = {
4673 "perf trace [<options>] [<command>]",
4674 "perf trace [<options>] -- <command> [<options>]",
4675 "perf trace record [<options>] [<command>]",
4676 "perf trace record [<options>] -- <command> [<options>]",
4677 NULL
4678 };
4679 struct trace trace = {
4680 .opts = {
4681 .target = {
4682 .uid = UINT_MAX,
4683 .uses_mmap = true,
4684 },
4685 .user_freq = UINT_MAX,
4686 .user_interval = ULLONG_MAX,
4687 .no_buffering = true,
4688 .mmap_pages = UINT_MAX,
4689 },
4690 .output = stderr,
4691 .show_comm = true,
4692 .show_tstamp = true,
4693 .show_duration = true,
4694 .show_arg_names = true,
4695 .args_alignment = 70,
4696 .trace_syscalls = false,
4697 .kernel_syscallchains = false,
4698 .max_stack = UINT_MAX,
4699 .max_events = ULONG_MAX,
4700 };
4701 const char *map_dump_str = NULL;
4702 const char *output_name = NULL;
4703 const struct option trace_options[] = {
4704 OPT_CALLBACK('e', "event", &trace, "event",
4705 "event/syscall selector. use 'perf list' to list available events",
4706 trace__parse_events_option),
4707 OPT_CALLBACK(0, "filter", &trace.evlist, "filter",
4708 "event filter", parse_filter),
4709 OPT_BOOLEAN(0, "comm", &trace.show_comm,
4710 "show the thread COMM next to its id"),
4711 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
4712 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
4713 trace__parse_events_option),
4714 OPT_STRING('o', "output", &output_name, "file", "output file name"),
4715 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
4716 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
4717 "trace events on existing process id"),
4718 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
4719 "trace events on existing thread id"),
4720 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
4721 "pids to filter (by the kernel)", trace__set_filter_pids_from_option),
4722 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
4723 "system-wide collection from all CPUs"),
4724 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
4725 "list of cpus to monitor"),
4726 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
4727 "child tasks do not inherit counters"),
4728 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
4729 "number of mmap data pages",
4730 perf_evlist__parse_mmap_pages),
4731 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
4732 "user to profile"),
4733 OPT_CALLBACK(0, "duration", &trace, "float",
4734 "show only events with duration > N.M ms",
4735 trace__set_duration),
4736#ifdef HAVE_LIBBPF_SUPPORT
4737 OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
4738#endif
4739 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
4740 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4741 OPT_BOOLEAN('T', "time", &trace.full_time,
4742 "Show full timestamp, not time relative to first start"),
4743 OPT_BOOLEAN(0, "failure", &trace.failure_only,
4744 "Show only syscalls that failed"),
4745 OPT_BOOLEAN('s', "summary", &trace.summary_only,
4746 "Show only syscall summary with statistics"),
4747 OPT_BOOLEAN('S', "with-summary", &trace.summary,
4748 "Show all syscalls and summary with statistics"),
4749 OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary,
4750 "Show errno stats per syscall, use with -s or -S"),
4751 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
4752 "Trace pagefaults", parse_pagefaults, "maj"),
4753 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
4754 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
4755 OPT_CALLBACK(0, "call-graph", &trace.opts,
4756 "record_mode[,record_size]", record_callchain_help,
4757 &record_parse_callchain_opt),
4758 OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print,
4759 "Use libtraceevent to print the tracepoint arguments."),
4760 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
4761 "Show the kernel callchains on the syscall exit path"),
4762 OPT_ULONG(0, "max-events", &trace.max_events,
4763 "Set the maximum number of events to print, exit after that is reached. "),
4764 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
4765 "Set the minimum stack depth when parsing the callchain, "
4766 "anything below the specified depth will be ignored."),
4767 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
4768 "Set the maximum stack depth when parsing the callchain, "
4769 "anything beyond the specified depth will be ignored. "
4770 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
4771 OPT_BOOLEAN(0, "sort-events", &trace.sort_events,
4772 "Sort batch of events before processing, use if getting out of order events"),
4773 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
4774 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
4775 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
4776 "per thread proc mmap processing timeout in ms"),
4777 OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
4778 trace__parse_cgroups),
4779 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
4780 "ms to wait before starting measurement after program "
4781 "start"),
4782 OPTS_EVSWITCH(&trace.evswitch),
4783 OPT_END()
4784 };
4785 bool __maybe_unused max_stack_user_set = true;
4786 bool mmap_pages_user_set = true;
4787 struct evsel *evsel;
4788 const char * const trace_subcommands[] = { "record", NULL };
4789 int err = -1;
4790 char bf[BUFSIZ];
4791
4792 signal(SIGSEGV, sighandler_dump_stack);
4793 signal(SIGFPE, sighandler_dump_stack);
4794
4795 trace.evlist = evlist__new();
4796 trace.sctbl = syscalltbl__new();
4797
4798 if (trace.evlist == NULL || trace.sctbl == NULL) {
4799 pr_err("Not enough memory to run!\n");
4800 err = -ENOMEM;
4801 goto out;
4802 }
4803
4804
4805
4806
4807
4808
4809
4810
4811 rlimit__bump_memlock();
4812
4813 err = perf_config(trace__config, &trace);
4814 if (err)
4815 goto out;
4816
4817 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
4818 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
4832 trace.evlist->core.nr_entries == 0 ) {
4833 trace.trace_syscalls = true;
4834 }
4835
4836
4837
4838
4839
4840
4841 if (trace.perfconfig_events != NULL) {
4842 struct parse_events_error parse_err;
4843
4844 bzero(&parse_err, sizeof(parse_err));
4845 err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err);
4846 if (err) {
4847 parse_events_print_error(&parse_err, trace.perfconfig_events);
4848 goto out;
4849 }
4850 }
4851
4852 if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
4853 usage_with_options_msg(trace_usage, trace_options,
4854 "cgroup monitoring only available in system-wide mode");
4855 }
4856
4857 evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
4858 if (IS_ERR(evsel)) {
4859 bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
4860 pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
4861 goto out;
4862 }
4863
4864 if (evsel) {
4865 trace.syscalls.events.augmented = evsel;
4866
4867 evsel = perf_evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
4868 if (evsel == NULL) {
4869 pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n");
4870 goto out;
4871 }
4872
4873 if (evsel->bpf_obj == NULL) {
4874 pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n");
4875 goto out;
4876 }
4877
4878 trace.bpf_obj = evsel->bpf_obj;
4879
4880
4881
4882
4883
4884
4885 if (!trace.trace_syscalls && trace__only_augmented_syscalls_evsels(&trace))
4886 trace.trace_syscalls = true;
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912 if (!trace.trace_syscalls) {
4913 trace__delete_augmented_syscalls(&trace);
4914 } else {
4915 trace__set_bpf_map_filtered_pids(&trace);
4916 trace__set_bpf_map_syscalls(&trace);
4917 trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented");
4918 }
4919 }
4920
4921 err = bpf__setup_stdout(trace.evlist);
4922 if (err) {
4923 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
4924 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
4925 goto out;
4926 }
4927
4928 err = -1;
4929
4930 if (map_dump_str) {
4931 trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
4932 if (trace.dump.map == NULL) {
4933 pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
4934 goto out;
4935 }
4936 }
4937
4938 if (trace.trace_pgfaults) {
4939 trace.opts.sample_address = true;
4940 trace.opts.sample_time = true;
4941 }
4942
4943 if (trace.opts.mmap_pages == UINT_MAX)
4944 mmap_pages_user_set = false;
4945
4946 if (trace.max_stack == UINT_MAX) {
4947 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
4948 max_stack_user_set = false;
4949 }
4950
4951#ifdef HAVE_DWARF_UNWIND_SUPPORT
4952 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
4953 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
4954 }
4955#endif
4956
4957 if (callchain_param.enabled) {
4958 if (!mmap_pages_user_set && geteuid() == 0)
4959 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
4960
4961 symbol_conf.use_callchain = true;
4962 }
4963
4964 if (trace.evlist->core.nr_entries > 0) {
4965 evlist__set_default_evsel_handler(trace.evlist, trace__event_handler);
4966 if (evlist__set_syscall_tp_fields(trace.evlist)) {
4967 perror("failed to set syscalls:* tracepoint fields");
4968 goto out;
4969 }
4970 }
4971
4972 if (trace.sort_events) {
4973 ordered_events__init(&trace.oe.data, ordered_events__deliver_event, &trace);
4974 ordered_events__set_copy_on_queue(&trace.oe.data, true);
4975 }
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988 if (trace.syscalls.events.augmented) {
4989 evlist__for_each_entry(trace.evlist, evsel) {
4990 bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
4991
4992 if (raw_syscalls_sys_exit) {
4993 trace.raw_augmented_syscalls = true;
4994 goto init_augmented_syscall_tp;
4995 }
4996
4997 if (trace.syscalls.events.augmented->priv == NULL &&
4998 strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) {
4999 struct evsel *augmented = trace.syscalls.events.augmented;
5000 if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) ||
5001 perf_evsel__init_augmented_syscall_tp_args(augmented))
5002 goto out;
5003
5004
5005
5006
5007
5008 augmented->handler = trace__sys_enter;
5009
5010
5011
5012
5013
5014
5015 if (perf_evsel__init_augmented_syscall_tp(evsel, evsel) ||
5016 perf_evsel__init_augmented_syscall_tp_args(evsel))
5017 goto out;
5018 evsel->handler = trace__sys_enter;
5019 }
5020
5021 if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
5022 struct syscall_tp *sc;
5023init_augmented_syscall_tp:
5024 if (perf_evsel__init_augmented_syscall_tp(evsel, evsel))
5025 goto out;
5026 sc = __evsel__syscall_tp(evsel);
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046 if (trace.raw_augmented_syscalls)
5047 trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
5048 perf_evsel__init_augmented_syscall_tp_ret(evsel);
5049 evsel->handler = trace__sys_exit;
5050 }
5051 }
5052 }
5053
5054 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
5055 return trace__record(&trace, argc-1, &argv[1]);
5056
5057
5058 if (trace.errno_summary && !trace.summary && !trace.summary_only)
5059 trace.summary_only = true;
5060
5061
5062 if (trace.summary_only)
5063 trace.summary = trace.summary_only;
5064
5065 if (output_name != NULL) {
5066 err = trace__open_output(&trace, output_name);
5067 if (err < 0) {
5068 perror("failed to create output file");
5069 goto out;
5070 }
5071 }
5072
5073 err = evswitch__init(&trace.evswitch, trace.evlist, stderr);
5074 if (err)
5075 goto out_close;
5076
5077 err = target__validate(&trace.opts.target);
5078 if (err) {
5079 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
5080 fprintf(trace.output, "%s", bf);
5081 goto out_close;
5082 }
5083
5084 err = target__parse_uid(&trace.opts.target);
5085 if (err) {
5086 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
5087 fprintf(trace.output, "%s", bf);
5088 goto out_close;
5089 }
5090
5091 if (!argc && target__none(&trace.opts.target))
5092 trace.opts.target.system_wide = true;
5093
5094 if (input_name)
5095 err = trace__replay(&trace);
5096 else
5097 err = trace__run(&trace, argc, argv);
5098
5099out_close:
5100 if (output_name != NULL)
5101 fclose(trace.output);
5102out:
5103 zfree(&trace.perfconfig_events);
5104 return err;
5105}
5106