1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <traceevent/event-parse.h>
20#include <api/fs/tracing_path.h>
21#include "builtin.h"
22#include "util/color.h"
23#include "util/debug.h"
24#include "util/evlist.h"
25#include <subcmd/exec-cmd.h>
26#include "util/machine.h"
27#include "util/session.h"
28#include "util/thread.h"
29#include <subcmd/parse-options.h>
30#include "util/strlist.h"
31#include "util/intlist.h"
32#include "util/thread_map.h"
33#include "util/stat.h"
34#include "trace-event.h"
35#include "util/parse-events.h"
36
37#include <libaudit.h>
38#include <stdlib.h>
39#include <sys/mman.h>
40#include <linux/futex.h>
41#include <linux/err.h>
42
43
44#ifndef MAP_STACK
45# define MAP_STACK 0x20000
46#endif
47
48#ifndef MADV_HWPOISON
49# define MADV_HWPOISON 100
50
51#endif
52
53#ifndef MADV_MERGEABLE
54# define MADV_MERGEABLE 12
55#endif
56
57#ifndef MADV_UNMERGEABLE
58# define MADV_UNMERGEABLE 13
59#endif
60
61#ifndef EFD_SEMAPHORE
62# define EFD_SEMAPHORE 1
63#endif
64
65#ifndef EFD_NONBLOCK
66# define EFD_NONBLOCK 00004000
67#endif
68
69#ifndef EFD_CLOEXEC
70# define EFD_CLOEXEC 02000000
71#endif
72
73#ifndef O_CLOEXEC
74# define O_CLOEXEC 02000000
75#endif
76
77#ifndef SOCK_DCCP
78# define SOCK_DCCP 6
79#endif
80
81#ifndef SOCK_CLOEXEC
82# define SOCK_CLOEXEC 02000000
83#endif
84
85#ifndef SOCK_NONBLOCK
86# define SOCK_NONBLOCK 00004000
87#endif
88
89#ifndef MSG_CMSG_CLOEXEC
90# define MSG_CMSG_CLOEXEC 0x40000000
91#endif
92
93#ifndef PERF_FLAG_FD_NO_GROUP
94# define PERF_FLAG_FD_NO_GROUP (1UL << 0)
95#endif
96
97#ifndef PERF_FLAG_FD_OUTPUT
98# define PERF_FLAG_FD_OUTPUT (1UL << 1)
99#endif
100
101#ifndef PERF_FLAG_PID_CGROUP
102# define PERF_FLAG_PID_CGROUP (1UL << 2)
103#endif
104
105#ifndef PERF_FLAG_FD_CLOEXEC
106# define PERF_FLAG_FD_CLOEXEC (1UL << 3)
107#endif
108
109
110struct tp_field {
111 int offset;
112 union {
113 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
114 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
115 };
116};
117
118#define TP_UINT_FIELD(bits) \
119static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
120{ \
121 u##bits value; \
122 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
123 return value; \
124}
125
126TP_UINT_FIELD(8);
127TP_UINT_FIELD(16);
128TP_UINT_FIELD(32);
129TP_UINT_FIELD(64);
130
131#define TP_UINT_FIELD__SWAPPED(bits) \
132static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
133{ \
134 u##bits value; \
135 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136 return bswap_##bits(value);\
137}
138
139TP_UINT_FIELD__SWAPPED(16);
140TP_UINT_FIELD__SWAPPED(32);
141TP_UINT_FIELD__SWAPPED(64);
142
143static int tp_field__init_uint(struct tp_field *field,
144 struct format_field *format_field,
145 bool needs_swap)
146{
147 field->offset = format_field->offset;
148
149 switch (format_field->size) {
150 case 1:
151 field->integer = tp_field__u8;
152 break;
153 case 2:
154 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
155 break;
156 case 4:
157 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
158 break;
159 case 8:
160 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
161 break;
162 default:
163 return -1;
164 }
165
166 return 0;
167}
168
169static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
170{
171 return sample->raw_data + field->offset;
172}
173
174static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
175{
176 field->offset = format_field->offset;
177 field->pointer = tp_field__ptr;
178 return 0;
179}
180
181struct syscall_tp {
182 struct tp_field id;
183 union {
184 struct tp_field args, ret;
185 };
186};
187
188static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
189 struct tp_field *field,
190 const char *name)
191{
192 struct format_field *format_field = perf_evsel__field(evsel, name);
193
194 if (format_field == NULL)
195 return -1;
196
197 return tp_field__init_uint(field, format_field, evsel->needs_swap);
198}
199
200#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
201 ({ struct syscall_tp *sc = evsel->priv;\
202 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
203
204static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
205 struct tp_field *field,
206 const char *name)
207{
208 struct format_field *format_field = perf_evsel__field(evsel, name);
209
210 if (format_field == NULL)
211 return -1;
212
213 return tp_field__init_ptr(field, format_field);
214}
215
216#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
217 ({ struct syscall_tp *sc = evsel->priv;\
218 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
219
220static void perf_evsel__delete_priv(struct perf_evsel *evsel)
221{
222 zfree(&evsel->priv);
223 perf_evsel__delete(evsel);
224}
225
226static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
227{
228 evsel->priv = malloc(sizeof(struct syscall_tp));
229 if (evsel->priv != NULL) {
230 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
231 goto out_delete;
232
233 evsel->handler = handler;
234 return 0;
235 }
236
237 return -ENOMEM;
238
239out_delete:
240 zfree(&evsel->priv);
241 return -ENOENT;
242}
243
244static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
245{
246 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
247
248
249 if (IS_ERR(evsel))
250 evsel = perf_evsel__newtp("syscalls", direction);
251
252 if (IS_ERR(evsel))
253 return NULL;
254
255 if (perf_evsel__init_syscall_tp(evsel, handler))
256 goto out_delete;
257
258 return evsel;
259
260out_delete:
261 perf_evsel__delete_priv(evsel);
262 return NULL;
263}
264
265#define perf_evsel__sc_tp_uint(evsel, name, sample) \
266 ({ struct syscall_tp *fields = evsel->priv; \
267 fields->name.integer(&fields->name, sample); })
268
269#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
270 ({ struct syscall_tp *fields = evsel->priv; \
271 fields->name.pointer(&fields->name, sample); })
272
273struct syscall_arg {
274 unsigned long val;
275 struct thread *thread;
276 struct trace *trace;
277 void *parm;
278 u8 idx;
279 u8 mask;
280};
281
282struct strarray {
283 int offset;
284 int nr_entries;
285 const char **entries;
286};
287
288#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
289 .nr_entries = ARRAY_SIZE(array), \
290 .entries = array, \
291}
292
293#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
294 .offset = off, \
295 .nr_entries = ARRAY_SIZE(array), \
296 .entries = array, \
297}
298
299static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
300 const char *intfmt,
301 struct syscall_arg *arg)
302{
303 struct strarray *sa = arg->parm;
304 int idx = arg->val - sa->offset;
305
306 if (idx < 0 || idx >= sa->nr_entries)
307 return scnprintf(bf, size, intfmt, arg->val);
308
309 return scnprintf(bf, size, "%s", sa->entries[idx]);
310}
311
312static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
313 struct syscall_arg *arg)
314{
315 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
316}
317
318#define SCA_STRARRAY syscall_arg__scnprintf_strarray
319
320#if defined(__i386__) || defined(__x86_64__)
321
322
323
324
325static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
326 struct syscall_arg *arg)
327{
328 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
329}
330
331#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
332#endif
333
334static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
335 struct syscall_arg *arg);
336
337#define SCA_FD syscall_arg__scnprintf_fd
338
339static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
340 struct syscall_arg *arg)
341{
342 int fd = arg->val;
343
344 if (fd == AT_FDCWD)
345 return scnprintf(bf, size, "CWD");
346
347 return syscall_arg__scnprintf_fd(bf, size, arg);
348}
349
350#define SCA_FDAT syscall_arg__scnprintf_fd_at
351
352static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
353 struct syscall_arg *arg);
354
355#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
356
357static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
358 struct syscall_arg *arg)
359{
360 return scnprintf(bf, size, "%#lx", arg->val);
361}
362
363#define SCA_HEX syscall_arg__scnprintf_hex
364
365static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
366 struct syscall_arg *arg)
367{
368 return scnprintf(bf, size, "%d", arg->val);
369}
370
371#define SCA_INT syscall_arg__scnprintf_int
372
373static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
374 struct syscall_arg *arg)
375{
376 int printed = 0, prot = arg->val;
377
378 if (prot == PROT_NONE)
379 return scnprintf(bf, size, "NONE");
380#define P_MMAP_PROT(n) \
381 if (prot & PROT_##n) { \
382 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
383 prot &= ~PROT_##n; \
384 }
385
386 P_MMAP_PROT(EXEC);
387 P_MMAP_PROT(READ);
388 P_MMAP_PROT(WRITE);
389#ifdef PROT_SEM
390 P_MMAP_PROT(SEM);
391#endif
392 P_MMAP_PROT(GROWSDOWN);
393 P_MMAP_PROT(GROWSUP);
394#undef P_MMAP_PROT
395
396 if (prot)
397 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
398
399 return printed;
400}
401
402#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
403
404static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
405 struct syscall_arg *arg)
406{
407 int printed = 0, flags = arg->val;
408
409#define P_MMAP_FLAG(n) \
410 if (flags & MAP_##n) { \
411 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
412 flags &= ~MAP_##n; \
413 }
414
415 P_MMAP_FLAG(SHARED);
416 P_MMAP_FLAG(PRIVATE);
417#ifdef MAP_32BIT
418 P_MMAP_FLAG(32BIT);
419#endif
420 P_MMAP_FLAG(ANONYMOUS);
421 P_MMAP_FLAG(DENYWRITE);
422 P_MMAP_FLAG(EXECUTABLE);
423 P_MMAP_FLAG(FILE);
424 P_MMAP_FLAG(FIXED);
425 P_MMAP_FLAG(GROWSDOWN);
426#ifdef MAP_HUGETLB
427 P_MMAP_FLAG(HUGETLB);
428#endif
429 P_MMAP_FLAG(LOCKED);
430 P_MMAP_FLAG(NONBLOCK);
431 P_MMAP_FLAG(NORESERVE);
432 P_MMAP_FLAG(POPULATE);
433 P_MMAP_FLAG(STACK);
434#ifdef MAP_UNINITIALIZED
435 P_MMAP_FLAG(UNINITIALIZED);
436#endif
437#undef P_MMAP_FLAG
438
439 if (flags)
440 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
441
442 return printed;
443}
444
445#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
446
447static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
448 struct syscall_arg *arg)
449{
450 int printed = 0, flags = arg->val;
451
452#define P_MREMAP_FLAG(n) \
453 if (flags & MREMAP_##n) { \
454 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
455 flags &= ~MREMAP_##n; \
456 }
457
458 P_MREMAP_FLAG(MAYMOVE);
459#ifdef MREMAP_FIXED
460 P_MREMAP_FLAG(FIXED);
461#endif
462#undef P_MREMAP_FLAG
463
464 if (flags)
465 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
466
467 return printed;
468}
469
470#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
471
472static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
473 struct syscall_arg *arg)
474{
475 int behavior = arg->val;
476
477 switch (behavior) {
478#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
479 P_MADV_BHV(NORMAL);
480 P_MADV_BHV(RANDOM);
481 P_MADV_BHV(SEQUENTIAL);
482 P_MADV_BHV(WILLNEED);
483 P_MADV_BHV(DONTNEED);
484 P_MADV_BHV(REMOVE);
485 P_MADV_BHV(DONTFORK);
486 P_MADV_BHV(DOFORK);
487 P_MADV_BHV(HWPOISON);
488#ifdef MADV_SOFT_OFFLINE
489 P_MADV_BHV(SOFT_OFFLINE);
490#endif
491 P_MADV_BHV(MERGEABLE);
492 P_MADV_BHV(UNMERGEABLE);
493#ifdef MADV_HUGEPAGE
494 P_MADV_BHV(HUGEPAGE);
495#endif
496#ifdef MADV_NOHUGEPAGE
497 P_MADV_BHV(NOHUGEPAGE);
498#endif
499#ifdef MADV_DONTDUMP
500 P_MADV_BHV(DONTDUMP);
501#endif
502#ifdef MADV_DODUMP
503 P_MADV_BHV(DODUMP);
504#endif
505#undef P_MADV_PHV
506 default: break;
507 }
508
509 return scnprintf(bf, size, "%#x", behavior);
510}
511
512#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
513
514static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
515 struct syscall_arg *arg)
516{
517 int printed = 0, op = arg->val;
518
519 if (op == 0)
520 return scnprintf(bf, size, "NONE");
521#define P_CMD(cmd) \
522 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
523 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
524 op &= ~LOCK_##cmd; \
525 }
526
527 P_CMD(SH);
528 P_CMD(EX);
529 P_CMD(NB);
530 P_CMD(UN);
531 P_CMD(MAND);
532 P_CMD(RW);
533 P_CMD(READ);
534 P_CMD(WRITE);
535#undef P_OP
536
537 if (op)
538 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
539
540 return printed;
541}
542
543#define SCA_FLOCK syscall_arg__scnprintf_flock
544
545static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
546{
547 enum syscall_futex_args {
548 SCF_UADDR = (1 << 0),
549 SCF_OP = (1 << 1),
550 SCF_VAL = (1 << 2),
551 SCF_TIMEOUT = (1 << 3),
552 SCF_UADDR2 = (1 << 4),
553 SCF_VAL3 = (1 << 5),
554 };
555 int op = arg->val;
556 int cmd = op & FUTEX_CMD_MASK;
557 size_t printed = 0;
558
559 switch (cmd) {
560#define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
561 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
562 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
563 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
564 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
565 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
566 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
567 P_FUTEX_OP(WAKE_OP); break;
568 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
569 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
570 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
571 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
572 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
573 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
574 default: printed = scnprintf(bf, size, "%#x", cmd); break;
575 }
576
577 if (op & FUTEX_PRIVATE_FLAG)
578 printed += scnprintf(bf + printed, size - printed, "|PRIV");
579
580 if (op & FUTEX_CLOCK_REALTIME)
581 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
582
583 return printed;
584}
585
586#define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
587
588static const char *bpf_cmd[] = {
589 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
590 "MAP_GET_NEXT_KEY", "PROG_LOAD",
591};
592static DEFINE_STRARRAY(bpf_cmd);
593
594static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
595static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
596
597static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
598static DEFINE_STRARRAY(itimers);
599
600static const char *keyctl_options[] = {
601 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
602 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
603 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
604 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
605 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
606};
607static DEFINE_STRARRAY(keyctl_options);
608
609static const char *whences[] = { "SET", "CUR", "END",
610#ifdef SEEK_DATA
611"DATA",
612#endif
613#ifdef SEEK_HOLE
614"HOLE",
615#endif
616};
617static DEFINE_STRARRAY(whences);
618
619static const char *fcntl_cmds[] = {
620 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
621 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
622 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
623 "F_GETOWNER_UIDS",
624};
625static DEFINE_STRARRAY(fcntl_cmds);
626
627static const char *rlimit_resources[] = {
628 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
629 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
630 "RTTIME",
631};
632static DEFINE_STRARRAY(rlimit_resources);
633
634static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
635static DEFINE_STRARRAY(sighow);
636
637static const char *clockid[] = {
638 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
639 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
640 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
641};
642static DEFINE_STRARRAY(clockid);
643
644static const char *socket_families[] = {
645 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
646 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
647 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
648 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
649 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
650 "ALG", "NFC", "VSOCK",
651};
652static DEFINE_STRARRAY(socket_families);
653
654#ifndef SOCK_TYPE_MASK
655#define SOCK_TYPE_MASK 0xf
656#endif
657
658static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
659 struct syscall_arg *arg)
660{
661 size_t printed;
662 int type = arg->val,
663 flags = type & ~SOCK_TYPE_MASK;
664
665 type &= SOCK_TYPE_MASK;
666
667
668
669 switch (type) {
670#define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
671 P_SK_TYPE(STREAM);
672 P_SK_TYPE(DGRAM);
673 P_SK_TYPE(RAW);
674 P_SK_TYPE(RDM);
675 P_SK_TYPE(SEQPACKET);
676 P_SK_TYPE(DCCP);
677 P_SK_TYPE(PACKET);
678#undef P_SK_TYPE
679 default:
680 printed = scnprintf(bf, size, "%#x", type);
681 }
682
683#define P_SK_FLAG(n) \
684 if (flags & SOCK_##n) { \
685 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
686 flags &= ~SOCK_##n; \
687 }
688
689 P_SK_FLAG(CLOEXEC);
690 P_SK_FLAG(NONBLOCK);
691#undef P_SK_FLAG
692
693 if (flags)
694 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
695
696 return printed;
697}
698
699#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
700
701#ifndef MSG_PROBE
702#define MSG_PROBE 0x10
703#endif
704#ifndef MSG_WAITFORONE
705#define MSG_WAITFORONE 0x10000
706#endif
707#ifndef MSG_SENDPAGE_NOTLAST
708#define MSG_SENDPAGE_NOTLAST 0x20000
709#endif
710#ifndef MSG_FASTOPEN
711#define MSG_FASTOPEN 0x20000000
712#endif
713
714static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
715 struct syscall_arg *arg)
716{
717 int printed = 0, flags = arg->val;
718
719 if (flags == 0)
720 return scnprintf(bf, size, "NONE");
721#define P_MSG_FLAG(n) \
722 if (flags & MSG_##n) { \
723 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
724 flags &= ~MSG_##n; \
725 }
726
727 P_MSG_FLAG(OOB);
728 P_MSG_FLAG(PEEK);
729 P_MSG_FLAG(DONTROUTE);
730 P_MSG_FLAG(TRYHARD);
731 P_MSG_FLAG(CTRUNC);
732 P_MSG_FLAG(PROBE);
733 P_MSG_FLAG(TRUNC);
734 P_MSG_FLAG(DONTWAIT);
735 P_MSG_FLAG(EOR);
736 P_MSG_FLAG(WAITALL);
737 P_MSG_FLAG(FIN);
738 P_MSG_FLAG(SYN);
739 P_MSG_FLAG(CONFIRM);
740 P_MSG_FLAG(RST);
741 P_MSG_FLAG(ERRQUEUE);
742 P_MSG_FLAG(NOSIGNAL);
743 P_MSG_FLAG(MORE);
744 P_MSG_FLAG(WAITFORONE);
745 P_MSG_FLAG(SENDPAGE_NOTLAST);
746 P_MSG_FLAG(FASTOPEN);
747 P_MSG_FLAG(CMSG_CLOEXEC);
748#undef P_MSG_FLAG
749
750 if (flags)
751 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752
753 return printed;
754}
755
756#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
757
758static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
759 struct syscall_arg *arg)
760{
761 size_t printed = 0;
762 int mode = arg->val;
763
764 if (mode == F_OK)
765 return scnprintf(bf, size, "F");
766#define P_MODE(n) \
767 if (mode & n##_OK) { \
768 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
769 mode &= ~n##_OK; \
770 }
771
772 P_MODE(R);
773 P_MODE(W);
774 P_MODE(X);
775#undef P_MODE
776
777 if (mode)
778 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
779
780 return printed;
781}
782
783#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
784
785static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
786 struct syscall_arg *arg);
787
788#define SCA_FILENAME syscall_arg__scnprintf_filename
789
790static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
791 struct syscall_arg *arg)
792{
793 int printed = 0, flags = arg->val;
794
795 if (!(flags & O_CREAT))
796 arg->mask |= 1 << (arg->idx + 1);
797
798 if (flags == 0)
799 return scnprintf(bf, size, "RDONLY");
800#define P_FLAG(n) \
801 if (flags & O_##n) { \
802 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
803 flags &= ~O_##n; \
804 }
805
806 P_FLAG(APPEND);
807 P_FLAG(ASYNC);
808 P_FLAG(CLOEXEC);
809 P_FLAG(CREAT);
810 P_FLAG(DIRECT);
811 P_FLAG(DIRECTORY);
812 P_FLAG(EXCL);
813 P_FLAG(LARGEFILE);
814 P_FLAG(NOATIME);
815 P_FLAG(NOCTTY);
816#ifdef O_NONBLOCK
817 P_FLAG(NONBLOCK);
818#elif O_NDELAY
819 P_FLAG(NDELAY);
820#endif
821#ifdef O_PATH
822 P_FLAG(PATH);
823#endif
824 P_FLAG(RDWR);
825#ifdef O_DSYNC
826 if ((flags & O_SYNC) == O_SYNC)
827 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
828 else {
829 P_FLAG(DSYNC);
830 }
831#else
832 P_FLAG(SYNC);
833#endif
834 P_FLAG(TRUNC);
835 P_FLAG(WRONLY);
836#undef P_FLAG
837
838 if (flags)
839 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
840
841 return printed;
842}
843
844#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
845
846static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
847 struct syscall_arg *arg)
848{
849 int printed = 0, flags = arg->val;
850
851 if (flags == 0)
852 return 0;
853
854#define P_FLAG(n) \
855 if (flags & PERF_FLAG_##n) { \
856 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
857 flags &= ~PERF_FLAG_##n; \
858 }
859
860 P_FLAG(FD_NO_GROUP);
861 P_FLAG(FD_OUTPUT);
862 P_FLAG(PID_CGROUP);
863 P_FLAG(FD_CLOEXEC);
864#undef P_FLAG
865
866 if (flags)
867 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
868
869 return printed;
870}
871
872#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
873
874static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
875 struct syscall_arg *arg)
876{
877 int printed = 0, flags = arg->val;
878
879 if (flags == 0)
880 return scnprintf(bf, size, "NONE");
881#define P_FLAG(n) \
882 if (flags & EFD_##n) { \
883 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
884 flags &= ~EFD_##n; \
885 }
886
887 P_FLAG(SEMAPHORE);
888 P_FLAG(CLOEXEC);
889 P_FLAG(NONBLOCK);
890#undef P_FLAG
891
892 if (flags)
893 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
894
895 return printed;
896}
897
898#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
899
900static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
901 struct syscall_arg *arg)
902{
903 int printed = 0, flags = arg->val;
904
905#define P_FLAG(n) \
906 if (flags & O_##n) { \
907 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
908 flags &= ~O_##n; \
909 }
910
911 P_FLAG(CLOEXEC);
912 P_FLAG(NONBLOCK);
913#undef P_FLAG
914
915 if (flags)
916 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
917
918 return printed;
919}
920
921#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
922
923static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
924{
925 int sig = arg->val;
926
927 switch (sig) {
928#define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
929 P_SIGNUM(HUP);
930 P_SIGNUM(INT);
931 P_SIGNUM(QUIT);
932 P_SIGNUM(ILL);
933 P_SIGNUM(TRAP);
934 P_SIGNUM(ABRT);
935 P_SIGNUM(BUS);
936 P_SIGNUM(FPE);
937 P_SIGNUM(KILL);
938 P_SIGNUM(USR1);
939 P_SIGNUM(SEGV);
940 P_SIGNUM(USR2);
941 P_SIGNUM(PIPE);
942 P_SIGNUM(ALRM);
943 P_SIGNUM(TERM);
944 P_SIGNUM(CHLD);
945 P_SIGNUM(CONT);
946 P_SIGNUM(STOP);
947 P_SIGNUM(TSTP);
948 P_SIGNUM(TTIN);
949 P_SIGNUM(TTOU);
950 P_SIGNUM(URG);
951 P_SIGNUM(XCPU);
952 P_SIGNUM(XFSZ);
953 P_SIGNUM(VTALRM);
954 P_SIGNUM(PROF);
955 P_SIGNUM(WINCH);
956 P_SIGNUM(IO);
957 P_SIGNUM(PWR);
958 P_SIGNUM(SYS);
959#ifdef SIGEMT
960 P_SIGNUM(EMT);
961#endif
962#ifdef SIGSTKFLT
963 P_SIGNUM(STKFLT);
964#endif
965#ifdef SIGSWI
966 P_SIGNUM(SWI);
967#endif
968 default: break;
969 }
970
971 return scnprintf(bf, size, "%#x", sig);
972}
973
974#define SCA_SIGNUM syscall_arg__scnprintf_signum
975
976#if defined(__i386__) || defined(__x86_64__)
977
978
979
980#define TCGETS 0x5401
981
982static const char *tioctls[] = {
983 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
984 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
985 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
986 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
987 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
988 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
989 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
990 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
991 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
992 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
993 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
994 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
995 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
996 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
997 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
998};
999
1000static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1001#endif
1002
1003#define STRARRAY(arg, name, array) \
1004 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1005 .arg_parm = { [arg] = &strarray__##array, }
1006
1007static struct syscall_fmt {
1008 const char *name;
1009 const char *alias;
1010 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1011 void *arg_parm[6];
1012 bool errmsg;
1013 bool timeout;
1014 bool hexret;
1015} syscall_fmts[] = {
1016 { .name = "access", .errmsg = true,
1017 .arg_scnprintf = { [0] = SCA_FILENAME,
1018 [1] = SCA_ACCMODE, }, },
1019 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
1020 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1021 { .name = "brk", .hexret = true,
1022 .arg_scnprintf = { [0] = SCA_HEX, }, },
1023 { .name = "chdir", .errmsg = true,
1024 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1025 { .name = "chmod", .errmsg = true,
1026 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1027 { .name = "chroot", .errmsg = true,
1028 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1029 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
1030 { .name = "close", .errmsg = true,
1031 .arg_scnprintf = { [0] = SCA_CLOSE_FD, }, },
1032 { .name = "connect", .errmsg = true, },
1033 { .name = "creat", .errmsg = true,
1034 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1035 { .name = "dup", .errmsg = true,
1036 .arg_scnprintf = { [0] = SCA_FD, }, },
1037 { .name = "dup2", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_FD, }, },
1039 { .name = "dup3", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_FD, }, },
1041 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1042 { .name = "eventfd2", .errmsg = true,
1043 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, }, },
1044 { .name = "faccessat", .errmsg = true,
1045 .arg_scnprintf = { [0] = SCA_FDAT,
1046 [1] = SCA_FILENAME, }, },
1047 { .name = "fadvise64", .errmsg = true,
1048 .arg_scnprintf = { [0] = SCA_FD, }, },
1049 { .name = "fallocate", .errmsg = true,
1050 .arg_scnprintf = { [0] = SCA_FD, }, },
1051 { .name = "fchdir", .errmsg = true,
1052 .arg_scnprintf = { [0] = SCA_FD, }, },
1053 { .name = "fchmod", .errmsg = true,
1054 .arg_scnprintf = { [0] = SCA_FD, }, },
1055 { .name = "fchmodat", .errmsg = true,
1056 .arg_scnprintf = { [0] = SCA_FDAT,
1057 [1] = SCA_FILENAME, }, },
1058 { .name = "fchown", .errmsg = true,
1059 .arg_scnprintf = { [0] = SCA_FD, }, },
1060 { .name = "fchownat", .errmsg = true,
1061 .arg_scnprintf = { [0] = SCA_FDAT,
1062 [1] = SCA_FILENAME, }, },
1063 { .name = "fcntl", .errmsg = true,
1064 .arg_scnprintf = { [0] = SCA_FD,
1065 [1] = SCA_STRARRAY, },
1066 .arg_parm = { [1] = &strarray__fcntl_cmds, }, },
1067 { .name = "fdatasync", .errmsg = true,
1068 .arg_scnprintf = { [0] = SCA_FD, }, },
1069 { .name = "flock", .errmsg = true,
1070 .arg_scnprintf = { [0] = SCA_FD,
1071 [1] = SCA_FLOCK, }, },
1072 { .name = "fsetxattr", .errmsg = true,
1073 .arg_scnprintf = { [0] = SCA_FD, }, },
1074 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1075 .arg_scnprintf = { [0] = SCA_FD, }, },
1076 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1077 .arg_scnprintf = { [0] = SCA_FDAT,
1078 [1] = SCA_FILENAME, }, },
1079 { .name = "fstatfs", .errmsg = true,
1080 .arg_scnprintf = { [0] = SCA_FD, }, },
1081 { .name = "fsync", .errmsg = true,
1082 .arg_scnprintf = { [0] = SCA_FD, }, },
1083 { .name = "ftruncate", .errmsg = true,
1084 .arg_scnprintf = { [0] = SCA_FD, }, },
1085 { .name = "futex", .errmsg = true,
1086 .arg_scnprintf = { [1] = SCA_FUTEX_OP, }, },
1087 { .name = "futimesat", .errmsg = true,
1088 .arg_scnprintf = { [0] = SCA_FDAT,
1089 [1] = SCA_FILENAME, }, },
1090 { .name = "getdents", .errmsg = true,
1091 .arg_scnprintf = { [0] = SCA_FD, }, },
1092 { .name = "getdents64", .errmsg = true,
1093 .arg_scnprintf = { [0] = SCA_FD, }, },
1094 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1095 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096 { .name = "getxattr", .errmsg = true,
1097 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1098 { .name = "inotify_add_watch", .errmsg = true,
1099 .arg_scnprintf = { [1] = SCA_FILENAME, }, },
1100 { .name = "ioctl", .errmsg = true,
1101 .arg_scnprintf = { [0] = SCA_FD,
1102#if defined(__i386__) || defined(__x86_64__)
1103
1104
1105
1106 [1] = SCA_STRHEXARRAY,
1107 [2] = SCA_HEX, },
1108 .arg_parm = { [1] = &strarray__tioctls, }, },
1109#else
1110 [2] = SCA_HEX, }, },
1111#endif
1112 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
1113 { .name = "kill", .errmsg = true,
1114 .arg_scnprintf = { [1] = SCA_SIGNUM, }, },
1115 { .name = "lchown", .errmsg = true,
1116 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1117 { .name = "lgetxattr", .errmsg = true,
1118 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1119 { .name = "linkat", .errmsg = true,
1120 .arg_scnprintf = { [0] = SCA_FDAT, }, },
1121 { .name = "listxattr", .errmsg = true,
1122 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1123 { .name = "llistxattr", .errmsg = true,
1124 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1125 { .name = "lremovexattr", .errmsg = true,
1126 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1127 { .name = "lseek", .errmsg = true,
1128 .arg_scnprintf = { [0] = SCA_FD,
1129 [2] = SCA_STRARRAY, },
1130 .arg_parm = { [2] = &strarray__whences, }, },
1131 { .name = "lsetxattr", .errmsg = true,
1132 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1133 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1134 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1135 { .name = "lsxattr", .errmsg = true,
1136 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1137 { .name = "madvise", .errmsg = true,
1138 .arg_scnprintf = { [0] = SCA_HEX,
1139 [2] = SCA_MADV_BHV, }, },
1140 { .name = "mkdir", .errmsg = true,
1141 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1142 { .name = "mkdirat", .errmsg = true,
1143 .arg_scnprintf = { [0] = SCA_FDAT,
1144 [1] = SCA_FILENAME, }, },
1145 { .name = "mknod", .errmsg = true,
1146 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1147 { .name = "mknodat", .errmsg = true,
1148 .arg_scnprintf = { [0] = SCA_FDAT,
1149 [1] = SCA_FILENAME, }, },
1150 { .name = "mlock", .errmsg = true,
1151 .arg_scnprintf = { [0] = SCA_HEX, }, },
1152 { .name = "mlockall", .errmsg = true,
1153 .arg_scnprintf = { [0] = SCA_HEX, }, },
1154 { .name = "mmap", .hexret = true,
1155 .arg_scnprintf = { [0] = SCA_HEX,
1156 [2] = SCA_MMAP_PROT,
1157 [3] = SCA_MMAP_FLAGS,
1158 [4] = SCA_FD, }, },
1159 { .name = "mprotect", .errmsg = true,
1160 .arg_scnprintf = { [0] = SCA_HEX,
1161 [2] = SCA_MMAP_PROT, }, },
1162 { .name = "mq_unlink", .errmsg = true,
1163 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1164 { .name = "mremap", .hexret = true,
1165 .arg_scnprintf = { [0] = SCA_HEX,
1166 [3] = SCA_MREMAP_FLAGS,
1167 [4] = SCA_HEX, }, },
1168 { .name = "munlock", .errmsg = true,
1169 .arg_scnprintf = { [0] = SCA_HEX, }, },
1170 { .name = "munmap", .errmsg = true,
1171 .arg_scnprintf = { [0] = SCA_HEX, }, },
1172 { .name = "name_to_handle_at", .errmsg = true,
1173 .arg_scnprintf = { [0] = SCA_FDAT, }, },
1174 { .name = "newfstatat", .errmsg = true,
1175 .arg_scnprintf = { [0] = SCA_FDAT,
1176 [1] = SCA_FILENAME, }, },
1177 { .name = "open", .errmsg = true,
1178 .arg_scnprintf = { [0] = SCA_FILENAME,
1179 [1] = SCA_OPEN_FLAGS, }, },
1180 { .name = "open_by_handle_at", .errmsg = true,
1181 .arg_scnprintf = { [0] = SCA_FDAT,
1182 [2] = SCA_OPEN_FLAGS, }, },
1183 { .name = "openat", .errmsg = true,
1184 .arg_scnprintf = { [0] = SCA_FDAT,
1185 [1] = SCA_FILENAME,
1186 [2] = SCA_OPEN_FLAGS, }, },
1187 { .name = "perf_event_open", .errmsg = true,
1188 .arg_scnprintf = { [1] = SCA_INT,
1189 [2] = SCA_INT,
1190 [3] = SCA_FD,
1191 [4] = SCA_PERF_FLAGS, }, },
1192 { .name = "pipe2", .errmsg = true,
1193 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, }, },
1194 { .name = "poll", .errmsg = true, .timeout = true, },
1195 { .name = "ppoll", .errmsg = true, .timeout = true, },
1196 { .name = "pread", .errmsg = true, .alias = "pread64",
1197 .arg_scnprintf = { [0] = SCA_FD, }, },
1198 { .name = "preadv", .errmsg = true, .alias = "pread",
1199 .arg_scnprintf = { [0] = SCA_FD, }, },
1200 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1201 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1202 .arg_scnprintf = { [0] = SCA_FD, }, },
1203 { .name = "pwritev", .errmsg = true,
1204 .arg_scnprintf = { [0] = SCA_FD, }, },
1205 { .name = "read", .errmsg = true,
1206 .arg_scnprintf = { [0] = SCA_FD, }, },
1207 { .name = "readlink", .errmsg = true,
1208 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1209 { .name = "readlinkat", .errmsg = true,
1210 .arg_scnprintf = { [0] = SCA_FDAT,
1211 [1] = SCA_FILENAME, }, },
1212 { .name = "readv", .errmsg = true,
1213 .arg_scnprintf = { [0] = SCA_FD, }, },
1214 { .name = "recvfrom", .errmsg = true,
1215 .arg_scnprintf = { [0] = SCA_FD,
1216 [3] = SCA_MSG_FLAGS, }, },
1217 { .name = "recvmmsg", .errmsg = true,
1218 .arg_scnprintf = { [0] = SCA_FD,
1219 [3] = SCA_MSG_FLAGS, }, },
1220 { .name = "recvmsg", .errmsg = true,
1221 .arg_scnprintf = { [0] = SCA_FD,
1222 [2] = SCA_MSG_FLAGS, }, },
1223 { .name = "removexattr", .errmsg = true,
1224 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1225 { .name = "renameat", .errmsg = true,
1226 .arg_scnprintf = { [0] = SCA_FDAT, }, },
1227 { .name = "rmdir", .errmsg = true,
1228 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1229 { .name = "rt_sigaction", .errmsg = true,
1230 .arg_scnprintf = { [0] = SCA_SIGNUM, }, },
1231 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1232 { .name = "rt_sigqueueinfo", .errmsg = true,
1233 .arg_scnprintf = { [1] = SCA_SIGNUM, }, },
1234 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1235 .arg_scnprintf = { [2] = SCA_SIGNUM, }, },
1236 { .name = "select", .errmsg = true, .timeout = true, },
1237 { .name = "sendmmsg", .errmsg = true,
1238 .arg_scnprintf = { [0] = SCA_FD,
1239 [3] = SCA_MSG_FLAGS, }, },
1240 { .name = "sendmsg", .errmsg = true,
1241 .arg_scnprintf = { [0] = SCA_FD,
1242 [2] = SCA_MSG_FLAGS, }, },
1243 { .name = "sendto", .errmsg = true,
1244 .arg_scnprintf = { [0] = SCA_FD,
1245 [3] = SCA_MSG_FLAGS, }, },
1246 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1247 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1248 { .name = "setxattr", .errmsg = true,
1249 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1250 { .name = "shutdown", .errmsg = true,
1251 .arg_scnprintf = { [0] = SCA_FD, }, },
1252 { .name = "socket", .errmsg = true,
1253 .arg_scnprintf = { [0] = SCA_STRARRAY,
1254 [1] = SCA_SK_TYPE, },
1255 .arg_parm = { [0] = &strarray__socket_families, }, },
1256 { .name = "socketpair", .errmsg = true,
1257 .arg_scnprintf = { [0] = SCA_STRARRAY,
1258 [1] = SCA_SK_TYPE, },
1259 .arg_parm = { [0] = &strarray__socket_families, }, },
1260 { .name = "stat", .errmsg = true, .alias = "newstat",
1261 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1262 { .name = "statfs", .errmsg = true,
1263 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1264 { .name = "swapoff", .errmsg = true,
1265 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1266 { .name = "swapon", .errmsg = true,
1267 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1268 { .name = "symlinkat", .errmsg = true,
1269 .arg_scnprintf = { [0] = SCA_FDAT, }, },
1270 { .name = "tgkill", .errmsg = true,
1271 .arg_scnprintf = { [2] = SCA_SIGNUM, }, },
1272 { .name = "tkill", .errmsg = true,
1273 .arg_scnprintf = { [1] = SCA_SIGNUM, }, },
1274 { .name = "truncate", .errmsg = true,
1275 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1276 { .name = "uname", .errmsg = true, .alias = "newuname", },
1277 { .name = "unlinkat", .errmsg = true,
1278 .arg_scnprintf = { [0] = SCA_FDAT,
1279 [1] = SCA_FILENAME, }, },
1280 { .name = "utime", .errmsg = true,
1281 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1282 { .name = "utimensat", .errmsg = true,
1283 .arg_scnprintf = { [0] = SCA_FDAT,
1284 [1] = SCA_FILENAME, }, },
1285 { .name = "utimes", .errmsg = true,
1286 .arg_scnprintf = { [0] = SCA_FILENAME, }, },
1287 { .name = "vmsplice", .errmsg = true,
1288 .arg_scnprintf = { [0] = SCA_FD, }, },
1289 { .name = "write", .errmsg = true,
1290 .arg_scnprintf = { [0] = SCA_FD, }, },
1291 { .name = "writev", .errmsg = true,
1292 .arg_scnprintf = { [0] = SCA_FD, }, },
1293};
1294
1295static int syscall_fmt__cmp(const void *name, const void *fmtp)
1296{
1297 const struct syscall_fmt *fmt = fmtp;
1298 return strcmp(name, fmt->name);
1299}
1300
1301static struct syscall_fmt *syscall_fmt__find(const char *name)
1302{
1303 const int nmemb = ARRAY_SIZE(syscall_fmts);
1304 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1305}
1306
1307struct syscall {
1308 struct event_format *tp_format;
1309 int nr_args;
1310 struct format_field *args;
1311 const char *name;
1312 bool is_exit;
1313 struct syscall_fmt *fmt;
1314 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1315 void **arg_parm;
1316};
1317
1318static size_t fprintf_duration(unsigned long t, FILE *fp)
1319{
1320 double duration = (double)t / NSEC_PER_MSEC;
1321 size_t printed = fprintf(fp, "(");
1322
1323 if (duration >= 1.0)
1324 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1325 else if (duration >= 0.01)
1326 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1327 else
1328 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1329 return printed + fprintf(fp, "): ");
1330}
1331
1332
1333
1334
1335
1336
1337struct thread_trace {
1338 u64 entry_time;
1339 u64 exit_time;
1340 bool entry_pending;
1341 unsigned long nr_events;
1342 unsigned long pfmaj, pfmin;
1343 char *entry_str;
1344 double runtime_ms;
1345 struct {
1346 unsigned long ptr;
1347 short int entry_str_pos;
1348 bool pending_open;
1349 unsigned int namelen;
1350 char *name;
1351 } filename;
1352 struct {
1353 int max;
1354 char **table;
1355 } paths;
1356
1357 struct intlist *syscall_stats;
1358};
1359
1360static struct thread_trace *thread_trace__new(void)
1361{
1362 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1363
1364 if (ttrace)
1365 ttrace->paths.max = -1;
1366
1367 ttrace->syscall_stats = intlist__new(NULL);
1368
1369 return ttrace;
1370}
1371
1372static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1373{
1374 struct thread_trace *ttrace;
1375
1376 if (thread == NULL)
1377 goto fail;
1378
1379 if (thread__priv(thread) == NULL)
1380 thread__set_priv(thread, thread_trace__new());
1381
1382 if (thread__priv(thread) == NULL)
1383 goto fail;
1384
1385 ttrace = thread__priv(thread);
1386 ++ttrace->nr_events;
1387
1388 return ttrace;
1389fail:
1390 color_fprintf(fp, PERF_COLOR_RED,
1391 "WARNING: not enough memory, dropping samples!\n");
1392 return NULL;
1393}
1394
1395#define TRACE_PFMAJ (1 << 0)
1396#define TRACE_PFMIN (1 << 1)
1397
1398static const size_t trace__entry_str_size = 2048;
1399
1400struct trace {
1401 struct perf_tool tool;
1402 struct {
1403 int machine;
1404 int open_id;
1405 } audit;
1406 struct {
1407 int max;
1408 struct syscall *table;
1409 struct {
1410 struct perf_evsel *sys_enter,
1411 *sys_exit;
1412 } events;
1413 } syscalls;
1414 struct record_opts opts;
1415 struct perf_evlist *evlist;
1416 struct machine *host;
1417 struct thread *current;
1418 u64 base_time;
1419 FILE *output;
1420 unsigned long nr_events;
1421 struct strlist *ev_qualifier;
1422 struct {
1423 size_t nr;
1424 int *entries;
1425 } ev_qualifier_ids;
1426 struct intlist *tid_list;
1427 struct intlist *pid_list;
1428 struct {
1429 size_t nr;
1430 pid_t *entries;
1431 } filter_pids;
1432 double duration_filter;
1433 double runtime_ms;
1434 struct {
1435 u64 vfs_getname,
1436 proc_getname;
1437 } stats;
1438 bool not_ev_qualifier;
1439 bool live;
1440 bool full_time;
1441 bool sched;
1442 bool multiple_threads;
1443 bool summary;
1444 bool summary_only;
1445 bool show_comm;
1446 bool show_tool_stats;
1447 bool trace_syscalls;
1448 bool force;
1449 bool vfs_getname;
1450 int trace_pgfaults;
1451};
1452
1453static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1454{
1455 struct thread_trace *ttrace = thread__priv(thread);
1456
1457 if (fd > ttrace->paths.max) {
1458 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1459
1460 if (npath == NULL)
1461 return -1;
1462
1463 if (ttrace->paths.max != -1) {
1464 memset(npath + ttrace->paths.max + 1, 0,
1465 (fd - ttrace->paths.max) * sizeof(char *));
1466 } else {
1467 memset(npath, 0, (fd + 1) * sizeof(char *));
1468 }
1469
1470 ttrace->paths.table = npath;
1471 ttrace->paths.max = fd;
1472 }
1473
1474 ttrace->paths.table[fd] = strdup(pathname);
1475
1476 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1477}
1478
1479static int thread__read_fd_path(struct thread *thread, int fd)
1480{
1481 char linkname[PATH_MAX], pathname[PATH_MAX];
1482 struct stat st;
1483 int ret;
1484
1485 if (thread->pid_ == thread->tid) {
1486 scnprintf(linkname, sizeof(linkname),
1487 "/proc/%d/fd/%d", thread->pid_, fd);
1488 } else {
1489 scnprintf(linkname, sizeof(linkname),
1490 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1491 }
1492
1493 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1494 return -1;
1495
1496 ret = readlink(linkname, pathname, sizeof(pathname));
1497
1498 if (ret < 0 || ret > st.st_size)
1499 return -1;
1500
1501 pathname[ret] = '\0';
1502 return trace__set_fd_pathname(thread, fd, pathname);
1503}
1504
1505static const char *thread__fd_path(struct thread *thread, int fd,
1506 struct trace *trace)
1507{
1508 struct thread_trace *ttrace = thread__priv(thread);
1509
1510 if (ttrace == NULL)
1511 return NULL;
1512
1513 if (fd < 0)
1514 return NULL;
1515
1516 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1517 if (!trace->live)
1518 return NULL;
1519 ++trace->stats.proc_getname;
1520 if (thread__read_fd_path(thread, fd))
1521 return NULL;
1522 }
1523
1524 return ttrace->paths.table[fd];
1525}
1526
1527static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1528 struct syscall_arg *arg)
1529{
1530 int fd = arg->val;
1531 size_t printed = scnprintf(bf, size, "%d", fd);
1532 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1533
1534 if (path)
1535 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1536
1537 return printed;
1538}
1539
1540static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1541 struct syscall_arg *arg)
1542{
1543 int fd = arg->val;
1544 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1545 struct thread_trace *ttrace = thread__priv(arg->thread);
1546
1547 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1548 zfree(&ttrace->paths.table[fd]);
1549
1550 return printed;
1551}
1552
1553static void thread__set_filename_pos(struct thread *thread, const char *bf,
1554 unsigned long ptr)
1555{
1556 struct thread_trace *ttrace = thread__priv(thread);
1557
1558 ttrace->filename.ptr = ptr;
1559 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1560}
1561
1562static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1563 struct syscall_arg *arg)
1564{
1565 unsigned long ptr = arg->val;
1566
1567 if (!arg->trace->vfs_getname)
1568 return scnprintf(bf, size, "%#x", ptr);
1569
1570 thread__set_filename_pos(arg->thread, bf, ptr);
1571 return 0;
1572}
1573
1574static bool trace__filter_duration(struct trace *trace, double t)
1575{
1576 return t < (trace->duration_filter * NSEC_PER_MSEC);
1577}
1578
1579static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1580{
1581 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1582
1583 return fprintf(fp, "%10.3f ", ts);
1584}
1585
1586static bool done = false;
1587static bool interrupted = false;
1588
1589static void sig_handler(int sig)
1590{
1591 done = true;
1592 interrupted = sig == SIGINT;
1593}
1594
1595static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1596 u64 duration, u64 tstamp, FILE *fp)
1597{
1598 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1599 printed += fprintf_duration(duration, fp);
1600
1601 if (trace->multiple_threads) {
1602 if (trace->show_comm)
1603 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1604 printed += fprintf(fp, "%d ", thread->tid);
1605 }
1606
1607 return printed;
1608}
1609
1610static int trace__process_event(struct trace *trace, struct machine *machine,
1611 union perf_event *event, struct perf_sample *sample)
1612{
1613 int ret = 0;
1614
1615 switch (event->header.type) {
1616 case PERF_RECORD_LOST:
1617 color_fprintf(trace->output, PERF_COLOR_RED,
1618 "LOST %" PRIu64 " events!\n", event->lost.lost);
1619 ret = machine__process_lost_event(machine, event, sample);
1620 default:
1621 ret = machine__process_event(machine, event, sample);
1622 break;
1623 }
1624
1625 return ret;
1626}
1627
1628static int trace__tool_process(struct perf_tool *tool,
1629 union perf_event *event,
1630 struct perf_sample *sample,
1631 struct machine *machine)
1632{
1633 struct trace *trace = container_of(tool, struct trace, tool);
1634 return trace__process_event(trace, machine, event, sample);
1635}
1636
1637static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1638{
1639 int err = symbol__init(NULL);
1640
1641 if (err)
1642 return err;
1643
1644 trace->host = machine__new_host();
1645 if (trace->host == NULL)
1646 return -ENOMEM;
1647
1648 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1649 return -errno;
1650
1651 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1652 evlist->threads, trace__tool_process, false,
1653 trace->opts.proc_map_timeout);
1654 if (err)
1655 symbol__exit();
1656
1657 return err;
1658}
1659
1660static int syscall__set_arg_fmts(struct syscall *sc)
1661{
1662 struct format_field *field;
1663 int idx = 0;
1664
1665 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1666 if (sc->arg_scnprintf == NULL)
1667 return -1;
1668
1669 if (sc->fmt)
1670 sc->arg_parm = sc->fmt->arg_parm;
1671
1672 for (field = sc->args; field; field = field->next) {
1673 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1674 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1675 else if (field->flags & FIELD_IS_POINTER)
1676 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1677 ++idx;
1678 }
1679
1680 return 0;
1681}
1682
1683static int trace__read_syscall_info(struct trace *trace, int id)
1684{
1685 char tp_name[128];
1686 struct syscall *sc;
1687 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1688
1689 if (name == NULL)
1690 return -1;
1691
1692 if (id > trace->syscalls.max) {
1693 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1694
1695 if (nsyscalls == NULL)
1696 return -1;
1697
1698 if (trace->syscalls.max != -1) {
1699 memset(nsyscalls + trace->syscalls.max + 1, 0,
1700 (id - trace->syscalls.max) * sizeof(*sc));
1701 } else {
1702 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1703 }
1704
1705 trace->syscalls.table = nsyscalls;
1706 trace->syscalls.max = id;
1707 }
1708
1709 sc = trace->syscalls.table + id;
1710 sc->name = name;
1711
1712 sc->fmt = syscall_fmt__find(sc->name);
1713
1714 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1715 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1716
1717 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1718 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1719 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1720 }
1721
1722 if (IS_ERR(sc->tp_format))
1723 return -1;
1724
1725 sc->args = sc->tp_format->format.fields;
1726 sc->nr_args = sc->tp_format->format.nr_fields;
1727
1728 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1729 sc->args = sc->args->next;
1730 --sc->nr_args;
1731 }
1732
1733 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1734
1735 return syscall__set_arg_fmts(sc);
1736}
1737
1738static int trace__validate_ev_qualifier(struct trace *trace)
1739{
1740 int err = 0, i;
1741 struct str_node *pos;
1742
1743 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1744 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1745 sizeof(trace->ev_qualifier_ids.entries[0]));
1746
1747 if (trace->ev_qualifier_ids.entries == NULL) {
1748 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1749 trace->output);
1750 err = -EINVAL;
1751 goto out;
1752 }
1753
1754 i = 0;
1755
1756 strlist__for_each(pos, trace->ev_qualifier) {
1757 const char *sc = pos->s;
1758 int id = audit_name_to_syscall(sc, trace->audit.machine);
1759
1760 if (id < 0) {
1761 if (err == 0) {
1762 fputs("Error:\tInvalid syscall ", trace->output);
1763 err = -EINVAL;
1764 } else {
1765 fputs(", ", trace->output);
1766 }
1767
1768 fputs(sc, trace->output);
1769 }
1770
1771 trace->ev_qualifier_ids.entries[i++] = id;
1772 }
1773
1774 if (err < 0) {
1775 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1776 "\nHint:\tand: 'man syscalls'\n", trace->output);
1777 zfree(&trace->ev_qualifier_ids.entries);
1778 trace->ev_qualifier_ids.nr = 0;
1779 }
1780out:
1781 return err;
1782}
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1794 unsigned char *args, struct trace *trace,
1795 struct thread *thread)
1796{
1797 size_t printed = 0;
1798 unsigned char *p;
1799 unsigned long val;
1800
1801 if (sc->args != NULL) {
1802 struct format_field *field;
1803 u8 bit = 1;
1804 struct syscall_arg arg = {
1805 .idx = 0,
1806 .mask = 0,
1807 .trace = trace,
1808 .thread = thread,
1809 };
1810
1811 for (field = sc->args; field;
1812 field = field->next, ++arg.idx, bit <<= 1) {
1813 if (arg.mask & bit)
1814 continue;
1815
1816
1817 p = args + sizeof(unsigned long) * arg.idx;
1818 memcpy(&val, p, sizeof(val));
1819
1820
1821
1822
1823
1824
1825 if (val == 0 &&
1826 !(sc->arg_scnprintf &&
1827 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1828 sc->arg_parm[arg.idx]))
1829 continue;
1830
1831 printed += scnprintf(bf + printed, size - printed,
1832 "%s%s: ", printed ? ", " : "", field->name);
1833 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1834 arg.val = val;
1835 if (sc->arg_parm)
1836 arg.parm = sc->arg_parm[arg.idx];
1837 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1838 size - printed, &arg);
1839 } else {
1840 printed += scnprintf(bf + printed, size - printed,
1841 "%ld", val);
1842 }
1843 }
1844 } else {
1845 int i = 0;
1846
1847 while (i < 6) {
1848
1849 p = args + sizeof(unsigned long) * i;
1850 memcpy(&val, p, sizeof(val));
1851 printed += scnprintf(bf + printed, size - printed,
1852 "%sarg%d: %ld",
1853 printed ? ", " : "", i, val);
1854 ++i;
1855 }
1856 }
1857
1858 return printed;
1859}
1860
1861typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1862 union perf_event *event,
1863 struct perf_sample *sample);
1864
1865static struct syscall *trace__syscall_info(struct trace *trace,
1866 struct perf_evsel *evsel, int id)
1867{
1868
1869 if (id < 0) {
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881 if (verbose > 1) {
1882 static u64 n;
1883 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1884 id, perf_evsel__name(evsel), ++n);
1885 }
1886 return NULL;
1887 }
1888
1889 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1890 trace__read_syscall_info(trace, id))
1891 goto out_cant_read;
1892
1893 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1894 goto out_cant_read;
1895
1896 return &trace->syscalls.table[id];
1897
1898out_cant_read:
1899 if (verbose) {
1900 fprintf(trace->output, "Problems reading syscall %d", id);
1901 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1902 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1903 fputs(" information\n", trace->output);
1904 }
1905 return NULL;
1906}
1907
1908static void thread__update_stats(struct thread_trace *ttrace,
1909 int id, struct perf_sample *sample)
1910{
1911 struct int_node *inode;
1912 struct stats *stats;
1913 u64 duration = 0;
1914
1915 inode = intlist__findnew(ttrace->syscall_stats, id);
1916 if (inode == NULL)
1917 return;
1918
1919 stats = inode->priv;
1920 if (stats == NULL) {
1921 stats = malloc(sizeof(struct stats));
1922 if (stats == NULL)
1923 return;
1924 init_stats(stats);
1925 inode->priv = stats;
1926 }
1927
1928 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1929 duration = sample->time - ttrace->entry_time;
1930
1931 update_stats(stats, duration);
1932}
1933
1934static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1935{
1936 struct thread_trace *ttrace;
1937 u64 duration;
1938 size_t printed;
1939
1940 if (trace->current == NULL)
1941 return 0;
1942
1943 ttrace = thread__priv(trace->current);
1944
1945 if (!ttrace->entry_pending)
1946 return 0;
1947
1948 duration = sample->time - ttrace->entry_time;
1949
1950 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1951 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1952 ttrace->entry_pending = false;
1953
1954 return printed;
1955}
1956
1957static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1958 union perf_event *event __maybe_unused,
1959 struct perf_sample *sample)
1960{
1961 char *msg;
1962 void *args;
1963 size_t printed = 0;
1964 struct thread *thread;
1965 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1966 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1967 struct thread_trace *ttrace;
1968
1969 if (sc == NULL)
1970 return -1;
1971
1972 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1973 ttrace = thread__trace(thread, trace->output);
1974 if (ttrace == NULL)
1975 goto out_put;
1976
1977 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1978
1979 if (ttrace->entry_str == NULL) {
1980 ttrace->entry_str = malloc(trace__entry_str_size);
1981 if (!ttrace->entry_str)
1982 goto out_put;
1983 }
1984
1985 if (!trace->summary_only)
1986 trace__printf_interrupted_entry(trace, sample);
1987
1988 ttrace->entry_time = sample->time;
1989 msg = ttrace->entry_str;
1990 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1991
1992 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1993 args, trace, thread);
1994
1995 if (sc->is_exit) {
1996 if (!trace->duration_filter && !trace->summary_only) {
1997 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1998 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1999 }
2000 } else {
2001 ttrace->entry_pending = true;
2002
2003 ttrace->filename.pending_open = false;
2004 }
2005
2006 if (trace->current != thread) {
2007 thread__put(trace->current);
2008 trace->current = thread__get(thread);
2009 }
2010 err = 0;
2011out_put:
2012 thread__put(thread);
2013 return err;
2014}
2015
2016static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2017 union perf_event *event __maybe_unused,
2018 struct perf_sample *sample)
2019{
2020 long ret;
2021 u64 duration = 0;
2022 struct thread *thread;
2023 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2024 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2025 struct thread_trace *ttrace;
2026
2027 if (sc == NULL)
2028 return -1;
2029
2030 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2031 ttrace = thread__trace(thread, trace->output);
2032 if (ttrace == NULL)
2033 goto out_put;
2034
2035 if (trace->summary)
2036 thread__update_stats(ttrace, id, sample);
2037
2038 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2039
2040 if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2041 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2042 ttrace->filename.pending_open = false;
2043 ++trace->stats.vfs_getname;
2044 }
2045
2046 ttrace->exit_time = sample->time;
2047
2048 if (ttrace->entry_time) {
2049 duration = sample->time - ttrace->entry_time;
2050 if (trace__filter_duration(trace, duration))
2051 goto out;
2052 } else if (trace->duration_filter)
2053 goto out;
2054
2055 if (trace->summary_only)
2056 goto out;
2057
2058 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2059
2060 if (ttrace->entry_pending) {
2061 fprintf(trace->output, "%-70s", ttrace->entry_str);
2062 } else {
2063 fprintf(trace->output, " ... [");
2064 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2065 fprintf(trace->output, "]: %s()", sc->name);
2066 }
2067
2068 if (sc->fmt == NULL) {
2069signed_print:
2070 fprintf(trace->output, ") = %ld", ret);
2071 } else if (ret < 0 && sc->fmt->errmsg) {
2072 char bf[STRERR_BUFSIZE];
2073 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2074 *e = audit_errno_to_name(-ret);
2075
2076 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2077 } else if (ret == 0 && sc->fmt->timeout)
2078 fprintf(trace->output, ") = 0 Timeout");
2079 else if (sc->fmt->hexret)
2080 fprintf(trace->output, ") = %#lx", ret);
2081 else
2082 goto signed_print;
2083
2084 fputc('\n', trace->output);
2085out:
2086 ttrace->entry_pending = false;
2087 err = 0;
2088out_put:
2089 thread__put(thread);
2090 return err;
2091}
2092
2093static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2094 union perf_event *event __maybe_unused,
2095 struct perf_sample *sample)
2096{
2097 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2098 struct thread_trace *ttrace;
2099 size_t filename_len, entry_str_len, to_move;
2100 ssize_t remaining_space;
2101 char *pos;
2102 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2103
2104 if (!thread)
2105 goto out;
2106
2107 ttrace = thread__priv(thread);
2108 if (!ttrace)
2109 goto out;
2110
2111 filename_len = strlen(filename);
2112
2113 if (ttrace->filename.namelen < filename_len) {
2114 char *f = realloc(ttrace->filename.name, filename_len + 1);
2115
2116 if (f == NULL)
2117 goto out;
2118
2119 ttrace->filename.namelen = filename_len;
2120 ttrace->filename.name = f;
2121 }
2122
2123 strcpy(ttrace->filename.name, filename);
2124 ttrace->filename.pending_open = true;
2125
2126 if (!ttrace->filename.ptr)
2127 goto out;
2128
2129 entry_str_len = strlen(ttrace->entry_str);
2130 remaining_space = trace__entry_str_size - entry_str_len - 1;
2131 if (remaining_space <= 0)
2132 goto out;
2133
2134 if (filename_len > (size_t)remaining_space) {
2135 filename += filename_len - remaining_space;
2136 filename_len = remaining_space;
2137 }
2138
2139 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1;
2140 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2141 memmove(pos + filename_len, pos, to_move);
2142 memcpy(pos, filename, filename_len);
2143
2144 ttrace->filename.ptr = 0;
2145 ttrace->filename.entry_str_pos = 0;
2146out:
2147 return 0;
2148}
2149
2150static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2151 union perf_event *event __maybe_unused,
2152 struct perf_sample *sample)
2153{
2154 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2155 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2156 struct thread *thread = machine__findnew_thread(trace->host,
2157 sample->pid,
2158 sample->tid);
2159 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2160
2161 if (ttrace == NULL)
2162 goto out_dump;
2163
2164 ttrace->runtime_ms += runtime_ms;
2165 trace->runtime_ms += runtime_ms;
2166 thread__put(thread);
2167 return 0;
2168
2169out_dump:
2170 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2171 evsel->name,
2172 perf_evsel__strval(evsel, sample, "comm"),
2173 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2174 runtime,
2175 perf_evsel__intval(evsel, sample, "vruntime"));
2176 thread__put(thread);
2177 return 0;
2178}
2179
2180static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2181 union perf_event *event __maybe_unused,
2182 struct perf_sample *sample)
2183{
2184 trace__printf_interrupted_entry(trace, sample);
2185 trace__fprintf_tstamp(trace, sample->time, trace->output);
2186
2187 if (trace->trace_syscalls)
2188 fprintf(trace->output, "( ): ");
2189
2190 fprintf(trace->output, "%s:", evsel->name);
2191
2192 if (evsel->tp_format) {
2193 event_format__fprintf(evsel->tp_format, sample->cpu,
2194 sample->raw_data, sample->raw_size,
2195 trace->output);
2196 }
2197
2198 fprintf(trace->output, ")\n");
2199 return 0;
2200}
2201
2202static void print_location(FILE *f, struct perf_sample *sample,
2203 struct addr_location *al,
2204 bool print_dso, bool print_sym)
2205{
2206
2207 if ((verbose || print_dso) && al->map)
2208 fprintf(f, "%s@", al->map->dso->long_name);
2209
2210 if ((verbose || print_sym) && al->sym)
2211 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2212 al->addr - al->sym->start);
2213 else if (al->map)
2214 fprintf(f, "0x%" PRIx64, al->addr);
2215 else
2216 fprintf(f, "0x%" PRIx64, sample->addr);
2217}
2218
2219static int trace__pgfault(struct trace *trace,
2220 struct perf_evsel *evsel,
2221 union perf_event *event,
2222 struct perf_sample *sample)
2223{
2224 struct thread *thread;
2225 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2226 struct addr_location al;
2227 char map_type = 'd';
2228 struct thread_trace *ttrace;
2229 int err = -1;
2230
2231 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2232 ttrace = thread__trace(thread, trace->output);
2233 if (ttrace == NULL)
2234 goto out_put;
2235
2236 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2237 ttrace->pfmaj++;
2238 else
2239 ttrace->pfmin++;
2240
2241 if (trace->summary_only)
2242 goto out;
2243
2244 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2245 sample->ip, &al);
2246
2247 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2248
2249 fprintf(trace->output, "%sfault [",
2250 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2251 "maj" : "min");
2252
2253 print_location(trace->output, sample, &al, false, true);
2254
2255 fprintf(trace->output, "] => ");
2256
2257 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2258 sample->addr, &al);
2259
2260 if (!al.map) {
2261 thread__find_addr_location(thread, cpumode,
2262 MAP__FUNCTION, sample->addr, &al);
2263
2264 if (al.map)
2265 map_type = 'x';
2266 else
2267 map_type = '?';
2268 }
2269
2270 print_location(trace->output, sample, &al, true, false);
2271
2272 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2273out:
2274 err = 0;
2275out_put:
2276 thread__put(thread);
2277 return err;
2278}
2279
2280static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2281{
2282 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2283 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2284 return false;
2285
2286 if (trace->pid_list || trace->tid_list)
2287 return true;
2288
2289 return false;
2290}
2291
2292static int trace__process_sample(struct perf_tool *tool,
2293 union perf_event *event,
2294 struct perf_sample *sample,
2295 struct perf_evsel *evsel,
2296 struct machine *machine __maybe_unused)
2297{
2298 struct trace *trace = container_of(tool, struct trace, tool);
2299 int err = 0;
2300
2301 tracepoint_handler handler = evsel->handler;
2302
2303 if (skip_sample(trace, sample))
2304 return 0;
2305
2306 if (!trace->full_time && trace->base_time == 0)
2307 trace->base_time = sample->time;
2308
2309 if (handler) {
2310 ++trace->nr_events;
2311 handler(trace, evsel, event, sample);
2312 }
2313
2314 return err;
2315}
2316
2317static int parse_target_str(struct trace *trace)
2318{
2319 if (trace->opts.target.pid) {
2320 trace->pid_list = intlist__new(trace->opts.target.pid);
2321 if (trace->pid_list == NULL) {
2322 pr_err("Error parsing process id string\n");
2323 return -EINVAL;
2324 }
2325 }
2326
2327 if (trace->opts.target.tid) {
2328 trace->tid_list = intlist__new(trace->opts.target.tid);
2329 if (trace->tid_list == NULL) {
2330 pr_err("Error parsing thread id string\n");
2331 return -EINVAL;
2332 }
2333 }
2334
2335 return 0;
2336}
2337
2338static int trace__record(struct trace *trace, int argc, const char **argv)
2339{
2340 unsigned int rec_argc, i, j;
2341 const char **rec_argv;
2342 const char * const record_args[] = {
2343 "record",
2344 "-R",
2345 "-m", "1024",
2346 "-c", "1",
2347 };
2348
2349 const char * const sc_args[] = { "-e", };
2350 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2351 const char * const majpf_args[] = { "-e", "major-faults" };
2352 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2353 const char * const minpf_args[] = { "-e", "minor-faults" };
2354 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2355
2356
2357 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2358 majpf_args_nr + minpf_args_nr + argc;
2359 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2360
2361 if (rec_argv == NULL)
2362 return -ENOMEM;
2363
2364 j = 0;
2365 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2366 rec_argv[j++] = record_args[i];
2367
2368 if (trace->trace_syscalls) {
2369 for (i = 0; i < sc_args_nr; i++)
2370 rec_argv[j++] = sc_args[i];
2371
2372
2373 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2374 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2375 else if (is_valid_tracepoint("syscalls:sys_enter"))
2376 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2377 else {
2378 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2379 return -1;
2380 }
2381 }
2382
2383 if (trace->trace_pgfaults & TRACE_PFMAJ)
2384 for (i = 0; i < majpf_args_nr; i++)
2385 rec_argv[j++] = majpf_args[i];
2386
2387 if (trace->trace_pgfaults & TRACE_PFMIN)
2388 for (i = 0; i < minpf_args_nr; i++)
2389 rec_argv[j++] = minpf_args[i];
2390
2391 for (i = 0; i < (unsigned int)argc; i++)
2392 rec_argv[j++] = argv[i];
2393
2394 return cmd_record(j, rec_argv, NULL);
2395}
2396
2397static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2398
2399static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2400{
2401 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2402
2403 if (IS_ERR(evsel))
2404 return false;
2405
2406 if (perf_evsel__field(evsel, "pathname") == NULL) {
2407 perf_evsel__delete(evsel);
2408 return false;
2409 }
2410
2411 evsel->handler = trace__vfs_getname;
2412 perf_evlist__add(evlist, evsel);
2413 return true;
2414}
2415
2416static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2417 u64 config)
2418{
2419 struct perf_evsel *evsel;
2420 struct perf_event_attr attr = {
2421 .type = PERF_TYPE_SOFTWARE,
2422 .mmap_data = 1,
2423 };
2424
2425 attr.config = config;
2426 attr.sample_period = 1;
2427
2428 event_attr_init(&attr);
2429
2430 evsel = perf_evsel__new(&attr);
2431 if (!evsel)
2432 return -ENOMEM;
2433
2434 evsel->handler = trace__pgfault;
2435 perf_evlist__add(evlist, evsel);
2436
2437 return 0;
2438}
2439
2440static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2441{
2442 const u32 type = event->header.type;
2443 struct perf_evsel *evsel;
2444
2445 if (!trace->full_time && trace->base_time == 0)
2446 trace->base_time = sample->time;
2447
2448 if (type != PERF_RECORD_SAMPLE) {
2449 trace__process_event(trace, trace->host, event, sample);
2450 return;
2451 }
2452
2453 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2454 if (evsel == NULL) {
2455 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2456 return;
2457 }
2458
2459 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2460 sample->raw_data == NULL) {
2461 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2462 perf_evsel__name(evsel), sample->tid,
2463 sample->cpu, sample->raw_size);
2464 } else {
2465 tracepoint_handler handler = evsel->handler;
2466 handler(trace, evsel, event, sample);
2467 }
2468}
2469
2470static int trace__add_syscall_newtp(struct trace *trace)
2471{
2472 int ret = -1;
2473 struct perf_evlist *evlist = trace->evlist;
2474 struct perf_evsel *sys_enter, *sys_exit;
2475
2476 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2477 if (sys_enter == NULL)
2478 goto out;
2479
2480 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2481 goto out_delete_sys_enter;
2482
2483 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2484 if (sys_exit == NULL)
2485 goto out_delete_sys_enter;
2486
2487 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2488 goto out_delete_sys_exit;
2489
2490 perf_evlist__add(evlist, sys_enter);
2491 perf_evlist__add(evlist, sys_exit);
2492
2493 trace->syscalls.events.sys_enter = sys_enter;
2494 trace->syscalls.events.sys_exit = sys_exit;
2495
2496 ret = 0;
2497out:
2498 return ret;
2499
2500out_delete_sys_exit:
2501 perf_evsel__delete_priv(sys_exit);
2502out_delete_sys_enter:
2503 perf_evsel__delete_priv(sys_enter);
2504 goto out;
2505}
2506
2507static int trace__set_ev_qualifier_filter(struct trace *trace)
2508{
2509 int err = -1;
2510 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2511 trace->ev_qualifier_ids.nr,
2512 trace->ev_qualifier_ids.entries);
2513
2514 if (filter == NULL)
2515 goto out_enomem;
2516
2517 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2518 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2519
2520 free(filter);
2521out:
2522 return err;
2523out_enomem:
2524 errno = ENOMEM;
2525 goto out;
2526}
2527
2528static int trace__run(struct trace *trace, int argc, const char **argv)
2529{
2530 struct perf_evlist *evlist = trace->evlist;
2531 struct perf_evsel *evsel;
2532 int err = -1, i;
2533 unsigned long before;
2534 const bool forks = argc > 0;
2535 bool draining = false;
2536
2537 trace->live = true;
2538
2539 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2540 goto out_error_raw_syscalls;
2541
2542 if (trace->trace_syscalls)
2543 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2544
2545 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2546 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2547 goto out_error_mem;
2548 }
2549
2550 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2551 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2552 goto out_error_mem;
2553
2554 if (trace->sched &&
2555 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2556 trace__sched_stat_runtime))
2557 goto out_error_sched_stat_runtime;
2558
2559 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2560 if (err < 0) {
2561 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2562 goto out_delete_evlist;
2563 }
2564
2565 err = trace__symbols_init(trace, evlist);
2566 if (err < 0) {
2567 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2568 goto out_delete_evlist;
2569 }
2570
2571 perf_evlist__config(evlist, &trace->opts);
2572
2573 signal(SIGCHLD, sig_handler);
2574 signal(SIGINT, sig_handler);
2575
2576 if (forks) {
2577 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2578 argv, false, NULL);
2579 if (err < 0) {
2580 fprintf(trace->output, "Couldn't run the workload!\n");
2581 goto out_delete_evlist;
2582 }
2583 }
2584
2585 err = perf_evlist__open(evlist);
2586 if (err < 0)
2587 goto out_error_open;
2588
2589
2590
2591
2592
2593
2594
2595 if (trace->filter_pids.nr > 0)
2596 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2597 else if (thread_map__pid(evlist->threads, 0) == -1)
2598 err = perf_evlist__set_filter_pid(evlist, getpid());
2599
2600 if (err < 0)
2601 goto out_error_mem;
2602
2603 if (trace->ev_qualifier_ids.nr > 0) {
2604 err = trace__set_ev_qualifier_filter(trace);
2605 if (err < 0)
2606 goto out_errno;
2607
2608 pr_debug("event qualifier tracepoint filter: %s\n",
2609 trace->syscalls.events.sys_exit->filter);
2610 }
2611
2612 err = perf_evlist__apply_filters(evlist, &evsel);
2613 if (err < 0)
2614 goto out_error_apply_filters;
2615
2616 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2617 if (err < 0)
2618 goto out_error_mmap;
2619
2620 if (!target__none(&trace->opts.target))
2621 perf_evlist__enable(evlist);
2622
2623 if (forks)
2624 perf_evlist__start_workload(evlist);
2625
2626 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2627 evlist->threads->nr > 1 ||
2628 perf_evlist__first(evlist)->attr.inherit;
2629again:
2630 before = trace->nr_events;
2631
2632 for (i = 0; i < evlist->nr_mmaps; i++) {
2633 union perf_event *event;
2634
2635 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2636 struct perf_sample sample;
2637
2638 ++trace->nr_events;
2639
2640 err = perf_evlist__parse_sample(evlist, event, &sample);
2641 if (err) {
2642 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2643 goto next_event;
2644 }
2645
2646 trace__handle_event(trace, event, &sample);
2647next_event:
2648 perf_evlist__mmap_consume(evlist, i);
2649
2650 if (interrupted)
2651 goto out_disable;
2652
2653 if (done && !draining) {
2654 perf_evlist__disable(evlist);
2655 draining = true;
2656 }
2657 }
2658 }
2659
2660 if (trace->nr_events == before) {
2661 int timeout = done ? 100 : -1;
2662
2663 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2664 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2665 draining = true;
2666
2667 goto again;
2668 }
2669 } else {
2670 goto again;
2671 }
2672
2673out_disable:
2674 thread__zput(trace->current);
2675
2676 perf_evlist__disable(evlist);
2677
2678 if (!err) {
2679 if (trace->summary)
2680 trace__fprintf_thread_summary(trace, trace->output);
2681
2682 if (trace->show_tool_stats) {
2683 fprintf(trace->output, "Stats:\n "
2684 " vfs_getname : %" PRIu64 "\n"
2685 " proc_getname: %" PRIu64 "\n",
2686 trace->stats.vfs_getname,
2687 trace->stats.proc_getname);
2688 }
2689 }
2690
2691out_delete_evlist:
2692 perf_evlist__delete(evlist);
2693 trace->evlist = NULL;
2694 trace->live = false;
2695 return err;
2696{
2697 char errbuf[BUFSIZ];
2698
2699out_error_sched_stat_runtime:
2700 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2701 goto out_error;
2702
2703out_error_raw_syscalls:
2704 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2705 goto out_error;
2706
2707out_error_mmap:
2708 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2709 goto out_error;
2710
2711out_error_open:
2712 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2713
2714out_error:
2715 fprintf(trace->output, "%s\n", errbuf);
2716 goto out_delete_evlist;
2717
2718out_error_apply_filters:
2719 fprintf(trace->output,
2720 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2721 evsel->filter, perf_evsel__name(evsel), errno,
2722 strerror_r(errno, errbuf, sizeof(errbuf)));
2723 goto out_delete_evlist;
2724}
2725out_error_mem:
2726 fprintf(trace->output, "Not enough memory to run!\n");
2727 goto out_delete_evlist;
2728
2729out_errno:
2730 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2731 goto out_delete_evlist;
2732}
2733
2734static int trace__replay(struct trace *trace)
2735{
2736 const struct perf_evsel_str_handler handlers[] = {
2737 { "probe:vfs_getname", trace__vfs_getname, },
2738 };
2739 struct perf_data_file file = {
2740 .path = input_name,
2741 .mode = PERF_DATA_MODE_READ,
2742 .force = trace->force,
2743 };
2744 struct perf_session *session;
2745 struct perf_evsel *evsel;
2746 int err = -1;
2747
2748 trace->tool.sample = trace__process_sample;
2749 trace->tool.mmap = perf_event__process_mmap;
2750 trace->tool.mmap2 = perf_event__process_mmap2;
2751 trace->tool.comm = perf_event__process_comm;
2752 trace->tool.exit = perf_event__process_exit;
2753 trace->tool.fork = perf_event__process_fork;
2754 trace->tool.attr = perf_event__process_attr;
2755 trace->tool.tracing_data = perf_event__process_tracing_data;
2756 trace->tool.build_id = perf_event__process_build_id;
2757
2758 trace->tool.ordered_events = true;
2759 trace->tool.ordering_requires_timestamps = true;
2760
2761
2762 trace->multiple_threads = true;
2763
2764 session = perf_session__new(&file, false, &trace->tool);
2765 if (session == NULL)
2766 return -1;
2767
2768 if (symbol__init(&session->header.env) < 0)
2769 goto out;
2770
2771 trace->host = &session->machines.host;
2772
2773 err = perf_session__set_tracepoints_handlers(session, handlers);
2774 if (err)
2775 goto out;
2776
2777 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2778 "raw_syscalls:sys_enter");
2779
2780 if (evsel == NULL)
2781 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2782 "syscalls:sys_enter");
2783
2784 if (evsel &&
2785 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2786 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2787 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2788 goto out;
2789 }
2790
2791 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2792 "raw_syscalls:sys_exit");
2793 if (evsel == NULL)
2794 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2795 "syscalls:sys_exit");
2796 if (evsel &&
2797 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2798 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2799 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2800 goto out;
2801 }
2802
2803 evlist__for_each(session->evlist, evsel) {
2804 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2805 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2806 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2807 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2808 evsel->handler = trace__pgfault;
2809 }
2810
2811 err = parse_target_str(trace);
2812 if (err != 0)
2813 goto out;
2814
2815 setup_pager();
2816
2817 err = perf_session__process_events(session);
2818 if (err)
2819 pr_err("Failed to process events, error %d", err);
2820
2821 else if (trace->summary)
2822 trace__fprintf_thread_summary(trace, trace->output);
2823
2824out:
2825 perf_session__delete(session);
2826
2827 return err;
2828}
2829
2830static size_t trace__fprintf_threads_header(FILE *fp)
2831{
2832 size_t printed;
2833
2834 printed = fprintf(fp, "\n Summary of events:\n\n");
2835
2836 return printed;
2837}
2838
2839static size_t thread__dump_stats(struct thread_trace *ttrace,
2840 struct trace *trace, FILE *fp)
2841{
2842 struct stats *stats;
2843 size_t printed = 0;
2844 struct syscall *sc;
2845 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2846
2847 if (inode == NULL)
2848 return 0;
2849
2850 printed += fprintf(fp, "\n");
2851
2852 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2853 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2854 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2855
2856
2857 while (inode) {
2858 stats = inode->priv;
2859 if (stats) {
2860 double min = (double)(stats->min) / NSEC_PER_MSEC;
2861 double max = (double)(stats->max) / NSEC_PER_MSEC;
2862 double avg = avg_stats(stats);
2863 double pct;
2864 u64 n = (u64) stats->n;
2865
2866 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2867 avg /= NSEC_PER_MSEC;
2868
2869 sc = &trace->syscalls.table[inode->i];
2870 printed += fprintf(fp, " %-15s", sc->name);
2871 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2872 n, avg * n, min, avg);
2873 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2874 }
2875
2876 inode = intlist__next(inode);
2877 }
2878
2879 printed += fprintf(fp, "\n\n");
2880
2881 return printed;
2882}
2883
2884
2885struct summary_data {
2886 FILE *fp;
2887 struct trace *trace;
2888 size_t printed;
2889};
2890
2891static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2892{
2893 struct summary_data *data = priv;
2894 FILE *fp = data->fp;
2895 size_t printed = data->printed;
2896 struct trace *trace = data->trace;
2897 struct thread_trace *ttrace = thread__priv(thread);
2898 double ratio;
2899
2900 if (ttrace == NULL)
2901 return 0;
2902
2903 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2904
2905 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2906 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2907 printed += fprintf(fp, "%.1f%%", ratio);
2908 if (ttrace->pfmaj)
2909 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2910 if (ttrace->pfmin)
2911 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2912 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2913 printed += thread__dump_stats(ttrace, trace, fp);
2914
2915 data->printed += printed;
2916
2917 return 0;
2918}
2919
2920static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2921{
2922 struct summary_data data = {
2923 .fp = fp,
2924 .trace = trace
2925 };
2926 data.printed = trace__fprintf_threads_header(fp);
2927
2928 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2929
2930 return data.printed;
2931}
2932
2933static int trace__set_duration(const struct option *opt, const char *str,
2934 int unset __maybe_unused)
2935{
2936 struct trace *trace = opt->value;
2937
2938 trace->duration_filter = atof(str);
2939 return 0;
2940}
2941
2942static int trace__set_filter_pids(const struct option *opt, const char *str,
2943 int unset __maybe_unused)
2944{
2945 int ret = -1;
2946 size_t i;
2947 struct trace *trace = opt->value;
2948
2949
2950
2951
2952 struct intlist *list = intlist__new(str);
2953
2954 if (list == NULL)
2955 return -1;
2956
2957 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2958 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2959
2960 if (trace->filter_pids.entries == NULL)
2961 goto out;
2962
2963 trace->filter_pids.entries[0] = getpid();
2964
2965 for (i = 1; i < trace->filter_pids.nr; ++i)
2966 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2967
2968 intlist__delete(list);
2969 ret = 0;
2970out:
2971 return ret;
2972}
2973
2974static int trace__open_output(struct trace *trace, const char *filename)
2975{
2976 struct stat st;
2977
2978 if (!stat(filename, &st) && st.st_size) {
2979 char oldname[PATH_MAX];
2980
2981 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2982 unlink(oldname);
2983 rename(filename, oldname);
2984 }
2985
2986 trace->output = fopen(filename, "w");
2987
2988 return trace->output == NULL ? -errno : 0;
2989}
2990
2991static int parse_pagefaults(const struct option *opt, const char *str,
2992 int unset __maybe_unused)
2993{
2994 int *trace_pgfaults = opt->value;
2995
2996 if (strcmp(str, "all") == 0)
2997 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2998 else if (strcmp(str, "maj") == 0)
2999 *trace_pgfaults |= TRACE_PFMAJ;
3000 else if (strcmp(str, "min") == 0)
3001 *trace_pgfaults |= TRACE_PFMIN;
3002 else
3003 return -1;
3004
3005 return 0;
3006}
3007
3008static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3009{
3010 struct perf_evsel *evsel;
3011
3012 evlist__for_each(evlist, evsel)
3013 evsel->handler = handler;
3014}
3015
3016int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3017{
3018 const char *trace_usage[] = {
3019 "perf trace [<options>] [<command>]",
3020 "perf trace [<options>] -- <command> [<options>]",
3021 "perf trace record [<options>] [<command>]",
3022 "perf trace record [<options>] -- <command> [<options>]",
3023 NULL
3024 };
3025 struct trace trace = {
3026 .audit = {
3027 .machine = audit_detect_machine(),
3028 .open_id = audit_name_to_syscall("open", trace.audit.machine),
3029 },
3030 .syscalls = {
3031 . max = -1,
3032 },
3033 .opts = {
3034 .target = {
3035 .uid = UINT_MAX,
3036 .uses_mmap = true,
3037 },
3038 .user_freq = UINT_MAX,
3039 .user_interval = ULLONG_MAX,
3040 .no_buffering = true,
3041 .mmap_pages = UINT_MAX,
3042 .proc_map_timeout = 500,
3043 },
3044 .output = stderr,
3045 .show_comm = true,
3046 .trace_syscalls = true,
3047 };
3048 const char *output_name = NULL;
3049 const char *ev_qualifier_str = NULL;
3050 const struct option trace_options[] = {
3051 OPT_CALLBACK(0, "event", &trace.evlist, "event",
3052 "event selector. use 'perf list' to list available events",
3053 parse_events_option),
3054 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3055 "show the thread COMM next to its id"),
3056 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3057 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3058 OPT_STRING('o', "output", &output_name, "file", "output file name"),
3059 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3060 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3061 "trace events on existing process id"),
3062 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3063 "trace events on existing thread id"),
3064 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3065 "pids to filter (by the kernel)", trace__set_filter_pids),
3066 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3067 "system-wide collection from all CPUs"),
3068 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3069 "list of cpus to monitor"),
3070 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3071 "child tasks do not inherit counters"),
3072 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3073 "number of mmap data pages",
3074 perf_evlist__parse_mmap_pages),
3075 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3076 "user to profile"),
3077 OPT_CALLBACK(0, "duration", &trace, "float",
3078 "show only events with duration > N.M ms",
3079 trace__set_duration),
3080 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3081 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3082 OPT_BOOLEAN('T', "time", &trace.full_time,
3083 "Show full timestamp, not time relative to first start"),
3084 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3085 "Show only syscall summary with statistics"),
3086 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3087 "Show all syscalls and summary with statistics"),
3088 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3089 "Trace pagefaults", parse_pagefaults, "maj"),
3090 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3091 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3092 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3093 "per thread proc mmap processing timeout in ms"),
3094 OPT_END()
3095 };
3096 const char * const trace_subcommands[] = { "record", NULL };
3097 int err;
3098 char bf[BUFSIZ];
3099
3100 signal(SIGSEGV, sighandler_dump_stack);
3101 signal(SIGFPE, sighandler_dump_stack);
3102
3103 trace.evlist = perf_evlist__new();
3104
3105 if (trace.evlist == NULL) {
3106 pr_err("Not enough memory to run!\n");
3107 err = -ENOMEM;
3108 goto out;
3109 }
3110
3111 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3112 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3113
3114 if (trace.trace_pgfaults) {
3115 trace.opts.sample_address = true;
3116 trace.opts.sample_time = true;
3117 }
3118
3119 if (trace.evlist->nr_entries > 0)
3120 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3121
3122 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3123 return trace__record(&trace, argc-1, &argv[1]);
3124
3125
3126 if (trace.summary_only)
3127 trace.summary = trace.summary_only;
3128
3129 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3130 trace.evlist->nr_entries == 0 ) {
3131 pr_err("Please specify something to trace.\n");
3132 return -1;
3133 }
3134
3135 if (output_name != NULL) {
3136 err = trace__open_output(&trace, output_name);
3137 if (err < 0) {
3138 perror("failed to create output file");
3139 goto out;
3140 }
3141 }
3142
3143 if (ev_qualifier_str != NULL) {
3144 const char *s = ev_qualifier_str;
3145 struct strlist_config slist_config = {
3146 .dirname = system_path(STRACE_GROUPS_DIR),
3147 };
3148
3149 trace.not_ev_qualifier = *s == '!';
3150 if (trace.not_ev_qualifier)
3151 ++s;
3152 trace.ev_qualifier = strlist__new(s, &slist_config);
3153 if (trace.ev_qualifier == NULL) {
3154 fputs("Not enough memory to parse event qualifier",
3155 trace.output);
3156 err = -ENOMEM;
3157 goto out_close;
3158 }
3159
3160 err = trace__validate_ev_qualifier(&trace);
3161 if (err)
3162 goto out_close;
3163 }
3164
3165 err = target__validate(&trace.opts.target);
3166 if (err) {
3167 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3168 fprintf(trace.output, "%s", bf);
3169 goto out_close;
3170 }
3171
3172 err = target__parse_uid(&trace.opts.target);
3173 if (err) {
3174 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3175 fprintf(trace.output, "%s", bf);
3176 goto out_close;
3177 }
3178
3179 if (!argc && target__none(&trace.opts.target))
3180 trace.opts.target.system_wide = true;
3181
3182 if (input_name)
3183 err = trace__replay(&trace);
3184 else
3185 err = trace__run(&trace, argc, argv);
3186
3187out_close:
3188 if (output_name != NULL)
3189 fclose(trace.output);
3190out:
3191 return err;
3192}
3193