linux/tools/perf/builtin-trace.c
<<
>>
Prefs
   1#include <traceevent/event-parse.h>
   2#include "builtin.h"
   3#include "util/color.h"
   4#include "util/debug.h"
   5#include "util/evlist.h"
   6#include "util/machine.h"
   7#include "util/session.h"
   8#include "util/thread.h"
   9#include "util/parse-options.h"
  10#include "util/strlist.h"
  11#include "util/intlist.h"
  12#include "util/thread_map.h"
  13#include "util/stat.h"
  14#include "trace-event.h"
  15#include "util/parse-events.h"
  16
  17#include <libaudit.h>
  18#include <stdlib.h>
  19#include <sys/eventfd.h>
  20#include <sys/mman.h>
  21#include <linux/futex.h>
  22
  23/* For older distros: */
  24#ifndef MAP_STACK
  25# define MAP_STACK              0x20000
  26#endif
  27
  28#ifndef MADV_HWPOISON
  29# define MADV_HWPOISON          100
  30#endif
  31
  32#ifndef MADV_MERGEABLE
  33# define MADV_MERGEABLE         12
  34#endif
  35
  36#ifndef MADV_UNMERGEABLE
  37# define MADV_UNMERGEABLE       13
  38#endif
  39
  40#ifndef EFD_SEMAPHORE
  41# define EFD_SEMAPHORE          1
  42#endif
  43
  44struct tp_field {
  45        int offset;
  46        union {
  47                u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
  48                void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
  49        };
  50};
  51
  52#define TP_UINT_FIELD(bits) \
  53static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
  54{ \
  55        return *(u##bits *)(sample->raw_data + field->offset); \
  56}
  57
  58TP_UINT_FIELD(8);
  59TP_UINT_FIELD(16);
  60TP_UINT_FIELD(32);
  61TP_UINT_FIELD(64);
  62
  63#define TP_UINT_FIELD__SWAPPED(bits) \
  64static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
  65{ \
  66        u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
  67        return bswap_##bits(value);\
  68}
  69
  70TP_UINT_FIELD__SWAPPED(16);
  71TP_UINT_FIELD__SWAPPED(32);
  72TP_UINT_FIELD__SWAPPED(64);
  73
  74static int tp_field__init_uint(struct tp_field *field,
  75                               struct format_field *format_field,
  76                               bool needs_swap)
  77{
  78        field->offset = format_field->offset;
  79
  80        switch (format_field->size) {
  81        case 1:
  82                field->integer = tp_field__u8;
  83                break;
  84        case 2:
  85                field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
  86                break;
  87        case 4:
  88                field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
  89                break;
  90        case 8:
  91                field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
  92                break;
  93        default:
  94                return -1;
  95        }
  96
  97        return 0;
  98}
  99
 100static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 101{
 102        return sample->raw_data + field->offset;
 103}
 104
 105static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
 106{
 107        field->offset = format_field->offset;
 108        field->pointer = tp_field__ptr;
 109        return 0;
 110}
 111
 112struct syscall_tp {
 113        struct tp_field id;
 114        union {
 115                struct tp_field args, ret;
 116        };
 117};
 118
 119static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
 120                                          struct tp_field *field,
 121                                          const char *name)
 122{
 123        struct format_field *format_field = perf_evsel__field(evsel, name);
 124
 125        if (format_field == NULL)
 126                return -1;
 127
 128        return tp_field__init_uint(field, format_field, evsel->needs_swap);
 129}
 130
 131#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
 132        ({ struct syscall_tp *sc = evsel->priv;\
 133           perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
 134
 135static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
 136                                         struct tp_field *field,
 137                                         const char *name)
 138{
 139        struct format_field *format_field = perf_evsel__field(evsel, name);
 140
 141        if (format_field == NULL)
 142                return -1;
 143
 144        return tp_field__init_ptr(field, format_field);
 145}
 146
 147#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
 148        ({ struct syscall_tp *sc = evsel->priv;\
 149           perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
 150
 151static void perf_evsel__delete_priv(struct perf_evsel *evsel)
 152{
 153        zfree(&evsel->priv);
 154        perf_evsel__delete(evsel);
 155}
 156
 157static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
 158{
 159        evsel->priv = malloc(sizeof(struct syscall_tp));
 160        if (evsel->priv != NULL) {
 161                if (perf_evsel__init_sc_tp_uint_field(evsel, id))
 162                        goto out_delete;
 163
 164                evsel->handler = handler;
 165                return 0;
 166        }
 167
 168        return -ENOMEM;
 169
 170out_delete:
 171        zfree(&evsel->priv);
 172        return -ENOENT;
 173}
 174
 175static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
 176{
 177        struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
 178
 179        /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
 180        if (evsel == NULL)
 181                evsel = perf_evsel__newtp("syscalls", direction);
 182
 183        if (evsel) {
 184                if (perf_evsel__init_syscall_tp(evsel, handler))
 185                        goto out_delete;
 186        }
 187
 188        return evsel;
 189
 190out_delete:
 191        perf_evsel__delete_priv(evsel);
 192        return NULL;
 193}
 194
 195#define perf_evsel__sc_tp_uint(evsel, name, sample) \
 196        ({ struct syscall_tp *fields = evsel->priv; \
 197           fields->name.integer(&fields->name, sample); })
 198
 199#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
 200        ({ struct syscall_tp *fields = evsel->priv; \
 201           fields->name.pointer(&fields->name, sample); })
 202
 203static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
 204                                          void *sys_enter_handler,
 205                                          void *sys_exit_handler)
 206{
 207        int ret = -1;
 208        struct perf_evsel *sys_enter, *sys_exit;
 209
 210        sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
 211        if (sys_enter == NULL)
 212                goto out;
 213
 214        if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
 215                goto out_delete_sys_enter;
 216
 217        sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
 218        if (sys_exit == NULL)
 219                goto out_delete_sys_enter;
 220
 221        if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
 222                goto out_delete_sys_exit;
 223
 224        perf_evlist__add(evlist, sys_enter);
 225        perf_evlist__add(evlist, sys_exit);
 226
 227        ret = 0;
 228out:
 229        return ret;
 230
 231out_delete_sys_exit:
 232        perf_evsel__delete_priv(sys_exit);
 233out_delete_sys_enter:
 234        perf_evsel__delete_priv(sys_enter);
 235        goto out;
 236}
 237
 238
 239struct syscall_arg {
 240        unsigned long val;
 241        struct thread *thread;
 242        struct trace  *trace;
 243        void          *parm;
 244        u8            idx;
 245        u8            mask;
 246};
 247
 248struct strarray {
 249        int         offset;
 250        int         nr_entries;
 251        const char **entries;
 252};
 253
 254#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
 255        .nr_entries = ARRAY_SIZE(array), \
 256        .entries = array, \
 257}
 258
 259#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
 260        .offset     = off, \
 261        .nr_entries = ARRAY_SIZE(array), \
 262        .entries = array, \
 263}
 264
 265static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
 266                                                const char *intfmt,
 267                                                struct syscall_arg *arg)
 268{
 269        struct strarray *sa = arg->parm;
 270        int idx = arg->val - sa->offset;
 271
 272        if (idx < 0 || idx >= sa->nr_entries)
 273                return scnprintf(bf, size, intfmt, arg->val);
 274
 275        return scnprintf(bf, size, "%s", sa->entries[idx]);
 276}
 277
 278static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
 279                                              struct syscall_arg *arg)
 280{
 281        return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
 282}
 283
 284#define SCA_STRARRAY syscall_arg__scnprintf_strarray
 285
 286#if defined(__i386__) || defined(__x86_64__)
 287/*
 288 * FIXME: Make this available to all arches as soon as the ioctl beautifier
 289 *        gets rewritten to support all arches.
 290 */
 291static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
 292                                                 struct syscall_arg *arg)
 293{
 294        return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
 295}
 296
 297#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
 298#endif /* defined(__i386__) || defined(__x86_64__) */
 299
 300static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
 301                                        struct syscall_arg *arg);
 302
 303#define SCA_FD syscall_arg__scnprintf_fd
 304
 305static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
 306                                           struct syscall_arg *arg)
 307{
 308        int fd = arg->val;
 309
 310        if (fd == AT_FDCWD)
 311                return scnprintf(bf, size, "CWD");
 312
 313        return syscall_arg__scnprintf_fd(bf, size, arg);
 314}
 315
 316#define SCA_FDAT syscall_arg__scnprintf_fd_at
 317
 318static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
 319                                              struct syscall_arg *arg);
 320
 321#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
 322
 323static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 324                                         struct syscall_arg *arg)
 325{
 326        return scnprintf(bf, size, "%#lx", arg->val);
 327}
 328
 329#define SCA_HEX syscall_arg__scnprintf_hex
 330
 331static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 332                                               struct syscall_arg *arg)
 333{
 334        int printed = 0, prot = arg->val;
 335
 336        if (prot == PROT_NONE)
 337                return scnprintf(bf, size, "NONE");
 338#define P_MMAP_PROT(n) \
 339        if (prot & PROT_##n) { \
 340                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 341                prot &= ~PROT_##n; \
 342        }
 343
 344        P_MMAP_PROT(EXEC);
 345        P_MMAP_PROT(READ);
 346        P_MMAP_PROT(WRITE);
 347#ifdef PROT_SEM
 348        P_MMAP_PROT(SEM);
 349#endif
 350        P_MMAP_PROT(GROWSDOWN);
 351        P_MMAP_PROT(GROWSUP);
 352#undef P_MMAP_PROT
 353
 354        if (prot)
 355                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
 356
 357        return printed;
 358}
 359
 360#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 361
 362static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 363                                                struct syscall_arg *arg)
 364{
 365        int printed = 0, flags = arg->val;
 366
 367#define P_MMAP_FLAG(n) \
 368        if (flags & MAP_##n) { \
 369                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 370                flags &= ~MAP_##n; \
 371        }
 372
 373        P_MMAP_FLAG(SHARED);
 374        P_MMAP_FLAG(PRIVATE);
 375#ifdef MAP_32BIT
 376        P_MMAP_FLAG(32BIT);
 377#endif
 378        P_MMAP_FLAG(ANONYMOUS);
 379        P_MMAP_FLAG(DENYWRITE);
 380        P_MMAP_FLAG(EXECUTABLE);
 381        P_MMAP_FLAG(FILE);
 382        P_MMAP_FLAG(FIXED);
 383        P_MMAP_FLAG(GROWSDOWN);
 384#ifdef MAP_HUGETLB
 385        P_MMAP_FLAG(HUGETLB);
 386#endif
 387        P_MMAP_FLAG(LOCKED);
 388        P_MMAP_FLAG(NONBLOCK);
 389        P_MMAP_FLAG(NORESERVE);
 390        P_MMAP_FLAG(POPULATE);
 391        P_MMAP_FLAG(STACK);
 392#ifdef MAP_UNINITIALIZED
 393        P_MMAP_FLAG(UNINITIALIZED);
 394#endif
 395#undef P_MMAP_FLAG
 396
 397        if (flags)
 398                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 399
 400        return printed;
 401}
 402
 403#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 404
 405static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
 406                                                  struct syscall_arg *arg)
 407{
 408        int printed = 0, flags = arg->val;
 409
 410#define P_MREMAP_FLAG(n) \
 411        if (flags & MREMAP_##n) { \
 412                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 413                flags &= ~MREMAP_##n; \
 414        }
 415
 416        P_MREMAP_FLAG(MAYMOVE);
 417#ifdef MREMAP_FIXED
 418        P_MREMAP_FLAG(FIXED);
 419#endif
 420#undef P_MREMAP_FLAG
 421
 422        if (flags)
 423                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 424
 425        return printed;
 426}
 427
 428#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
 429
 430static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 431                                                      struct syscall_arg *arg)
 432{
 433        int behavior = arg->val;
 434
 435        switch (behavior) {
 436#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
 437        P_MADV_BHV(NORMAL);
 438        P_MADV_BHV(RANDOM);
 439        P_MADV_BHV(SEQUENTIAL);
 440        P_MADV_BHV(WILLNEED);
 441        P_MADV_BHV(DONTNEED);
 442        P_MADV_BHV(REMOVE);
 443        P_MADV_BHV(DONTFORK);
 444        P_MADV_BHV(DOFORK);
 445        P_MADV_BHV(HWPOISON);
 446#ifdef MADV_SOFT_OFFLINE
 447        P_MADV_BHV(SOFT_OFFLINE);
 448#endif
 449        P_MADV_BHV(MERGEABLE);
 450        P_MADV_BHV(UNMERGEABLE);
 451#ifdef MADV_HUGEPAGE
 452        P_MADV_BHV(HUGEPAGE);
 453#endif
 454#ifdef MADV_NOHUGEPAGE
 455        P_MADV_BHV(NOHUGEPAGE);
 456#endif
 457#ifdef MADV_DONTDUMP
 458        P_MADV_BHV(DONTDUMP);
 459#endif
 460#ifdef MADV_DODUMP
 461        P_MADV_BHV(DODUMP);
 462#endif
 463#undef P_MADV_PHV
 464        default: break;
 465        }
 466
 467        return scnprintf(bf, size, "%#x", behavior);
 468}
 469
 470#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
 471
 472static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
 473                                           struct syscall_arg *arg)
 474{
 475        int printed = 0, op = arg->val;
 476
 477        if (op == 0)
 478                return scnprintf(bf, size, "NONE");
 479#define P_CMD(cmd) \
 480        if ((op & LOCK_##cmd) == LOCK_##cmd) { \
 481                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
 482                op &= ~LOCK_##cmd; \
 483        }
 484
 485        P_CMD(SH);
 486        P_CMD(EX);
 487        P_CMD(NB);
 488        P_CMD(UN);
 489        P_CMD(MAND);
 490        P_CMD(RW);
 491        P_CMD(READ);
 492        P_CMD(WRITE);
 493#undef P_OP
 494
 495        if (op)
 496                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
 497
 498        return printed;
 499}
 500
 501#define SCA_FLOCK syscall_arg__scnprintf_flock
 502
 503static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
 504{
 505        enum syscall_futex_args {
 506                SCF_UADDR   = (1 << 0),
 507                SCF_OP      = (1 << 1),
 508                SCF_VAL     = (1 << 2),
 509                SCF_TIMEOUT = (1 << 3),
 510                SCF_UADDR2  = (1 << 4),
 511                SCF_VAL3    = (1 << 5),
 512        };
 513        int op = arg->val;
 514        int cmd = op & FUTEX_CMD_MASK;
 515        size_t printed = 0;
 516
 517        switch (cmd) {
 518#define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
 519        P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
 520        P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 521        P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 522        P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
 523        P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
 524        P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
 525        P_FUTEX_OP(WAKE_OP);                                                      break;
 526        P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 527        P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 528        P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
 529        P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
 530        P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
 531        P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
 532        default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
 533        }
 534
 535        if (op & FUTEX_PRIVATE_FLAG)
 536                printed += scnprintf(bf + printed, size - printed, "|PRIV");
 537
 538        if (op & FUTEX_CLOCK_REALTIME)
 539                printed += scnprintf(bf + printed, size - printed, "|CLKRT");
 540
 541        return printed;
 542}
 543
 544#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
 545
 546static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
 547static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
 548
 549static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
 550static DEFINE_STRARRAY(itimers);
 551
 552static const char *whences[] = { "SET", "CUR", "END",
 553#ifdef SEEK_DATA
 554"DATA",
 555#endif
 556#ifdef SEEK_HOLE
 557"HOLE",
 558#endif
 559};
 560static DEFINE_STRARRAY(whences);
 561
 562static const char *fcntl_cmds[] = {
 563        "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
 564        "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
 565        "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
 566        "F_GETOWNER_UIDS",
 567};
 568static DEFINE_STRARRAY(fcntl_cmds);
 569
 570static const char *rlimit_resources[] = {
 571        "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
 572        "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
 573        "RTTIME",
 574};
 575static DEFINE_STRARRAY(rlimit_resources);
 576
 577static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
 578static DEFINE_STRARRAY(sighow);
 579
 580static const char *clockid[] = {
 581        "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
 582        "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
 583};
 584static DEFINE_STRARRAY(clockid);
 585
 586static const char *socket_families[] = {
 587        "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
 588        "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
 589        "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
 590        "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
 591        "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
 592        "ALG", "NFC", "VSOCK",
 593};
 594static DEFINE_STRARRAY(socket_families);
 595
 596#ifndef SOCK_TYPE_MASK
 597#define SOCK_TYPE_MASK 0xf
 598#endif
 599
 600static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
 601                                                      struct syscall_arg *arg)
 602{
 603        size_t printed;
 604        int type = arg->val,
 605            flags = type & ~SOCK_TYPE_MASK;
 606
 607        type &= SOCK_TYPE_MASK;
 608        /*
 609         * Can't use a strarray, MIPS may override for ABI reasons.
 610         */
 611        switch (type) {
 612#define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
 613        P_SK_TYPE(STREAM);
 614        P_SK_TYPE(DGRAM);
 615        P_SK_TYPE(RAW);
 616        P_SK_TYPE(RDM);
 617        P_SK_TYPE(SEQPACKET);
 618        P_SK_TYPE(DCCP);
 619        P_SK_TYPE(PACKET);
 620#undef P_SK_TYPE
 621        default:
 622                printed = scnprintf(bf, size, "%#x", type);
 623        }
 624
 625#define P_SK_FLAG(n) \
 626        if (flags & SOCK_##n) { \
 627                printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
 628                flags &= ~SOCK_##n; \
 629        }
 630
 631        P_SK_FLAG(CLOEXEC);
 632        P_SK_FLAG(NONBLOCK);
 633#undef P_SK_FLAG
 634
 635        if (flags)
 636                printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
 637
 638        return printed;
 639}
 640
 641#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
 642
 643#ifndef MSG_PROBE
 644#define MSG_PROBE            0x10
 645#endif
 646#ifndef MSG_WAITFORONE
 647#define MSG_WAITFORONE  0x10000
 648#endif
 649#ifndef MSG_SENDPAGE_NOTLAST
 650#define MSG_SENDPAGE_NOTLAST 0x20000
 651#endif
 652#ifndef MSG_FASTOPEN
 653#define MSG_FASTOPEN         0x20000000
 654#endif
 655
 656static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
 657                                               struct syscall_arg *arg)
 658{
 659        int printed = 0, flags = arg->val;
 660
 661        if (flags == 0)
 662                return scnprintf(bf, size, "NONE");
 663#define P_MSG_FLAG(n) \
 664        if (flags & MSG_##n) { \
 665                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 666                flags &= ~MSG_##n; \
 667        }
 668
 669        P_MSG_FLAG(OOB);
 670        P_MSG_FLAG(PEEK);
 671        P_MSG_FLAG(DONTROUTE);
 672        P_MSG_FLAG(TRYHARD);
 673        P_MSG_FLAG(CTRUNC);
 674        P_MSG_FLAG(PROBE);
 675        P_MSG_FLAG(TRUNC);
 676        P_MSG_FLAG(DONTWAIT);
 677        P_MSG_FLAG(EOR);
 678        P_MSG_FLAG(WAITALL);
 679        P_MSG_FLAG(FIN);
 680        P_MSG_FLAG(SYN);
 681        P_MSG_FLAG(CONFIRM);
 682        P_MSG_FLAG(RST);
 683        P_MSG_FLAG(ERRQUEUE);
 684        P_MSG_FLAG(NOSIGNAL);
 685        P_MSG_FLAG(MORE);
 686        P_MSG_FLAG(WAITFORONE);
 687        P_MSG_FLAG(SENDPAGE_NOTLAST);
 688        P_MSG_FLAG(FASTOPEN);
 689        P_MSG_FLAG(CMSG_CLOEXEC);
 690#undef P_MSG_FLAG
 691
 692        if (flags)
 693                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 694
 695        return printed;
 696}
 697
 698#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
 699
 700static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 701                                                 struct syscall_arg *arg)
 702{
 703        size_t printed = 0;
 704        int mode = arg->val;
 705
 706        if (mode == F_OK) /* 0 */
 707                return scnprintf(bf, size, "F");
 708#define P_MODE(n) \
 709        if (mode & n##_OK) { \
 710                printed += scnprintf(bf + printed, size - printed, "%s", #n); \
 711                mode &= ~n##_OK; \
 712        }
 713
 714        P_MODE(R);
 715        P_MODE(W);
 716        P_MODE(X);
 717#undef P_MODE
 718
 719        if (mode)
 720                printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
 721
 722        return printed;
 723}
 724
 725#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
 726
 727static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
 728                                               struct syscall_arg *arg)
 729{
 730        int printed = 0, flags = arg->val;
 731
 732        if (!(flags & O_CREAT))
 733                arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
 734
 735        if (flags == 0)
 736                return scnprintf(bf, size, "RDONLY");
 737#define P_FLAG(n) \
 738        if (flags & O_##n) { \
 739                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 740                flags &= ~O_##n; \
 741        }
 742
 743        P_FLAG(APPEND);
 744        P_FLAG(ASYNC);
 745        P_FLAG(CLOEXEC);
 746        P_FLAG(CREAT);
 747        P_FLAG(DIRECT);
 748        P_FLAG(DIRECTORY);
 749        P_FLAG(EXCL);
 750        P_FLAG(LARGEFILE);
 751        P_FLAG(NOATIME);
 752        P_FLAG(NOCTTY);
 753#ifdef O_NONBLOCK
 754        P_FLAG(NONBLOCK);
 755#elif O_NDELAY
 756        P_FLAG(NDELAY);
 757#endif
 758#ifdef O_PATH
 759        P_FLAG(PATH);
 760#endif
 761        P_FLAG(RDWR);
 762#ifdef O_DSYNC
 763        if ((flags & O_SYNC) == O_SYNC)
 764                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
 765        else {
 766                P_FLAG(DSYNC);
 767        }
 768#else
 769        P_FLAG(SYNC);
 770#endif
 771        P_FLAG(TRUNC);
 772        P_FLAG(WRONLY);
 773#undef P_FLAG
 774
 775        if (flags)
 776                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 777
 778        return printed;
 779}
 780
 781#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
 782
 783static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
 784                                                   struct syscall_arg *arg)
 785{
 786        int printed = 0, flags = arg->val;
 787
 788        if (flags == 0)
 789                return scnprintf(bf, size, "NONE");
 790#define P_FLAG(n) \
 791        if (flags & EFD_##n) { \
 792                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 793                flags &= ~EFD_##n; \
 794        }
 795
 796        P_FLAG(SEMAPHORE);
 797        P_FLAG(CLOEXEC);
 798        P_FLAG(NONBLOCK);
 799#undef P_FLAG
 800
 801        if (flags)
 802                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 803
 804        return printed;
 805}
 806
 807#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
 808
 809static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 810                                                struct syscall_arg *arg)
 811{
 812        int printed = 0, flags = arg->val;
 813
 814#define P_FLAG(n) \
 815        if (flags & O_##n) { \
 816                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 817                flags &= ~O_##n; \
 818        }
 819
 820        P_FLAG(CLOEXEC);
 821        P_FLAG(NONBLOCK);
 822#undef P_FLAG
 823
 824        if (flags)
 825                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 826
 827        return printed;
 828}
 829
 830#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 831
 832static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
 833{
 834        int sig = arg->val;
 835
 836        switch (sig) {
 837#define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
 838        P_SIGNUM(HUP);
 839        P_SIGNUM(INT);
 840        P_SIGNUM(QUIT);
 841        P_SIGNUM(ILL);
 842        P_SIGNUM(TRAP);
 843        P_SIGNUM(ABRT);
 844        P_SIGNUM(BUS);
 845        P_SIGNUM(FPE);
 846        P_SIGNUM(KILL);
 847        P_SIGNUM(USR1);
 848        P_SIGNUM(SEGV);
 849        P_SIGNUM(USR2);
 850        P_SIGNUM(PIPE);
 851        P_SIGNUM(ALRM);
 852        P_SIGNUM(TERM);
 853        P_SIGNUM(CHLD);
 854        P_SIGNUM(CONT);
 855        P_SIGNUM(STOP);
 856        P_SIGNUM(TSTP);
 857        P_SIGNUM(TTIN);
 858        P_SIGNUM(TTOU);
 859        P_SIGNUM(URG);
 860        P_SIGNUM(XCPU);
 861        P_SIGNUM(XFSZ);
 862        P_SIGNUM(VTALRM);
 863        P_SIGNUM(PROF);
 864        P_SIGNUM(WINCH);
 865        P_SIGNUM(IO);
 866        P_SIGNUM(PWR);
 867        P_SIGNUM(SYS);
 868#ifdef SIGEMT
 869        P_SIGNUM(EMT);
 870#endif
 871#ifdef SIGSTKFLT
 872        P_SIGNUM(STKFLT);
 873#endif
 874#ifdef SIGSWI
 875        P_SIGNUM(SWI);
 876#endif
 877        default: break;
 878        }
 879
 880        return scnprintf(bf, size, "%#x", sig);
 881}
 882
 883#define SCA_SIGNUM syscall_arg__scnprintf_signum
 884
 885#if defined(__i386__) || defined(__x86_64__)
 886/*
 887 * FIXME: Make this available to all arches.
 888 */
 889#define TCGETS          0x5401
 890
 891static const char *tioctls[] = {
 892        "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
 893        "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
 894        "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
 895        "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
 896        "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
 897        "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
 898        "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
 899        "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
 900        "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
 901        "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
 902        "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
 903        [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
 904        "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
 905        "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
 906        "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
 907};
 908
 909static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
 910#endif /* defined(__i386__) || defined(__x86_64__) */
 911
 912#define STRARRAY(arg, name, array) \
 913          .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
 914          .arg_parm      = { [arg] = &strarray__##array, }
 915
 916static struct syscall_fmt {
 917        const char *name;
 918        const char *alias;
 919        size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
 920        void       *arg_parm[6];
 921        bool       errmsg;
 922        bool       timeout;
 923        bool       hexret;
 924} syscall_fmts[] = {
 925        { .name     = "access",     .errmsg = true,
 926          .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
 927        { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
 928        { .name     = "brk",        .hexret = true,
 929          .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
 930        { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
 931        { .name     = "close",      .errmsg = true,
 932          .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
 933        { .name     = "connect",    .errmsg = true, },
 934        { .name     = "dup",        .errmsg = true,
 935          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 936        { .name     = "dup2",       .errmsg = true,
 937          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 938        { .name     = "dup3",       .errmsg = true,
 939          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 940        { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
 941        { .name     = "eventfd2",   .errmsg = true,
 942          .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
 943        { .name     = "faccessat",  .errmsg = true,
 944          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
 945        { .name     = "fadvise64",  .errmsg = true,
 946          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 947        { .name     = "fallocate",  .errmsg = true,
 948          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 949        { .name     = "fchdir",     .errmsg = true,
 950          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 951        { .name     = "fchmod",     .errmsg = true,
 952          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 953        { .name     = "fchmodat",   .errmsg = true,
 954          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
 955        { .name     = "fchown",     .errmsg = true,
 956          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 957        { .name     = "fchownat",   .errmsg = true,
 958          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
 959        { .name     = "fcntl",      .errmsg = true,
 960          .arg_scnprintf = { [0] = SCA_FD, /* fd */
 961                             [1] = SCA_STRARRAY, /* cmd */ },
 962          .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
 963        { .name     = "fdatasync",  .errmsg = true,
 964          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 965        { .name     = "flock",      .errmsg = true,
 966          .arg_scnprintf = { [0] = SCA_FD, /* fd */
 967                             [1] = SCA_FLOCK, /* cmd */ }, },
 968        { .name     = "fsetxattr",  .errmsg = true,
 969          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 970        { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
 971          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 972        { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
 973          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
 974        { .name     = "fstatfs",    .errmsg = true,
 975          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 976        { .name     = "fsync",    .errmsg = true,
 977          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 978        { .name     = "ftruncate", .errmsg = true,
 979          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 980        { .name     = "futex",      .errmsg = true,
 981          .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
 982        { .name     = "futimesat", .errmsg = true,
 983          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
 984        { .name     = "getdents",   .errmsg = true,
 985          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 986        { .name     = "getdents64", .errmsg = true,
 987          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 988        { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
 989        { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
 990        { .name     = "ioctl",      .errmsg = true,
 991          .arg_scnprintf = { [0] = SCA_FD, /* fd */
 992#if defined(__i386__) || defined(__x86_64__)
 993/*
 994 * FIXME: Make this available to all arches.
 995 */
 996                             [1] = SCA_STRHEXARRAY, /* cmd */
 997                             [2] = SCA_HEX, /* arg */ },
 998          .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
 999#else
1000                             [2] = SCA_HEX, /* arg */ }, },
1001#endif
1002        { .name     = "kill",       .errmsg = true,
1003          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1004        { .name     = "linkat",     .errmsg = true,
1005          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1006        { .name     = "lseek",      .errmsg = true,
1007          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1008                             [2] = SCA_STRARRAY, /* whence */ },
1009          .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1010        { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
1011        { .name     = "madvise",    .errmsg = true,
1012          .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1013                             [2] = SCA_MADV_BHV, /* behavior */ }, },
1014        { .name     = "mkdirat",    .errmsg = true,
1015          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1016        { .name     = "mknodat",    .errmsg = true,
1017          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1018        { .name     = "mlock",      .errmsg = true,
1019          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1020        { .name     = "mlockall",   .errmsg = true,
1021          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1022        { .name     = "mmap",       .hexret = true,
1023          .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1024                             [2] = SCA_MMAP_PROT, /* prot */
1025                             [3] = SCA_MMAP_FLAGS, /* flags */
1026                             [4] = SCA_FD,        /* fd */ }, },
1027        { .name     = "mprotect",   .errmsg = true,
1028          .arg_scnprintf = { [0] = SCA_HEX, /* start */
1029                             [2] = SCA_MMAP_PROT, /* prot */ }, },
1030        { .name     = "mremap",     .hexret = true,
1031          .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1032                             [3] = SCA_MREMAP_FLAGS, /* flags */
1033                             [4] = SCA_HEX, /* new_addr */ }, },
1034        { .name     = "munlock",    .errmsg = true,
1035          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1036        { .name     = "munmap",     .errmsg = true,
1037          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1038        { .name     = "name_to_handle_at", .errmsg = true,
1039          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1040        { .name     = "newfstatat", .errmsg = true,
1041          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1042        { .name     = "open",       .errmsg = true,
1043          .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1044        { .name     = "open_by_handle_at", .errmsg = true,
1045          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046                             [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1047        { .name     = "openat",     .errmsg = true,
1048          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049                             [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1050        { .name     = "pipe2",      .errmsg = true,
1051          .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1052        { .name     = "poll",       .errmsg = true, .timeout = true, },
1053        { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1054        { .name     = "pread",      .errmsg = true, .alias = "pread64",
1055          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1056        { .name     = "preadv",     .errmsg = true, .alias = "pread",
1057          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1058        { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1059        { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1060          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061        { .name     = "pwritev",    .errmsg = true,
1062          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063        { .name     = "read",       .errmsg = true,
1064          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1065        { .name     = "readlinkat", .errmsg = true,
1066          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1067        { .name     = "readv",      .errmsg = true,
1068          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069        { .name     = "recvfrom",   .errmsg = true,
1070          .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1071        { .name     = "recvmmsg",   .errmsg = true,
1072          .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1073        { .name     = "recvmsg",    .errmsg = true,
1074          .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1075        { .name     = "renameat",   .errmsg = true,
1076          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1077        { .name     = "rt_sigaction", .errmsg = true,
1078          .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1079        { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1080        { .name     = "rt_sigqueueinfo", .errmsg = true,
1081          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1082        { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1083          .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1084        { .name     = "select",     .errmsg = true, .timeout = true, },
1085        { .name     = "sendmmsg",    .errmsg = true,
1086          .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1087        { .name     = "sendmsg",    .errmsg = true,
1088          .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1089        { .name     = "sendto",     .errmsg = true,
1090          .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1091        { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1092        { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1093        { .name     = "shutdown",   .errmsg = true,
1094          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1095        { .name     = "socket",     .errmsg = true,
1096          .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1097                             [1] = SCA_SK_TYPE, /* type */ },
1098          .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1099        { .name     = "socketpair", .errmsg = true,
1100          .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1101                             [1] = SCA_SK_TYPE, /* type */ },
1102          .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1103        { .name     = "stat",       .errmsg = true, .alias = "newstat", },
1104        { .name     = "symlinkat",  .errmsg = true,
1105          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1106        { .name     = "tgkill",     .errmsg = true,
1107          .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1108        { .name     = "tkill",      .errmsg = true,
1109          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1110        { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1111        { .name     = "unlinkat",   .errmsg = true,
1112          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1113        { .name     = "utimensat",  .errmsg = true,
1114          .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1115        { .name     = "write",      .errmsg = true,
1116          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1117        { .name     = "writev",     .errmsg = true,
1118          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1119};
1120
1121static int syscall_fmt__cmp(const void *name, const void *fmtp)
1122{
1123        const struct syscall_fmt *fmt = fmtp;
1124        return strcmp(name, fmt->name);
1125}
1126
1127static struct syscall_fmt *syscall_fmt__find(const char *name)
1128{
1129        const int nmemb = ARRAY_SIZE(syscall_fmts);
1130        return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1131}
1132
1133struct syscall {
1134        struct event_format *tp_format;
1135        const char          *name;
1136        bool                filtered;
1137        bool                is_exit;
1138        struct syscall_fmt  *fmt;
1139        size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1140        void                **arg_parm;
1141};
1142
1143static size_t fprintf_duration(unsigned long t, FILE *fp)
1144{
1145        double duration = (double)t / NSEC_PER_MSEC;
1146        size_t printed = fprintf(fp, "(");
1147
1148        if (duration >= 1.0)
1149                printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1150        else if (duration >= 0.01)
1151                printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1152        else
1153                printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1154        return printed + fprintf(fp, "): ");
1155}
1156
1157struct thread_trace {
1158        u64               entry_time;
1159        u64               exit_time;
1160        bool              entry_pending;
1161        unsigned long     nr_events;
1162        unsigned long     pfmaj, pfmin;
1163        char              *entry_str;
1164        double            runtime_ms;
1165        struct {
1166                int       max;
1167                char      **table;
1168        } paths;
1169
1170        struct intlist *syscall_stats;
1171};
1172
1173static struct thread_trace *thread_trace__new(void)
1174{
1175        struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1176
1177        if (ttrace)
1178                ttrace->paths.max = -1;
1179
1180        ttrace->syscall_stats = intlist__new(NULL);
1181
1182        return ttrace;
1183}
1184
1185static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1186{
1187        struct thread_trace *ttrace;
1188
1189        if (thread == NULL)
1190                goto fail;
1191
1192        if (thread__priv(thread) == NULL)
1193                thread__set_priv(thread, thread_trace__new());
1194
1195        if (thread__priv(thread) == NULL)
1196                goto fail;
1197
1198        ttrace = thread__priv(thread);
1199        ++ttrace->nr_events;
1200
1201        return ttrace;
1202fail:
1203        color_fprintf(fp, PERF_COLOR_RED,
1204                      "WARNING: not enough memory, dropping samples!\n");
1205        return NULL;
1206}
1207
1208#define TRACE_PFMAJ             (1 << 0)
1209#define TRACE_PFMIN             (1 << 1)
1210
1211struct trace {
1212        struct perf_tool        tool;
1213        struct {
1214                int             machine;
1215                int             open_id;
1216        }                       audit;
1217        struct {
1218                int             max;
1219                struct syscall  *table;
1220        } syscalls;
1221        struct record_opts      opts;
1222        struct machine          *host;
1223        u64                     base_time;
1224        FILE                    *output;
1225        unsigned long           nr_events;
1226        struct strlist          *ev_qualifier;
1227        const char              *last_vfs_getname;
1228        struct intlist          *tid_list;
1229        struct intlist          *pid_list;
1230        double                  duration_filter;
1231        double                  runtime_ms;
1232        struct {
1233                u64             vfs_getname,
1234                                proc_getname;
1235        } stats;
1236        bool                    not_ev_qualifier;
1237        bool                    live;
1238        bool                    full_time;
1239        bool                    sched;
1240        bool                    multiple_threads;
1241        bool                    summary;
1242        bool                    summary_only;
1243        bool                    show_comm;
1244        bool                    show_tool_stats;
1245        bool                    trace_syscalls;
1246        int                     trace_pgfaults;
1247};
1248
1249static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1250{
1251        struct thread_trace *ttrace = thread__priv(thread);
1252
1253        if (fd > ttrace->paths.max) {
1254                char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1255
1256                if (npath == NULL)
1257                        return -1;
1258
1259                if (ttrace->paths.max != -1) {
1260                        memset(npath + ttrace->paths.max + 1, 0,
1261                               (fd - ttrace->paths.max) * sizeof(char *));
1262                } else {
1263                        memset(npath, 0, (fd + 1) * sizeof(char *));
1264                }
1265
1266                ttrace->paths.table = npath;
1267                ttrace->paths.max   = fd;
1268        }
1269
1270        ttrace->paths.table[fd] = strdup(pathname);
1271
1272        return ttrace->paths.table[fd] != NULL ? 0 : -1;
1273}
1274
1275static int thread__read_fd_path(struct thread *thread, int fd)
1276{
1277        char linkname[PATH_MAX], pathname[PATH_MAX];
1278        struct stat st;
1279        int ret;
1280
1281        if (thread->pid_ == thread->tid) {
1282                scnprintf(linkname, sizeof(linkname),
1283                          "/proc/%d/fd/%d", thread->pid_, fd);
1284        } else {
1285                scnprintf(linkname, sizeof(linkname),
1286                          "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1287        }
1288
1289        if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1290                return -1;
1291
1292        ret = readlink(linkname, pathname, sizeof(pathname));
1293
1294        if (ret < 0 || ret > st.st_size)
1295                return -1;
1296
1297        pathname[ret] = '\0';
1298        return trace__set_fd_pathname(thread, fd, pathname);
1299}
1300
1301static const char *thread__fd_path(struct thread *thread, int fd,
1302                                   struct trace *trace)
1303{
1304        struct thread_trace *ttrace = thread__priv(thread);
1305
1306        if (ttrace == NULL)
1307                return NULL;
1308
1309        if (fd < 0)
1310                return NULL;
1311
1312        if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1313                if (!trace->live)
1314                        return NULL;
1315                ++trace->stats.proc_getname;
1316                if (thread__read_fd_path(thread, fd))
1317                        return NULL;
1318        }
1319
1320        return ttrace->paths.table[fd];
1321}
1322
1323static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1324                                        struct syscall_arg *arg)
1325{
1326        int fd = arg->val;
1327        size_t printed = scnprintf(bf, size, "%d", fd);
1328        const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1329
1330        if (path)
1331                printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1332
1333        return printed;
1334}
1335
1336static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1337                                              struct syscall_arg *arg)
1338{
1339        int fd = arg->val;
1340        size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1341        struct thread_trace *ttrace = thread__priv(arg->thread);
1342
1343        if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1344                zfree(&ttrace->paths.table[fd]);
1345
1346        return printed;
1347}
1348
1349static bool trace__filter_duration(struct trace *trace, double t)
1350{
1351        return t < (trace->duration_filter * NSEC_PER_MSEC);
1352}
1353
1354static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1355{
1356        double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1357
1358        return fprintf(fp, "%10.3f ", ts);
1359}
1360
1361static bool done = false;
1362static bool interrupted = false;
1363
1364static void sig_handler(int sig)
1365{
1366        done = true;
1367        interrupted = sig == SIGINT;
1368}
1369
1370static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1371                                        u64 duration, u64 tstamp, FILE *fp)
1372{
1373        size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1374        printed += fprintf_duration(duration, fp);
1375
1376        if (trace->multiple_threads) {
1377                if (trace->show_comm)
1378                        printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1379                printed += fprintf(fp, "%d ", thread->tid);
1380        }
1381
1382        return printed;
1383}
1384
1385static int trace__process_event(struct trace *trace, struct machine *machine,
1386                                union perf_event *event, struct perf_sample *sample)
1387{
1388        int ret = 0;
1389
1390        switch (event->header.type) {
1391        case PERF_RECORD_LOST:
1392                color_fprintf(trace->output, PERF_COLOR_RED,
1393                              "LOST %" PRIu64 " events!\n", event->lost.lost);
1394                ret = machine__process_lost_event(machine, event, sample);
1395        default:
1396                ret = machine__process_event(machine, event, sample);
1397                break;
1398        }
1399
1400        return ret;
1401}
1402
1403static int trace__tool_process(struct perf_tool *tool,
1404                               union perf_event *event,
1405                               struct perf_sample *sample,
1406                               struct machine *machine)
1407{
1408        struct trace *trace = container_of(tool, struct trace, tool);
1409        return trace__process_event(trace, machine, event, sample);
1410}
1411
1412static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1413{
1414        int err = symbol__init(NULL);
1415
1416        if (err)
1417                return err;
1418
1419        trace->host = machine__new_host();
1420        if (trace->host == NULL)
1421                return -ENOMEM;
1422
1423        err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1424                                            evlist->threads, trace__tool_process, false);
1425        if (err)
1426                symbol__exit();
1427
1428        return err;
1429}
1430
1431static int syscall__set_arg_fmts(struct syscall *sc)
1432{
1433        struct format_field *field;
1434        int idx = 0;
1435
1436        sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1437        if (sc->arg_scnprintf == NULL)
1438                return -1;
1439
1440        if (sc->fmt)
1441                sc->arg_parm = sc->fmt->arg_parm;
1442
1443        for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1444                if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1445                        sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1446                else if (field->flags & FIELD_IS_POINTER)
1447                        sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1448                ++idx;
1449        }
1450
1451        return 0;
1452}
1453
1454static int trace__read_syscall_info(struct trace *trace, int id)
1455{
1456        char tp_name[128];
1457        struct syscall *sc;
1458        const char *name = audit_syscall_to_name(id, trace->audit.machine);
1459
1460        if (name == NULL)
1461                return -1;
1462
1463        if (id > trace->syscalls.max) {
1464                struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1465
1466                if (nsyscalls == NULL)
1467                        return -1;
1468
1469                if (trace->syscalls.max != -1) {
1470                        memset(nsyscalls + trace->syscalls.max + 1, 0,
1471                               (id - trace->syscalls.max) * sizeof(*sc));
1472                } else {
1473                        memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1474                }
1475
1476                trace->syscalls.table = nsyscalls;
1477                trace->syscalls.max   = id;
1478        }
1479
1480        sc = trace->syscalls.table + id;
1481        sc->name = name;
1482
1483        if (trace->ev_qualifier) {
1484                bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1485
1486                if (!(in ^ trace->not_ev_qualifier)) {
1487                        sc->filtered = true;
1488                        /*
1489                         * No need to do read tracepoint information since this will be
1490                         * filtered out.
1491                         */
1492                        return 0;
1493                }
1494        }
1495
1496        sc->fmt  = syscall_fmt__find(sc->name);
1497
1498        snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1499        sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1500
1501        if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1502                snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1503                sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1504        }
1505
1506        if (sc->tp_format == NULL)
1507                return -1;
1508
1509        sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1510
1511        return syscall__set_arg_fmts(sc);
1512}
1513
1514static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1515                                      unsigned long *args, struct trace *trace,
1516                                      struct thread *thread)
1517{
1518        size_t printed = 0;
1519
1520        if (sc->tp_format != NULL) {
1521                struct format_field *field;
1522                u8 bit = 1;
1523                struct syscall_arg arg = {
1524                        .idx    = 0,
1525                        .mask   = 0,
1526                        .trace  = trace,
1527                        .thread = thread,
1528                };
1529
1530                for (field = sc->tp_format->format.fields->next; field;
1531                     field = field->next, ++arg.idx, bit <<= 1) {
1532                        if (arg.mask & bit)
1533                                continue;
1534                        /*
1535                         * Suppress this argument if its value is zero and
1536                         * and we don't have a string associated in an
1537                         * strarray for it.
1538                         */
1539                        if (args[arg.idx] == 0 &&
1540                            !(sc->arg_scnprintf &&
1541                              sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1542                              sc->arg_parm[arg.idx]))
1543                                continue;
1544
1545                        printed += scnprintf(bf + printed, size - printed,
1546                                             "%s%s: ", printed ? ", " : "", field->name);
1547                        if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1548                                arg.val = args[arg.idx];
1549                                if (sc->arg_parm)
1550                                        arg.parm = sc->arg_parm[arg.idx];
1551                                printed += sc->arg_scnprintf[arg.idx](bf + printed,
1552                                                                      size - printed, &arg);
1553                        } else {
1554                                printed += scnprintf(bf + printed, size - printed,
1555                                                     "%ld", args[arg.idx]);
1556                        }
1557                }
1558        } else {
1559                int i = 0;
1560
1561                while (i < 6) {
1562                        printed += scnprintf(bf + printed, size - printed,
1563                                             "%sarg%d: %ld",
1564                                             printed ? ", " : "", i, args[i]);
1565                        ++i;
1566                }
1567        }
1568
1569        return printed;
1570}
1571
1572typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1573                                  union perf_event *event,
1574                                  struct perf_sample *sample);
1575
1576static struct syscall *trace__syscall_info(struct trace *trace,
1577                                           struct perf_evsel *evsel, int id)
1578{
1579
1580        if (id < 0) {
1581
1582                /*
1583                 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1584                 * before that, leaving at a higher verbosity level till that is
1585                 * explained. Reproduced with plain ftrace with:
1586                 *
1587                 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1588                 * grep "NR -1 " /t/trace_pipe
1589                 *
1590                 * After generating some load on the machine.
1591                 */
1592                if (verbose > 1) {
1593                        static u64 n;
1594                        fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1595                                id, perf_evsel__name(evsel), ++n);
1596                }
1597                return NULL;
1598        }
1599
1600        if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1601            trace__read_syscall_info(trace, id))
1602                goto out_cant_read;
1603
1604        if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1605                goto out_cant_read;
1606
1607        return &trace->syscalls.table[id];
1608
1609out_cant_read:
1610        if (verbose) {
1611                fprintf(trace->output, "Problems reading syscall %d", id);
1612                if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1613                        fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1614                fputs(" information\n", trace->output);
1615        }
1616        return NULL;
1617}
1618
1619static void thread__update_stats(struct thread_trace *ttrace,
1620                                 int id, struct perf_sample *sample)
1621{
1622        struct int_node *inode;
1623        struct stats *stats;
1624        u64 duration = 0;
1625
1626        inode = intlist__findnew(ttrace->syscall_stats, id);
1627        if (inode == NULL)
1628                return;
1629
1630        stats = inode->priv;
1631        if (stats == NULL) {
1632                stats = malloc(sizeof(struct stats));
1633                if (stats == NULL)
1634                        return;
1635                init_stats(stats);
1636                inode->priv = stats;
1637        }
1638
1639        if (ttrace->entry_time && sample->time > ttrace->entry_time)
1640                duration = sample->time - ttrace->entry_time;
1641
1642        update_stats(stats, duration);
1643}
1644
1645static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1646                            union perf_event *event __maybe_unused,
1647                            struct perf_sample *sample)
1648{
1649        char *msg;
1650        void *args;
1651        size_t printed = 0;
1652        struct thread *thread;
1653        int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1654        struct syscall *sc = trace__syscall_info(trace, evsel, id);
1655        struct thread_trace *ttrace;
1656
1657        if (sc == NULL)
1658                return -1;
1659
1660        if (sc->filtered)
1661                return 0;
1662
1663        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1664        ttrace = thread__trace(thread, trace->output);
1665        if (ttrace == NULL)
1666                return -1;
1667
1668        args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1669
1670        if (ttrace->entry_str == NULL) {
1671                ttrace->entry_str = malloc(1024);
1672                if (!ttrace->entry_str)
1673                        return -1;
1674        }
1675
1676        ttrace->entry_time = sample->time;
1677        msg = ttrace->entry_str;
1678        printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1679
1680        printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1681                                           args, trace, thread);
1682
1683        if (sc->is_exit) {
1684                if (!trace->duration_filter && !trace->summary_only) {
1685                        trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1686                        fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1687                }
1688        } else
1689                ttrace->entry_pending = true;
1690
1691        return 0;
1692}
1693
1694static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1695                           union perf_event *event __maybe_unused,
1696                           struct perf_sample *sample)
1697{
1698        long ret;
1699        u64 duration = 0;
1700        struct thread *thread;
1701        int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1702        struct syscall *sc = trace__syscall_info(trace, evsel, id);
1703        struct thread_trace *ttrace;
1704
1705        if (sc == NULL)
1706                return -1;
1707
1708        if (sc->filtered)
1709                return 0;
1710
1711        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1712        ttrace = thread__trace(thread, trace->output);
1713        if (ttrace == NULL)
1714                return -1;
1715
1716        if (trace->summary)
1717                thread__update_stats(ttrace, id, sample);
1718
1719        ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1720
1721        if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1722                trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1723                trace->last_vfs_getname = NULL;
1724                ++trace->stats.vfs_getname;
1725        }
1726
1727        ttrace->exit_time = sample->time;
1728
1729        if (ttrace->entry_time) {
1730                duration = sample->time - ttrace->entry_time;
1731                if (trace__filter_duration(trace, duration))
1732                        goto out;
1733        } else if (trace->duration_filter)
1734                goto out;
1735
1736        if (trace->summary_only)
1737                goto out;
1738
1739        trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1740
1741        if (ttrace->entry_pending) {
1742                fprintf(trace->output, "%-70s", ttrace->entry_str);
1743        } else {
1744                fprintf(trace->output, " ... [");
1745                color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1746                fprintf(trace->output, "]: %s()", sc->name);
1747        }
1748
1749        if (sc->fmt == NULL) {
1750signed_print:
1751                fprintf(trace->output, ") = %ld", ret);
1752        } else if (ret < 0 && sc->fmt->errmsg) {
1753                char bf[STRERR_BUFSIZE];
1754                const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1755                           *e = audit_errno_to_name(-ret);
1756
1757                fprintf(trace->output, ") = -1 %s %s", e, emsg);
1758        } else if (ret == 0 && sc->fmt->timeout)
1759                fprintf(trace->output, ") = 0 Timeout");
1760        else if (sc->fmt->hexret)
1761                fprintf(trace->output, ") = %#lx", ret);
1762        else
1763                goto signed_print;
1764
1765        fputc('\n', trace->output);
1766out:
1767        ttrace->entry_pending = false;
1768
1769        return 0;
1770}
1771
1772static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1773                              union perf_event *event __maybe_unused,
1774                              struct perf_sample *sample)
1775{
1776        trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1777        return 0;
1778}
1779
1780static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1781                                     union perf_event *event __maybe_unused,
1782                                     struct perf_sample *sample)
1783{
1784        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1785        double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1786        struct thread *thread = machine__findnew_thread(trace->host,
1787                                                        sample->pid,
1788                                                        sample->tid);
1789        struct thread_trace *ttrace = thread__trace(thread, trace->output);
1790
1791        if (ttrace == NULL)
1792                goto out_dump;
1793
1794        ttrace->runtime_ms += runtime_ms;
1795        trace->runtime_ms += runtime_ms;
1796        return 0;
1797
1798out_dump:
1799        fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1800               evsel->name,
1801               perf_evsel__strval(evsel, sample, "comm"),
1802               (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1803               runtime,
1804               perf_evsel__intval(evsel, sample, "vruntime"));
1805        return 0;
1806}
1807
1808static void print_location(FILE *f, struct perf_sample *sample,
1809                           struct addr_location *al,
1810                           bool print_dso, bool print_sym)
1811{
1812
1813        if ((verbose || print_dso) && al->map)
1814                fprintf(f, "%s@", al->map->dso->long_name);
1815
1816        if ((verbose || print_sym) && al->sym)
1817                fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1818                        al->addr - al->sym->start);
1819        else if (al->map)
1820                fprintf(f, "0x%" PRIx64, al->addr);
1821        else
1822                fprintf(f, "0x%" PRIx64, sample->addr);
1823}
1824
1825static int trace__pgfault(struct trace *trace,
1826                          struct perf_evsel *evsel,
1827                          union perf_event *event,
1828                          struct perf_sample *sample)
1829{
1830        struct thread *thread;
1831        u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1832        struct addr_location al;
1833        char map_type = 'd';
1834        struct thread_trace *ttrace;
1835
1836        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1837        ttrace = thread__trace(thread, trace->output);
1838        if (ttrace == NULL)
1839                return -1;
1840
1841        if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1842                ttrace->pfmaj++;
1843        else
1844                ttrace->pfmin++;
1845
1846        if (trace->summary_only)
1847                return 0;
1848
1849        thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1850                              sample->ip, &al);
1851
1852        trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1853
1854        fprintf(trace->output, "%sfault [",
1855                evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1856                "maj" : "min");
1857
1858        print_location(trace->output, sample, &al, false, true);
1859
1860        fprintf(trace->output, "] => ");
1861
1862        thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
1863                                   sample->addr, &al);
1864
1865        if (!al.map) {
1866                thread__find_addr_location(thread, cpumode,
1867                                           MAP__FUNCTION, sample->addr, &al);
1868
1869                if (al.map)
1870                        map_type = 'x';
1871                else
1872                        map_type = '?';
1873        }
1874
1875        print_location(trace->output, sample, &al, true, false);
1876
1877        fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1878
1879        return 0;
1880}
1881
1882static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1883{
1884        if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1885            (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1886                return false;
1887
1888        if (trace->pid_list || trace->tid_list)
1889                return true;
1890
1891        return false;
1892}
1893
1894static int trace__process_sample(struct perf_tool *tool,
1895                                 union perf_event *event,
1896                                 struct perf_sample *sample,
1897                                 struct perf_evsel *evsel,
1898                                 struct machine *machine __maybe_unused)
1899{
1900        struct trace *trace = container_of(tool, struct trace, tool);
1901        int err = 0;
1902
1903        tracepoint_handler handler = evsel->handler;
1904
1905        if (skip_sample(trace, sample))
1906                return 0;
1907
1908        if (!trace->full_time && trace->base_time == 0)
1909                trace->base_time = sample->time;
1910
1911        if (handler) {
1912                ++trace->nr_events;
1913                handler(trace, evsel, event, sample);
1914        }
1915
1916        return err;
1917}
1918
1919static int parse_target_str(struct trace *trace)
1920{
1921        if (trace->opts.target.pid) {
1922                trace->pid_list = intlist__new(trace->opts.target.pid);
1923                if (trace->pid_list == NULL) {
1924                        pr_err("Error parsing process id string\n");
1925                        return -EINVAL;
1926                }
1927        }
1928
1929        if (trace->opts.target.tid) {
1930                trace->tid_list = intlist__new(trace->opts.target.tid);
1931                if (trace->tid_list == NULL) {
1932                        pr_err("Error parsing thread id string\n");
1933                        return -EINVAL;
1934                }
1935        }
1936
1937        return 0;
1938}
1939
1940static int trace__record(struct trace *trace, int argc, const char **argv)
1941{
1942        unsigned int rec_argc, i, j;
1943        const char **rec_argv;
1944        const char * const record_args[] = {
1945                "record",
1946                "-R",
1947                "-m", "1024",
1948                "-c", "1",
1949        };
1950
1951        const char * const sc_args[] = { "-e", };
1952        unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1953        const char * const majpf_args[] = { "-e", "major-faults" };
1954        unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1955        const char * const minpf_args[] = { "-e", "minor-faults" };
1956        unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1957
1958        /* +1 is for the event string below */
1959        rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1960                majpf_args_nr + minpf_args_nr + argc;
1961        rec_argv = calloc(rec_argc + 1, sizeof(char *));
1962
1963        if (rec_argv == NULL)
1964                return -ENOMEM;
1965
1966        j = 0;
1967        for (i = 0; i < ARRAY_SIZE(record_args); i++)
1968                rec_argv[j++] = record_args[i];
1969
1970        if (trace->trace_syscalls) {
1971                for (i = 0; i < sc_args_nr; i++)
1972                        rec_argv[j++] = sc_args[i];
1973
1974                /* event string may be different for older kernels - e.g., RHEL6 */
1975                if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1976                        rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1977                else if (is_valid_tracepoint("syscalls:sys_enter"))
1978                        rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
1979                else {
1980                        pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1981                        return -1;
1982                }
1983        }
1984
1985        if (trace->trace_pgfaults & TRACE_PFMAJ)
1986                for (i = 0; i < majpf_args_nr; i++)
1987                        rec_argv[j++] = majpf_args[i];
1988
1989        if (trace->trace_pgfaults & TRACE_PFMIN)
1990                for (i = 0; i < minpf_args_nr; i++)
1991                        rec_argv[j++] = minpf_args[i];
1992
1993        for (i = 0; i < (unsigned int)argc; i++)
1994                rec_argv[j++] = argv[i];
1995
1996        return cmd_record(j, rec_argv, NULL);
1997}
1998
1999static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2000
2001static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2002{
2003        struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2004        if (evsel == NULL)
2005                return;
2006
2007        if (perf_evsel__field(evsel, "pathname") == NULL) {
2008                perf_evsel__delete(evsel);
2009                return;
2010        }
2011
2012        evsel->handler = trace__vfs_getname;
2013        perf_evlist__add(evlist, evsel);
2014}
2015
2016static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2017                                    u64 config)
2018{
2019        struct perf_evsel *evsel;
2020        struct perf_event_attr attr = {
2021                .type = PERF_TYPE_SOFTWARE,
2022                .mmap_data = 1,
2023        };
2024
2025        attr.config = config;
2026        attr.sample_period = 1;
2027
2028        event_attr_init(&attr);
2029
2030        evsel = perf_evsel__new(&attr);
2031        if (!evsel)
2032                return -ENOMEM;
2033
2034        evsel->handler = trace__pgfault;
2035        perf_evlist__add(evlist, evsel);
2036
2037        return 0;
2038}
2039
2040static int trace__run(struct trace *trace, int argc, const char **argv)
2041{
2042        struct perf_evlist *evlist = perf_evlist__new();
2043        struct perf_evsel *evsel;
2044        int err = -1, i;
2045        unsigned long before;
2046        const bool forks = argc > 0;
2047        bool draining = false;
2048
2049        trace->live = true;
2050
2051        if (evlist == NULL) {
2052                fprintf(trace->output, "Not enough memory to run!\n");
2053                goto out;
2054        }
2055
2056        if (trace->trace_syscalls &&
2057            perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2058                                           trace__sys_exit))
2059                goto out_error_raw_syscalls;
2060
2061        if (trace->trace_syscalls)
2062                perf_evlist__add_vfs_getname(evlist);
2063
2064        if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2065            perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2066                goto out_error_mem;
2067        }
2068
2069        if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2070            perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2071                goto out_error_mem;
2072
2073        if (trace->sched &&
2074            perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2075                                   trace__sched_stat_runtime))
2076                goto out_error_sched_stat_runtime;
2077
2078        err = perf_evlist__create_maps(evlist, &trace->opts.target);
2079        if (err < 0) {
2080                fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2081                goto out_delete_evlist;
2082        }
2083
2084        err = trace__symbols_init(trace, evlist);
2085        if (err < 0) {
2086                fprintf(trace->output, "Problems initializing symbol libraries!\n");
2087                goto out_delete_evlist;
2088        }
2089
2090        perf_evlist__config(evlist, &trace->opts);
2091
2092        signal(SIGCHLD, sig_handler);
2093        signal(SIGINT, sig_handler);
2094
2095        if (forks) {
2096                err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2097                                                    argv, false, NULL);
2098                if (err < 0) {
2099                        fprintf(trace->output, "Couldn't run the workload!\n");
2100                        goto out_delete_evlist;
2101                }
2102        }
2103
2104        err = perf_evlist__open(evlist);
2105        if (err < 0)
2106                goto out_error_open;
2107
2108        err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2109        if (err < 0)
2110                goto out_error_mmap;
2111
2112        perf_evlist__enable(evlist);
2113
2114        if (forks)
2115                perf_evlist__start_workload(evlist);
2116
2117        trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
2118again:
2119        before = trace->nr_events;
2120
2121        for (i = 0; i < evlist->nr_mmaps; i++) {
2122                union perf_event *event;
2123
2124                while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2125                        const u32 type = event->header.type;
2126                        tracepoint_handler handler;
2127                        struct perf_sample sample;
2128
2129                        ++trace->nr_events;
2130
2131                        err = perf_evlist__parse_sample(evlist, event, &sample);
2132                        if (err) {
2133                                fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2134                                goto next_event;
2135                        }
2136
2137                        if (!trace->full_time && trace->base_time == 0)
2138                                trace->base_time = sample.time;
2139
2140                        if (type != PERF_RECORD_SAMPLE) {
2141                                trace__process_event(trace, trace->host, event, &sample);
2142                                continue;
2143                        }
2144
2145                        evsel = perf_evlist__id2evsel(evlist, sample.id);
2146                        if (evsel == NULL) {
2147                                fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
2148                                goto next_event;
2149                        }
2150
2151                        if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2152                            sample.raw_data == NULL) {
2153                                fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2154                                       perf_evsel__name(evsel), sample.tid,
2155                                       sample.cpu, sample.raw_size);
2156                                goto next_event;
2157                        }
2158
2159                        handler = evsel->handler;
2160                        handler(trace, evsel, event, &sample);
2161next_event:
2162                        perf_evlist__mmap_consume(evlist, i);
2163
2164                        if (interrupted)
2165                                goto out_disable;
2166                }
2167        }
2168
2169        if (trace->nr_events == before) {
2170                int timeout = done ? 100 : -1;
2171
2172                if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2173                        if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2174                                draining = true;
2175
2176                        goto again;
2177                }
2178        } else {
2179                goto again;
2180        }
2181
2182out_disable:
2183        perf_evlist__disable(evlist);
2184
2185        if (!err) {
2186                if (trace->summary)
2187                        trace__fprintf_thread_summary(trace, trace->output);
2188
2189                if (trace->show_tool_stats) {
2190                        fprintf(trace->output, "Stats:\n "
2191                                               " vfs_getname : %" PRIu64 "\n"
2192                                               " proc_getname: %" PRIu64 "\n",
2193                                trace->stats.vfs_getname,
2194                                trace->stats.proc_getname);
2195                }
2196        }
2197
2198out_delete_evlist:
2199        perf_evlist__delete(evlist);
2200out:
2201        trace->live = false;
2202        return err;
2203{
2204        char errbuf[BUFSIZ];
2205
2206out_error_sched_stat_runtime:
2207        debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2208        goto out_error;
2209
2210out_error_raw_syscalls:
2211        debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2212        goto out_error;
2213
2214out_error_mmap:
2215        perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2216        goto out_error;
2217
2218out_error_open:
2219        perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2220
2221out_error:
2222        fprintf(trace->output, "%s\n", errbuf);
2223        goto out_delete_evlist;
2224}
2225out_error_mem:
2226        fprintf(trace->output, "Not enough memory to run!\n");
2227        goto out_delete_evlist;
2228}
2229
2230static int trace__replay(struct trace *trace)
2231{
2232        const struct perf_evsel_str_handler handlers[] = {
2233                { "probe:vfs_getname",       trace__vfs_getname, },
2234        };
2235        struct perf_data_file file = {
2236                .path  = input_name,
2237                .mode  = PERF_DATA_MODE_READ,
2238        };
2239        struct perf_session *session;
2240        struct perf_evsel *evsel;
2241        int err = -1;
2242
2243        trace->tool.sample        = trace__process_sample;
2244        trace->tool.mmap          = perf_event__process_mmap;
2245        trace->tool.mmap2         = perf_event__process_mmap2;
2246        trace->tool.comm          = perf_event__process_comm;
2247        trace->tool.exit          = perf_event__process_exit;
2248        trace->tool.fork          = perf_event__process_fork;
2249        trace->tool.attr          = perf_event__process_attr;
2250        trace->tool.tracing_data = perf_event__process_tracing_data;
2251        trace->tool.build_id      = perf_event__process_build_id;
2252
2253        trace->tool.ordered_events = true;
2254        trace->tool.ordering_requires_timestamps = true;
2255
2256        /* add tid to output */
2257        trace->multiple_threads = true;
2258
2259        session = perf_session__new(&file, false, &trace->tool);
2260        if (session == NULL)
2261                return -1;
2262
2263        if (symbol__init(&session->header.env) < 0)
2264                goto out;
2265
2266        trace->host = &session->machines.host;
2267
2268        err = perf_session__set_tracepoints_handlers(session, handlers);
2269        if (err)
2270                goto out;
2271
2272        evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2273                                                     "raw_syscalls:sys_enter");
2274        /* older kernels have syscalls tp versus raw_syscalls */
2275        if (evsel == NULL)
2276                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2277                                                             "syscalls:sys_enter");
2278
2279        if (evsel &&
2280            (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2281            perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2282                pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2283                goto out;
2284        }
2285
2286        evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2287                                                     "raw_syscalls:sys_exit");
2288        if (evsel == NULL)
2289                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2290                                                             "syscalls:sys_exit");
2291        if (evsel &&
2292            (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2293            perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2294                pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2295                goto out;
2296        }
2297
2298        evlist__for_each(session->evlist, evsel) {
2299                if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2300                    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2301                     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2302                     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2303                        evsel->handler = trace__pgfault;
2304        }
2305
2306        err = parse_target_str(trace);
2307        if (err != 0)
2308                goto out;
2309
2310        setup_pager();
2311
2312        err = perf_session__process_events(session, &trace->tool);
2313        if (err)
2314                pr_err("Failed to process events, error %d", err);
2315
2316        else if (trace->summary)
2317                trace__fprintf_thread_summary(trace, trace->output);
2318
2319out:
2320        perf_session__delete(session);
2321
2322        return err;
2323}
2324
2325static size_t trace__fprintf_threads_header(FILE *fp)
2326{
2327        size_t printed;
2328
2329        printed  = fprintf(fp, "\n Summary of events:\n\n");
2330
2331        return printed;
2332}
2333
2334static size_t thread__dump_stats(struct thread_trace *ttrace,
2335                                 struct trace *trace, FILE *fp)
2336{
2337        struct stats *stats;
2338        size_t printed = 0;
2339        struct syscall *sc;
2340        struct int_node *inode = intlist__first(ttrace->syscall_stats);
2341
2342        if (inode == NULL)
2343                return 0;
2344
2345        printed += fprintf(fp, "\n");
2346
2347        printed += fprintf(fp, "   syscall            calls      min       avg       max      stddev\n");
2348        printed += fprintf(fp, "                               (msec)    (msec)    (msec)        (%%)\n");
2349        printed += fprintf(fp, "   --------------- -------- --------- --------- ---------     ------\n");
2350
2351        /* each int_node is a syscall */
2352        while (inode) {
2353                stats = inode->priv;
2354                if (stats) {
2355                        double min = (double)(stats->min) / NSEC_PER_MSEC;
2356                        double max = (double)(stats->max) / NSEC_PER_MSEC;
2357                        double avg = avg_stats(stats);
2358                        double pct;
2359                        u64 n = (u64) stats->n;
2360
2361                        pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2362                        avg /= NSEC_PER_MSEC;
2363
2364                        sc = &trace->syscalls.table[inode->i];
2365                        printed += fprintf(fp, "   %-15s", sc->name);
2366                        printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2367                                           n, min, avg);
2368                        printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2369                }
2370
2371                inode = intlist__next(inode);
2372        }
2373
2374        printed += fprintf(fp, "\n\n");
2375
2376        return printed;
2377}
2378
2379/* struct used to pass data to per-thread function */
2380struct summary_data {
2381        FILE *fp;
2382        struct trace *trace;
2383        size_t printed;
2384};
2385
2386static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2387{
2388        struct summary_data *data = priv;
2389        FILE *fp = data->fp;
2390        size_t printed = data->printed;
2391        struct trace *trace = data->trace;
2392        struct thread_trace *ttrace = thread__priv(thread);
2393        double ratio;
2394
2395        if (ttrace == NULL)
2396                return 0;
2397
2398        ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2399
2400        printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2401        printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2402        printed += fprintf(fp, "%.1f%%", ratio);
2403        if (ttrace->pfmaj)
2404                printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2405        if (ttrace->pfmin)
2406                printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2407        printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2408        printed += thread__dump_stats(ttrace, trace, fp);
2409
2410        data->printed += printed;
2411
2412        return 0;
2413}
2414
2415static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2416{
2417        struct summary_data data = {
2418                .fp = fp,
2419                .trace = trace
2420        };
2421        data.printed = trace__fprintf_threads_header(fp);
2422
2423        machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2424
2425        return data.printed;
2426}
2427
2428static int trace__set_duration(const struct option *opt, const char *str,
2429                               int unset __maybe_unused)
2430{
2431        struct trace *trace = opt->value;
2432
2433        trace->duration_filter = atof(str);
2434        return 0;
2435}
2436
2437static int trace__open_output(struct trace *trace, const char *filename)
2438{
2439        struct stat st;
2440
2441        if (!stat(filename, &st) && st.st_size) {
2442                char oldname[PATH_MAX];
2443
2444                scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2445                unlink(oldname);
2446                rename(filename, oldname);
2447        }
2448
2449        trace->output = fopen(filename, "w");
2450
2451        return trace->output == NULL ? -errno : 0;
2452}
2453
2454static int parse_pagefaults(const struct option *opt, const char *str,
2455                            int unset __maybe_unused)
2456{
2457        int *trace_pgfaults = opt->value;
2458
2459        if (strcmp(str, "all") == 0)
2460                *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2461        else if (strcmp(str, "maj") == 0)
2462                *trace_pgfaults |= TRACE_PFMAJ;
2463        else if (strcmp(str, "min") == 0)
2464                *trace_pgfaults |= TRACE_PFMIN;
2465        else
2466                return -1;
2467
2468        return 0;
2469}
2470
2471int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2472{
2473        const char * const trace_usage[] = {
2474                "perf trace [<options>] [<command>]",
2475                "perf trace [<options>] -- <command> [<options>]",
2476                "perf trace record [<options>] [<command>]",
2477                "perf trace record [<options>] -- <command> [<options>]",
2478                NULL
2479        };
2480        struct trace trace = {
2481                .audit = {
2482                        .machine = audit_detect_machine(),
2483                        .open_id = audit_name_to_syscall("open", trace.audit.machine),
2484                },
2485                .syscalls = {
2486                        . max = -1,
2487                },
2488                .opts = {
2489                        .target = {
2490                                .uid       = UINT_MAX,
2491                                .uses_mmap = true,
2492                        },
2493                        .user_freq     = UINT_MAX,
2494                        .user_interval = ULLONG_MAX,
2495                        .no_buffering  = true,
2496                        .mmap_pages    = UINT_MAX,
2497                },
2498                .output = stdout,
2499                .show_comm = true,
2500                .trace_syscalls = true,
2501        };
2502        const char *output_name = NULL;
2503        const char *ev_qualifier_str = NULL;
2504        const struct option trace_options[] = {
2505        OPT_BOOLEAN(0, "comm", &trace.show_comm,
2506                    "show the thread COMM next to its id"),
2507        OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2508        OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2509                    "list of events to trace"),
2510        OPT_STRING('o', "output", &output_name, "file", "output file name"),
2511        OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2512        OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2513                    "trace events on existing process id"),
2514        OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2515                    "trace events on existing thread id"),
2516        OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2517                    "system-wide collection from all CPUs"),
2518        OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2519                    "list of cpus to monitor"),
2520        OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2521                    "child tasks do not inherit counters"),
2522        OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2523                     "number of mmap data pages",
2524                     perf_evlist__parse_mmap_pages),
2525        OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2526                   "user to profile"),
2527        OPT_CALLBACK(0, "duration", &trace, "float",
2528                     "show only events with duration > N.M ms",
2529                     trace__set_duration),
2530        OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2531        OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2532        OPT_BOOLEAN('T', "time", &trace.full_time,
2533                    "Show full timestamp, not time relative to first start"),
2534        OPT_BOOLEAN('s', "summary", &trace.summary_only,
2535                    "Show only syscall summary with statistics"),
2536        OPT_BOOLEAN('S', "with-summary", &trace.summary,
2537                    "Show all syscalls and summary with statistics"),
2538        OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2539                     "Trace pagefaults", parse_pagefaults, "maj"),
2540        OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2541        OPT_END()
2542        };
2543        int err;
2544        char bf[BUFSIZ];
2545
2546        argc = parse_options(argc, argv, trace_options, trace_usage,
2547                             PARSE_OPT_STOP_AT_NON_OPTION);
2548
2549        if (trace.trace_pgfaults) {
2550                trace.opts.sample_address = true;
2551                trace.opts.sample_time = true;
2552        }
2553
2554        if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2555                return trace__record(&trace, argc-1, &argv[1]);
2556
2557        /* summary_only implies summary option, but don't overwrite summary if set */
2558        if (trace.summary_only)
2559                trace.summary = trace.summary_only;
2560
2561        if (!trace.trace_syscalls && !trace.trace_pgfaults) {
2562                pr_err("Please specify something to trace.\n");
2563                return -1;
2564        }
2565
2566        if (output_name != NULL) {
2567                err = trace__open_output(&trace, output_name);
2568                if (err < 0) {
2569                        perror("failed to create output file");
2570                        goto out;
2571                }
2572        }
2573
2574        if (ev_qualifier_str != NULL) {
2575                const char *s = ev_qualifier_str;
2576
2577                trace.not_ev_qualifier = *s == '!';
2578                if (trace.not_ev_qualifier)
2579                        ++s;
2580                trace.ev_qualifier = strlist__new(true, s);
2581                if (trace.ev_qualifier == NULL) {
2582                        fputs("Not enough memory to parse event qualifier",
2583                              trace.output);
2584                        err = -ENOMEM;
2585                        goto out_close;
2586                }
2587        }
2588
2589        err = target__validate(&trace.opts.target);
2590        if (err) {
2591                target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2592                fprintf(trace.output, "%s", bf);
2593                goto out_close;
2594        }
2595
2596        err = target__parse_uid(&trace.opts.target);
2597        if (err) {
2598                target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2599                fprintf(trace.output, "%s", bf);
2600                goto out_close;
2601        }
2602
2603        if (!argc && target__none(&trace.opts.target))
2604                trace.opts.target.system_wide = true;
2605
2606        if (input_name)
2607                err = trace__replay(&trace);
2608        else
2609                err = trace__run(&trace, argc, argv);
2610
2611out_close:
2612        if (output_name != NULL)
2613                fclose(trace.output);
2614out:
2615        return err;
2616}
2617