linux/tools/perf/builtin-trace.c
<<
>>
Prefs
   1/*
   2 * builtin-trace.c
   3 *
   4 * Builtin 'trace' command:
   5 *
   6 * Display a continuously updated trace of any workload, CPU, specific PID,
   7 * system wide, etc.  Default format is loosely strace like, but any other
   8 * event may be specified using --event.
   9 *
  10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
  11 *
  12 * Initially based on the 'trace' prototype by Thomas Gleixner:
  13 *
  14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
  15 *
  16 * Released under the GPL v2. (and only v2, not any later version)
  17 */
  18
  19#include <traceevent/event-parse.h>
  20#include <api/fs/tracing_path.h>
  21#include "builtin.h"
  22#include "util/color.h"
  23#include "util/debug.h"
  24#include "util/evlist.h"
  25#include <subcmd/exec-cmd.h>
  26#include "util/machine.h"
  27#include "util/session.h"
  28#include "util/thread.h"
  29#include <subcmd/parse-options.h>
  30#include "util/strlist.h"
  31#include "util/intlist.h"
  32#include "util/thread_map.h"
  33#include "util/stat.h"
  34#include "trace-event.h"
  35#include "util/parse-events.h"
  36
  37#include <libaudit.h>
  38#include <stdlib.h>
  39#include <sys/mman.h>
  40#include <linux/futex.h>
  41#include <linux/err.h>
  42
  43/* For older distros: */
  44#ifndef MAP_STACK
  45# define MAP_STACK              0x20000
  46#endif
  47
  48#ifndef MADV_HWPOISON
  49# define MADV_HWPOISON          100
  50
  51#endif
  52
  53#ifndef MADV_MERGEABLE
  54# define MADV_MERGEABLE         12
  55#endif
  56
  57#ifndef MADV_UNMERGEABLE
  58# define MADV_UNMERGEABLE       13
  59#endif
  60
  61#ifndef EFD_SEMAPHORE
  62# define EFD_SEMAPHORE          1
  63#endif
  64
  65#ifndef EFD_NONBLOCK
  66# define EFD_NONBLOCK           00004000
  67#endif
  68
  69#ifndef EFD_CLOEXEC
  70# define EFD_CLOEXEC            02000000
  71#endif
  72
  73#ifndef O_CLOEXEC
  74# define O_CLOEXEC              02000000
  75#endif
  76
  77#ifndef SOCK_DCCP
  78# define SOCK_DCCP              6
  79#endif
  80
  81#ifndef SOCK_CLOEXEC
  82# define SOCK_CLOEXEC           02000000
  83#endif
  84
  85#ifndef SOCK_NONBLOCK
  86# define SOCK_NONBLOCK          00004000
  87#endif
  88
  89#ifndef MSG_CMSG_CLOEXEC
  90# define MSG_CMSG_CLOEXEC       0x40000000
  91#endif
  92
  93#ifndef PERF_FLAG_FD_NO_GROUP
  94# define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
  95#endif
  96
  97#ifndef PERF_FLAG_FD_OUTPUT
  98# define PERF_FLAG_FD_OUTPUT            (1UL << 1)
  99#endif
 100
 101#ifndef PERF_FLAG_PID_CGROUP
 102# define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
 103#endif
 104
 105#ifndef PERF_FLAG_FD_CLOEXEC
 106# define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
 107#endif
 108
 109
 110struct tp_field {
 111        int offset;
 112        union {
 113                u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
 114                void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
 115        };
 116};
 117
 118#define TP_UINT_FIELD(bits) \
 119static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
 120{ \
 121        u##bits value; \
 122        memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 123        return value;  \
 124}
 125
 126TP_UINT_FIELD(8);
 127TP_UINT_FIELD(16);
 128TP_UINT_FIELD(32);
 129TP_UINT_FIELD(64);
 130
 131#define TP_UINT_FIELD__SWAPPED(bits) \
 132static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
 133{ \
 134        u##bits value; \
 135        memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 136        return bswap_##bits(value);\
 137}
 138
 139TP_UINT_FIELD__SWAPPED(16);
 140TP_UINT_FIELD__SWAPPED(32);
 141TP_UINT_FIELD__SWAPPED(64);
 142
 143static int tp_field__init_uint(struct tp_field *field,
 144                               struct format_field *format_field,
 145                               bool needs_swap)
 146{
 147        field->offset = format_field->offset;
 148
 149        switch (format_field->size) {
 150        case 1:
 151                field->integer = tp_field__u8;
 152                break;
 153        case 2:
 154                field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
 155                break;
 156        case 4:
 157                field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
 158                break;
 159        case 8:
 160                field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
 161                break;
 162        default:
 163                return -1;
 164        }
 165
 166        return 0;
 167}
 168
 169static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 170{
 171        return sample->raw_data + field->offset;
 172}
 173
 174static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
 175{
 176        field->offset = format_field->offset;
 177        field->pointer = tp_field__ptr;
 178        return 0;
 179}
 180
 181struct syscall_tp {
 182        struct tp_field id;
 183        union {
 184                struct tp_field args, ret;
 185        };
 186};
 187
 188static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
 189                                          struct tp_field *field,
 190                                          const char *name)
 191{
 192        struct format_field *format_field = perf_evsel__field(evsel, name);
 193
 194        if (format_field == NULL)
 195                return -1;
 196
 197        return tp_field__init_uint(field, format_field, evsel->needs_swap);
 198}
 199
 200#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
 201        ({ struct syscall_tp *sc = evsel->priv;\
 202           perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
 203
 204static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
 205                                         struct tp_field *field,
 206                                         const char *name)
 207{
 208        struct format_field *format_field = perf_evsel__field(evsel, name);
 209
 210        if (format_field == NULL)
 211                return -1;
 212
 213        return tp_field__init_ptr(field, format_field);
 214}
 215
 216#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
 217        ({ struct syscall_tp *sc = evsel->priv;\
 218           perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
 219
 220static void perf_evsel__delete_priv(struct perf_evsel *evsel)
 221{
 222        zfree(&evsel->priv);
 223        perf_evsel__delete(evsel);
 224}
 225
 226static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
 227{
 228        evsel->priv = malloc(sizeof(struct syscall_tp));
 229        if (evsel->priv != NULL) {
 230                if (perf_evsel__init_sc_tp_uint_field(evsel, id))
 231                        goto out_delete;
 232
 233                evsel->handler = handler;
 234                return 0;
 235        }
 236
 237        return -ENOMEM;
 238
 239out_delete:
 240        zfree(&evsel->priv);
 241        return -ENOENT;
 242}
 243
 244static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
 245{
 246        struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
 247
 248        /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
 249        if (IS_ERR(evsel))
 250                evsel = perf_evsel__newtp("syscalls", direction);
 251
 252        if (IS_ERR(evsel))
 253                return NULL;
 254
 255        if (perf_evsel__init_syscall_tp(evsel, handler))
 256                goto out_delete;
 257
 258        return evsel;
 259
 260out_delete:
 261        perf_evsel__delete_priv(evsel);
 262        return NULL;
 263}
 264
 265#define perf_evsel__sc_tp_uint(evsel, name, sample) \
 266        ({ struct syscall_tp *fields = evsel->priv; \
 267           fields->name.integer(&fields->name, sample); })
 268
 269#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
 270        ({ struct syscall_tp *fields = evsel->priv; \
 271           fields->name.pointer(&fields->name, sample); })
 272
 273struct syscall_arg {
 274        unsigned long val;
 275        struct thread *thread;
 276        struct trace  *trace;
 277        void          *parm;
 278        u8            idx;
 279        u8            mask;
 280};
 281
 282struct strarray {
 283        int         offset;
 284        int         nr_entries;
 285        const char **entries;
 286};
 287
 288#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
 289        .nr_entries = ARRAY_SIZE(array), \
 290        .entries = array, \
 291}
 292
 293#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
 294        .offset     = off, \
 295        .nr_entries = ARRAY_SIZE(array), \
 296        .entries = array, \
 297}
 298
 299static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
 300                                                const char *intfmt,
 301                                                struct syscall_arg *arg)
 302{
 303        struct strarray *sa = arg->parm;
 304        int idx = arg->val - sa->offset;
 305
 306        if (idx < 0 || idx >= sa->nr_entries)
 307                return scnprintf(bf, size, intfmt, arg->val);
 308
 309        return scnprintf(bf, size, "%s", sa->entries[idx]);
 310}
 311
 312static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
 313                                              struct syscall_arg *arg)
 314{
 315        return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
 316}
 317
 318#define SCA_STRARRAY syscall_arg__scnprintf_strarray
 319
 320#if defined(__i386__) || defined(__x86_64__)
 321/*
 322 * FIXME: Make this available to all arches as soon as the ioctl beautifier
 323 *        gets rewritten to support all arches.
 324 */
 325static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
 326                                                 struct syscall_arg *arg)
 327{
 328        return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
 329}
 330
 331#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
 332#endif /* defined(__i386__) || defined(__x86_64__) */
 333
 334static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
 335                                        struct syscall_arg *arg);
 336
 337#define SCA_FD syscall_arg__scnprintf_fd
 338
 339static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
 340                                           struct syscall_arg *arg)
 341{
 342        int fd = arg->val;
 343
 344        if (fd == AT_FDCWD)
 345                return scnprintf(bf, size, "CWD");
 346
 347        return syscall_arg__scnprintf_fd(bf, size, arg);
 348}
 349
 350#define SCA_FDAT syscall_arg__scnprintf_fd_at
 351
 352static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
 353                                              struct syscall_arg *arg);
 354
 355#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
 356
 357static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 358                                         struct syscall_arg *arg)
 359{
 360        return scnprintf(bf, size, "%#lx", arg->val);
 361}
 362
 363#define SCA_HEX syscall_arg__scnprintf_hex
 364
 365static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
 366                                         struct syscall_arg *arg)
 367{
 368        return scnprintf(bf, size, "%d", arg->val);
 369}
 370
 371#define SCA_INT syscall_arg__scnprintf_int
 372
 373static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 374                                               struct syscall_arg *arg)
 375{
 376        int printed = 0, prot = arg->val;
 377
 378        if (prot == PROT_NONE)
 379                return scnprintf(bf, size, "NONE");
 380#define P_MMAP_PROT(n) \
 381        if (prot & PROT_##n) { \
 382                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 383                prot &= ~PROT_##n; \
 384        }
 385
 386        P_MMAP_PROT(EXEC);
 387        P_MMAP_PROT(READ);
 388        P_MMAP_PROT(WRITE);
 389#ifdef PROT_SEM
 390        P_MMAP_PROT(SEM);
 391#endif
 392        P_MMAP_PROT(GROWSDOWN);
 393        P_MMAP_PROT(GROWSUP);
 394#undef P_MMAP_PROT
 395
 396        if (prot)
 397                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
 398
 399        return printed;
 400}
 401
 402#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 403
 404static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 405                                                struct syscall_arg *arg)
 406{
 407        int printed = 0, flags = arg->val;
 408
 409#define P_MMAP_FLAG(n) \
 410        if (flags & MAP_##n) { \
 411                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 412                flags &= ~MAP_##n; \
 413        }
 414
 415        P_MMAP_FLAG(SHARED);
 416        P_MMAP_FLAG(PRIVATE);
 417#ifdef MAP_32BIT
 418        P_MMAP_FLAG(32BIT);
 419#endif
 420        P_MMAP_FLAG(ANONYMOUS);
 421        P_MMAP_FLAG(DENYWRITE);
 422        P_MMAP_FLAG(EXECUTABLE);
 423        P_MMAP_FLAG(FILE);
 424        P_MMAP_FLAG(FIXED);
 425        P_MMAP_FLAG(GROWSDOWN);
 426#ifdef MAP_HUGETLB
 427        P_MMAP_FLAG(HUGETLB);
 428#endif
 429        P_MMAP_FLAG(LOCKED);
 430        P_MMAP_FLAG(NONBLOCK);
 431        P_MMAP_FLAG(NORESERVE);
 432        P_MMAP_FLAG(POPULATE);
 433        P_MMAP_FLAG(STACK);
 434#ifdef MAP_UNINITIALIZED
 435        P_MMAP_FLAG(UNINITIALIZED);
 436#endif
 437#undef P_MMAP_FLAG
 438
 439        if (flags)
 440                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 441
 442        return printed;
 443}
 444
 445#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 446
 447static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
 448                                                  struct syscall_arg *arg)
 449{
 450        int printed = 0, flags = arg->val;
 451
 452#define P_MREMAP_FLAG(n) \
 453        if (flags & MREMAP_##n) { \
 454                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 455                flags &= ~MREMAP_##n; \
 456        }
 457
 458        P_MREMAP_FLAG(MAYMOVE);
 459#ifdef MREMAP_FIXED
 460        P_MREMAP_FLAG(FIXED);
 461#endif
 462#undef P_MREMAP_FLAG
 463
 464        if (flags)
 465                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 466
 467        return printed;
 468}
 469
 470#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
 471
 472static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 473                                                      struct syscall_arg *arg)
 474{
 475        int behavior = arg->val;
 476
 477        switch (behavior) {
 478#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
 479        P_MADV_BHV(NORMAL);
 480        P_MADV_BHV(RANDOM);
 481        P_MADV_BHV(SEQUENTIAL);
 482        P_MADV_BHV(WILLNEED);
 483        P_MADV_BHV(DONTNEED);
 484        P_MADV_BHV(REMOVE);
 485        P_MADV_BHV(DONTFORK);
 486        P_MADV_BHV(DOFORK);
 487        P_MADV_BHV(HWPOISON);
 488#ifdef MADV_SOFT_OFFLINE
 489        P_MADV_BHV(SOFT_OFFLINE);
 490#endif
 491        P_MADV_BHV(MERGEABLE);
 492        P_MADV_BHV(UNMERGEABLE);
 493#ifdef MADV_HUGEPAGE
 494        P_MADV_BHV(HUGEPAGE);
 495#endif
 496#ifdef MADV_NOHUGEPAGE
 497        P_MADV_BHV(NOHUGEPAGE);
 498#endif
 499#ifdef MADV_DONTDUMP
 500        P_MADV_BHV(DONTDUMP);
 501#endif
 502#ifdef MADV_DODUMP
 503        P_MADV_BHV(DODUMP);
 504#endif
 505#undef P_MADV_PHV
 506        default: break;
 507        }
 508
 509        return scnprintf(bf, size, "%#x", behavior);
 510}
 511
 512#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
 513
 514static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
 515                                           struct syscall_arg *arg)
 516{
 517        int printed = 0, op = arg->val;
 518
 519        if (op == 0)
 520                return scnprintf(bf, size, "NONE");
 521#define P_CMD(cmd) \
 522        if ((op & LOCK_##cmd) == LOCK_##cmd) { \
 523                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
 524                op &= ~LOCK_##cmd; \
 525        }
 526
 527        P_CMD(SH);
 528        P_CMD(EX);
 529        P_CMD(NB);
 530        P_CMD(UN);
 531        P_CMD(MAND);
 532        P_CMD(RW);
 533        P_CMD(READ);
 534        P_CMD(WRITE);
 535#undef P_OP
 536
 537        if (op)
 538                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
 539
 540        return printed;
 541}
 542
 543#define SCA_FLOCK syscall_arg__scnprintf_flock
 544
 545static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
 546{
 547        enum syscall_futex_args {
 548                SCF_UADDR   = (1 << 0),
 549                SCF_OP      = (1 << 1),
 550                SCF_VAL     = (1 << 2),
 551                SCF_TIMEOUT = (1 << 3),
 552                SCF_UADDR2  = (1 << 4),
 553                SCF_VAL3    = (1 << 5),
 554        };
 555        int op = arg->val;
 556        int cmd = op & FUTEX_CMD_MASK;
 557        size_t printed = 0;
 558
 559        switch (cmd) {
 560#define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
 561        P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
 562        P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 563        P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 564        P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
 565        P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
 566        P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
 567        P_FUTEX_OP(WAKE_OP);                                                      break;
 568        P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 569        P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 570        P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
 571        P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
 572        P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
 573        P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
 574        default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
 575        }
 576
 577        if (op & FUTEX_PRIVATE_FLAG)
 578                printed += scnprintf(bf + printed, size - printed, "|PRIV");
 579
 580        if (op & FUTEX_CLOCK_REALTIME)
 581                printed += scnprintf(bf + printed, size - printed, "|CLKRT");
 582
 583        return printed;
 584}
 585
 586#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
 587
 588static const char *bpf_cmd[] = {
 589        "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
 590        "MAP_GET_NEXT_KEY", "PROG_LOAD",
 591};
 592static DEFINE_STRARRAY(bpf_cmd);
 593
 594static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
 595static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
 596
 597static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
 598static DEFINE_STRARRAY(itimers);
 599
 600static const char *keyctl_options[] = {
 601        "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
 602        "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
 603        "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
 604        "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
 605        "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
 606};
 607static DEFINE_STRARRAY(keyctl_options);
 608
 609static const char *whences[] = { "SET", "CUR", "END",
 610#ifdef SEEK_DATA
 611"DATA",
 612#endif
 613#ifdef SEEK_HOLE
 614"HOLE",
 615#endif
 616};
 617static DEFINE_STRARRAY(whences);
 618
 619static const char *fcntl_cmds[] = {
 620        "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
 621        "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
 622        "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
 623        "F_GETOWNER_UIDS",
 624};
 625static DEFINE_STRARRAY(fcntl_cmds);
 626
 627static const char *rlimit_resources[] = {
 628        "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
 629        "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
 630        "RTTIME",
 631};
 632static DEFINE_STRARRAY(rlimit_resources);
 633
 634static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
 635static DEFINE_STRARRAY(sighow);
 636
 637static const char *clockid[] = {
 638        "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
 639        "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
 640        "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
 641};
 642static DEFINE_STRARRAY(clockid);
 643
 644static const char *socket_families[] = {
 645        "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
 646        "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
 647        "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
 648        "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
 649        "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
 650        "ALG", "NFC", "VSOCK",
 651};
 652static DEFINE_STRARRAY(socket_families);
 653
 654#ifndef SOCK_TYPE_MASK
 655#define SOCK_TYPE_MASK 0xf
 656#endif
 657
 658static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
 659                                                      struct syscall_arg *arg)
 660{
 661        size_t printed;
 662        int type = arg->val,
 663            flags = type & ~SOCK_TYPE_MASK;
 664
 665        type &= SOCK_TYPE_MASK;
 666        /*
 667         * Can't use a strarray, MIPS may override for ABI reasons.
 668         */
 669        switch (type) {
 670#define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
 671        P_SK_TYPE(STREAM);
 672        P_SK_TYPE(DGRAM);
 673        P_SK_TYPE(RAW);
 674        P_SK_TYPE(RDM);
 675        P_SK_TYPE(SEQPACKET);
 676        P_SK_TYPE(DCCP);
 677        P_SK_TYPE(PACKET);
 678#undef P_SK_TYPE
 679        default:
 680                printed = scnprintf(bf, size, "%#x", type);
 681        }
 682
 683#define P_SK_FLAG(n) \
 684        if (flags & SOCK_##n) { \
 685                printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
 686                flags &= ~SOCK_##n; \
 687        }
 688
 689        P_SK_FLAG(CLOEXEC);
 690        P_SK_FLAG(NONBLOCK);
 691#undef P_SK_FLAG
 692
 693        if (flags)
 694                printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
 695
 696        return printed;
 697}
 698
 699#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
 700
 701#ifndef MSG_PROBE
 702#define MSG_PROBE            0x10
 703#endif
 704#ifndef MSG_WAITFORONE
 705#define MSG_WAITFORONE  0x10000
 706#endif
 707#ifndef MSG_SENDPAGE_NOTLAST
 708#define MSG_SENDPAGE_NOTLAST 0x20000
 709#endif
 710#ifndef MSG_FASTOPEN
 711#define MSG_FASTOPEN         0x20000000
 712#endif
 713
 714static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
 715                                               struct syscall_arg *arg)
 716{
 717        int printed = 0, flags = arg->val;
 718
 719        if (flags == 0)
 720                return scnprintf(bf, size, "NONE");
 721#define P_MSG_FLAG(n) \
 722        if (flags & MSG_##n) { \
 723                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 724                flags &= ~MSG_##n; \
 725        }
 726
 727        P_MSG_FLAG(OOB);
 728        P_MSG_FLAG(PEEK);
 729        P_MSG_FLAG(DONTROUTE);
 730        P_MSG_FLAG(TRYHARD);
 731        P_MSG_FLAG(CTRUNC);
 732        P_MSG_FLAG(PROBE);
 733        P_MSG_FLAG(TRUNC);
 734        P_MSG_FLAG(DONTWAIT);
 735        P_MSG_FLAG(EOR);
 736        P_MSG_FLAG(WAITALL);
 737        P_MSG_FLAG(FIN);
 738        P_MSG_FLAG(SYN);
 739        P_MSG_FLAG(CONFIRM);
 740        P_MSG_FLAG(RST);
 741        P_MSG_FLAG(ERRQUEUE);
 742        P_MSG_FLAG(NOSIGNAL);
 743        P_MSG_FLAG(MORE);
 744        P_MSG_FLAG(WAITFORONE);
 745        P_MSG_FLAG(SENDPAGE_NOTLAST);
 746        P_MSG_FLAG(FASTOPEN);
 747        P_MSG_FLAG(CMSG_CLOEXEC);
 748#undef P_MSG_FLAG
 749
 750        if (flags)
 751                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 752
 753        return printed;
 754}
 755
 756#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
 757
 758static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 759                                                 struct syscall_arg *arg)
 760{
 761        size_t printed = 0;
 762        int mode = arg->val;
 763
 764        if (mode == F_OK) /* 0 */
 765                return scnprintf(bf, size, "F");
 766#define P_MODE(n) \
 767        if (mode & n##_OK) { \
 768                printed += scnprintf(bf + printed, size - printed, "%s", #n); \
 769                mode &= ~n##_OK; \
 770        }
 771
 772        P_MODE(R);
 773        P_MODE(W);
 774        P_MODE(X);
 775#undef P_MODE
 776
 777        if (mode)
 778                printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
 779
 780        return printed;
 781}
 782
 783#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
 784
 785static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 786                                              struct syscall_arg *arg);
 787
 788#define SCA_FILENAME syscall_arg__scnprintf_filename
 789
 790static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
 791                                               struct syscall_arg *arg)
 792{
 793        int printed = 0, flags = arg->val;
 794
 795        if (!(flags & O_CREAT))
 796                arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
 797
 798        if (flags == 0)
 799                return scnprintf(bf, size, "RDONLY");
 800#define P_FLAG(n) \
 801        if (flags & O_##n) { \
 802                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 803                flags &= ~O_##n; \
 804        }
 805
 806        P_FLAG(APPEND);
 807        P_FLAG(ASYNC);
 808        P_FLAG(CLOEXEC);
 809        P_FLAG(CREAT);
 810        P_FLAG(DIRECT);
 811        P_FLAG(DIRECTORY);
 812        P_FLAG(EXCL);
 813        P_FLAG(LARGEFILE);
 814        P_FLAG(NOATIME);
 815        P_FLAG(NOCTTY);
 816#ifdef O_NONBLOCK
 817        P_FLAG(NONBLOCK);
 818#elif O_NDELAY
 819        P_FLAG(NDELAY);
 820#endif
 821#ifdef O_PATH
 822        P_FLAG(PATH);
 823#endif
 824        P_FLAG(RDWR);
 825#ifdef O_DSYNC
 826        if ((flags & O_SYNC) == O_SYNC)
 827                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
 828        else {
 829                P_FLAG(DSYNC);
 830        }
 831#else
 832        P_FLAG(SYNC);
 833#endif
 834        P_FLAG(TRUNC);
 835        P_FLAG(WRONLY);
 836#undef P_FLAG
 837
 838        if (flags)
 839                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 840
 841        return printed;
 842}
 843
 844#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
 845
 846static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
 847                                                struct syscall_arg *arg)
 848{
 849        int printed = 0, flags = arg->val;
 850
 851        if (flags == 0)
 852                return 0;
 853
 854#define P_FLAG(n) \
 855        if (flags & PERF_FLAG_##n) { \
 856                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 857                flags &= ~PERF_FLAG_##n; \
 858        }
 859
 860        P_FLAG(FD_NO_GROUP);
 861        P_FLAG(FD_OUTPUT);
 862        P_FLAG(PID_CGROUP);
 863        P_FLAG(FD_CLOEXEC);
 864#undef P_FLAG
 865
 866        if (flags)
 867                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 868
 869        return printed;
 870}
 871
 872#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
 873
 874static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
 875                                                   struct syscall_arg *arg)
 876{
 877        int printed = 0, flags = arg->val;
 878
 879        if (flags == 0)
 880                return scnprintf(bf, size, "NONE");
 881#define P_FLAG(n) \
 882        if (flags & EFD_##n) { \
 883                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 884                flags &= ~EFD_##n; \
 885        }
 886
 887        P_FLAG(SEMAPHORE);
 888        P_FLAG(CLOEXEC);
 889        P_FLAG(NONBLOCK);
 890#undef P_FLAG
 891
 892        if (flags)
 893                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 894
 895        return printed;
 896}
 897
 898#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
 899
 900static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 901                                                struct syscall_arg *arg)
 902{
 903        int printed = 0, flags = arg->val;
 904
 905#define P_FLAG(n) \
 906        if (flags & O_##n) { \
 907                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 908                flags &= ~O_##n; \
 909        }
 910
 911        P_FLAG(CLOEXEC);
 912        P_FLAG(NONBLOCK);
 913#undef P_FLAG
 914
 915        if (flags)
 916                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 917
 918        return printed;
 919}
 920
 921#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 922
 923static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
 924{
 925        int sig = arg->val;
 926
 927        switch (sig) {
 928#define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
 929        P_SIGNUM(HUP);
 930        P_SIGNUM(INT);
 931        P_SIGNUM(QUIT);
 932        P_SIGNUM(ILL);
 933        P_SIGNUM(TRAP);
 934        P_SIGNUM(ABRT);
 935        P_SIGNUM(BUS);
 936        P_SIGNUM(FPE);
 937        P_SIGNUM(KILL);
 938        P_SIGNUM(USR1);
 939        P_SIGNUM(SEGV);
 940        P_SIGNUM(USR2);
 941        P_SIGNUM(PIPE);
 942        P_SIGNUM(ALRM);
 943        P_SIGNUM(TERM);
 944        P_SIGNUM(CHLD);
 945        P_SIGNUM(CONT);
 946        P_SIGNUM(STOP);
 947        P_SIGNUM(TSTP);
 948        P_SIGNUM(TTIN);
 949        P_SIGNUM(TTOU);
 950        P_SIGNUM(URG);
 951        P_SIGNUM(XCPU);
 952        P_SIGNUM(XFSZ);
 953        P_SIGNUM(VTALRM);
 954        P_SIGNUM(PROF);
 955        P_SIGNUM(WINCH);
 956        P_SIGNUM(IO);
 957        P_SIGNUM(PWR);
 958        P_SIGNUM(SYS);
 959#ifdef SIGEMT
 960        P_SIGNUM(EMT);
 961#endif
 962#ifdef SIGSTKFLT
 963        P_SIGNUM(STKFLT);
 964#endif
 965#ifdef SIGSWI
 966        P_SIGNUM(SWI);
 967#endif
 968        default: break;
 969        }
 970
 971        return scnprintf(bf, size, "%#x", sig);
 972}
 973
 974#define SCA_SIGNUM syscall_arg__scnprintf_signum
 975
 976#if defined(__i386__) || defined(__x86_64__)
 977/*
 978 * FIXME: Make this available to all arches.
 979 */
 980#define TCGETS          0x5401
 981
 982static const char *tioctls[] = {
 983        "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
 984        "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
 985        "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
 986        "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
 987        "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
 988        "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
 989        "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
 990        "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
 991        "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
 992        "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
 993        "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
 994        [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
 995        "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
 996        "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
 997        "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
 998};
 999
1000static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1001#endif /* defined(__i386__) || defined(__x86_64__) */
1002
1003#define STRARRAY(arg, name, array) \
1004          .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1005          .arg_parm      = { [arg] = &strarray__##array, }
1006
1007static struct syscall_fmt {
1008        const char *name;
1009        const char *alias;
1010        size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1011        void       *arg_parm[6];
1012        bool       errmsg;
1013        bool       timeout;
1014        bool       hexret;
1015} syscall_fmts[] = {
1016        { .name     = "access",     .errmsg = true,
1017          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1018                             [1] = SCA_ACCMODE,  /* mode */ }, },
1019        { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
1020        { .name     = "bpf",        .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1021        { .name     = "brk",        .hexret = true,
1022          .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1023        { .name     = "chdir",      .errmsg = true,
1024          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1025        { .name     = "chmod",      .errmsg = true,
1026          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1027        { .name     = "chroot",     .errmsg = true,
1028          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1029        { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1030        { .name     = "close",      .errmsg = true,
1031          .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1032        { .name     = "connect",    .errmsg = true, },
1033        { .name     = "creat",      .errmsg = true,
1034          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1035        { .name     = "dup",        .errmsg = true,
1036          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037        { .name     = "dup2",       .errmsg = true,
1038          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039        { .name     = "dup3",       .errmsg = true,
1040          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041        { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1042        { .name     = "eventfd2",   .errmsg = true,
1043          .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1044        { .name     = "faccessat",  .errmsg = true,
1045          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046                             [1] = SCA_FILENAME, /* filename */ }, },
1047        { .name     = "fadvise64",  .errmsg = true,
1048          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1049        { .name     = "fallocate",  .errmsg = true,
1050          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1051        { .name     = "fchdir",     .errmsg = true,
1052          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053        { .name     = "fchmod",     .errmsg = true,
1054          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1055        { .name     = "fchmodat",   .errmsg = true,
1056          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1057                             [1] = SCA_FILENAME, /* filename */ }, },
1058        { .name     = "fchown",     .errmsg = true,
1059          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1060        { .name     = "fchownat",   .errmsg = true,
1061          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1062                             [1] = SCA_FILENAME, /* filename */ }, },
1063        { .name     = "fcntl",      .errmsg = true,
1064          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1065                             [1] = SCA_STRARRAY, /* cmd */ },
1066          .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1067        { .name     = "fdatasync",  .errmsg = true,
1068          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069        { .name     = "flock",      .errmsg = true,
1070          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1071                             [1] = SCA_FLOCK, /* cmd */ }, },
1072        { .name     = "fsetxattr",  .errmsg = true,
1073          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1074        { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1075          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1076        { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1077          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1078                             [1] = SCA_FILENAME, /* filename */ }, },
1079        { .name     = "fstatfs",    .errmsg = true,
1080          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1081        { .name     = "fsync",    .errmsg = true,
1082          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1083        { .name     = "ftruncate", .errmsg = true,
1084          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1085        { .name     = "futex",      .errmsg = true,
1086          .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1087        { .name     = "futimesat", .errmsg = true,
1088          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1089                             [1] = SCA_FILENAME, /* filename */ }, },
1090        { .name     = "getdents",   .errmsg = true,
1091          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1092        { .name     = "getdents64", .errmsg = true,
1093          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1094        { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1095        { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096        { .name     = "getxattr",    .errmsg = true,
1097          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098        { .name     = "inotify_add_watch",          .errmsg = true,
1099          .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1100        { .name     = "ioctl",      .errmsg = true,
1101          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1102#if defined(__i386__) || defined(__x86_64__)
1103/*
1104 * FIXME: Make this available to all arches.
1105 */
1106                             [1] = SCA_STRHEXARRAY, /* cmd */
1107                             [2] = SCA_HEX, /* arg */ },
1108          .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1109#else
1110                             [2] = SCA_HEX, /* arg */ }, },
1111#endif
1112        { .name     = "keyctl",     .errmsg = true, STRARRAY(0, option, keyctl_options), },
1113        { .name     = "kill",       .errmsg = true,
1114          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1115        { .name     = "lchown",    .errmsg = true,
1116          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1117        { .name     = "lgetxattr",  .errmsg = true,
1118          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1119        { .name     = "linkat",     .errmsg = true,
1120          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1121        { .name     = "listxattr",  .errmsg = true,
1122          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1123        { .name     = "llistxattr", .errmsg = true,
1124          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1125        { .name     = "lremovexattr",  .errmsg = true,
1126          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1127        { .name     = "lseek",      .errmsg = true,
1128          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1129                             [2] = SCA_STRARRAY, /* whence */ },
1130          .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1131        { .name     = "lsetxattr",  .errmsg = true,
1132          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1133        { .name     = "lstat",      .errmsg = true, .alias = "newlstat",
1134          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1135        { .name     = "lsxattr",    .errmsg = true,
1136          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1137        { .name     = "madvise",    .errmsg = true,
1138          .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1139                             [2] = SCA_MADV_BHV, /* behavior */ }, },
1140        { .name     = "mkdir",    .errmsg = true,
1141          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1142        { .name     = "mkdirat",    .errmsg = true,
1143          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1144                             [1] = SCA_FILENAME, /* pathname */ }, },
1145        { .name     = "mknod",      .errmsg = true,
1146          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1147        { .name     = "mknodat",    .errmsg = true,
1148          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1149                             [1] = SCA_FILENAME, /* filename */ }, },
1150        { .name     = "mlock",      .errmsg = true,
1151          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1152        { .name     = "mlockall",   .errmsg = true,
1153          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1154        { .name     = "mmap",       .hexret = true,
1155          .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1156                             [2] = SCA_MMAP_PROT, /* prot */
1157                             [3] = SCA_MMAP_FLAGS, /* flags */
1158                             [4] = SCA_FD,        /* fd */ }, },
1159        { .name     = "mprotect",   .errmsg = true,
1160          .arg_scnprintf = { [0] = SCA_HEX, /* start */
1161                             [2] = SCA_MMAP_PROT, /* prot */ }, },
1162        { .name     = "mq_unlink", .errmsg = true,
1163          .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1164        { .name     = "mremap",     .hexret = true,
1165          .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1166                             [3] = SCA_MREMAP_FLAGS, /* flags */
1167                             [4] = SCA_HEX, /* new_addr */ }, },
1168        { .name     = "munlock",    .errmsg = true,
1169          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1170        { .name     = "munmap",     .errmsg = true,
1171          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1172        { .name     = "name_to_handle_at", .errmsg = true,
1173          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1174        { .name     = "newfstatat", .errmsg = true,
1175          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1176                             [1] = SCA_FILENAME, /* filename */ }, },
1177        { .name     = "open",       .errmsg = true,
1178          .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1179                             [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1180        { .name     = "open_by_handle_at", .errmsg = true,
1181          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1182                             [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1183        { .name     = "openat",     .errmsg = true,
1184          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1185                             [1] = SCA_FILENAME, /* filename */
1186                             [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1187        { .name     = "perf_event_open", .errmsg = true,
1188          .arg_scnprintf = { [1] = SCA_INT, /* pid */
1189                             [2] = SCA_INT, /* cpu */
1190                             [3] = SCA_FD,  /* group_fd */
1191                             [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1192        { .name     = "pipe2",      .errmsg = true,
1193          .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1194        { .name     = "poll",       .errmsg = true, .timeout = true, },
1195        { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1196        { .name     = "pread",      .errmsg = true, .alias = "pread64",
1197          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1198        { .name     = "preadv",     .errmsg = true, .alias = "pread",
1199          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1200        { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1201        { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1202          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1203        { .name     = "pwritev",    .errmsg = true,
1204          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1205        { .name     = "read",       .errmsg = true,
1206          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207        { .name     = "readlink",   .errmsg = true,
1208          .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1209        { .name     = "readlinkat", .errmsg = true,
1210          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1211                             [1] = SCA_FILENAME, /* pathname */ }, },
1212        { .name     = "readv",      .errmsg = true,
1213          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1214        { .name     = "recvfrom",   .errmsg = true,
1215          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1216                             [3] = SCA_MSG_FLAGS, /* flags */ }, },
1217        { .name     = "recvmmsg",   .errmsg = true,
1218          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1219                             [3] = SCA_MSG_FLAGS, /* flags */ }, },
1220        { .name     = "recvmsg",    .errmsg = true,
1221          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1222                             [2] = SCA_MSG_FLAGS, /* flags */ }, },
1223        { .name     = "removexattr", .errmsg = true,
1224          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1225        { .name     = "renameat",   .errmsg = true,
1226          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1227        { .name     = "rmdir",    .errmsg = true,
1228          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1229        { .name     = "rt_sigaction", .errmsg = true,
1230          .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1231        { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1232        { .name     = "rt_sigqueueinfo", .errmsg = true,
1233          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1234        { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1235          .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1236        { .name     = "select",     .errmsg = true, .timeout = true, },
1237        { .name     = "sendmmsg",    .errmsg = true,
1238          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1239                             [3] = SCA_MSG_FLAGS, /* flags */ }, },
1240        { .name     = "sendmsg",    .errmsg = true,
1241          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1242                             [2] = SCA_MSG_FLAGS, /* flags */ }, },
1243        { .name     = "sendto",     .errmsg = true,
1244          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1245                             [3] = SCA_MSG_FLAGS, /* flags */ }, },
1246        { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1247        { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1248        { .name     = "setxattr",   .errmsg = true,
1249          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1250        { .name     = "shutdown",   .errmsg = true,
1251          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1252        { .name     = "socket",     .errmsg = true,
1253          .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1254                             [1] = SCA_SK_TYPE, /* type */ },
1255          .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1256        { .name     = "socketpair", .errmsg = true,
1257          .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1258                             [1] = SCA_SK_TYPE, /* type */ },
1259          .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1260        { .name     = "stat",       .errmsg = true, .alias = "newstat",
1261          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1262        { .name     = "statfs",     .errmsg = true,
1263          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1264        { .name     = "swapoff",    .errmsg = true,
1265          .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1266        { .name     = "swapon",     .errmsg = true,
1267          .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1268        { .name     = "symlinkat",  .errmsg = true,
1269          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1270        { .name     = "tgkill",     .errmsg = true,
1271          .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1272        { .name     = "tkill",      .errmsg = true,
1273          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1274        { .name     = "truncate",   .errmsg = true,
1275          .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1276        { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1277        { .name     = "unlinkat",   .errmsg = true,
1278          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1279                             [1] = SCA_FILENAME, /* pathname */ }, },
1280        { .name     = "utime",  .errmsg = true,
1281          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1282        { .name     = "utimensat",  .errmsg = true,
1283          .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1284                             [1] = SCA_FILENAME, /* filename */ }, },
1285        { .name     = "utimes",  .errmsg = true,
1286          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1287        { .name     = "vmsplice",  .errmsg = true,
1288          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1289        { .name     = "write",      .errmsg = true,
1290          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1291        { .name     = "writev",     .errmsg = true,
1292          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1293};
1294
1295static int syscall_fmt__cmp(const void *name, const void *fmtp)
1296{
1297        const struct syscall_fmt *fmt = fmtp;
1298        return strcmp(name, fmt->name);
1299}
1300
1301static struct syscall_fmt *syscall_fmt__find(const char *name)
1302{
1303        const int nmemb = ARRAY_SIZE(syscall_fmts);
1304        return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1305}
1306
1307struct syscall {
1308        struct event_format *tp_format;
1309        int                 nr_args;
1310        struct format_field *args;
1311        const char          *name;
1312        bool                is_exit;
1313        struct syscall_fmt  *fmt;
1314        size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1315        void                **arg_parm;
1316};
1317
1318static size_t fprintf_duration(unsigned long t, FILE *fp)
1319{
1320        double duration = (double)t / NSEC_PER_MSEC;
1321        size_t printed = fprintf(fp, "(");
1322
1323        if (duration >= 1.0)
1324                printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1325        else if (duration >= 0.01)
1326                printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1327        else
1328                printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1329        return printed + fprintf(fp, "): ");
1330}
1331
1332/**
1333 * filename.ptr: The filename char pointer that will be vfs_getname'd
1334 * filename.entry_str_pos: Where to insert the string translated from
1335 *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1336 */
1337struct thread_trace {
1338        u64               entry_time;
1339        u64               exit_time;
1340        bool              entry_pending;
1341        unsigned long     nr_events;
1342        unsigned long     pfmaj, pfmin;
1343        char              *entry_str;
1344        double            runtime_ms;
1345        struct {
1346                unsigned long ptr;
1347                short int     entry_str_pos;
1348                bool          pending_open;
1349                unsigned int  namelen;
1350                char          *name;
1351        } filename;
1352        struct {
1353                int       max;
1354                char      **table;
1355        } paths;
1356
1357        struct intlist *syscall_stats;
1358};
1359
1360static struct thread_trace *thread_trace__new(void)
1361{
1362        struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1363
1364        if (ttrace)
1365                ttrace->paths.max = -1;
1366
1367        ttrace->syscall_stats = intlist__new(NULL);
1368
1369        return ttrace;
1370}
1371
1372static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1373{
1374        struct thread_trace *ttrace;
1375
1376        if (thread == NULL)
1377                goto fail;
1378
1379        if (thread__priv(thread) == NULL)
1380                thread__set_priv(thread, thread_trace__new());
1381
1382        if (thread__priv(thread) == NULL)
1383                goto fail;
1384
1385        ttrace = thread__priv(thread);
1386        ++ttrace->nr_events;
1387
1388        return ttrace;
1389fail:
1390        color_fprintf(fp, PERF_COLOR_RED,
1391                      "WARNING: not enough memory, dropping samples!\n");
1392        return NULL;
1393}
1394
1395#define TRACE_PFMAJ             (1 << 0)
1396#define TRACE_PFMIN             (1 << 1)
1397
1398static const size_t trace__entry_str_size = 2048;
1399
1400struct trace {
1401        struct perf_tool        tool;
1402        struct {
1403                int             machine;
1404                int             open_id;
1405        }                       audit;
1406        struct {
1407                int             max;
1408                struct syscall  *table;
1409                struct {
1410                        struct perf_evsel *sys_enter,
1411                                          *sys_exit;
1412                }               events;
1413        } syscalls;
1414        struct record_opts      opts;
1415        struct perf_evlist      *evlist;
1416        struct machine          *host;
1417        struct thread           *current;
1418        u64                     base_time;
1419        FILE                    *output;
1420        unsigned long           nr_events;
1421        struct strlist          *ev_qualifier;
1422        struct {
1423                size_t          nr;
1424                int             *entries;
1425        }                       ev_qualifier_ids;
1426        struct intlist          *tid_list;
1427        struct intlist          *pid_list;
1428        struct {
1429                size_t          nr;
1430                pid_t           *entries;
1431        }                       filter_pids;
1432        double                  duration_filter;
1433        double                  runtime_ms;
1434        struct {
1435                u64             vfs_getname,
1436                                proc_getname;
1437        } stats;
1438        bool                    not_ev_qualifier;
1439        bool                    live;
1440        bool                    full_time;
1441        bool                    sched;
1442        bool                    multiple_threads;
1443        bool                    summary;
1444        bool                    summary_only;
1445        bool                    show_comm;
1446        bool                    show_tool_stats;
1447        bool                    trace_syscalls;
1448        bool                    force;
1449        bool                    vfs_getname;
1450        int                     trace_pgfaults;
1451};
1452
1453static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1454{
1455        struct thread_trace *ttrace = thread__priv(thread);
1456
1457        if (fd > ttrace->paths.max) {
1458                char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1459
1460                if (npath == NULL)
1461                        return -1;
1462
1463                if (ttrace->paths.max != -1) {
1464                        memset(npath + ttrace->paths.max + 1, 0,
1465                               (fd - ttrace->paths.max) * sizeof(char *));
1466                } else {
1467                        memset(npath, 0, (fd + 1) * sizeof(char *));
1468                }
1469
1470                ttrace->paths.table = npath;
1471                ttrace->paths.max   = fd;
1472        }
1473
1474        ttrace->paths.table[fd] = strdup(pathname);
1475
1476        return ttrace->paths.table[fd] != NULL ? 0 : -1;
1477}
1478
1479static int thread__read_fd_path(struct thread *thread, int fd)
1480{
1481        char linkname[PATH_MAX], pathname[PATH_MAX];
1482        struct stat st;
1483        int ret;
1484
1485        if (thread->pid_ == thread->tid) {
1486                scnprintf(linkname, sizeof(linkname),
1487                          "/proc/%d/fd/%d", thread->pid_, fd);
1488        } else {
1489                scnprintf(linkname, sizeof(linkname),
1490                          "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1491        }
1492
1493        if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1494                return -1;
1495
1496        ret = readlink(linkname, pathname, sizeof(pathname));
1497
1498        if (ret < 0 || ret > st.st_size)
1499                return -1;
1500
1501        pathname[ret] = '\0';
1502        return trace__set_fd_pathname(thread, fd, pathname);
1503}
1504
1505static const char *thread__fd_path(struct thread *thread, int fd,
1506                                   struct trace *trace)
1507{
1508        struct thread_trace *ttrace = thread__priv(thread);
1509
1510        if (ttrace == NULL)
1511                return NULL;
1512
1513        if (fd < 0)
1514                return NULL;
1515
1516        if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1517                if (!trace->live)
1518                        return NULL;
1519                ++trace->stats.proc_getname;
1520                if (thread__read_fd_path(thread, fd))
1521                        return NULL;
1522        }
1523
1524        return ttrace->paths.table[fd];
1525}
1526
1527static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1528                                        struct syscall_arg *arg)
1529{
1530        int fd = arg->val;
1531        size_t printed = scnprintf(bf, size, "%d", fd);
1532        const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1533
1534        if (path)
1535                printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1536
1537        return printed;
1538}
1539
1540static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1541                                              struct syscall_arg *arg)
1542{
1543        int fd = arg->val;
1544        size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1545        struct thread_trace *ttrace = thread__priv(arg->thread);
1546
1547        if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1548                zfree(&ttrace->paths.table[fd]);
1549
1550        return printed;
1551}
1552
1553static void thread__set_filename_pos(struct thread *thread, const char *bf,
1554                                     unsigned long ptr)
1555{
1556        struct thread_trace *ttrace = thread__priv(thread);
1557
1558        ttrace->filename.ptr = ptr;
1559        ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1560}
1561
1562static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1563                                              struct syscall_arg *arg)
1564{
1565        unsigned long ptr = arg->val;
1566
1567        if (!arg->trace->vfs_getname)
1568                return scnprintf(bf, size, "%#x", ptr);
1569
1570        thread__set_filename_pos(arg->thread, bf, ptr);
1571        return 0;
1572}
1573
1574static bool trace__filter_duration(struct trace *trace, double t)
1575{
1576        return t < (trace->duration_filter * NSEC_PER_MSEC);
1577}
1578
1579static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1580{
1581        double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1582
1583        return fprintf(fp, "%10.3f ", ts);
1584}
1585
1586static bool done = false;
1587static bool interrupted = false;
1588
1589static void sig_handler(int sig)
1590{
1591        done = true;
1592        interrupted = sig == SIGINT;
1593}
1594
1595static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1596                                        u64 duration, u64 tstamp, FILE *fp)
1597{
1598        size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1599        printed += fprintf_duration(duration, fp);
1600
1601        if (trace->multiple_threads) {
1602                if (trace->show_comm)
1603                        printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1604                printed += fprintf(fp, "%d ", thread->tid);
1605        }
1606
1607        return printed;
1608}
1609
1610static int trace__process_event(struct trace *trace, struct machine *machine,
1611                                union perf_event *event, struct perf_sample *sample)
1612{
1613        int ret = 0;
1614
1615        switch (event->header.type) {
1616        case PERF_RECORD_LOST:
1617                color_fprintf(trace->output, PERF_COLOR_RED,
1618                              "LOST %" PRIu64 " events!\n", event->lost.lost);
1619                ret = machine__process_lost_event(machine, event, sample);
1620        default:
1621                ret = machine__process_event(machine, event, sample);
1622                break;
1623        }
1624
1625        return ret;
1626}
1627
1628static int trace__tool_process(struct perf_tool *tool,
1629                               union perf_event *event,
1630                               struct perf_sample *sample,
1631                               struct machine *machine)
1632{
1633        struct trace *trace = container_of(tool, struct trace, tool);
1634        return trace__process_event(trace, machine, event, sample);
1635}
1636
1637static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1638{
1639        int err = symbol__init(NULL);
1640
1641        if (err)
1642                return err;
1643
1644        trace->host = machine__new_host();
1645        if (trace->host == NULL)
1646                return -ENOMEM;
1647
1648        if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1649                return -errno;
1650
1651        err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1652                                            evlist->threads, trace__tool_process, false,
1653                                            trace->opts.proc_map_timeout);
1654        if (err)
1655                symbol__exit();
1656
1657        return err;
1658}
1659
1660static int syscall__set_arg_fmts(struct syscall *sc)
1661{
1662        struct format_field *field;
1663        int idx = 0;
1664
1665        sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1666        if (sc->arg_scnprintf == NULL)
1667                return -1;
1668
1669        if (sc->fmt)
1670                sc->arg_parm = sc->fmt->arg_parm;
1671
1672        for (field = sc->args; field; field = field->next) {
1673                if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1674                        sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1675                else if (field->flags & FIELD_IS_POINTER)
1676                        sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1677                ++idx;
1678        }
1679
1680        return 0;
1681}
1682
1683static int trace__read_syscall_info(struct trace *trace, int id)
1684{
1685        char tp_name[128];
1686        struct syscall *sc;
1687        const char *name = audit_syscall_to_name(id, trace->audit.machine);
1688
1689        if (name == NULL)
1690                return -1;
1691
1692        if (id > trace->syscalls.max) {
1693                struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1694
1695                if (nsyscalls == NULL)
1696                        return -1;
1697
1698                if (trace->syscalls.max != -1) {
1699                        memset(nsyscalls + trace->syscalls.max + 1, 0,
1700                               (id - trace->syscalls.max) * sizeof(*sc));
1701                } else {
1702                        memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1703                }
1704
1705                trace->syscalls.table = nsyscalls;
1706                trace->syscalls.max   = id;
1707        }
1708
1709        sc = trace->syscalls.table + id;
1710        sc->name = name;
1711
1712        sc->fmt  = syscall_fmt__find(sc->name);
1713
1714        snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1715        sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1716
1717        if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1718                snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1719                sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1720        }
1721
1722        if (IS_ERR(sc->tp_format))
1723                return -1;
1724
1725        sc->args = sc->tp_format->format.fields;
1726        sc->nr_args = sc->tp_format->format.nr_fields;
1727        /* drop nr field - not relevant here; does not exist on older kernels */
1728        if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1729                sc->args = sc->args->next;
1730                --sc->nr_args;
1731        }
1732
1733        sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1734
1735        return syscall__set_arg_fmts(sc);
1736}
1737
1738static int trace__validate_ev_qualifier(struct trace *trace)
1739{
1740        int err = 0, i;
1741        struct str_node *pos;
1742
1743        trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1744        trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1745                                                 sizeof(trace->ev_qualifier_ids.entries[0]));
1746
1747        if (trace->ev_qualifier_ids.entries == NULL) {
1748                fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1749                       trace->output);
1750                err = -EINVAL;
1751                goto out;
1752        }
1753
1754        i = 0;
1755
1756        strlist__for_each(pos, trace->ev_qualifier) {
1757                const char *sc = pos->s;
1758                int id = audit_name_to_syscall(sc, trace->audit.machine);
1759
1760                if (id < 0) {
1761                        if (err == 0) {
1762                                fputs("Error:\tInvalid syscall ", trace->output);
1763                                err = -EINVAL;
1764                        } else {
1765                                fputs(", ", trace->output);
1766                        }
1767
1768                        fputs(sc, trace->output);
1769                }
1770
1771                trace->ev_qualifier_ids.entries[i++] = id;
1772        }
1773
1774        if (err < 0) {
1775                fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1776                      "\nHint:\tand: 'man syscalls'\n", trace->output);
1777                zfree(&trace->ev_qualifier_ids.entries);
1778                trace->ev_qualifier_ids.nr = 0;
1779        }
1780out:
1781        return err;
1782}
1783
1784/*
1785 * args is to be interpreted as a series of longs but we need to handle
1786 * 8-byte unaligned accesses. args points to raw_data within the event
1787 * and raw_data is guaranteed to be 8-byte unaligned because it is
1788 * preceded by raw_size which is a u32. So we need to copy args to a temp
1789 * variable to read it. Most notably this avoids extended load instructions
1790 * on unaligned addresses
1791 */
1792
1793static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1794                                      unsigned char *args, struct trace *trace,
1795                                      struct thread *thread)
1796{
1797        size_t printed = 0;
1798        unsigned char *p;
1799        unsigned long val;
1800
1801        if (sc->args != NULL) {
1802                struct format_field *field;
1803                u8 bit = 1;
1804                struct syscall_arg arg = {
1805                        .idx    = 0,
1806                        .mask   = 0,
1807                        .trace  = trace,
1808                        .thread = thread,
1809                };
1810
1811                for (field = sc->args; field;
1812                     field = field->next, ++arg.idx, bit <<= 1) {
1813                        if (arg.mask & bit)
1814                                continue;
1815
1816                        /* special care for unaligned accesses */
1817                        p = args + sizeof(unsigned long) * arg.idx;
1818                        memcpy(&val, p, sizeof(val));
1819
1820                        /*
1821                         * Suppress this argument if its value is zero and
1822                         * and we don't have a string associated in an
1823                         * strarray for it.
1824                         */
1825                        if (val == 0 &&
1826                            !(sc->arg_scnprintf &&
1827                              sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1828                              sc->arg_parm[arg.idx]))
1829                                continue;
1830
1831                        printed += scnprintf(bf + printed, size - printed,
1832                                             "%s%s: ", printed ? ", " : "", field->name);
1833                        if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1834                                arg.val = val;
1835                                if (sc->arg_parm)
1836                                        arg.parm = sc->arg_parm[arg.idx];
1837                                printed += sc->arg_scnprintf[arg.idx](bf + printed,
1838                                                                      size - printed, &arg);
1839                        } else {
1840                                printed += scnprintf(bf + printed, size - printed,
1841                                                     "%ld", val);
1842                        }
1843                }
1844        } else {
1845                int i = 0;
1846
1847                while (i < 6) {
1848                        /* special care for unaligned accesses */
1849                        p = args + sizeof(unsigned long) * i;
1850                        memcpy(&val, p, sizeof(val));
1851                        printed += scnprintf(bf + printed, size - printed,
1852                                             "%sarg%d: %ld",
1853                                             printed ? ", " : "", i, val);
1854                        ++i;
1855                }
1856        }
1857
1858        return printed;
1859}
1860
1861typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1862                                  union perf_event *event,
1863                                  struct perf_sample *sample);
1864
1865static struct syscall *trace__syscall_info(struct trace *trace,
1866                                           struct perf_evsel *evsel, int id)
1867{
1868
1869        if (id < 0) {
1870
1871                /*
1872                 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1873                 * before that, leaving at a higher verbosity level till that is
1874                 * explained. Reproduced with plain ftrace with:
1875                 *
1876                 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1877                 * grep "NR -1 " /t/trace_pipe
1878                 *
1879                 * After generating some load on the machine.
1880                 */
1881                if (verbose > 1) {
1882                        static u64 n;
1883                        fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1884                                id, perf_evsel__name(evsel), ++n);
1885                }
1886                return NULL;
1887        }
1888
1889        if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1890            trace__read_syscall_info(trace, id))
1891                goto out_cant_read;
1892
1893        if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1894                goto out_cant_read;
1895
1896        return &trace->syscalls.table[id];
1897
1898out_cant_read:
1899        if (verbose) {
1900                fprintf(trace->output, "Problems reading syscall %d", id);
1901                if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1902                        fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1903                fputs(" information\n", trace->output);
1904        }
1905        return NULL;
1906}
1907
1908static void thread__update_stats(struct thread_trace *ttrace,
1909                                 int id, struct perf_sample *sample)
1910{
1911        struct int_node *inode;
1912        struct stats *stats;
1913        u64 duration = 0;
1914
1915        inode = intlist__findnew(ttrace->syscall_stats, id);
1916        if (inode == NULL)
1917                return;
1918
1919        stats = inode->priv;
1920        if (stats == NULL) {
1921                stats = malloc(sizeof(struct stats));
1922                if (stats == NULL)
1923                        return;
1924                init_stats(stats);
1925                inode->priv = stats;
1926        }
1927
1928        if (ttrace->entry_time && sample->time > ttrace->entry_time)
1929                duration = sample->time - ttrace->entry_time;
1930
1931        update_stats(stats, duration);
1932}
1933
1934static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1935{
1936        struct thread_trace *ttrace;
1937        u64 duration;
1938        size_t printed;
1939
1940        if (trace->current == NULL)
1941                return 0;
1942
1943        ttrace = thread__priv(trace->current);
1944
1945        if (!ttrace->entry_pending)
1946                return 0;
1947
1948        duration = sample->time - ttrace->entry_time;
1949
1950        printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1951        printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1952        ttrace->entry_pending = false;
1953
1954        return printed;
1955}
1956
1957static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1958                            union perf_event *event __maybe_unused,
1959                            struct perf_sample *sample)
1960{
1961        char *msg;
1962        void *args;
1963        size_t printed = 0;
1964        struct thread *thread;
1965        int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1966        struct syscall *sc = trace__syscall_info(trace, evsel, id);
1967        struct thread_trace *ttrace;
1968
1969        if (sc == NULL)
1970                return -1;
1971
1972        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1973        ttrace = thread__trace(thread, trace->output);
1974        if (ttrace == NULL)
1975                goto out_put;
1976
1977        args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1978
1979        if (ttrace->entry_str == NULL) {
1980                ttrace->entry_str = malloc(trace__entry_str_size);
1981                if (!ttrace->entry_str)
1982                        goto out_put;
1983        }
1984
1985        if (!trace->summary_only)
1986                trace__printf_interrupted_entry(trace, sample);
1987
1988        ttrace->entry_time = sample->time;
1989        msg = ttrace->entry_str;
1990        printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1991
1992        printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1993                                           args, trace, thread);
1994
1995        if (sc->is_exit) {
1996                if (!trace->duration_filter && !trace->summary_only) {
1997                        trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1998                        fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1999                }
2000        } else {
2001                ttrace->entry_pending = true;
2002                /* See trace__vfs_getname & trace__sys_exit */
2003                ttrace->filename.pending_open = false;
2004        }
2005
2006        if (trace->current != thread) {
2007                thread__put(trace->current);
2008                trace->current = thread__get(thread);
2009        }
2010        err = 0;
2011out_put:
2012        thread__put(thread);
2013        return err;
2014}
2015
2016static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2017                           union perf_event *event __maybe_unused,
2018                           struct perf_sample *sample)
2019{
2020        long ret;
2021        u64 duration = 0;
2022        struct thread *thread;
2023        int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2024        struct syscall *sc = trace__syscall_info(trace, evsel, id);
2025        struct thread_trace *ttrace;
2026
2027        if (sc == NULL)
2028                return -1;
2029
2030        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2031        ttrace = thread__trace(thread, trace->output);
2032        if (ttrace == NULL)
2033                goto out_put;
2034
2035        if (trace->summary)
2036                thread__update_stats(ttrace, id, sample);
2037
2038        ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2039
2040        if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2041                trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2042                ttrace->filename.pending_open = false;
2043                ++trace->stats.vfs_getname;
2044        }
2045
2046        ttrace->exit_time = sample->time;
2047
2048        if (ttrace->entry_time) {
2049                duration = sample->time - ttrace->entry_time;
2050                if (trace__filter_duration(trace, duration))
2051                        goto out;
2052        } else if (trace->duration_filter)
2053                goto out;
2054
2055        if (trace->summary_only)
2056                goto out;
2057
2058        trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2059
2060        if (ttrace->entry_pending) {
2061                fprintf(trace->output, "%-70s", ttrace->entry_str);
2062        } else {
2063                fprintf(trace->output, " ... [");
2064                color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2065                fprintf(trace->output, "]: %s()", sc->name);
2066        }
2067
2068        if (sc->fmt == NULL) {
2069signed_print:
2070                fprintf(trace->output, ") = %ld", ret);
2071        } else if (ret < 0 && sc->fmt->errmsg) {
2072                char bf[STRERR_BUFSIZE];
2073                const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2074                           *e = audit_errno_to_name(-ret);
2075
2076                fprintf(trace->output, ") = -1 %s %s", e, emsg);
2077        } else if (ret == 0 && sc->fmt->timeout)
2078                fprintf(trace->output, ") = 0 Timeout");
2079        else if (sc->fmt->hexret)
2080                fprintf(trace->output, ") = %#lx", ret);
2081        else
2082                goto signed_print;
2083
2084        fputc('\n', trace->output);
2085out:
2086        ttrace->entry_pending = false;
2087        err = 0;
2088out_put:
2089        thread__put(thread);
2090        return err;
2091}
2092
2093static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2094                              union perf_event *event __maybe_unused,
2095                              struct perf_sample *sample)
2096{
2097        struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2098        struct thread_trace *ttrace;
2099        size_t filename_len, entry_str_len, to_move;
2100        ssize_t remaining_space;
2101        char *pos;
2102        const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2103
2104        if (!thread)
2105                goto out;
2106
2107        ttrace = thread__priv(thread);
2108        if (!ttrace)
2109                goto out;
2110
2111        filename_len = strlen(filename);
2112
2113        if (ttrace->filename.namelen < filename_len) {
2114                char *f = realloc(ttrace->filename.name, filename_len + 1);
2115
2116                if (f == NULL)
2117                                goto out;
2118
2119                ttrace->filename.namelen = filename_len;
2120                ttrace->filename.name = f;
2121        }
2122
2123        strcpy(ttrace->filename.name, filename);
2124        ttrace->filename.pending_open = true;
2125
2126        if (!ttrace->filename.ptr)
2127                goto out;
2128
2129        entry_str_len = strlen(ttrace->entry_str);
2130        remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2131        if (remaining_space <= 0)
2132                goto out;
2133
2134        if (filename_len > (size_t)remaining_space) {
2135                filename += filename_len - remaining_space;
2136                filename_len = remaining_space;
2137        }
2138
2139        to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2140        pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2141        memmove(pos + filename_len, pos, to_move);
2142        memcpy(pos, filename, filename_len);
2143
2144        ttrace->filename.ptr = 0;
2145        ttrace->filename.entry_str_pos = 0;
2146out:
2147        return 0;
2148}
2149
2150static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2151                                     union perf_event *event __maybe_unused,
2152                                     struct perf_sample *sample)
2153{
2154        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2155        double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2156        struct thread *thread = machine__findnew_thread(trace->host,
2157                                                        sample->pid,
2158                                                        sample->tid);
2159        struct thread_trace *ttrace = thread__trace(thread, trace->output);
2160
2161        if (ttrace == NULL)
2162                goto out_dump;
2163
2164        ttrace->runtime_ms += runtime_ms;
2165        trace->runtime_ms += runtime_ms;
2166        thread__put(thread);
2167        return 0;
2168
2169out_dump:
2170        fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2171               evsel->name,
2172               perf_evsel__strval(evsel, sample, "comm"),
2173               (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2174               runtime,
2175               perf_evsel__intval(evsel, sample, "vruntime"));
2176        thread__put(thread);
2177        return 0;
2178}
2179
2180static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2181                                union perf_event *event __maybe_unused,
2182                                struct perf_sample *sample)
2183{
2184        trace__printf_interrupted_entry(trace, sample);
2185        trace__fprintf_tstamp(trace, sample->time, trace->output);
2186
2187        if (trace->trace_syscalls)
2188                fprintf(trace->output, "(         ): ");
2189
2190        fprintf(trace->output, "%s:", evsel->name);
2191
2192        if (evsel->tp_format) {
2193                event_format__fprintf(evsel->tp_format, sample->cpu,
2194                                      sample->raw_data, sample->raw_size,
2195                                      trace->output);
2196        }
2197
2198        fprintf(trace->output, ")\n");
2199        return 0;
2200}
2201
2202static void print_location(FILE *f, struct perf_sample *sample,
2203                           struct addr_location *al,
2204                           bool print_dso, bool print_sym)
2205{
2206
2207        if ((verbose || print_dso) && al->map)
2208                fprintf(f, "%s@", al->map->dso->long_name);
2209
2210        if ((verbose || print_sym) && al->sym)
2211                fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2212                        al->addr - al->sym->start);
2213        else if (al->map)
2214                fprintf(f, "0x%" PRIx64, al->addr);
2215        else
2216                fprintf(f, "0x%" PRIx64, sample->addr);
2217}
2218
2219static int trace__pgfault(struct trace *trace,
2220                          struct perf_evsel *evsel,
2221                          union perf_event *event,
2222                          struct perf_sample *sample)
2223{
2224        struct thread *thread;
2225        u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2226        struct addr_location al;
2227        char map_type = 'd';
2228        struct thread_trace *ttrace;
2229        int err = -1;
2230
2231        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2232        ttrace = thread__trace(thread, trace->output);
2233        if (ttrace == NULL)
2234                goto out_put;
2235
2236        if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2237                ttrace->pfmaj++;
2238        else
2239                ttrace->pfmin++;
2240
2241        if (trace->summary_only)
2242                goto out;
2243
2244        thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2245                              sample->ip, &al);
2246
2247        trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2248
2249        fprintf(trace->output, "%sfault [",
2250                evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2251                "maj" : "min");
2252
2253        print_location(trace->output, sample, &al, false, true);
2254
2255        fprintf(trace->output, "] => ");
2256
2257        thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2258                                   sample->addr, &al);
2259
2260        if (!al.map) {
2261                thread__find_addr_location(thread, cpumode,
2262                                           MAP__FUNCTION, sample->addr, &al);
2263
2264                if (al.map)
2265                        map_type = 'x';
2266                else
2267                        map_type = '?';
2268        }
2269
2270        print_location(trace->output, sample, &al, true, false);
2271
2272        fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2273out:
2274        err = 0;
2275out_put:
2276        thread__put(thread);
2277        return err;
2278}
2279
2280static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2281{
2282        if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2283            (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2284                return false;
2285
2286        if (trace->pid_list || trace->tid_list)
2287                return true;
2288
2289        return false;
2290}
2291
2292static int trace__process_sample(struct perf_tool *tool,
2293                                 union perf_event *event,
2294                                 struct perf_sample *sample,
2295                                 struct perf_evsel *evsel,
2296                                 struct machine *machine __maybe_unused)
2297{
2298        struct trace *trace = container_of(tool, struct trace, tool);
2299        int err = 0;
2300
2301        tracepoint_handler handler = evsel->handler;
2302
2303        if (skip_sample(trace, sample))
2304                return 0;
2305
2306        if (!trace->full_time && trace->base_time == 0)
2307                trace->base_time = sample->time;
2308
2309        if (handler) {
2310                ++trace->nr_events;
2311                handler(trace, evsel, event, sample);
2312        }
2313
2314        return err;
2315}
2316
2317static int parse_target_str(struct trace *trace)
2318{
2319        if (trace->opts.target.pid) {
2320                trace->pid_list = intlist__new(trace->opts.target.pid);
2321                if (trace->pid_list == NULL) {
2322                        pr_err("Error parsing process id string\n");
2323                        return -EINVAL;
2324                }
2325        }
2326
2327        if (trace->opts.target.tid) {
2328                trace->tid_list = intlist__new(trace->opts.target.tid);
2329                if (trace->tid_list == NULL) {
2330                        pr_err("Error parsing thread id string\n");
2331                        return -EINVAL;
2332                }
2333        }
2334
2335        return 0;
2336}
2337
2338static int trace__record(struct trace *trace, int argc, const char **argv)
2339{
2340        unsigned int rec_argc, i, j;
2341        const char **rec_argv;
2342        const char * const record_args[] = {
2343                "record",
2344                "-R",
2345                "-m", "1024",
2346                "-c", "1",
2347        };
2348
2349        const char * const sc_args[] = { "-e", };
2350        unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2351        const char * const majpf_args[] = { "-e", "major-faults" };
2352        unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2353        const char * const minpf_args[] = { "-e", "minor-faults" };
2354        unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2355
2356        /* +1 is for the event string below */
2357        rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2358                majpf_args_nr + minpf_args_nr + argc;
2359        rec_argv = calloc(rec_argc + 1, sizeof(char *));
2360
2361        if (rec_argv == NULL)
2362                return -ENOMEM;
2363
2364        j = 0;
2365        for (i = 0; i < ARRAY_SIZE(record_args); i++)
2366                rec_argv[j++] = record_args[i];
2367
2368        if (trace->trace_syscalls) {
2369                for (i = 0; i < sc_args_nr; i++)
2370                        rec_argv[j++] = sc_args[i];
2371
2372                /* event string may be different for older kernels - e.g., RHEL6 */
2373                if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2374                        rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2375                else if (is_valid_tracepoint("syscalls:sys_enter"))
2376                        rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2377                else {
2378                        pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2379                        return -1;
2380                }
2381        }
2382
2383        if (trace->trace_pgfaults & TRACE_PFMAJ)
2384                for (i = 0; i < majpf_args_nr; i++)
2385                        rec_argv[j++] = majpf_args[i];
2386
2387        if (trace->trace_pgfaults & TRACE_PFMIN)
2388                for (i = 0; i < minpf_args_nr; i++)
2389                        rec_argv[j++] = minpf_args[i];
2390
2391        for (i = 0; i < (unsigned int)argc; i++)
2392                rec_argv[j++] = argv[i];
2393
2394        return cmd_record(j, rec_argv, NULL);
2395}
2396
2397static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2398
2399static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2400{
2401        struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2402
2403        if (IS_ERR(evsel))
2404                return false;
2405
2406        if (perf_evsel__field(evsel, "pathname") == NULL) {
2407                perf_evsel__delete(evsel);
2408                return false;
2409        }
2410
2411        evsel->handler = trace__vfs_getname;
2412        perf_evlist__add(evlist, evsel);
2413        return true;
2414}
2415
2416static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2417                                    u64 config)
2418{
2419        struct perf_evsel *evsel;
2420        struct perf_event_attr attr = {
2421                .type = PERF_TYPE_SOFTWARE,
2422                .mmap_data = 1,
2423        };
2424
2425        attr.config = config;
2426        attr.sample_period = 1;
2427
2428        event_attr_init(&attr);
2429
2430        evsel = perf_evsel__new(&attr);
2431        if (!evsel)
2432                return -ENOMEM;
2433
2434        evsel->handler = trace__pgfault;
2435        perf_evlist__add(evlist, evsel);
2436
2437        return 0;
2438}
2439
2440static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2441{
2442        const u32 type = event->header.type;
2443        struct perf_evsel *evsel;
2444
2445        if (!trace->full_time && trace->base_time == 0)
2446                trace->base_time = sample->time;
2447
2448        if (type != PERF_RECORD_SAMPLE) {
2449                trace__process_event(trace, trace->host, event, sample);
2450                return;
2451        }
2452
2453        evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2454        if (evsel == NULL) {
2455                fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2456                return;
2457        }
2458
2459        if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2460            sample->raw_data == NULL) {
2461                fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2462                       perf_evsel__name(evsel), sample->tid,
2463                       sample->cpu, sample->raw_size);
2464        } else {
2465                tracepoint_handler handler = evsel->handler;
2466                handler(trace, evsel, event, sample);
2467        }
2468}
2469
2470static int trace__add_syscall_newtp(struct trace *trace)
2471{
2472        int ret = -1;
2473        struct perf_evlist *evlist = trace->evlist;
2474        struct perf_evsel *sys_enter, *sys_exit;
2475
2476        sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2477        if (sys_enter == NULL)
2478                goto out;
2479
2480        if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2481                goto out_delete_sys_enter;
2482
2483        sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2484        if (sys_exit == NULL)
2485                goto out_delete_sys_enter;
2486
2487        if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2488                goto out_delete_sys_exit;
2489
2490        perf_evlist__add(evlist, sys_enter);
2491        perf_evlist__add(evlist, sys_exit);
2492
2493        trace->syscalls.events.sys_enter = sys_enter;
2494        trace->syscalls.events.sys_exit  = sys_exit;
2495
2496        ret = 0;
2497out:
2498        return ret;
2499
2500out_delete_sys_exit:
2501        perf_evsel__delete_priv(sys_exit);
2502out_delete_sys_enter:
2503        perf_evsel__delete_priv(sys_enter);
2504        goto out;
2505}
2506
2507static int trace__set_ev_qualifier_filter(struct trace *trace)
2508{
2509        int err = -1;
2510        char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2511                                                trace->ev_qualifier_ids.nr,
2512                                                trace->ev_qualifier_ids.entries);
2513
2514        if (filter == NULL)
2515                goto out_enomem;
2516
2517        if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2518                err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2519
2520        free(filter);
2521out:
2522        return err;
2523out_enomem:
2524        errno = ENOMEM;
2525        goto out;
2526}
2527
2528static int trace__run(struct trace *trace, int argc, const char **argv)
2529{
2530        struct perf_evlist *evlist = trace->evlist;
2531        struct perf_evsel *evsel;
2532        int err = -1, i;
2533        unsigned long before;
2534        const bool forks = argc > 0;
2535        bool draining = false;
2536
2537        trace->live = true;
2538
2539        if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2540                goto out_error_raw_syscalls;
2541
2542        if (trace->trace_syscalls)
2543                trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2544
2545        if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2546            perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2547                goto out_error_mem;
2548        }
2549
2550        if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2551            perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2552                goto out_error_mem;
2553
2554        if (trace->sched &&
2555            perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2556                                   trace__sched_stat_runtime))
2557                goto out_error_sched_stat_runtime;
2558
2559        err = perf_evlist__create_maps(evlist, &trace->opts.target);
2560        if (err < 0) {
2561                fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2562                goto out_delete_evlist;
2563        }
2564
2565        err = trace__symbols_init(trace, evlist);
2566        if (err < 0) {
2567                fprintf(trace->output, "Problems initializing symbol libraries!\n");
2568                goto out_delete_evlist;
2569        }
2570
2571        perf_evlist__config(evlist, &trace->opts);
2572
2573        signal(SIGCHLD, sig_handler);
2574        signal(SIGINT, sig_handler);
2575
2576        if (forks) {
2577                err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2578                                                    argv, false, NULL);
2579                if (err < 0) {
2580                        fprintf(trace->output, "Couldn't run the workload!\n");
2581                        goto out_delete_evlist;
2582                }
2583        }
2584
2585        err = perf_evlist__open(evlist);
2586        if (err < 0)
2587                goto out_error_open;
2588
2589        /*
2590         * Better not use !target__has_task() here because we need to cover the
2591         * case where no threads were specified in the command line, but a
2592         * workload was, and in that case we will fill in the thread_map when
2593         * we fork the workload in perf_evlist__prepare_workload.
2594         */
2595        if (trace->filter_pids.nr > 0)
2596                err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2597        else if (thread_map__pid(evlist->threads, 0) == -1)
2598                err = perf_evlist__set_filter_pid(evlist, getpid());
2599
2600        if (err < 0)
2601                goto out_error_mem;
2602
2603        if (trace->ev_qualifier_ids.nr > 0) {
2604                err = trace__set_ev_qualifier_filter(trace);
2605                if (err < 0)
2606                        goto out_errno;
2607
2608                pr_debug("event qualifier tracepoint filter: %s\n",
2609                         trace->syscalls.events.sys_exit->filter);
2610        }
2611
2612        err = perf_evlist__apply_filters(evlist, &evsel);
2613        if (err < 0)
2614                goto out_error_apply_filters;
2615
2616        err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2617        if (err < 0)
2618                goto out_error_mmap;
2619
2620        if (!target__none(&trace->opts.target))
2621                perf_evlist__enable(evlist);
2622
2623        if (forks)
2624                perf_evlist__start_workload(evlist);
2625
2626        trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2627                                  evlist->threads->nr > 1 ||
2628                                  perf_evlist__first(evlist)->attr.inherit;
2629again:
2630        before = trace->nr_events;
2631
2632        for (i = 0; i < evlist->nr_mmaps; i++) {
2633                union perf_event *event;
2634
2635                while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2636                        struct perf_sample sample;
2637
2638                        ++trace->nr_events;
2639
2640                        err = perf_evlist__parse_sample(evlist, event, &sample);
2641                        if (err) {
2642                                fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2643                                goto next_event;
2644                        }
2645
2646                        trace__handle_event(trace, event, &sample);
2647next_event:
2648                        perf_evlist__mmap_consume(evlist, i);
2649
2650                        if (interrupted)
2651                                goto out_disable;
2652
2653                        if (done && !draining) {
2654                                perf_evlist__disable(evlist);
2655                                draining = true;
2656                        }
2657                }
2658        }
2659
2660        if (trace->nr_events == before) {
2661                int timeout = done ? 100 : -1;
2662
2663                if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2664                        if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2665                                draining = true;
2666
2667                        goto again;
2668                }
2669        } else {
2670                goto again;
2671        }
2672
2673out_disable:
2674        thread__zput(trace->current);
2675
2676        perf_evlist__disable(evlist);
2677
2678        if (!err) {
2679                if (trace->summary)
2680                        trace__fprintf_thread_summary(trace, trace->output);
2681
2682                if (trace->show_tool_stats) {
2683                        fprintf(trace->output, "Stats:\n "
2684                                               " vfs_getname : %" PRIu64 "\n"
2685                                               " proc_getname: %" PRIu64 "\n",
2686                                trace->stats.vfs_getname,
2687                                trace->stats.proc_getname);
2688                }
2689        }
2690
2691out_delete_evlist:
2692        perf_evlist__delete(evlist);
2693        trace->evlist = NULL;
2694        trace->live = false;
2695        return err;
2696{
2697        char errbuf[BUFSIZ];
2698
2699out_error_sched_stat_runtime:
2700        tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2701        goto out_error;
2702
2703out_error_raw_syscalls:
2704        tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2705        goto out_error;
2706
2707out_error_mmap:
2708        perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2709        goto out_error;
2710
2711out_error_open:
2712        perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2713
2714out_error:
2715        fprintf(trace->output, "%s\n", errbuf);
2716        goto out_delete_evlist;
2717
2718out_error_apply_filters:
2719        fprintf(trace->output,
2720                "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2721                evsel->filter, perf_evsel__name(evsel), errno,
2722                strerror_r(errno, errbuf, sizeof(errbuf)));
2723        goto out_delete_evlist;
2724}
2725out_error_mem:
2726        fprintf(trace->output, "Not enough memory to run!\n");
2727        goto out_delete_evlist;
2728
2729out_errno:
2730        fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2731        goto out_delete_evlist;
2732}
2733
2734static int trace__replay(struct trace *trace)
2735{
2736        const struct perf_evsel_str_handler handlers[] = {
2737                { "probe:vfs_getname",       trace__vfs_getname, },
2738        };
2739        struct perf_data_file file = {
2740                .path  = input_name,
2741                .mode  = PERF_DATA_MODE_READ,
2742                .force = trace->force,
2743        };
2744        struct perf_session *session;
2745        struct perf_evsel *evsel;
2746        int err = -1;
2747
2748        trace->tool.sample        = trace__process_sample;
2749        trace->tool.mmap          = perf_event__process_mmap;
2750        trace->tool.mmap2         = perf_event__process_mmap2;
2751        trace->tool.comm          = perf_event__process_comm;
2752        trace->tool.exit          = perf_event__process_exit;
2753        trace->tool.fork          = perf_event__process_fork;
2754        trace->tool.attr          = perf_event__process_attr;
2755        trace->tool.tracing_data = perf_event__process_tracing_data;
2756        trace->tool.build_id      = perf_event__process_build_id;
2757
2758        trace->tool.ordered_events = true;
2759        trace->tool.ordering_requires_timestamps = true;
2760
2761        /* add tid to output */
2762        trace->multiple_threads = true;
2763
2764        session = perf_session__new(&file, false, &trace->tool);
2765        if (session == NULL)
2766                return -1;
2767
2768        if (symbol__init(&session->header.env) < 0)
2769                goto out;
2770
2771        trace->host = &session->machines.host;
2772
2773        err = perf_session__set_tracepoints_handlers(session, handlers);
2774        if (err)
2775                goto out;
2776
2777        evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2778                                                     "raw_syscalls:sys_enter");
2779        /* older kernels have syscalls tp versus raw_syscalls */
2780        if (evsel == NULL)
2781                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2782                                                             "syscalls:sys_enter");
2783
2784        if (evsel &&
2785            (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2786            perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2787                pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2788                goto out;
2789        }
2790
2791        evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2792                                                     "raw_syscalls:sys_exit");
2793        if (evsel == NULL)
2794                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2795                                                             "syscalls:sys_exit");
2796        if (evsel &&
2797            (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2798            perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2799                pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2800                goto out;
2801        }
2802
2803        evlist__for_each(session->evlist, evsel) {
2804                if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2805                    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2806                     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2807                     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2808                        evsel->handler = trace__pgfault;
2809        }
2810
2811        err = parse_target_str(trace);
2812        if (err != 0)
2813                goto out;
2814
2815        setup_pager();
2816
2817        err = perf_session__process_events(session);
2818        if (err)
2819                pr_err("Failed to process events, error %d", err);
2820
2821        else if (trace->summary)
2822                trace__fprintf_thread_summary(trace, trace->output);
2823
2824out:
2825        perf_session__delete(session);
2826
2827        return err;
2828}
2829
2830static size_t trace__fprintf_threads_header(FILE *fp)
2831{
2832        size_t printed;
2833
2834        printed  = fprintf(fp, "\n Summary of events:\n\n");
2835
2836        return printed;
2837}
2838
2839static size_t thread__dump_stats(struct thread_trace *ttrace,
2840                                 struct trace *trace, FILE *fp)
2841{
2842        struct stats *stats;
2843        size_t printed = 0;
2844        struct syscall *sc;
2845        struct int_node *inode = intlist__first(ttrace->syscall_stats);
2846
2847        if (inode == NULL)
2848                return 0;
2849
2850        printed += fprintf(fp, "\n");
2851
2852        printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2853        printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2854        printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2855
2856        /* each int_node is a syscall */
2857        while (inode) {
2858                stats = inode->priv;
2859                if (stats) {
2860                        double min = (double)(stats->min) / NSEC_PER_MSEC;
2861                        double max = (double)(stats->max) / NSEC_PER_MSEC;
2862                        double avg = avg_stats(stats);
2863                        double pct;
2864                        u64 n = (u64) stats->n;
2865
2866                        pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2867                        avg /= NSEC_PER_MSEC;
2868
2869                        sc = &trace->syscalls.table[inode->i];
2870                        printed += fprintf(fp, "   %-15s", sc->name);
2871                        printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2872                                           n, avg * n, min, avg);
2873                        printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2874                }
2875
2876                inode = intlist__next(inode);
2877        }
2878
2879        printed += fprintf(fp, "\n\n");
2880
2881        return printed;
2882}
2883
2884/* struct used to pass data to per-thread function */
2885struct summary_data {
2886        FILE *fp;
2887        struct trace *trace;
2888        size_t printed;
2889};
2890
2891static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2892{
2893        struct summary_data *data = priv;
2894        FILE *fp = data->fp;
2895        size_t printed = data->printed;
2896        struct trace *trace = data->trace;
2897        struct thread_trace *ttrace = thread__priv(thread);
2898        double ratio;
2899
2900        if (ttrace == NULL)
2901                return 0;
2902
2903        ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2904
2905        printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2906        printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2907        printed += fprintf(fp, "%.1f%%", ratio);
2908        if (ttrace->pfmaj)
2909                printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2910        if (ttrace->pfmin)
2911                printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2912        printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2913        printed += thread__dump_stats(ttrace, trace, fp);
2914
2915        data->printed += printed;
2916
2917        return 0;
2918}
2919
2920static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2921{
2922        struct summary_data data = {
2923                .fp = fp,
2924                .trace = trace
2925        };
2926        data.printed = trace__fprintf_threads_header(fp);
2927
2928        machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2929
2930        return data.printed;
2931}
2932
2933static int trace__set_duration(const struct option *opt, const char *str,
2934                               int unset __maybe_unused)
2935{
2936        struct trace *trace = opt->value;
2937
2938        trace->duration_filter = atof(str);
2939        return 0;
2940}
2941
2942static int trace__set_filter_pids(const struct option *opt, const char *str,
2943                                  int unset __maybe_unused)
2944{
2945        int ret = -1;
2946        size_t i;
2947        struct trace *trace = opt->value;
2948        /*
2949         * FIXME: introduce a intarray class, plain parse csv and create a
2950         * { int nr, int entries[] } struct...
2951         */
2952        struct intlist *list = intlist__new(str);
2953
2954        if (list == NULL)
2955                return -1;
2956
2957        i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2958        trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2959
2960        if (trace->filter_pids.entries == NULL)
2961                goto out;
2962
2963        trace->filter_pids.entries[0] = getpid();
2964
2965        for (i = 1; i < trace->filter_pids.nr; ++i)
2966                trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2967
2968        intlist__delete(list);
2969        ret = 0;
2970out:
2971        return ret;
2972}
2973
2974static int trace__open_output(struct trace *trace, const char *filename)
2975{
2976        struct stat st;
2977
2978        if (!stat(filename, &st) && st.st_size) {
2979                char oldname[PATH_MAX];
2980
2981                scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2982                unlink(oldname);
2983                rename(filename, oldname);
2984        }
2985
2986        trace->output = fopen(filename, "w");
2987
2988        return trace->output == NULL ? -errno : 0;
2989}
2990
2991static int parse_pagefaults(const struct option *opt, const char *str,
2992                            int unset __maybe_unused)
2993{
2994        int *trace_pgfaults = opt->value;
2995
2996        if (strcmp(str, "all") == 0)
2997                *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2998        else if (strcmp(str, "maj") == 0)
2999                *trace_pgfaults |= TRACE_PFMAJ;
3000        else if (strcmp(str, "min") == 0)
3001                *trace_pgfaults |= TRACE_PFMIN;
3002        else
3003                return -1;
3004
3005        return 0;
3006}
3007
3008static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3009{
3010        struct perf_evsel *evsel;
3011
3012        evlist__for_each(evlist, evsel)
3013                evsel->handler = handler;
3014}
3015
3016int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3017{
3018        const char *trace_usage[] = {
3019                "perf trace [<options>] [<command>]",
3020                "perf trace [<options>] -- <command> [<options>]",
3021                "perf trace record [<options>] [<command>]",
3022                "perf trace record [<options>] -- <command> [<options>]",
3023                NULL
3024        };
3025        struct trace trace = {
3026                .audit = {
3027                        .machine = audit_detect_machine(),
3028                        .open_id = audit_name_to_syscall("open", trace.audit.machine),
3029                },
3030                .syscalls = {
3031                        . max = -1,
3032                },
3033                .opts = {
3034                        .target = {
3035                                .uid       = UINT_MAX,
3036                                .uses_mmap = true,
3037                        },
3038                        .user_freq     = UINT_MAX,
3039                        .user_interval = ULLONG_MAX,
3040                        .no_buffering  = true,
3041                        .mmap_pages    = UINT_MAX,
3042                        .proc_map_timeout  = 500,
3043                },
3044                .output = stderr,
3045                .show_comm = true,
3046                .trace_syscalls = true,
3047        };
3048        const char *output_name = NULL;
3049        const char *ev_qualifier_str = NULL;
3050        const struct option trace_options[] = {
3051        OPT_CALLBACK(0, "event", &trace.evlist, "event",
3052                     "event selector. use 'perf list' to list available events",
3053                     parse_events_option),
3054        OPT_BOOLEAN(0, "comm", &trace.show_comm,
3055                    "show the thread COMM next to its id"),
3056        OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3057        OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3058        OPT_STRING('o', "output", &output_name, "file", "output file name"),
3059        OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3060        OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3061                    "trace events on existing process id"),
3062        OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3063                    "trace events on existing thread id"),
3064        OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3065                     "pids to filter (by the kernel)", trace__set_filter_pids),
3066        OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3067                    "system-wide collection from all CPUs"),
3068        OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3069                    "list of cpus to monitor"),
3070        OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3071                    "child tasks do not inherit counters"),
3072        OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3073                     "number of mmap data pages",
3074                     perf_evlist__parse_mmap_pages),
3075        OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3076                   "user to profile"),
3077        OPT_CALLBACK(0, "duration", &trace, "float",
3078                     "show only events with duration > N.M ms",
3079                     trace__set_duration),
3080        OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3081        OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3082        OPT_BOOLEAN('T', "time", &trace.full_time,
3083                    "Show full timestamp, not time relative to first start"),
3084        OPT_BOOLEAN('s', "summary", &trace.summary_only,
3085                    "Show only syscall summary with statistics"),
3086        OPT_BOOLEAN('S', "with-summary", &trace.summary,
3087                    "Show all syscalls and summary with statistics"),
3088        OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3089                     "Trace pagefaults", parse_pagefaults, "maj"),
3090        OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3091        OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3092        OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3093                        "per thread proc mmap processing timeout in ms"),
3094        OPT_END()
3095        };
3096        const char * const trace_subcommands[] = { "record", NULL };
3097        int err;
3098        char bf[BUFSIZ];
3099
3100        signal(SIGSEGV, sighandler_dump_stack);
3101        signal(SIGFPE, sighandler_dump_stack);
3102
3103        trace.evlist = perf_evlist__new();
3104
3105        if (trace.evlist == NULL) {
3106                pr_err("Not enough memory to run!\n");
3107                err = -ENOMEM;
3108                goto out;
3109        }
3110
3111        argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3112                                 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3113
3114        if (trace.trace_pgfaults) {
3115                trace.opts.sample_address = true;
3116                trace.opts.sample_time = true;
3117        }
3118
3119        if (trace.evlist->nr_entries > 0)
3120                evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3121
3122        if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3123                return trace__record(&trace, argc-1, &argv[1]);
3124
3125        /* summary_only implies summary option, but don't overwrite summary if set */
3126        if (trace.summary_only)
3127                trace.summary = trace.summary_only;
3128
3129        if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3130            trace.evlist->nr_entries == 0 /* Was --events used? */) {
3131                pr_err("Please specify something to trace.\n");
3132                return -1;
3133        }
3134
3135        if (output_name != NULL) {
3136                err = trace__open_output(&trace, output_name);
3137                if (err < 0) {
3138                        perror("failed to create output file");
3139                        goto out;
3140                }
3141        }
3142
3143        if (ev_qualifier_str != NULL) {
3144                const char *s = ev_qualifier_str;
3145                struct strlist_config slist_config = {
3146                        .dirname = system_path(STRACE_GROUPS_DIR),
3147                };
3148
3149                trace.not_ev_qualifier = *s == '!';
3150                if (trace.not_ev_qualifier)
3151                        ++s;
3152                trace.ev_qualifier = strlist__new(s, &slist_config);
3153                if (trace.ev_qualifier == NULL) {
3154                        fputs("Not enough memory to parse event qualifier",
3155                              trace.output);
3156                        err = -ENOMEM;
3157                        goto out_close;
3158                }
3159
3160                err = trace__validate_ev_qualifier(&trace);
3161                if (err)
3162                        goto out_close;
3163        }
3164
3165        err = target__validate(&trace.opts.target);
3166        if (err) {
3167                target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3168                fprintf(trace.output, "%s", bf);
3169                goto out_close;
3170        }
3171
3172        err = target__parse_uid(&trace.opts.target);
3173        if (err) {
3174                target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3175                fprintf(trace.output, "%s", bf);
3176                goto out_close;
3177        }
3178
3179        if (!argc && target__none(&trace.opts.target))
3180                trace.opts.target.system_wide = true;
3181
3182        if (input_name)
3183                err = trace__replay(&trace);
3184        else
3185                err = trace__run(&trace, argc, argv);
3186
3187out_close:
3188        if (output_name != NULL)
3189                fclose(trace.output);
3190out:
3191        return err;
3192}
3193