linux/tools/perf/builtin-trace.c
<<
>>
Prefs
   1/*
   2 * builtin-trace.c
   3 *
   4 * Builtin 'trace' command:
   5 *
   6 * Display a continuously updated trace of any workload, CPU, specific PID,
   7 * system wide, etc.  Default format is loosely strace like, but any other
   8 * event may be specified using --event.
   9 *
  10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
  11 *
  12 * Initially based on the 'trace' prototype by Thomas Gleixner:
  13 *
  14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
  15 *
  16 * Released under the GPL v2. (and only v2, not any later version)
  17 */
  18
  19#include <traceevent/event-parse.h>
  20#include "builtin.h"
  21#include "util/color.h"
  22#include "util/debug.h"
  23#include "util/evlist.h"
  24#include "util/exec_cmd.h"
  25#include "util/machine.h"
  26#include "util/session.h"
  27#include "util/thread.h"
  28#include "util/parse-options.h"
  29#include "util/strlist.h"
  30#include "util/intlist.h"
  31#include "util/thread_map.h"
  32#include "util/stat.h"
  33#include "trace-event.h"
  34#include "util/parse-events.h"
  35
  36#include <libaudit.h>
  37#include <stdlib.h>
  38#include <sys/mman.h>
  39#include <linux/futex.h>
  40
  41/* For older distros: */
  42#ifndef MAP_STACK
  43# define MAP_STACK              0x20000
  44#endif
  45
  46#ifndef MADV_HWPOISON
  47# define MADV_HWPOISON          100
  48
  49#endif
  50
  51#ifndef MADV_MERGEABLE
  52# define MADV_MERGEABLE         12
  53#endif
  54
  55#ifndef MADV_UNMERGEABLE
  56# define MADV_UNMERGEABLE       13
  57#endif
  58
  59#ifndef EFD_SEMAPHORE
  60# define EFD_SEMAPHORE          1
  61#endif
  62
  63#ifndef EFD_NONBLOCK
  64# define EFD_NONBLOCK           00004000
  65#endif
  66
  67#ifndef EFD_CLOEXEC
  68# define EFD_CLOEXEC            02000000
  69#endif
  70
  71#ifndef O_CLOEXEC
  72# define O_CLOEXEC              02000000
  73#endif
  74
  75#ifndef SOCK_DCCP
  76# define SOCK_DCCP              6
  77#endif
  78
  79#ifndef SOCK_CLOEXEC
  80# define SOCK_CLOEXEC           02000000
  81#endif
  82
  83#ifndef SOCK_NONBLOCK
  84# define SOCK_NONBLOCK          00004000
  85#endif
  86
  87#ifndef MSG_CMSG_CLOEXEC
  88# define MSG_CMSG_CLOEXEC       0x40000000
  89#endif
  90
  91#ifndef PERF_FLAG_FD_NO_GROUP
  92# define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
  93#endif
  94
  95#ifndef PERF_FLAG_FD_OUTPUT
  96# define PERF_FLAG_FD_OUTPUT            (1UL << 1)
  97#endif
  98
  99#ifndef PERF_FLAG_PID_CGROUP
 100# define PERF_FLAG_PID_CGROUP           (1UL << 2) /* pid=cgroup id, per-cpu mode only */
 101#endif
 102
 103#ifndef PERF_FLAG_FD_CLOEXEC
 104# define PERF_FLAG_FD_CLOEXEC           (1UL << 3) /* O_CLOEXEC */
 105#endif
 106
 107
 108struct tp_field {
 109        int offset;
 110        union {
 111                u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
 112                void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
 113        };
 114};
 115
 116#define TP_UINT_FIELD(bits) \
 117static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
 118{ \
 119        u##bits value; \
 120        memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 121        return value;  \
 122}
 123
 124TP_UINT_FIELD(8);
 125TP_UINT_FIELD(16);
 126TP_UINT_FIELD(32);
 127TP_UINT_FIELD(64);
 128
 129#define TP_UINT_FIELD__SWAPPED(bits) \
 130static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
 131{ \
 132        u##bits value; \
 133        memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 134        return bswap_##bits(value);\
 135}
 136
 137TP_UINT_FIELD__SWAPPED(16);
 138TP_UINT_FIELD__SWAPPED(32);
 139TP_UINT_FIELD__SWAPPED(64);
 140
 141static int tp_field__init_uint(struct tp_field *field,
 142                               struct format_field *format_field,
 143                               bool needs_swap)
 144{
 145        field->offset = format_field->offset;
 146
 147        switch (format_field->size) {
 148        case 1:
 149                field->integer = tp_field__u8;
 150                break;
 151        case 2:
 152                field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
 153                break;
 154        case 4:
 155                field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
 156                break;
 157        case 8:
 158                field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
 159                break;
 160        default:
 161                return -1;
 162        }
 163
 164        return 0;
 165}
 166
 167static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 168{
 169        return sample->raw_data + field->offset;
 170}
 171
 172static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
 173{
 174        field->offset = format_field->offset;
 175        field->pointer = tp_field__ptr;
 176        return 0;
 177}
 178
 179struct syscall_tp {
 180        struct tp_field id;
 181        union {
 182                struct tp_field args, ret;
 183        };
 184};
 185
 186static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
 187                                          struct tp_field *field,
 188                                          const char *name)
 189{
 190        struct format_field *format_field = perf_evsel__field(evsel, name);
 191
 192        if (format_field == NULL)
 193                return -1;
 194
 195        return tp_field__init_uint(field, format_field, evsel->needs_swap);
 196}
 197
 198#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
 199        ({ struct syscall_tp *sc = evsel->priv;\
 200           perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
 201
 202static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
 203                                         struct tp_field *field,
 204                                         const char *name)
 205{
 206        struct format_field *format_field = perf_evsel__field(evsel, name);
 207
 208        if (format_field == NULL)
 209                return -1;
 210
 211        return tp_field__init_ptr(field, format_field);
 212}
 213
 214#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
 215        ({ struct syscall_tp *sc = evsel->priv;\
 216           perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
 217
 218static void perf_evsel__delete_priv(struct perf_evsel *evsel)
 219{
 220        zfree(&evsel->priv);
 221        perf_evsel__delete(evsel);
 222}
 223
 224static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
 225{
 226        evsel->priv = malloc(sizeof(struct syscall_tp));
 227        if (evsel->priv != NULL) {
 228                if (perf_evsel__init_sc_tp_uint_field(evsel, id))
 229                        goto out_delete;
 230
 231                evsel->handler = handler;
 232                return 0;
 233        }
 234
 235        return -ENOMEM;
 236
 237out_delete:
 238        zfree(&evsel->priv);
 239        return -ENOENT;
 240}
 241
 242static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
 243{
 244        struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
 245
 246        /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
 247        if (evsel == NULL)
 248                evsel = perf_evsel__newtp("syscalls", direction);
 249
 250        if (evsel) {
 251                if (perf_evsel__init_syscall_tp(evsel, handler))
 252                        goto out_delete;
 253        }
 254
 255        return evsel;
 256
 257out_delete:
 258        perf_evsel__delete_priv(evsel);
 259        return NULL;
 260}
 261
 262#define perf_evsel__sc_tp_uint(evsel, name, sample) \
 263        ({ struct syscall_tp *fields = evsel->priv; \
 264           fields->name.integer(&fields->name, sample); })
 265
 266#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
 267        ({ struct syscall_tp *fields = evsel->priv; \
 268           fields->name.pointer(&fields->name, sample); })
 269
 270struct syscall_arg {
 271        unsigned long val;
 272        struct thread *thread;
 273        struct trace  *trace;
 274        void          *parm;
 275        u8            idx;
 276        u8            mask;
 277};
 278
 279struct strarray {
 280        int         offset;
 281        int         nr_entries;
 282        const char **entries;
 283};
 284
 285#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
 286        .nr_entries = ARRAY_SIZE(array), \
 287        .entries = array, \
 288}
 289
 290#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
 291        .offset     = off, \
 292        .nr_entries = ARRAY_SIZE(array), \
 293        .entries = array, \
 294}
 295
 296static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
 297                                                const char *intfmt,
 298                                                struct syscall_arg *arg)
 299{
 300        struct strarray *sa = arg->parm;
 301        int idx = arg->val - sa->offset;
 302
 303        if (idx < 0 || idx >= sa->nr_entries)
 304                return scnprintf(bf, size, intfmt, arg->val);
 305
 306        return scnprintf(bf, size, "%s", sa->entries[idx]);
 307}
 308
 309static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
 310                                              struct syscall_arg *arg)
 311{
 312        return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
 313}
 314
 315#define SCA_STRARRAY syscall_arg__scnprintf_strarray
 316
 317#if defined(__i386__) || defined(__x86_64__)
 318/*
 319 * FIXME: Make this available to all arches as soon as the ioctl beautifier
 320 *        gets rewritten to support all arches.
 321 */
 322static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
 323                                                 struct syscall_arg *arg)
 324{
 325        return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
 326}
 327
 328#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
 329#endif /* defined(__i386__) || defined(__x86_64__) */
 330
 331static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
 332                                        struct syscall_arg *arg);
 333
 334#define SCA_FD syscall_arg__scnprintf_fd
 335
 336static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
 337                                           struct syscall_arg *arg)
 338{
 339        int fd = arg->val;
 340
 341        if (fd == AT_FDCWD)
 342                return scnprintf(bf, size, "CWD");
 343
 344        return syscall_arg__scnprintf_fd(bf, size, arg);
 345}
 346
 347#define SCA_FDAT syscall_arg__scnprintf_fd_at
 348
 349static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
 350                                              struct syscall_arg *arg);
 351
 352#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
 353
 354static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 355                                         struct syscall_arg *arg)
 356{
 357        return scnprintf(bf, size, "%#lx", arg->val);
 358}
 359
 360#define SCA_HEX syscall_arg__scnprintf_hex
 361
 362static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
 363                                         struct syscall_arg *arg)
 364{
 365        return scnprintf(bf, size, "%d", arg->val);
 366}
 367
 368#define SCA_INT syscall_arg__scnprintf_int
 369
 370static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 371                                               struct syscall_arg *arg)
 372{
 373        int printed = 0, prot = arg->val;
 374
 375        if (prot == PROT_NONE)
 376                return scnprintf(bf, size, "NONE");
 377#define P_MMAP_PROT(n) \
 378        if (prot & PROT_##n) { \
 379                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 380                prot &= ~PROT_##n; \
 381        }
 382
 383        P_MMAP_PROT(EXEC);
 384        P_MMAP_PROT(READ);
 385        P_MMAP_PROT(WRITE);
 386#ifdef PROT_SEM
 387        P_MMAP_PROT(SEM);
 388#endif
 389        P_MMAP_PROT(GROWSDOWN);
 390        P_MMAP_PROT(GROWSUP);
 391#undef P_MMAP_PROT
 392
 393        if (prot)
 394                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
 395
 396        return printed;
 397}
 398
 399#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 400
 401static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 402                                                struct syscall_arg *arg)
 403{
 404        int printed = 0, flags = arg->val;
 405
 406#define P_MMAP_FLAG(n) \
 407        if (flags & MAP_##n) { \
 408                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 409                flags &= ~MAP_##n; \
 410        }
 411
 412        P_MMAP_FLAG(SHARED);
 413        P_MMAP_FLAG(PRIVATE);
 414#ifdef MAP_32BIT
 415        P_MMAP_FLAG(32BIT);
 416#endif
 417        P_MMAP_FLAG(ANONYMOUS);
 418        P_MMAP_FLAG(DENYWRITE);
 419        P_MMAP_FLAG(EXECUTABLE);
 420        P_MMAP_FLAG(FILE);
 421        P_MMAP_FLAG(FIXED);
 422        P_MMAP_FLAG(GROWSDOWN);
 423#ifdef MAP_HUGETLB
 424        P_MMAP_FLAG(HUGETLB);
 425#endif
 426        P_MMAP_FLAG(LOCKED);
 427        P_MMAP_FLAG(NONBLOCK);
 428        P_MMAP_FLAG(NORESERVE);
 429        P_MMAP_FLAG(POPULATE);
 430        P_MMAP_FLAG(STACK);
 431#ifdef MAP_UNINITIALIZED
 432        P_MMAP_FLAG(UNINITIALIZED);
 433#endif
 434#undef P_MMAP_FLAG
 435
 436        if (flags)
 437                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 438
 439        return printed;
 440}
 441
 442#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 443
 444static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
 445                                                  struct syscall_arg *arg)
 446{
 447        int printed = 0, flags = arg->val;
 448
 449#define P_MREMAP_FLAG(n) \
 450        if (flags & MREMAP_##n) { \
 451                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 452                flags &= ~MREMAP_##n; \
 453        }
 454
 455        P_MREMAP_FLAG(MAYMOVE);
 456#ifdef MREMAP_FIXED
 457        P_MREMAP_FLAG(FIXED);
 458#endif
 459#undef P_MREMAP_FLAG
 460
 461        if (flags)
 462                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 463
 464        return printed;
 465}
 466
 467#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
 468
 469static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 470                                                      struct syscall_arg *arg)
 471{
 472        int behavior = arg->val;
 473
 474        switch (behavior) {
 475#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
 476        P_MADV_BHV(NORMAL);
 477        P_MADV_BHV(RANDOM);
 478        P_MADV_BHV(SEQUENTIAL);
 479        P_MADV_BHV(WILLNEED);
 480        P_MADV_BHV(DONTNEED);
 481        P_MADV_BHV(REMOVE);
 482        P_MADV_BHV(DONTFORK);
 483        P_MADV_BHV(DOFORK);
 484        P_MADV_BHV(HWPOISON);
 485#ifdef MADV_SOFT_OFFLINE
 486        P_MADV_BHV(SOFT_OFFLINE);
 487#endif
 488        P_MADV_BHV(MERGEABLE);
 489        P_MADV_BHV(UNMERGEABLE);
 490#ifdef MADV_HUGEPAGE
 491        P_MADV_BHV(HUGEPAGE);
 492#endif
 493#ifdef MADV_NOHUGEPAGE
 494        P_MADV_BHV(NOHUGEPAGE);
 495#endif
 496#ifdef MADV_DONTDUMP
 497        P_MADV_BHV(DONTDUMP);
 498#endif
 499#ifdef MADV_DODUMP
 500        P_MADV_BHV(DODUMP);
 501#endif
 502#undef P_MADV_PHV
 503        default: break;
 504        }
 505
 506        return scnprintf(bf, size, "%#x", behavior);
 507}
 508
 509#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
 510
 511static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
 512                                           struct syscall_arg *arg)
 513{
 514        int printed = 0, op = arg->val;
 515
 516        if (op == 0)
 517                return scnprintf(bf, size, "NONE");
 518#define P_CMD(cmd) \
 519        if ((op & LOCK_##cmd) == LOCK_##cmd) { \
 520                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
 521                op &= ~LOCK_##cmd; \
 522        }
 523
 524        P_CMD(SH);
 525        P_CMD(EX);
 526        P_CMD(NB);
 527        P_CMD(UN);
 528        P_CMD(MAND);
 529        P_CMD(RW);
 530        P_CMD(READ);
 531        P_CMD(WRITE);
 532#undef P_OP
 533
 534        if (op)
 535                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
 536
 537        return printed;
 538}
 539
 540#define SCA_FLOCK syscall_arg__scnprintf_flock
 541
 542static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
 543{
 544        enum syscall_futex_args {
 545                SCF_UADDR   = (1 << 0),
 546                SCF_OP      = (1 << 1),
 547                SCF_VAL     = (1 << 2),
 548                SCF_TIMEOUT = (1 << 3),
 549                SCF_UADDR2  = (1 << 4),
 550                SCF_VAL3    = (1 << 5),
 551        };
 552        int op = arg->val;
 553        int cmd = op & FUTEX_CMD_MASK;
 554        size_t printed = 0;
 555
 556        switch (cmd) {
 557#define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
 558        P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
 559        P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 560        P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 561        P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
 562        P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
 563        P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
 564        P_FUTEX_OP(WAKE_OP);                                                      break;
 565        P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 566        P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
 567        P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
 568        P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
 569        P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
 570        P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
 571        default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
 572        }
 573
 574        if (op & FUTEX_PRIVATE_FLAG)
 575                printed += scnprintf(bf + printed, size - printed, "|PRIV");
 576
 577        if (op & FUTEX_CLOCK_REALTIME)
 578                printed += scnprintf(bf + printed, size - printed, "|CLKRT");
 579
 580        return printed;
 581}
 582
 583#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
 584
 585static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
 586static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
 587
 588static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
 589static DEFINE_STRARRAY(itimers);
 590
 591static const char *keyctl_options[] = {
 592        "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
 593        "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
 594        "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
 595        "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
 596        "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
 597};
 598static DEFINE_STRARRAY(keyctl_options);
 599
 600static const char *whences[] = { "SET", "CUR", "END",
 601#ifdef SEEK_DATA
 602"DATA",
 603#endif
 604#ifdef SEEK_HOLE
 605"HOLE",
 606#endif
 607};
 608static DEFINE_STRARRAY(whences);
 609
 610static const char *fcntl_cmds[] = {
 611        "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
 612        "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
 613        "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
 614        "F_GETOWNER_UIDS",
 615};
 616static DEFINE_STRARRAY(fcntl_cmds);
 617
 618static const char *rlimit_resources[] = {
 619        "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
 620        "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
 621        "RTTIME",
 622};
 623static DEFINE_STRARRAY(rlimit_resources);
 624
 625static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
 626static DEFINE_STRARRAY(sighow);
 627
 628static const char *clockid[] = {
 629        "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
 630        "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
 631        "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
 632};
 633static DEFINE_STRARRAY(clockid);
 634
 635static const char *socket_families[] = {
 636        "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
 637        "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
 638        "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
 639        "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
 640        "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
 641        "ALG", "NFC", "VSOCK",
 642};
 643static DEFINE_STRARRAY(socket_families);
 644
 645#ifndef SOCK_TYPE_MASK
 646#define SOCK_TYPE_MASK 0xf
 647#endif
 648
 649static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
 650                                                      struct syscall_arg *arg)
 651{
 652        size_t printed;
 653        int type = arg->val,
 654            flags = type & ~SOCK_TYPE_MASK;
 655
 656        type &= SOCK_TYPE_MASK;
 657        /*
 658         * Can't use a strarray, MIPS may override for ABI reasons.
 659         */
 660        switch (type) {
 661#define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
 662        P_SK_TYPE(STREAM);
 663        P_SK_TYPE(DGRAM);
 664        P_SK_TYPE(RAW);
 665        P_SK_TYPE(RDM);
 666        P_SK_TYPE(SEQPACKET);
 667        P_SK_TYPE(DCCP);
 668        P_SK_TYPE(PACKET);
 669#undef P_SK_TYPE
 670        default:
 671                printed = scnprintf(bf, size, "%#x", type);
 672        }
 673
 674#define P_SK_FLAG(n) \
 675        if (flags & SOCK_##n) { \
 676                printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
 677                flags &= ~SOCK_##n; \
 678        }
 679
 680        P_SK_FLAG(CLOEXEC);
 681        P_SK_FLAG(NONBLOCK);
 682#undef P_SK_FLAG
 683
 684        if (flags)
 685                printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
 686
 687        return printed;
 688}
 689
 690#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
 691
 692#ifndef MSG_PROBE
 693#define MSG_PROBE            0x10
 694#endif
 695#ifndef MSG_WAITFORONE
 696#define MSG_WAITFORONE  0x10000
 697#endif
 698#ifndef MSG_SENDPAGE_NOTLAST
 699#define MSG_SENDPAGE_NOTLAST 0x20000
 700#endif
 701#ifndef MSG_FASTOPEN
 702#define MSG_FASTOPEN         0x20000000
 703#endif
 704
 705static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
 706                                               struct syscall_arg *arg)
 707{
 708        int printed = 0, flags = arg->val;
 709
 710        if (flags == 0)
 711                return scnprintf(bf, size, "NONE");
 712#define P_MSG_FLAG(n) \
 713        if (flags & MSG_##n) { \
 714                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 715                flags &= ~MSG_##n; \
 716        }
 717
 718        P_MSG_FLAG(OOB);
 719        P_MSG_FLAG(PEEK);
 720        P_MSG_FLAG(DONTROUTE);
 721        P_MSG_FLAG(TRYHARD);
 722        P_MSG_FLAG(CTRUNC);
 723        P_MSG_FLAG(PROBE);
 724        P_MSG_FLAG(TRUNC);
 725        P_MSG_FLAG(DONTWAIT);
 726        P_MSG_FLAG(EOR);
 727        P_MSG_FLAG(WAITALL);
 728        P_MSG_FLAG(FIN);
 729        P_MSG_FLAG(SYN);
 730        P_MSG_FLAG(CONFIRM);
 731        P_MSG_FLAG(RST);
 732        P_MSG_FLAG(ERRQUEUE);
 733        P_MSG_FLAG(NOSIGNAL);
 734        P_MSG_FLAG(MORE);
 735        P_MSG_FLAG(WAITFORONE);
 736        P_MSG_FLAG(SENDPAGE_NOTLAST);
 737        P_MSG_FLAG(FASTOPEN);
 738        P_MSG_FLAG(CMSG_CLOEXEC);
 739#undef P_MSG_FLAG
 740
 741        if (flags)
 742                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 743
 744        return printed;
 745}
 746
 747#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
 748
 749static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 750                                                 struct syscall_arg *arg)
 751{
 752        size_t printed = 0;
 753        int mode = arg->val;
 754
 755        if (mode == F_OK) /* 0 */
 756                return scnprintf(bf, size, "F");
 757#define P_MODE(n) \
 758        if (mode & n##_OK) { \
 759                printed += scnprintf(bf + printed, size - printed, "%s", #n); \
 760                mode &= ~n##_OK; \
 761        }
 762
 763        P_MODE(R);
 764        P_MODE(W);
 765        P_MODE(X);
 766#undef P_MODE
 767
 768        if (mode)
 769                printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
 770
 771        return printed;
 772}
 773
 774#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
 775
 776static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 777                                              struct syscall_arg *arg);
 778
 779#define SCA_FILENAME syscall_arg__scnprintf_filename
 780
 781static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
 782                                               struct syscall_arg *arg)
 783{
 784        int printed = 0, flags = arg->val;
 785
 786        if (!(flags & O_CREAT))
 787                arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
 788
 789        if (flags == 0)
 790                return scnprintf(bf, size, "RDONLY");
 791#define P_FLAG(n) \
 792        if (flags & O_##n) { \
 793                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 794                flags &= ~O_##n; \
 795        }
 796
 797        P_FLAG(APPEND);
 798        P_FLAG(ASYNC);
 799        P_FLAG(CLOEXEC);
 800        P_FLAG(CREAT);
 801        P_FLAG(DIRECT);
 802        P_FLAG(DIRECTORY);
 803        P_FLAG(EXCL);
 804        P_FLAG(LARGEFILE);
 805        P_FLAG(NOATIME);
 806        P_FLAG(NOCTTY);
 807#ifdef O_NONBLOCK
 808        P_FLAG(NONBLOCK);
 809#elif O_NDELAY
 810        P_FLAG(NDELAY);
 811#endif
 812#ifdef O_PATH
 813        P_FLAG(PATH);
 814#endif
 815        P_FLAG(RDWR);
 816#ifdef O_DSYNC
 817        if ((flags & O_SYNC) == O_SYNC)
 818                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
 819        else {
 820                P_FLAG(DSYNC);
 821        }
 822#else
 823        P_FLAG(SYNC);
 824#endif
 825        P_FLAG(TRUNC);
 826        P_FLAG(WRONLY);
 827#undef P_FLAG
 828
 829        if (flags)
 830                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 831
 832        return printed;
 833}
 834
 835#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
 836
 837static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
 838                                                struct syscall_arg *arg)
 839{
 840        int printed = 0, flags = arg->val;
 841
 842        if (flags == 0)
 843                return 0;
 844
 845#define P_FLAG(n) \
 846        if (flags & PERF_FLAG_##n) { \
 847                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 848                flags &= ~PERF_FLAG_##n; \
 849        }
 850
 851        P_FLAG(FD_NO_GROUP);
 852        P_FLAG(FD_OUTPUT);
 853        P_FLAG(PID_CGROUP);
 854        P_FLAG(FD_CLOEXEC);
 855#undef P_FLAG
 856
 857        if (flags)
 858                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 859
 860        return printed;
 861}
 862
 863#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
 864
 865static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
 866                                                   struct syscall_arg *arg)
 867{
 868        int printed = 0, flags = arg->val;
 869
 870        if (flags == 0)
 871                return scnprintf(bf, size, "NONE");
 872#define P_FLAG(n) \
 873        if (flags & EFD_##n) { \
 874                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 875                flags &= ~EFD_##n; \
 876        }
 877
 878        P_FLAG(SEMAPHORE);
 879        P_FLAG(CLOEXEC);
 880        P_FLAG(NONBLOCK);
 881#undef P_FLAG
 882
 883        if (flags)
 884                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 885
 886        return printed;
 887}
 888
 889#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
 890
 891static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 892                                                struct syscall_arg *arg)
 893{
 894        int printed = 0, flags = arg->val;
 895
 896#define P_FLAG(n) \
 897        if (flags & O_##n) { \
 898                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 899                flags &= ~O_##n; \
 900        }
 901
 902        P_FLAG(CLOEXEC);
 903        P_FLAG(NONBLOCK);
 904#undef P_FLAG
 905
 906        if (flags)
 907                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 908
 909        return printed;
 910}
 911
 912#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 913
 914static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
 915{
 916        int sig = arg->val;
 917
 918        switch (sig) {
 919#define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
 920        P_SIGNUM(HUP);
 921        P_SIGNUM(INT);
 922        P_SIGNUM(QUIT);
 923        P_SIGNUM(ILL);
 924        P_SIGNUM(TRAP);
 925        P_SIGNUM(ABRT);
 926        P_SIGNUM(BUS);
 927        P_SIGNUM(FPE);
 928        P_SIGNUM(KILL);
 929        P_SIGNUM(USR1);
 930        P_SIGNUM(SEGV);
 931        P_SIGNUM(USR2);
 932        P_SIGNUM(PIPE);
 933        P_SIGNUM(ALRM);
 934        P_SIGNUM(TERM);
 935        P_SIGNUM(CHLD);
 936        P_SIGNUM(CONT);
 937        P_SIGNUM(STOP);
 938        P_SIGNUM(TSTP);
 939        P_SIGNUM(TTIN);
 940        P_SIGNUM(TTOU);
 941        P_SIGNUM(URG);
 942        P_SIGNUM(XCPU);
 943        P_SIGNUM(XFSZ);
 944        P_SIGNUM(VTALRM);
 945        P_SIGNUM(PROF);
 946        P_SIGNUM(WINCH);
 947        P_SIGNUM(IO);
 948        P_SIGNUM(PWR);
 949        P_SIGNUM(SYS);
 950#ifdef SIGEMT
 951        P_SIGNUM(EMT);
 952#endif
 953#ifdef SIGSTKFLT
 954        P_SIGNUM(STKFLT);
 955#endif
 956#ifdef SIGSWI
 957        P_SIGNUM(SWI);
 958#endif
 959        default: break;
 960        }
 961
 962        return scnprintf(bf, size, "%#x", sig);
 963}
 964
 965#define SCA_SIGNUM syscall_arg__scnprintf_signum
 966
 967#if defined(__i386__) || defined(__x86_64__)
 968/*
 969 * FIXME: Make this available to all arches.
 970 */
 971#define TCGETS          0x5401
 972
 973static const char *tioctls[] = {
 974        "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
 975        "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
 976        "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
 977        "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
 978        "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
 979        "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
 980        "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
 981        "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
 982        "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
 983        "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
 984        "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
 985        [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
 986        "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
 987        "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
 988        "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
 989};
 990
 991static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
 992#endif /* defined(__i386__) || defined(__x86_64__) */
 993
 994#define STRARRAY(arg, name, array) \
 995          .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
 996          .arg_parm      = { [arg] = &strarray__##array, }
 997
 998static struct syscall_fmt {
 999        const char *name;
1000        const char *alias;
1001        size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1002        void       *arg_parm[6];
1003        bool       errmsg;
1004        bool       timeout;
1005        bool       hexret;
1006} syscall_fmts[] = {
1007        { .name     = "access",     .errmsg = true,
1008          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1009                             [1] = SCA_ACCMODE,  /* mode */ }, },
1010        { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
1011        { .name     = "brk",        .hexret = true,
1012          .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1013        { .name     = "chdir",      .errmsg = true,
1014          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1015        { .name     = "chmod",      .errmsg = true,
1016          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1017        { .name     = "chroot",     .errmsg = true,
1018          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1019        { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1020        { .name     = "close",      .errmsg = true,
1021          .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1022        { .name     = "connect",    .errmsg = true, },
1023        { .name     = "creat",      .errmsg = true,
1024          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1025        { .name     = "dup",        .errmsg = true,
1026          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1027        { .name     = "dup2",       .errmsg = true,
1028          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1029        { .name     = "dup3",       .errmsg = true,
1030          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1031        { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1032        { .name     = "eventfd2",   .errmsg = true,
1033          .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1034        { .name     = "faccessat",  .errmsg = true,
1035          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1036                             [1] = SCA_FILENAME, /* filename */ }, },
1037        { .name     = "fadvise64",  .errmsg = true,
1038          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039        { .name     = "fallocate",  .errmsg = true,
1040          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041        { .name     = "fchdir",     .errmsg = true,
1042          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043        { .name     = "fchmod",     .errmsg = true,
1044          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1045        { .name     = "fchmodat",   .errmsg = true,
1046          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1047                             [1] = SCA_FILENAME, /* filename */ }, },
1048        { .name     = "fchown",     .errmsg = true,
1049          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1050        { .name     = "fchownat",   .errmsg = true,
1051          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1052                             [1] = SCA_FILENAME, /* filename */ }, },
1053        { .name     = "fcntl",      .errmsg = true,
1054          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1055                             [1] = SCA_STRARRAY, /* cmd */ },
1056          .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1057        { .name     = "fdatasync",  .errmsg = true,
1058          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059        { .name     = "flock",      .errmsg = true,
1060          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1061                             [1] = SCA_FLOCK, /* cmd */ }, },
1062        { .name     = "fsetxattr",  .errmsg = true,
1063          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1064        { .name     = "fstat",      .errmsg = true, .alias = "newfstat",
1065          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1066        { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat",
1067          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1068                             [1] = SCA_FILENAME, /* filename */ }, },
1069        { .name     = "fstatfs",    .errmsg = true,
1070          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1071        { .name     = "fsync",    .errmsg = true,
1072          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1073        { .name     = "ftruncate", .errmsg = true,
1074          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1075        { .name     = "futex",      .errmsg = true,
1076          .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1077        { .name     = "futimesat", .errmsg = true,
1078          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1079                             [1] = SCA_FILENAME, /* filename */ }, },
1080        { .name     = "getdents",   .errmsg = true,
1081          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1082        { .name     = "getdents64", .errmsg = true,
1083          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1084        { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1085        { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1086        { .name     = "getxattr",    .errmsg = true,
1087          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1088        { .name     = "inotify_add_watch",          .errmsg = true,
1089          .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1090        { .name     = "ioctl",      .errmsg = true,
1091          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1092#if defined(__i386__) || defined(__x86_64__)
1093/*
1094 * FIXME: Make this available to all arches.
1095 */
1096                             [1] = SCA_STRHEXARRAY, /* cmd */
1097                             [2] = SCA_HEX, /* arg */ },
1098          .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
1099#else
1100                             [2] = SCA_HEX, /* arg */ }, },
1101#endif
1102        { .name     = "keyctl",     .errmsg = true, STRARRAY(0, option, keyctl_options), },
1103        { .name     = "kill",       .errmsg = true,
1104          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1105        { .name     = "lchown",    .errmsg = true,
1106          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1107        { .name     = "lgetxattr",  .errmsg = true,
1108          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1109        { .name     = "linkat",     .errmsg = true,
1110          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1111        { .name     = "listxattr",  .errmsg = true,
1112          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1113        { .name     = "llistxattr", .errmsg = true,
1114          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1115        { .name     = "lremovexattr",  .errmsg = true,
1116          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1117        { .name     = "lseek",      .errmsg = true,
1118          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1119                             [2] = SCA_STRARRAY, /* whence */ },
1120          .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
1121        { .name     = "lsetxattr",  .errmsg = true,
1122          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1123        { .name     = "lstat",      .errmsg = true, .alias = "newlstat",
1124          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1125        { .name     = "lsxattr",    .errmsg = true,
1126          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1127        { .name     = "madvise",    .errmsg = true,
1128          .arg_scnprintf = { [0] = SCA_HEX,      /* start */
1129                             [2] = SCA_MADV_BHV, /* behavior */ }, },
1130        { .name     = "mkdir",    .errmsg = true,
1131          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1132        { .name     = "mkdirat",    .errmsg = true,
1133          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1134                             [1] = SCA_FILENAME, /* pathname */ }, },
1135        { .name     = "mknod",      .errmsg = true,
1136          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1137        { .name     = "mknodat",    .errmsg = true,
1138          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1139                             [1] = SCA_FILENAME, /* filename */ }, },
1140        { .name     = "mlock",      .errmsg = true,
1141          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1142        { .name     = "mlockall",   .errmsg = true,
1143          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1144        { .name     = "mmap",       .hexret = true,
1145          .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
1146                             [2] = SCA_MMAP_PROT, /* prot */
1147                             [3] = SCA_MMAP_FLAGS, /* flags */
1148                             [4] = SCA_FD,        /* fd */ }, },
1149        { .name     = "mprotect",   .errmsg = true,
1150          .arg_scnprintf = { [0] = SCA_HEX, /* start */
1151                             [2] = SCA_MMAP_PROT, /* prot */ }, },
1152        { .name     = "mq_unlink", .errmsg = true,
1153          .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1154        { .name     = "mremap",     .hexret = true,
1155          .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1156                             [3] = SCA_MREMAP_FLAGS, /* flags */
1157                             [4] = SCA_HEX, /* new_addr */ }, },
1158        { .name     = "munlock",    .errmsg = true,
1159          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1160        { .name     = "munmap",     .errmsg = true,
1161          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1162        { .name     = "name_to_handle_at", .errmsg = true,
1163          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1164        { .name     = "newfstatat", .errmsg = true,
1165          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1166                             [1] = SCA_FILENAME, /* filename */ }, },
1167        { .name     = "open",       .errmsg = true,
1168          .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1169                             [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1170        { .name     = "open_by_handle_at", .errmsg = true,
1171          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1172                             [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1173        { .name     = "openat",     .errmsg = true,
1174          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1175                             [1] = SCA_FILENAME, /* filename */
1176                             [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1177        { .name     = "perf_event_open", .errmsg = true,
1178          .arg_scnprintf = { [1] = SCA_INT, /* pid */
1179                             [2] = SCA_INT, /* cpu */
1180                             [3] = SCA_FD,  /* group_fd */
1181                             [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1182        { .name     = "pipe2",      .errmsg = true,
1183          .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1184        { .name     = "poll",       .errmsg = true, .timeout = true, },
1185        { .name     = "ppoll",      .errmsg = true, .timeout = true, },
1186        { .name     = "pread",      .errmsg = true, .alias = "pread64",
1187          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1188        { .name     = "preadv",     .errmsg = true, .alias = "pread",
1189          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1190        { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1191        { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64",
1192          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1193        { .name     = "pwritev",    .errmsg = true,
1194          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1195        { .name     = "read",       .errmsg = true,
1196          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1197        { .name     = "readlink",   .errmsg = true,
1198          .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1199        { .name     = "readlinkat", .errmsg = true,
1200          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1201                             [1] = SCA_FILENAME, /* pathname */ }, },
1202        { .name     = "readv",      .errmsg = true,
1203          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1204        { .name     = "recvfrom",   .errmsg = true,
1205          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1206                             [3] = SCA_MSG_FLAGS, /* flags */ }, },
1207        { .name     = "recvmmsg",   .errmsg = true,
1208          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1209                             [3] = SCA_MSG_FLAGS, /* flags */ }, },
1210        { .name     = "recvmsg",    .errmsg = true,
1211          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1212                             [2] = SCA_MSG_FLAGS, /* flags */ }, },
1213        { .name     = "removexattr", .errmsg = true,
1214          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1215        { .name     = "renameat",   .errmsg = true,
1216          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1217        { .name     = "rmdir",    .errmsg = true,
1218          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1219        { .name     = "rt_sigaction", .errmsg = true,
1220          .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1221        { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1222        { .name     = "rt_sigqueueinfo", .errmsg = true,
1223          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1224        { .name     = "rt_tgsigqueueinfo", .errmsg = true,
1225          .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1226        { .name     = "select",     .errmsg = true, .timeout = true, },
1227        { .name     = "sendmmsg",    .errmsg = true,
1228          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1229                             [3] = SCA_MSG_FLAGS, /* flags */ }, },
1230        { .name     = "sendmsg",    .errmsg = true,
1231          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1232                             [2] = SCA_MSG_FLAGS, /* flags */ }, },
1233        { .name     = "sendto",     .errmsg = true,
1234          .arg_scnprintf = { [0] = SCA_FD, /* fd */
1235                             [3] = SCA_MSG_FLAGS, /* flags */ }, },
1236        { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1237        { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1238        { .name     = "setxattr",   .errmsg = true,
1239          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1240        { .name     = "shutdown",   .errmsg = true,
1241          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1242        { .name     = "socket",     .errmsg = true,
1243          .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1244                             [1] = SCA_SK_TYPE, /* type */ },
1245          .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1246        { .name     = "socketpair", .errmsg = true,
1247          .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1248                             [1] = SCA_SK_TYPE, /* type */ },
1249          .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
1250        { .name     = "stat",       .errmsg = true, .alias = "newstat",
1251          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1252        { .name     = "statfs",     .errmsg = true,
1253          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1254        { .name     = "swapoff",    .errmsg = true,
1255          .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1256        { .name     = "swapon",     .errmsg = true,
1257          .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1258        { .name     = "symlinkat",  .errmsg = true,
1259          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1260        { .name     = "tgkill",     .errmsg = true,
1261          .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1262        { .name     = "tkill",      .errmsg = true,
1263          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1264        { .name     = "truncate",   .errmsg = true,
1265          .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1266        { .name     = "uname",      .errmsg = true, .alias = "newuname", },
1267        { .name     = "unlinkat",   .errmsg = true,
1268          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1269                             [1] = SCA_FILENAME, /* pathname */ }, },
1270        { .name     = "utime",  .errmsg = true,
1271          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1272        { .name     = "utimensat",  .errmsg = true,
1273          .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1274                             [1] = SCA_FILENAME, /* filename */ }, },
1275        { .name     = "utimes",  .errmsg = true,
1276          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1277        { .name     = "vmsplice",  .errmsg = true,
1278          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1279        { .name     = "write",      .errmsg = true,
1280          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1281        { .name     = "writev",     .errmsg = true,
1282          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1283};
1284
1285static int syscall_fmt__cmp(const void *name, const void *fmtp)
1286{
1287        const struct syscall_fmt *fmt = fmtp;
1288        return strcmp(name, fmt->name);
1289}
1290
1291static struct syscall_fmt *syscall_fmt__find(const char *name)
1292{
1293        const int nmemb = ARRAY_SIZE(syscall_fmts);
1294        return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1295}
1296
1297struct syscall {
1298        struct event_format *tp_format;
1299        int                 nr_args;
1300        struct format_field *args;
1301        const char          *name;
1302        bool                is_exit;
1303        struct syscall_fmt  *fmt;
1304        size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1305        void                **arg_parm;
1306};
1307
1308static size_t fprintf_duration(unsigned long t, FILE *fp)
1309{
1310        double duration = (double)t / NSEC_PER_MSEC;
1311        size_t printed = fprintf(fp, "(");
1312
1313        if (duration >= 1.0)
1314                printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1315        else if (duration >= 0.01)
1316                printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1317        else
1318                printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1319        return printed + fprintf(fp, "): ");
1320}
1321
1322/**
1323 * filename.ptr: The filename char pointer that will be vfs_getname'd
1324 * filename.entry_str_pos: Where to insert the string translated from
1325 *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1326 */
1327struct thread_trace {
1328        u64               entry_time;
1329        u64               exit_time;
1330        bool              entry_pending;
1331        unsigned long     nr_events;
1332        unsigned long     pfmaj, pfmin;
1333        char              *entry_str;
1334        double            runtime_ms;
1335        struct {
1336                unsigned long ptr;
1337                short int     entry_str_pos;
1338                bool          pending_open;
1339                unsigned int  namelen;
1340                char          *name;
1341        } filename;
1342        struct {
1343                int       max;
1344                char      **table;
1345        } paths;
1346
1347        struct intlist *syscall_stats;
1348};
1349
1350static struct thread_trace *thread_trace__new(void)
1351{
1352        struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1353
1354        if (ttrace)
1355                ttrace->paths.max = -1;
1356
1357        ttrace->syscall_stats = intlist__new(NULL);
1358
1359        return ttrace;
1360}
1361
1362static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1363{
1364        struct thread_trace *ttrace;
1365
1366        if (thread == NULL)
1367                goto fail;
1368
1369        if (thread__priv(thread) == NULL)
1370                thread__set_priv(thread, thread_trace__new());
1371
1372        if (thread__priv(thread) == NULL)
1373                goto fail;
1374
1375        ttrace = thread__priv(thread);
1376        ++ttrace->nr_events;
1377
1378        return ttrace;
1379fail:
1380        color_fprintf(fp, PERF_COLOR_RED,
1381                      "WARNING: not enough memory, dropping samples!\n");
1382        return NULL;
1383}
1384
1385#define TRACE_PFMAJ             (1 << 0)
1386#define TRACE_PFMIN             (1 << 1)
1387
1388static const size_t trace__entry_str_size = 2048;
1389
1390struct trace {
1391        struct perf_tool        tool;
1392        struct {
1393                int             machine;
1394                int             open_id;
1395        }                       audit;
1396        struct {
1397                int             max;
1398                struct syscall  *table;
1399                struct {
1400                        struct perf_evsel *sys_enter,
1401                                          *sys_exit;
1402                }               events;
1403        } syscalls;
1404        struct record_opts      opts;
1405        struct perf_evlist      *evlist;
1406        struct machine          *host;
1407        struct thread           *current;
1408        u64                     base_time;
1409        FILE                    *output;
1410        unsigned long           nr_events;
1411        struct strlist          *ev_qualifier;
1412        struct {
1413                size_t          nr;
1414                int             *entries;
1415        }                       ev_qualifier_ids;
1416        struct intlist          *tid_list;
1417        struct intlist          *pid_list;
1418        struct {
1419                size_t          nr;
1420                pid_t           *entries;
1421        }                       filter_pids;
1422        double                  duration_filter;
1423        double                  runtime_ms;
1424        struct {
1425                u64             vfs_getname,
1426                                proc_getname;
1427        } stats;
1428        bool                    not_ev_qualifier;
1429        bool                    live;
1430        bool                    full_time;
1431        bool                    sched;
1432        bool                    multiple_threads;
1433        bool                    summary;
1434        bool                    summary_only;
1435        bool                    show_comm;
1436        bool                    show_tool_stats;
1437        bool                    trace_syscalls;
1438        bool                    force;
1439        bool                    vfs_getname;
1440        int                     trace_pgfaults;
1441};
1442
1443static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1444{
1445        struct thread_trace *ttrace = thread__priv(thread);
1446
1447        if (fd > ttrace->paths.max) {
1448                char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1449
1450                if (npath == NULL)
1451                        return -1;
1452
1453                if (ttrace->paths.max != -1) {
1454                        memset(npath + ttrace->paths.max + 1, 0,
1455                               (fd - ttrace->paths.max) * sizeof(char *));
1456                } else {
1457                        memset(npath, 0, (fd + 1) * sizeof(char *));
1458                }
1459
1460                ttrace->paths.table = npath;
1461                ttrace->paths.max   = fd;
1462        }
1463
1464        ttrace->paths.table[fd] = strdup(pathname);
1465
1466        return ttrace->paths.table[fd] != NULL ? 0 : -1;
1467}
1468
1469static int thread__read_fd_path(struct thread *thread, int fd)
1470{
1471        char linkname[PATH_MAX], pathname[PATH_MAX];
1472        struct stat st;
1473        int ret;
1474
1475        if (thread->pid_ == thread->tid) {
1476                scnprintf(linkname, sizeof(linkname),
1477                          "/proc/%d/fd/%d", thread->pid_, fd);
1478        } else {
1479                scnprintf(linkname, sizeof(linkname),
1480                          "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1481        }
1482
1483        if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1484                return -1;
1485
1486        ret = readlink(linkname, pathname, sizeof(pathname));
1487
1488        if (ret < 0 || ret > st.st_size)
1489                return -1;
1490
1491        pathname[ret] = '\0';
1492        return trace__set_fd_pathname(thread, fd, pathname);
1493}
1494
1495static const char *thread__fd_path(struct thread *thread, int fd,
1496                                   struct trace *trace)
1497{
1498        struct thread_trace *ttrace = thread__priv(thread);
1499
1500        if (ttrace == NULL)
1501                return NULL;
1502
1503        if (fd < 0)
1504                return NULL;
1505
1506        if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1507                if (!trace->live)
1508                        return NULL;
1509                ++trace->stats.proc_getname;
1510                if (thread__read_fd_path(thread, fd))
1511                        return NULL;
1512        }
1513
1514        return ttrace->paths.table[fd];
1515}
1516
1517static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1518                                        struct syscall_arg *arg)
1519{
1520        int fd = arg->val;
1521        size_t printed = scnprintf(bf, size, "%d", fd);
1522        const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1523
1524        if (path)
1525                printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1526
1527        return printed;
1528}
1529
1530static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1531                                              struct syscall_arg *arg)
1532{
1533        int fd = arg->val;
1534        size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1535        struct thread_trace *ttrace = thread__priv(arg->thread);
1536
1537        if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1538                zfree(&ttrace->paths.table[fd]);
1539
1540        return printed;
1541}
1542
1543static void thread__set_filename_pos(struct thread *thread, const char *bf,
1544                                     unsigned long ptr)
1545{
1546        struct thread_trace *ttrace = thread__priv(thread);
1547
1548        ttrace->filename.ptr = ptr;
1549        ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1550}
1551
1552static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1553                                              struct syscall_arg *arg)
1554{
1555        unsigned long ptr = arg->val;
1556
1557        if (!arg->trace->vfs_getname)
1558                return scnprintf(bf, size, "%#x", ptr);
1559
1560        thread__set_filename_pos(arg->thread, bf, ptr);
1561        return 0;
1562}
1563
1564static bool trace__filter_duration(struct trace *trace, double t)
1565{
1566        return t < (trace->duration_filter * NSEC_PER_MSEC);
1567}
1568
1569static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1570{
1571        double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1572
1573        return fprintf(fp, "%10.3f ", ts);
1574}
1575
1576static bool done = false;
1577static bool interrupted = false;
1578
1579static void sig_handler(int sig)
1580{
1581        done = true;
1582        interrupted = sig == SIGINT;
1583}
1584
1585static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1586                                        u64 duration, u64 tstamp, FILE *fp)
1587{
1588        size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1589        printed += fprintf_duration(duration, fp);
1590
1591        if (trace->multiple_threads) {
1592                if (trace->show_comm)
1593                        printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1594                printed += fprintf(fp, "%d ", thread->tid);
1595        }
1596
1597        return printed;
1598}
1599
1600static int trace__process_event(struct trace *trace, struct machine *machine,
1601                                union perf_event *event, struct perf_sample *sample)
1602{
1603        int ret = 0;
1604
1605        switch (event->header.type) {
1606        case PERF_RECORD_LOST:
1607                color_fprintf(trace->output, PERF_COLOR_RED,
1608                              "LOST %" PRIu64 " events!\n", event->lost.lost);
1609                ret = machine__process_lost_event(machine, event, sample);
1610        default:
1611                ret = machine__process_event(machine, event, sample);
1612                break;
1613        }
1614
1615        return ret;
1616}
1617
1618static int trace__tool_process(struct perf_tool *tool,
1619                               union perf_event *event,
1620                               struct perf_sample *sample,
1621                               struct machine *machine)
1622{
1623        struct trace *trace = container_of(tool, struct trace, tool);
1624        return trace__process_event(trace, machine, event, sample);
1625}
1626
1627static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1628{
1629        int err = symbol__init(NULL);
1630
1631        if (err)
1632                return err;
1633
1634        trace->host = machine__new_host();
1635        if (trace->host == NULL)
1636                return -ENOMEM;
1637
1638        if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1639                return -errno;
1640
1641        err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1642                                            evlist->threads, trace__tool_process, false,
1643                                            trace->opts.proc_map_timeout);
1644        if (err)
1645                symbol__exit();
1646
1647        return err;
1648}
1649
1650static int syscall__set_arg_fmts(struct syscall *sc)
1651{
1652        struct format_field *field;
1653        int idx = 0;
1654
1655        sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1656        if (sc->arg_scnprintf == NULL)
1657                return -1;
1658
1659        if (sc->fmt)
1660                sc->arg_parm = sc->fmt->arg_parm;
1661
1662        for (field = sc->args; field; field = field->next) {
1663                if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1664                        sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1665                else if (field->flags & FIELD_IS_POINTER)
1666                        sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1667                ++idx;
1668        }
1669
1670        return 0;
1671}
1672
1673static int trace__read_syscall_info(struct trace *trace, int id)
1674{
1675        char tp_name[128];
1676        struct syscall *sc;
1677        const char *name = audit_syscall_to_name(id, trace->audit.machine);
1678
1679        if (name == NULL)
1680                return -1;
1681
1682        if (id > trace->syscalls.max) {
1683                struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1684
1685                if (nsyscalls == NULL)
1686                        return -1;
1687
1688                if (trace->syscalls.max != -1) {
1689                        memset(nsyscalls + trace->syscalls.max + 1, 0,
1690                               (id - trace->syscalls.max) * sizeof(*sc));
1691                } else {
1692                        memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1693                }
1694
1695                trace->syscalls.table = nsyscalls;
1696                trace->syscalls.max   = id;
1697        }
1698
1699        sc = trace->syscalls.table + id;
1700        sc->name = name;
1701
1702        sc->fmt  = syscall_fmt__find(sc->name);
1703
1704        snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1705        sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1706
1707        if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1708                snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1709                sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1710        }
1711
1712        if (sc->tp_format == NULL)
1713                return -1;
1714
1715        sc->args = sc->tp_format->format.fields;
1716        sc->nr_args = sc->tp_format->format.nr_fields;
1717        /* drop nr field - not relevant here; does not exist on older kernels */
1718        if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1719                sc->args = sc->args->next;
1720                --sc->nr_args;
1721        }
1722
1723        sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1724
1725        return syscall__set_arg_fmts(sc);
1726}
1727
1728static int trace__validate_ev_qualifier(struct trace *trace)
1729{
1730        int err = 0, i;
1731        struct str_node *pos;
1732
1733        trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1734        trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1735                                                 sizeof(trace->ev_qualifier_ids.entries[0]));
1736
1737        if (trace->ev_qualifier_ids.entries == NULL) {
1738                fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1739                       trace->output);
1740                err = -EINVAL;
1741                goto out;
1742        }
1743
1744        i = 0;
1745
1746        strlist__for_each(pos, trace->ev_qualifier) {
1747                const char *sc = pos->s;
1748                int id = audit_name_to_syscall(sc, trace->audit.machine);
1749
1750                if (id < 0) {
1751                        if (err == 0) {
1752                                fputs("Error:\tInvalid syscall ", trace->output);
1753                                err = -EINVAL;
1754                        } else {
1755                                fputs(", ", trace->output);
1756                        }
1757
1758                        fputs(sc, trace->output);
1759                }
1760
1761                trace->ev_qualifier_ids.entries[i++] = id;
1762        }
1763
1764        if (err < 0) {
1765                fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1766                      "\nHint:\tand: 'man syscalls'\n", trace->output);
1767                zfree(&trace->ev_qualifier_ids.entries);
1768                trace->ev_qualifier_ids.nr = 0;
1769        }
1770out:
1771        return err;
1772}
1773
1774/*
1775 * args is to be interpreted as a series of longs but we need to handle
1776 * 8-byte unaligned accesses. args points to raw_data within the event
1777 * and raw_data is guaranteed to be 8-byte unaligned because it is
1778 * preceded by raw_size which is a u32. So we need to copy args to a temp
1779 * variable to read it. Most notably this avoids extended load instructions
1780 * on unaligned addresses
1781 */
1782
1783static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1784                                      unsigned char *args, struct trace *trace,
1785                                      struct thread *thread)
1786{
1787        size_t printed = 0;
1788        unsigned char *p;
1789        unsigned long val;
1790
1791        if (sc->args != NULL) {
1792                struct format_field *field;
1793                u8 bit = 1;
1794                struct syscall_arg arg = {
1795                        .idx    = 0,
1796                        .mask   = 0,
1797                        .trace  = trace,
1798                        .thread = thread,
1799                };
1800
1801                for (field = sc->args; field;
1802                     field = field->next, ++arg.idx, bit <<= 1) {
1803                        if (arg.mask & bit)
1804                                continue;
1805
1806                        /* special care for unaligned accesses */
1807                        p = args + sizeof(unsigned long) * arg.idx;
1808                        memcpy(&val, p, sizeof(val));
1809
1810                        /*
1811                         * Suppress this argument if its value is zero and
1812                         * and we don't have a string associated in an
1813                         * strarray for it.
1814                         */
1815                        if (val == 0 &&
1816                            !(sc->arg_scnprintf &&
1817                              sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1818                              sc->arg_parm[arg.idx]))
1819                                continue;
1820
1821                        printed += scnprintf(bf + printed, size - printed,
1822                                             "%s%s: ", printed ? ", " : "", field->name);
1823                        if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1824                                arg.val = val;
1825                                if (sc->arg_parm)
1826                                        arg.parm = sc->arg_parm[arg.idx];
1827                                printed += sc->arg_scnprintf[arg.idx](bf + printed,
1828                                                                      size - printed, &arg);
1829                        } else {
1830                                printed += scnprintf(bf + printed, size - printed,
1831                                                     "%ld", val);
1832                        }
1833                }
1834        } else {
1835                int i = 0;
1836
1837                while (i < 6) {
1838                        /* special care for unaligned accesses */
1839                        p = args + sizeof(unsigned long) * i;
1840                        memcpy(&val, p, sizeof(val));
1841                        printed += scnprintf(bf + printed, size - printed,
1842                                             "%sarg%d: %ld",
1843                                             printed ? ", " : "", i, val);
1844                        ++i;
1845                }
1846        }
1847
1848        return printed;
1849}
1850
1851typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1852                                  union perf_event *event,
1853                                  struct perf_sample *sample);
1854
1855static struct syscall *trace__syscall_info(struct trace *trace,
1856                                           struct perf_evsel *evsel, int id)
1857{
1858
1859        if (id < 0) {
1860
1861                /*
1862                 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1863                 * before that, leaving at a higher verbosity level till that is
1864                 * explained. Reproduced with plain ftrace with:
1865                 *
1866                 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1867                 * grep "NR -1 " /t/trace_pipe
1868                 *
1869                 * After generating some load on the machine.
1870                 */
1871                if (verbose > 1) {
1872                        static u64 n;
1873                        fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1874                                id, perf_evsel__name(evsel), ++n);
1875                }
1876                return NULL;
1877        }
1878
1879        if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1880            trace__read_syscall_info(trace, id))
1881                goto out_cant_read;
1882
1883        if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1884                goto out_cant_read;
1885
1886        return &trace->syscalls.table[id];
1887
1888out_cant_read:
1889        if (verbose) {
1890                fprintf(trace->output, "Problems reading syscall %d", id);
1891                if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1892                        fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1893                fputs(" information\n", trace->output);
1894        }
1895        return NULL;
1896}
1897
1898static void thread__update_stats(struct thread_trace *ttrace,
1899                                 int id, struct perf_sample *sample)
1900{
1901        struct int_node *inode;
1902        struct stats *stats;
1903        u64 duration = 0;
1904
1905        inode = intlist__findnew(ttrace->syscall_stats, id);
1906        if (inode == NULL)
1907                return;
1908
1909        stats = inode->priv;
1910        if (stats == NULL) {
1911                stats = malloc(sizeof(struct stats));
1912                if (stats == NULL)
1913                        return;
1914                init_stats(stats);
1915                inode->priv = stats;
1916        }
1917
1918        if (ttrace->entry_time && sample->time > ttrace->entry_time)
1919                duration = sample->time - ttrace->entry_time;
1920
1921        update_stats(stats, duration);
1922}
1923
1924static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1925{
1926        struct thread_trace *ttrace;
1927        u64 duration;
1928        size_t printed;
1929
1930        if (trace->current == NULL)
1931                return 0;
1932
1933        ttrace = thread__priv(trace->current);
1934
1935        if (!ttrace->entry_pending)
1936                return 0;
1937
1938        duration = sample->time - ttrace->entry_time;
1939
1940        printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1941        printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1942        ttrace->entry_pending = false;
1943
1944        return printed;
1945}
1946
1947static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1948                            union perf_event *event __maybe_unused,
1949                            struct perf_sample *sample)
1950{
1951        char *msg;
1952        void *args;
1953        size_t printed = 0;
1954        struct thread *thread;
1955        int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1956        struct syscall *sc = trace__syscall_info(trace, evsel, id);
1957        struct thread_trace *ttrace;
1958
1959        if (sc == NULL)
1960                return -1;
1961
1962        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1963        ttrace = thread__trace(thread, trace->output);
1964        if (ttrace == NULL)
1965                goto out_put;
1966
1967        args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1968
1969        if (ttrace->entry_str == NULL) {
1970                ttrace->entry_str = malloc(trace__entry_str_size);
1971                if (!ttrace->entry_str)
1972                        goto out_put;
1973        }
1974
1975        if (!trace->summary_only)
1976                trace__printf_interrupted_entry(trace, sample);
1977
1978        ttrace->entry_time = sample->time;
1979        msg = ttrace->entry_str;
1980        printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1981
1982        printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1983                                           args, trace, thread);
1984
1985        if (sc->is_exit) {
1986                if (!trace->duration_filter && !trace->summary_only) {
1987                        trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1988                        fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1989                }
1990        } else {
1991                ttrace->entry_pending = true;
1992                /* See trace__vfs_getname & trace__sys_exit */
1993                ttrace->filename.pending_open = false;
1994        }
1995
1996        if (trace->current != thread) {
1997                thread__put(trace->current);
1998                trace->current = thread__get(thread);
1999        }
2000        err = 0;
2001out_put:
2002        thread__put(thread);
2003        return err;
2004}
2005
2006static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2007                           union perf_event *event __maybe_unused,
2008                           struct perf_sample *sample)
2009{
2010        long ret;
2011        u64 duration = 0;
2012        struct thread *thread;
2013        int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2014        struct syscall *sc = trace__syscall_info(trace, evsel, id);
2015        struct thread_trace *ttrace;
2016
2017        if (sc == NULL)
2018                return -1;
2019
2020        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2021        ttrace = thread__trace(thread, trace->output);
2022        if (ttrace == NULL)
2023                goto out_put;
2024
2025        if (trace->summary)
2026                thread__update_stats(ttrace, id, sample);
2027
2028        ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2029
2030        if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2031                trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2032                ttrace->filename.pending_open = false;
2033                ++trace->stats.vfs_getname;
2034        }
2035
2036        ttrace->exit_time = sample->time;
2037
2038        if (ttrace->entry_time) {
2039                duration = sample->time - ttrace->entry_time;
2040                if (trace__filter_duration(trace, duration))
2041                        goto out;
2042        } else if (trace->duration_filter)
2043                goto out;
2044
2045        if (trace->summary_only)
2046                goto out;
2047
2048        trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2049
2050        if (ttrace->entry_pending) {
2051                fprintf(trace->output, "%-70s", ttrace->entry_str);
2052        } else {
2053                fprintf(trace->output, " ... [");
2054                color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2055                fprintf(trace->output, "]: %s()", sc->name);
2056        }
2057
2058        if (sc->fmt == NULL) {
2059signed_print:
2060                fprintf(trace->output, ") = %ld", ret);
2061        } else if (ret < 0 && sc->fmt->errmsg) {
2062                char bf[STRERR_BUFSIZE];
2063                const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2064                           *e = audit_errno_to_name(-ret);
2065
2066                fprintf(trace->output, ") = -1 %s %s", e, emsg);
2067        } else if (ret == 0 && sc->fmt->timeout)
2068                fprintf(trace->output, ") = 0 Timeout");
2069        else if (sc->fmt->hexret)
2070                fprintf(trace->output, ") = %#lx", ret);
2071        else
2072                goto signed_print;
2073
2074        fputc('\n', trace->output);
2075out:
2076        ttrace->entry_pending = false;
2077        err = 0;
2078out_put:
2079        thread__put(thread);
2080        return err;
2081}
2082
2083static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2084                              union perf_event *event __maybe_unused,
2085                              struct perf_sample *sample)
2086{
2087        struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2088        struct thread_trace *ttrace;
2089        size_t filename_len, entry_str_len, to_move;
2090        ssize_t remaining_space;
2091        char *pos;
2092        const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2093
2094        if (!thread)
2095                goto out;
2096
2097        ttrace = thread__priv(thread);
2098        if (!ttrace)
2099                goto out;
2100
2101        filename_len = strlen(filename);
2102
2103        if (ttrace->filename.namelen < filename_len) {
2104                char *f = realloc(ttrace->filename.name, filename_len + 1);
2105
2106                if (f == NULL)
2107                                goto out;
2108
2109                ttrace->filename.namelen = filename_len;
2110                ttrace->filename.name = f;
2111        }
2112
2113        strcpy(ttrace->filename.name, filename);
2114        ttrace->filename.pending_open = true;
2115
2116        if (!ttrace->filename.ptr)
2117                goto out;
2118
2119        entry_str_len = strlen(ttrace->entry_str);
2120        remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2121        if (remaining_space <= 0)
2122                goto out;
2123
2124        if (filename_len > (size_t)remaining_space) {
2125                filename += filename_len - remaining_space;
2126                filename_len = remaining_space;
2127        }
2128
2129        to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2130        pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2131        memmove(pos + filename_len, pos, to_move);
2132        memcpy(pos, filename, filename_len);
2133
2134        ttrace->filename.ptr = 0;
2135        ttrace->filename.entry_str_pos = 0;
2136out:
2137        return 0;
2138}
2139
2140static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2141                                     union perf_event *event __maybe_unused,
2142                                     struct perf_sample *sample)
2143{
2144        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2145        double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2146        struct thread *thread = machine__findnew_thread(trace->host,
2147                                                        sample->pid,
2148                                                        sample->tid);
2149        struct thread_trace *ttrace = thread__trace(thread, trace->output);
2150
2151        if (ttrace == NULL)
2152                goto out_dump;
2153
2154        ttrace->runtime_ms += runtime_ms;
2155        trace->runtime_ms += runtime_ms;
2156        thread__put(thread);
2157        return 0;
2158
2159out_dump:
2160        fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2161               evsel->name,
2162               perf_evsel__strval(evsel, sample, "comm"),
2163               (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2164               runtime,
2165               perf_evsel__intval(evsel, sample, "vruntime"));
2166        thread__put(thread);
2167        return 0;
2168}
2169
2170static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2171                                union perf_event *event __maybe_unused,
2172                                struct perf_sample *sample)
2173{
2174        trace__printf_interrupted_entry(trace, sample);
2175        trace__fprintf_tstamp(trace, sample->time, trace->output);
2176
2177        if (trace->trace_syscalls)
2178                fprintf(trace->output, "(         ): ");
2179
2180        fprintf(trace->output, "%s:", evsel->name);
2181
2182        if (evsel->tp_format) {
2183                event_format__fprintf(evsel->tp_format, sample->cpu,
2184                                      sample->raw_data, sample->raw_size,
2185                                      trace->output);
2186        }
2187
2188        fprintf(trace->output, ")\n");
2189        return 0;
2190}
2191
2192static void print_location(FILE *f, struct perf_sample *sample,
2193                           struct addr_location *al,
2194                           bool print_dso, bool print_sym)
2195{
2196
2197        if ((verbose || print_dso) && al->map)
2198                fprintf(f, "%s@", al->map->dso->long_name);
2199
2200        if ((verbose || print_sym) && al->sym)
2201                fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2202                        al->addr - al->sym->start);
2203        else if (al->map)
2204                fprintf(f, "0x%" PRIx64, al->addr);
2205        else
2206                fprintf(f, "0x%" PRIx64, sample->addr);
2207}
2208
2209static int trace__pgfault(struct trace *trace,
2210                          struct perf_evsel *evsel,
2211                          union perf_event *event,
2212                          struct perf_sample *sample)
2213{
2214        struct thread *thread;
2215        u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2216        struct addr_location al;
2217        char map_type = 'd';
2218        struct thread_trace *ttrace;
2219        int err = -1;
2220
2221        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2222        ttrace = thread__trace(thread, trace->output);
2223        if (ttrace == NULL)
2224                goto out_put;
2225
2226        if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2227                ttrace->pfmaj++;
2228        else
2229                ttrace->pfmin++;
2230
2231        if (trace->summary_only)
2232                goto out;
2233
2234        thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2235                              sample->ip, &al);
2236
2237        trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2238
2239        fprintf(trace->output, "%sfault [",
2240                evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2241                "maj" : "min");
2242
2243        print_location(trace->output, sample, &al, false, true);
2244
2245        fprintf(trace->output, "] => ");
2246
2247        thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2248                                   sample->addr, &al);
2249
2250        if (!al.map) {
2251                thread__find_addr_location(thread, cpumode,
2252                                           MAP__FUNCTION, sample->addr, &al);
2253
2254                if (al.map)
2255                        map_type = 'x';
2256                else
2257                        map_type = '?';
2258        }
2259
2260        print_location(trace->output, sample, &al, true, false);
2261
2262        fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2263out:
2264        err = 0;
2265out_put:
2266        thread__put(thread);
2267        return err;
2268}
2269
2270static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2271{
2272        if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2273            (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2274                return false;
2275
2276        if (trace->pid_list || trace->tid_list)
2277                return true;
2278
2279        return false;
2280}
2281
2282static int trace__process_sample(struct perf_tool *tool,
2283                                 union perf_event *event,
2284                                 struct perf_sample *sample,
2285                                 struct perf_evsel *evsel,
2286                                 struct machine *machine __maybe_unused)
2287{
2288        struct trace *trace = container_of(tool, struct trace, tool);
2289        int err = 0;
2290
2291        tracepoint_handler handler = evsel->handler;
2292
2293        if (skip_sample(trace, sample))
2294                return 0;
2295
2296        if (!trace->full_time && trace->base_time == 0)
2297                trace->base_time = sample->time;
2298
2299        if (handler) {
2300                ++trace->nr_events;
2301                handler(trace, evsel, event, sample);
2302        }
2303
2304        return err;
2305}
2306
2307static int parse_target_str(struct trace *trace)
2308{
2309        if (trace->opts.target.pid) {
2310                trace->pid_list = intlist__new(trace->opts.target.pid);
2311                if (trace->pid_list == NULL) {
2312                        pr_err("Error parsing process id string\n");
2313                        return -EINVAL;
2314                }
2315        }
2316
2317        if (trace->opts.target.tid) {
2318                trace->tid_list = intlist__new(trace->opts.target.tid);
2319                if (trace->tid_list == NULL) {
2320                        pr_err("Error parsing thread id string\n");
2321                        return -EINVAL;
2322                }
2323        }
2324
2325        return 0;
2326}
2327
2328static int trace__record(struct trace *trace, int argc, const char **argv)
2329{
2330        unsigned int rec_argc, i, j;
2331        const char **rec_argv;
2332        const char * const record_args[] = {
2333                "record",
2334                "-R",
2335                "-m", "1024",
2336                "-c", "1",
2337        };
2338
2339        const char * const sc_args[] = { "-e", };
2340        unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2341        const char * const majpf_args[] = { "-e", "major-faults" };
2342        unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2343        const char * const minpf_args[] = { "-e", "minor-faults" };
2344        unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2345
2346        /* +1 is for the event string below */
2347        rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2348                majpf_args_nr + minpf_args_nr + argc;
2349        rec_argv = calloc(rec_argc + 1, sizeof(char *));
2350
2351        if (rec_argv == NULL)
2352                return -ENOMEM;
2353
2354        j = 0;
2355        for (i = 0; i < ARRAY_SIZE(record_args); i++)
2356                rec_argv[j++] = record_args[i];
2357
2358        if (trace->trace_syscalls) {
2359                for (i = 0; i < sc_args_nr; i++)
2360                        rec_argv[j++] = sc_args[i];
2361
2362                /* event string may be different for older kernels - e.g., RHEL6 */
2363                if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2364                        rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2365                else if (is_valid_tracepoint("syscalls:sys_enter"))
2366                        rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2367                else {
2368                        pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2369                        return -1;
2370                }
2371        }
2372
2373        if (trace->trace_pgfaults & TRACE_PFMAJ)
2374                for (i = 0; i < majpf_args_nr; i++)
2375                        rec_argv[j++] = majpf_args[i];
2376
2377        if (trace->trace_pgfaults & TRACE_PFMIN)
2378                for (i = 0; i < minpf_args_nr; i++)
2379                        rec_argv[j++] = minpf_args[i];
2380
2381        for (i = 0; i < (unsigned int)argc; i++)
2382                rec_argv[j++] = argv[i];
2383
2384        return cmd_record(j, rec_argv, NULL);
2385}
2386
2387static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2388
2389static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2390{
2391        struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2392        if (evsel == NULL)
2393                return false;
2394
2395        if (perf_evsel__field(evsel, "pathname") == NULL) {
2396                perf_evsel__delete(evsel);
2397                return false;
2398        }
2399
2400        evsel->handler = trace__vfs_getname;
2401        perf_evlist__add(evlist, evsel);
2402        return true;
2403}
2404
2405static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2406                                    u64 config)
2407{
2408        struct perf_evsel *evsel;
2409        struct perf_event_attr attr = {
2410                .type = PERF_TYPE_SOFTWARE,
2411                .mmap_data = 1,
2412        };
2413
2414        attr.config = config;
2415        attr.sample_period = 1;
2416
2417        event_attr_init(&attr);
2418
2419        evsel = perf_evsel__new(&attr);
2420        if (!evsel)
2421                return -ENOMEM;
2422
2423        evsel->handler = trace__pgfault;
2424        perf_evlist__add(evlist, evsel);
2425
2426        return 0;
2427}
2428
2429static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2430{
2431        const u32 type = event->header.type;
2432        struct perf_evsel *evsel;
2433
2434        if (!trace->full_time && trace->base_time == 0)
2435                trace->base_time = sample->time;
2436
2437        if (type != PERF_RECORD_SAMPLE) {
2438                trace__process_event(trace, trace->host, event, sample);
2439                return;
2440        }
2441
2442        evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2443        if (evsel == NULL) {
2444                fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2445                return;
2446        }
2447
2448        if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2449            sample->raw_data == NULL) {
2450                fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2451                       perf_evsel__name(evsel), sample->tid,
2452                       sample->cpu, sample->raw_size);
2453        } else {
2454                tracepoint_handler handler = evsel->handler;
2455                handler(trace, evsel, event, sample);
2456        }
2457}
2458
2459static int trace__add_syscall_newtp(struct trace *trace)
2460{
2461        int ret = -1;
2462        struct perf_evlist *evlist = trace->evlist;
2463        struct perf_evsel *sys_enter, *sys_exit;
2464
2465        sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2466        if (sys_enter == NULL)
2467                goto out;
2468
2469        if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2470                goto out_delete_sys_enter;
2471
2472        sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2473        if (sys_exit == NULL)
2474                goto out_delete_sys_enter;
2475
2476        if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2477                goto out_delete_sys_exit;
2478
2479        perf_evlist__add(evlist, sys_enter);
2480        perf_evlist__add(evlist, sys_exit);
2481
2482        trace->syscalls.events.sys_enter = sys_enter;
2483        trace->syscalls.events.sys_exit  = sys_exit;
2484
2485        ret = 0;
2486out:
2487        return ret;
2488
2489out_delete_sys_exit:
2490        perf_evsel__delete_priv(sys_exit);
2491out_delete_sys_enter:
2492        perf_evsel__delete_priv(sys_enter);
2493        goto out;
2494}
2495
2496static int trace__set_ev_qualifier_filter(struct trace *trace)
2497{
2498        int err = -1;
2499        char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2500                                                trace->ev_qualifier_ids.nr,
2501                                                trace->ev_qualifier_ids.entries);
2502
2503        if (filter == NULL)
2504                goto out_enomem;
2505
2506        if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2507                err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2508
2509        free(filter);
2510out:
2511        return err;
2512out_enomem:
2513        errno = ENOMEM;
2514        goto out;
2515}
2516
2517static int trace__run(struct trace *trace, int argc, const char **argv)
2518{
2519        struct perf_evlist *evlist = trace->evlist;
2520        struct perf_evsel *evsel;
2521        int err = -1, i;
2522        unsigned long before;
2523        const bool forks = argc > 0;
2524        bool draining = false;
2525
2526        trace->live = true;
2527
2528        if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2529                goto out_error_raw_syscalls;
2530
2531        if (trace->trace_syscalls)
2532                trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2533
2534        if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2535            perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2536                goto out_error_mem;
2537        }
2538
2539        if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2540            perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2541                goto out_error_mem;
2542
2543        if (trace->sched &&
2544            perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2545                                   trace__sched_stat_runtime))
2546                goto out_error_sched_stat_runtime;
2547
2548        err = perf_evlist__create_maps(evlist, &trace->opts.target);
2549        if (err < 0) {
2550                fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2551                goto out_delete_evlist;
2552        }
2553
2554        err = trace__symbols_init(trace, evlist);
2555        if (err < 0) {
2556                fprintf(trace->output, "Problems initializing symbol libraries!\n");
2557                goto out_delete_evlist;
2558        }
2559
2560        perf_evlist__config(evlist, &trace->opts);
2561
2562        signal(SIGCHLD, sig_handler);
2563        signal(SIGINT, sig_handler);
2564
2565        if (forks) {
2566                err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2567                                                    argv, false, NULL);
2568                if (err < 0) {
2569                        fprintf(trace->output, "Couldn't run the workload!\n");
2570                        goto out_delete_evlist;
2571                }
2572        }
2573
2574        err = perf_evlist__open(evlist);
2575        if (err < 0)
2576                goto out_error_open;
2577
2578        /*
2579         * Better not use !target__has_task() here because we need to cover the
2580         * case where no threads were specified in the command line, but a
2581         * workload was, and in that case we will fill in the thread_map when
2582         * we fork the workload in perf_evlist__prepare_workload.
2583         */
2584        if (trace->filter_pids.nr > 0)
2585                err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2586        else if (thread_map__pid(evlist->threads, 0) == -1)
2587                err = perf_evlist__set_filter_pid(evlist, getpid());
2588
2589        if (err < 0)
2590                goto out_error_mem;
2591
2592        if (trace->ev_qualifier_ids.nr > 0) {
2593                err = trace__set_ev_qualifier_filter(trace);
2594                if (err < 0)
2595                        goto out_errno;
2596
2597                pr_debug("event qualifier tracepoint filter: %s\n",
2598                         trace->syscalls.events.sys_exit->filter);
2599        }
2600
2601        err = perf_evlist__apply_filters(evlist, &evsel);
2602        if (err < 0)
2603                goto out_error_apply_filters;
2604
2605        err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2606        if (err < 0)
2607                goto out_error_mmap;
2608
2609        if (!target__none(&trace->opts.target))
2610                perf_evlist__enable(evlist);
2611
2612        if (forks)
2613                perf_evlist__start_workload(evlist);
2614
2615        trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2616                                  evlist->threads->nr > 1 ||
2617                                  perf_evlist__first(evlist)->attr.inherit;
2618again:
2619        before = trace->nr_events;
2620
2621        for (i = 0; i < evlist->nr_mmaps; i++) {
2622                union perf_event *event;
2623
2624                while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2625                        struct perf_sample sample;
2626
2627                        ++trace->nr_events;
2628
2629                        err = perf_evlist__parse_sample(evlist, event, &sample);
2630                        if (err) {
2631                                fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2632                                goto next_event;
2633                        }
2634
2635                        trace__handle_event(trace, event, &sample);
2636next_event:
2637                        perf_evlist__mmap_consume(evlist, i);
2638
2639                        if (interrupted)
2640                                goto out_disable;
2641
2642                        if (done && !draining) {
2643                                perf_evlist__disable(evlist);
2644                                draining = true;
2645                        }
2646                }
2647        }
2648
2649        if (trace->nr_events == before) {
2650                int timeout = done ? 100 : -1;
2651
2652                if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2653                        if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2654                                draining = true;
2655
2656                        goto again;
2657                }
2658        } else {
2659                goto again;
2660        }
2661
2662out_disable:
2663        thread__zput(trace->current);
2664
2665        perf_evlist__disable(evlist);
2666
2667        if (!err) {
2668                if (trace->summary)
2669                        trace__fprintf_thread_summary(trace, trace->output);
2670
2671                if (trace->show_tool_stats) {
2672                        fprintf(trace->output, "Stats:\n "
2673                                               " vfs_getname : %" PRIu64 "\n"
2674                                               " proc_getname: %" PRIu64 "\n",
2675                                trace->stats.vfs_getname,
2676                                trace->stats.proc_getname);
2677                }
2678        }
2679
2680out_delete_evlist:
2681        perf_evlist__delete(evlist);
2682        trace->evlist = NULL;
2683        trace->live = false;
2684        return err;
2685{
2686        char errbuf[BUFSIZ];
2687
2688out_error_sched_stat_runtime:
2689        debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2690        goto out_error;
2691
2692out_error_raw_syscalls:
2693        debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2694        goto out_error;
2695
2696out_error_mmap:
2697        perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2698        goto out_error;
2699
2700out_error_open:
2701        perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2702
2703out_error:
2704        fprintf(trace->output, "%s\n", errbuf);
2705        goto out_delete_evlist;
2706
2707out_error_apply_filters:
2708        fprintf(trace->output,
2709                "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2710                evsel->filter, perf_evsel__name(evsel), errno,
2711                strerror_r(errno, errbuf, sizeof(errbuf)));
2712        goto out_delete_evlist;
2713}
2714out_error_mem:
2715        fprintf(trace->output, "Not enough memory to run!\n");
2716        goto out_delete_evlist;
2717
2718out_errno:
2719        fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2720        goto out_delete_evlist;
2721}
2722
2723static int trace__replay(struct trace *trace)
2724{
2725        const struct perf_evsel_str_handler handlers[] = {
2726                { "probe:vfs_getname",       trace__vfs_getname, },
2727        };
2728        struct perf_data_file file = {
2729                .path  = input_name,
2730                .mode  = PERF_DATA_MODE_READ,
2731                .force = trace->force,
2732        };
2733        struct perf_session *session;
2734        struct perf_evsel *evsel;
2735        int err = -1;
2736
2737        trace->tool.sample        = trace__process_sample;
2738        trace->tool.mmap          = perf_event__process_mmap;
2739        trace->tool.mmap2         = perf_event__process_mmap2;
2740        trace->tool.comm          = perf_event__process_comm;
2741        trace->tool.exit          = perf_event__process_exit;
2742        trace->tool.fork          = perf_event__process_fork;
2743        trace->tool.attr          = perf_event__process_attr;
2744        trace->tool.tracing_data = perf_event__process_tracing_data;
2745        trace->tool.build_id      = perf_event__process_build_id;
2746
2747        trace->tool.ordered_events = true;
2748        trace->tool.ordering_requires_timestamps = true;
2749
2750        /* add tid to output */
2751        trace->multiple_threads = true;
2752
2753        session = perf_session__new(&file, false, &trace->tool);
2754        if (session == NULL)
2755                return -1;
2756
2757        if (symbol__init(&session->header.env) < 0)
2758                goto out;
2759
2760        trace->host = &session->machines.host;
2761
2762        err = perf_session__set_tracepoints_handlers(session, handlers);
2763        if (err)
2764                goto out;
2765
2766        evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2767                                                     "raw_syscalls:sys_enter");
2768        /* older kernels have syscalls tp versus raw_syscalls */
2769        if (evsel == NULL)
2770                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2771                                                             "syscalls:sys_enter");
2772
2773        if (evsel &&
2774            (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2775            perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2776                pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2777                goto out;
2778        }
2779
2780        evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2781                                                     "raw_syscalls:sys_exit");
2782        if (evsel == NULL)
2783                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2784                                                             "syscalls:sys_exit");
2785        if (evsel &&
2786            (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2787            perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2788                pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2789                goto out;
2790        }
2791
2792        evlist__for_each(session->evlist, evsel) {
2793                if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2794                    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2795                     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2796                     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2797                        evsel->handler = trace__pgfault;
2798        }
2799
2800        err = parse_target_str(trace);
2801        if (err != 0)
2802                goto out;
2803
2804        setup_pager();
2805
2806        err = perf_session__process_events(session);
2807        if (err)
2808                pr_err("Failed to process events, error %d", err);
2809
2810        else if (trace->summary)
2811                trace__fprintf_thread_summary(trace, trace->output);
2812
2813out:
2814        perf_session__delete(session);
2815
2816        return err;
2817}
2818
2819static size_t trace__fprintf_threads_header(FILE *fp)
2820{
2821        size_t printed;
2822
2823        printed  = fprintf(fp, "\n Summary of events:\n\n");
2824
2825        return printed;
2826}
2827
2828static size_t thread__dump_stats(struct thread_trace *ttrace,
2829                                 struct trace *trace, FILE *fp)
2830{
2831        struct stats *stats;
2832        size_t printed = 0;
2833        struct syscall *sc;
2834        struct int_node *inode = intlist__first(ttrace->syscall_stats);
2835
2836        if (inode == NULL)
2837                return 0;
2838
2839        printed += fprintf(fp, "\n");
2840
2841        printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2842        printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2843        printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2844
2845        /* each int_node is a syscall */
2846        while (inode) {
2847                stats = inode->priv;
2848                if (stats) {
2849                        double min = (double)(stats->min) / NSEC_PER_MSEC;
2850                        double max = (double)(stats->max) / NSEC_PER_MSEC;
2851                        double avg = avg_stats(stats);
2852                        double pct;
2853                        u64 n = (u64) stats->n;
2854
2855                        pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2856                        avg /= NSEC_PER_MSEC;
2857
2858                        sc = &trace->syscalls.table[inode->i];
2859                        printed += fprintf(fp, "   %-15s", sc->name);
2860                        printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2861                                           n, avg * n, min, avg);
2862                        printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2863                }
2864
2865                inode = intlist__next(inode);
2866        }
2867
2868        printed += fprintf(fp, "\n\n");
2869
2870        return printed;
2871}
2872
2873/* struct used to pass data to per-thread function */
2874struct summary_data {
2875        FILE *fp;
2876        struct trace *trace;
2877        size_t printed;
2878};
2879
2880static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2881{
2882        struct summary_data *data = priv;
2883        FILE *fp = data->fp;
2884        size_t printed = data->printed;
2885        struct trace *trace = data->trace;
2886        struct thread_trace *ttrace = thread__priv(thread);
2887        double ratio;
2888
2889        if (ttrace == NULL)
2890                return 0;
2891
2892        ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2893
2894        printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2895        printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2896        printed += fprintf(fp, "%.1f%%", ratio);
2897        if (ttrace->pfmaj)
2898                printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2899        if (ttrace->pfmin)
2900                printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2901        printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2902        printed += thread__dump_stats(ttrace, trace, fp);
2903
2904        data->printed += printed;
2905
2906        return 0;
2907}
2908
2909static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2910{
2911        struct summary_data data = {
2912                .fp = fp,
2913                .trace = trace
2914        };
2915        data.printed = trace__fprintf_threads_header(fp);
2916
2917        machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2918
2919        return data.printed;
2920}
2921
2922static int trace__set_duration(const struct option *opt, const char *str,
2923                               int unset __maybe_unused)
2924{
2925        struct trace *trace = opt->value;
2926
2927        trace->duration_filter = atof(str);
2928        return 0;
2929}
2930
2931static int trace__set_filter_pids(const struct option *opt, const char *str,
2932                                  int unset __maybe_unused)
2933{
2934        int ret = -1;
2935        size_t i;
2936        struct trace *trace = opt->value;
2937        /*
2938         * FIXME: introduce a intarray class, plain parse csv and create a
2939         * { int nr, int entries[] } struct...
2940         */
2941        struct intlist *list = intlist__new(str);
2942
2943        if (list == NULL)
2944                return -1;
2945
2946        i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2947        trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2948
2949        if (trace->filter_pids.entries == NULL)
2950                goto out;
2951
2952        trace->filter_pids.entries[0] = getpid();
2953
2954        for (i = 1; i < trace->filter_pids.nr; ++i)
2955                trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2956
2957        intlist__delete(list);
2958        ret = 0;
2959out:
2960        return ret;
2961}
2962
2963static int trace__open_output(struct trace *trace, const char *filename)
2964{
2965        struct stat st;
2966
2967        if (!stat(filename, &st) && st.st_size) {
2968                char oldname[PATH_MAX];
2969
2970                scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2971                unlink(oldname);
2972                rename(filename, oldname);
2973        }
2974
2975        trace->output = fopen(filename, "w");
2976
2977        return trace->output == NULL ? -errno : 0;
2978}
2979
2980static int parse_pagefaults(const struct option *opt, const char *str,
2981                            int unset __maybe_unused)
2982{
2983        int *trace_pgfaults = opt->value;
2984
2985        if (strcmp(str, "all") == 0)
2986                *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2987        else if (strcmp(str, "maj") == 0)
2988                *trace_pgfaults |= TRACE_PFMAJ;
2989        else if (strcmp(str, "min") == 0)
2990                *trace_pgfaults |= TRACE_PFMIN;
2991        else
2992                return -1;
2993
2994        return 0;
2995}
2996
2997static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2998{
2999        struct perf_evsel *evsel;
3000
3001        evlist__for_each(evlist, evsel)
3002                evsel->handler = handler;
3003}
3004
3005int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3006{
3007        const char *trace_usage[] = {
3008                "perf trace [<options>] [<command>]",
3009                "perf trace [<options>] -- <command> [<options>]",
3010                "perf trace record [<options>] [<command>]",
3011                "perf trace record [<options>] -- <command> [<options>]",
3012                NULL
3013        };
3014        struct trace trace = {
3015                .audit = {
3016                        .machine = audit_detect_machine(),
3017                        .open_id = audit_name_to_syscall("open", trace.audit.machine),
3018                },
3019                .syscalls = {
3020                        . max = -1,
3021                },
3022                .opts = {
3023                        .target = {
3024                                .uid       = UINT_MAX,
3025                                .uses_mmap = true,
3026                        },
3027                        .user_freq     = UINT_MAX,
3028                        .user_interval = ULLONG_MAX,
3029                        .no_buffering  = true,
3030                        .mmap_pages    = UINT_MAX,
3031                        .proc_map_timeout  = 500,
3032                },
3033                .output = stderr,
3034                .show_comm = true,
3035                .trace_syscalls = true,
3036        };
3037        const char *output_name = NULL;
3038        const char *ev_qualifier_str = NULL;
3039        const struct option trace_options[] = {
3040        OPT_CALLBACK(0, "event", &trace.evlist, "event",
3041                     "event selector. use 'perf list' to list available events",
3042                     parse_events_option),
3043        OPT_BOOLEAN(0, "comm", &trace.show_comm,
3044                    "show the thread COMM next to its id"),
3045        OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3046        OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3047        OPT_STRING('o', "output", &output_name, "file", "output file name"),
3048        OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3049        OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3050                    "trace events on existing process id"),
3051        OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3052                    "trace events on existing thread id"),
3053        OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3054                     "pids to filter (by the kernel)", trace__set_filter_pids),
3055        OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3056                    "system-wide collection from all CPUs"),
3057        OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3058                    "list of cpus to monitor"),
3059        OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3060                    "child tasks do not inherit counters"),
3061        OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3062                     "number of mmap data pages",
3063                     perf_evlist__parse_mmap_pages),
3064        OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3065                   "user to profile"),
3066        OPT_CALLBACK(0, "duration", &trace, "float",
3067                     "show only events with duration > N.M ms",
3068                     trace__set_duration),
3069        OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3070        OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3071        OPT_BOOLEAN('T', "time", &trace.full_time,
3072                    "Show full timestamp, not time relative to first start"),
3073        OPT_BOOLEAN('s', "summary", &trace.summary_only,
3074                    "Show only syscall summary with statistics"),
3075        OPT_BOOLEAN('S', "with-summary", &trace.summary,
3076                    "Show all syscalls and summary with statistics"),
3077        OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3078                     "Trace pagefaults", parse_pagefaults, "maj"),
3079        OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3080        OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3081        OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3082                        "per thread proc mmap processing timeout in ms"),
3083        OPT_END()
3084        };
3085        const char * const trace_subcommands[] = { "record", NULL };
3086        int err;
3087        char bf[BUFSIZ];
3088
3089        signal(SIGSEGV, sighandler_dump_stack);
3090        signal(SIGFPE, sighandler_dump_stack);
3091
3092        trace.evlist = perf_evlist__new();
3093
3094        if (trace.evlist == NULL) {
3095                pr_err("Not enough memory to run!\n");
3096                err = -ENOMEM;
3097                goto out;
3098        }
3099
3100        argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3101                                 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3102
3103        if (trace.trace_pgfaults) {
3104                trace.opts.sample_address = true;
3105                trace.opts.sample_time = true;
3106        }
3107
3108        if (trace.evlist->nr_entries > 0)
3109                evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3110
3111        if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3112                return trace__record(&trace, argc-1, &argv[1]);
3113
3114        /* summary_only implies summary option, but don't overwrite summary if set */
3115        if (trace.summary_only)
3116                trace.summary = trace.summary_only;
3117
3118        if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3119            trace.evlist->nr_entries == 0 /* Was --events used? */) {
3120                pr_err("Please specify something to trace.\n");
3121                return -1;
3122        }
3123
3124        if (output_name != NULL) {
3125                err = trace__open_output(&trace, output_name);
3126                if (err < 0) {
3127                        perror("failed to create output file");
3128                        goto out;
3129                }
3130        }
3131
3132        if (ev_qualifier_str != NULL) {
3133                const char *s = ev_qualifier_str;
3134                struct strlist_config slist_config = {
3135                        .dirname = system_path(STRACE_GROUPS_DIR),
3136                };
3137
3138                trace.not_ev_qualifier = *s == '!';
3139                if (trace.not_ev_qualifier)
3140                        ++s;
3141                trace.ev_qualifier = strlist__new(s, &slist_config);
3142                if (trace.ev_qualifier == NULL) {
3143                        fputs("Not enough memory to parse event qualifier",
3144                              trace.output);
3145                        err = -ENOMEM;
3146                        goto out_close;
3147                }
3148
3149                err = trace__validate_ev_qualifier(&trace);
3150                if (err)
3151                        goto out_close;
3152        }
3153
3154        err = target__validate(&trace.opts.target);
3155        if (err) {
3156                target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3157                fprintf(trace.output, "%s", bf);
3158                goto out_close;
3159        }
3160
3161        err = target__parse_uid(&trace.opts.target);
3162        if (err) {
3163                target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3164                fprintf(trace.output, "%s", bf);
3165                goto out_close;
3166        }
3167
3168        if (!argc && target__none(&trace.opts.target))
3169                trace.opts.target.system_wide = true;
3170
3171        if (input_name)
3172                err = trace__replay(&trace);
3173        else
3174                err = trace__run(&trace, argc, argv);
3175
3176out_close:
3177        if (output_name != NULL)
3178                fclose(trace.output);
3179out:
3180        return err;
3181}
3182