linux/tools/perf/builtin-trace.c
<<
>>
Prefs
   1/*
   2 * builtin-trace.c
   3 *
   4 * Builtin 'trace' command:
   5 *
   6 * Display a continuously updated trace of any workload, CPU, specific PID,
   7 * system wide, etc.  Default format is loosely strace like, but any other
   8 * event may be specified using --event.
   9 *
  10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
  11 *
  12 * Initially based on the 'trace' prototype by Thomas Gleixner:
  13 *
  14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
  15 *
  16 * Released under the GPL v2. (and only v2, not any later version)
  17 */
  18
  19#include <traceevent/event-parse.h>
  20#include <api/fs/tracing_path.h>
  21#include "builtin.h"
  22#include "util/color.h"
  23#include "util/debug.h"
  24#include "util/event.h"
  25#include "util/evlist.h"
  26#include <subcmd/exec-cmd.h>
  27#include "util/machine.h"
  28#include "util/path.h"
  29#include "util/session.h"
  30#include "util/thread.h"
  31#include <subcmd/parse-options.h>
  32#include "util/strlist.h"
  33#include "util/intlist.h"
  34#include "util/thread_map.h"
  35#include "util/stat.h"
  36#include "trace/beauty/beauty.h"
  37#include "trace-event.h"
  38#include "util/parse-events.h"
  39#include "util/bpf-loader.h"
  40#include "callchain.h"
  41#include "print_binary.h"
  42#include "string2.h"
  43#include "syscalltbl.h"
  44#include "rb_resort.h"
  45
  46#include <errno.h>
  47#include <inttypes.h>
  48#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
  49#include <poll.h>
  50#include <signal.h>
  51#include <stdlib.h>
  52#include <string.h>
  53#include <linux/err.h>
  54#include <linux/filter.h>
  55#include <linux/audit.h>
  56#include <linux/kernel.h>
  57#include <linux/random.h>
  58#include <linux/stringify.h>
  59#include <linux/time64.h>
  60
  61#include "sane_ctype.h"
  62
  63#ifndef O_CLOEXEC
  64# define O_CLOEXEC              02000000
  65#endif
  66
  67struct trace {
  68        struct perf_tool        tool;
  69        struct syscalltbl       *sctbl;
  70        struct {
  71                int             max;
  72                struct syscall  *table;
  73                struct {
  74                        struct perf_evsel *sys_enter,
  75                                          *sys_exit;
  76                }               events;
  77        } syscalls;
  78        struct record_opts      opts;
  79        struct perf_evlist      *evlist;
  80        struct machine          *host;
  81        struct thread           *current;
  82        u64                     base_time;
  83        FILE                    *output;
  84        unsigned long           nr_events;
  85        struct strlist          *ev_qualifier;
  86        struct {
  87                size_t          nr;
  88                int             *entries;
  89        }                       ev_qualifier_ids;
  90        struct {
  91                size_t          nr;
  92                pid_t           *entries;
  93        }                       filter_pids;
  94        double                  duration_filter;
  95        double                  runtime_ms;
  96        struct {
  97                u64             vfs_getname,
  98                                proc_getname;
  99        } stats;
 100        unsigned int            max_stack;
 101        unsigned int            min_stack;
 102        bool                    not_ev_qualifier;
 103        bool                    live;
 104        bool                    full_time;
 105        bool                    sched;
 106        bool                    multiple_threads;
 107        bool                    summary;
 108        bool                    summary_only;
 109        bool                    show_comm;
 110        bool                    show_tool_stats;
 111        bool                    trace_syscalls;
 112        bool                    kernel_syscallchains;
 113        bool                    force;
 114        bool                    vfs_getname;
 115        int                     trace_pgfaults;
 116        int                     open_id;
 117};
 118
 119struct tp_field {
 120        int offset;
 121        union {
 122                u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
 123                void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
 124        };
 125};
 126
 127#define TP_UINT_FIELD(bits) \
 128static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
 129{ \
 130        u##bits value; \
 131        memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 132        return value;  \
 133}
 134
 135TP_UINT_FIELD(8);
 136TP_UINT_FIELD(16);
 137TP_UINT_FIELD(32);
 138TP_UINT_FIELD(64);
 139
 140#define TP_UINT_FIELD__SWAPPED(bits) \
 141static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
 142{ \
 143        u##bits value; \
 144        memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
 145        return bswap_##bits(value);\
 146}
 147
 148TP_UINT_FIELD__SWAPPED(16);
 149TP_UINT_FIELD__SWAPPED(32);
 150TP_UINT_FIELD__SWAPPED(64);
 151
 152static int tp_field__init_uint(struct tp_field *field,
 153                               struct format_field *format_field,
 154                               bool needs_swap)
 155{
 156        field->offset = format_field->offset;
 157
 158        switch (format_field->size) {
 159        case 1:
 160                field->integer = tp_field__u8;
 161                break;
 162        case 2:
 163                field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
 164                break;
 165        case 4:
 166                field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
 167                break;
 168        case 8:
 169                field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
 170                break;
 171        default:
 172                return -1;
 173        }
 174
 175        return 0;
 176}
 177
 178static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
 179{
 180        return sample->raw_data + field->offset;
 181}
 182
 183static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
 184{
 185        field->offset = format_field->offset;
 186        field->pointer = tp_field__ptr;
 187        return 0;
 188}
 189
 190struct syscall_tp {
 191        struct tp_field id;
 192        union {
 193                struct tp_field args, ret;
 194        };
 195};
 196
 197static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
 198                                          struct tp_field *field,
 199                                          const char *name)
 200{
 201        struct format_field *format_field = perf_evsel__field(evsel, name);
 202
 203        if (format_field == NULL)
 204                return -1;
 205
 206        return tp_field__init_uint(field, format_field, evsel->needs_swap);
 207}
 208
 209#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
 210        ({ struct syscall_tp *sc = evsel->priv;\
 211           perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
 212
 213static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
 214                                         struct tp_field *field,
 215                                         const char *name)
 216{
 217        struct format_field *format_field = perf_evsel__field(evsel, name);
 218
 219        if (format_field == NULL)
 220                return -1;
 221
 222        return tp_field__init_ptr(field, format_field);
 223}
 224
 225#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
 226        ({ struct syscall_tp *sc = evsel->priv;\
 227           perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
 228
 229static void perf_evsel__delete_priv(struct perf_evsel *evsel)
 230{
 231        zfree(&evsel->priv);
 232        perf_evsel__delete(evsel);
 233}
 234
 235static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
 236{
 237        evsel->priv = malloc(sizeof(struct syscall_tp));
 238        if (evsel->priv != NULL) {
 239                if (perf_evsel__init_sc_tp_uint_field(evsel, id))
 240                        goto out_delete;
 241
 242                evsel->handler = handler;
 243                return 0;
 244        }
 245
 246        return -ENOMEM;
 247
 248out_delete:
 249        zfree(&evsel->priv);
 250        return -ENOENT;
 251}
 252
 253static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
 254{
 255        struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
 256
 257        /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
 258        if (IS_ERR(evsel))
 259                evsel = perf_evsel__newtp("syscalls", direction);
 260
 261        if (IS_ERR(evsel))
 262                return NULL;
 263
 264        if (perf_evsel__init_syscall_tp(evsel, handler))
 265                goto out_delete;
 266
 267        return evsel;
 268
 269out_delete:
 270        perf_evsel__delete_priv(evsel);
 271        return NULL;
 272}
 273
 274#define perf_evsel__sc_tp_uint(evsel, name, sample) \
 275        ({ struct syscall_tp *fields = evsel->priv; \
 276           fields->name.integer(&fields->name, sample); })
 277
 278#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
 279        ({ struct syscall_tp *fields = evsel->priv; \
 280           fields->name.pointer(&fields->name, sample); })
 281
 282struct strarray {
 283        int         offset;
 284        int         nr_entries;
 285        const char **entries;
 286};
 287
 288#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
 289        .nr_entries = ARRAY_SIZE(array), \
 290        .entries = array, \
 291}
 292
 293#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
 294        .offset     = off, \
 295        .nr_entries = ARRAY_SIZE(array), \
 296        .entries = array, \
 297}
 298
 299static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
 300                                                const char *intfmt,
 301                                                struct syscall_arg *arg)
 302{
 303        struct strarray *sa = arg->parm;
 304        int idx = arg->val - sa->offset;
 305
 306        if (idx < 0 || idx >= sa->nr_entries)
 307                return scnprintf(bf, size, intfmt, arg->val);
 308
 309        return scnprintf(bf, size, "%s", sa->entries[idx]);
 310}
 311
 312static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
 313                                              struct syscall_arg *arg)
 314{
 315        return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
 316}
 317
 318#define SCA_STRARRAY syscall_arg__scnprintf_strarray
 319
 320#if defined(__i386__) || defined(__x86_64__)
 321/*
 322 * FIXME: Make this available to all arches as soon as the ioctl beautifier
 323 *        gets rewritten to support all arches.
 324 */
 325static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
 326                                                 struct syscall_arg *arg)
 327{
 328        return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
 329}
 330
 331#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
 332#endif /* defined(__i386__) || defined(__x86_64__) */
 333
 334static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
 335                                        struct syscall_arg *arg);
 336
 337#define SCA_FD syscall_arg__scnprintf_fd
 338
 339#ifndef AT_FDCWD
 340#define AT_FDCWD        -100
 341#endif
 342
 343static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
 344                                           struct syscall_arg *arg)
 345{
 346        int fd = arg->val;
 347
 348        if (fd == AT_FDCWD)
 349                return scnprintf(bf, size, "CWD");
 350
 351        return syscall_arg__scnprintf_fd(bf, size, arg);
 352}
 353
 354#define SCA_FDAT syscall_arg__scnprintf_fd_at
 355
 356static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
 357                                              struct syscall_arg *arg);
 358
 359#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
 360
 361static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 362                                         struct syscall_arg *arg)
 363{
 364        return scnprintf(bf, size, "%#lx", arg->val);
 365}
 366
 367#define SCA_HEX syscall_arg__scnprintf_hex
 368
 369static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
 370                                         struct syscall_arg *arg)
 371{
 372        return scnprintf(bf, size, "%d", arg->val);
 373}
 374
 375#define SCA_INT syscall_arg__scnprintf_int
 376
 377static const char *bpf_cmd[] = {
 378        "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
 379        "MAP_GET_NEXT_KEY", "PROG_LOAD",
 380};
 381static DEFINE_STRARRAY(bpf_cmd);
 382
 383static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
 384static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
 385
 386static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
 387static DEFINE_STRARRAY(itimers);
 388
 389static const char *keyctl_options[] = {
 390        "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
 391        "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
 392        "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
 393        "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
 394        "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
 395};
 396static DEFINE_STRARRAY(keyctl_options);
 397
 398static const char *whences[] = { "SET", "CUR", "END",
 399#ifdef SEEK_DATA
 400"DATA",
 401#endif
 402#ifdef SEEK_HOLE
 403"HOLE",
 404#endif
 405};
 406static DEFINE_STRARRAY(whences);
 407
 408static const char *fcntl_cmds[] = {
 409        "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
 410        "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
 411        "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
 412        "F_GETOWNER_UIDS",
 413};
 414static DEFINE_STRARRAY(fcntl_cmds);
 415
 416static const char *rlimit_resources[] = {
 417        "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
 418        "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
 419        "RTTIME",
 420};
 421static DEFINE_STRARRAY(rlimit_resources);
 422
 423static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
 424static DEFINE_STRARRAY(sighow);
 425
 426static const char *clockid[] = {
 427        "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
 428        "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
 429        "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
 430};
 431static DEFINE_STRARRAY(clockid);
 432
 433static const char *socket_families[] = {
 434        "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
 435        "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
 436        "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
 437        "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
 438        "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
 439        "ALG", "NFC", "VSOCK",
 440};
 441static DEFINE_STRARRAY(socket_families);
 442
 443static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 444                                                 struct syscall_arg *arg)
 445{
 446        size_t printed = 0;
 447        int mode = arg->val;
 448
 449        if (mode == F_OK) /* 0 */
 450                return scnprintf(bf, size, "F");
 451#define P_MODE(n) \
 452        if (mode & n##_OK) { \
 453                printed += scnprintf(bf + printed, size - printed, "%s", #n); \
 454                mode &= ~n##_OK; \
 455        }
 456
 457        P_MODE(R);
 458        P_MODE(W);
 459        P_MODE(X);
 460#undef P_MODE
 461
 462        if (mode)
 463                printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
 464
 465        return printed;
 466}
 467
 468#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
 469
 470static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 471                                              struct syscall_arg *arg);
 472
 473#define SCA_FILENAME syscall_arg__scnprintf_filename
 474
 475static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 476                                                struct syscall_arg *arg)
 477{
 478        int printed = 0, flags = arg->val;
 479
 480#define P_FLAG(n) \
 481        if (flags & O_##n) { \
 482                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 483                flags &= ~O_##n; \
 484        }
 485
 486        P_FLAG(CLOEXEC);
 487        P_FLAG(NONBLOCK);
 488#undef P_FLAG
 489
 490        if (flags)
 491                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 492
 493        return printed;
 494}
 495
 496#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 497
 498#if defined(__i386__) || defined(__x86_64__)
 499/*
 500 * FIXME: Make this available to all arches.
 501 */
 502#define TCGETS          0x5401
 503
 504static const char *tioctls[] = {
 505        "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
 506        "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
 507        "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
 508        "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
 509        "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
 510        "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
 511        "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
 512        "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
 513        "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
 514        "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
 515        "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
 516        [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
 517        "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
 518        "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
 519        "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
 520};
 521
 522static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
 523#endif /* defined(__i386__) || defined(__x86_64__) */
 524
 525#ifndef GRND_NONBLOCK
 526#define GRND_NONBLOCK   0x0001
 527#endif
 528#ifndef GRND_RANDOM
 529#define GRND_RANDOM     0x0002
 530#endif
 531
 532static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
 533                                                   struct syscall_arg *arg)
 534{
 535        int printed = 0, flags = arg->val;
 536
 537#define P_FLAG(n) \
 538        if (flags & GRND_##n) { \
 539                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
 540                flags &= ~GRND_##n; \
 541        }
 542
 543        P_FLAG(RANDOM);
 544        P_FLAG(NONBLOCK);
 545#undef P_FLAG
 546
 547        if (flags)
 548                printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
 549
 550        return printed;
 551}
 552
 553#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
 554
 555#define STRARRAY(arg, name, array) \
 556          .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
 557          .arg_parm      = { [arg] = &strarray__##array, }
 558
 559#include "trace/beauty/eventfd.c"
 560#include "trace/beauty/flock.c"
 561#include "trace/beauty/futex_op.c"
 562#include "trace/beauty/mmap.c"
 563#include "trace/beauty/mode_t.c"
 564#include "trace/beauty/msg_flags.c"
 565#include "trace/beauty/open_flags.c"
 566#include "trace/beauty/perf_event_open.c"
 567#include "trace/beauty/pid.c"
 568#include "trace/beauty/sched_policy.c"
 569#include "trace/beauty/seccomp.c"
 570#include "trace/beauty/signum.c"
 571#include "trace/beauty/socket_type.c"
 572#include "trace/beauty/waitid_options.c"
 573
 574static struct syscall_fmt {
 575        const char *name;
 576        const char *alias;
 577        size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
 578        void       *arg_parm[6];
 579        bool       errmsg;
 580        bool       errpid;
 581        bool       timeout;
 582        bool       hexret;
 583} syscall_fmts[] = {
 584        { .name     = "access",     .errmsg = true,
 585          .arg_scnprintf = { [1] = SCA_ACCMODE,  /* mode */ }, },
 586        { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
 587        { .name     = "bpf",        .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
 588        { .name     = "brk",        .hexret = true,
 589          .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
 590        { .name     = "chdir",      .errmsg = true, },
 591        { .name     = "chmod",      .errmsg = true, },
 592        { .name     = "chroot",     .errmsg = true, },
 593        { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
 594        { .name     = "clone",      .errpid = true, },
 595        { .name     = "close",      .errmsg = true,
 596          .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
 597        { .name     = "connect",    .errmsg = true, },
 598        { .name     = "creat",      .errmsg = true, },
 599        { .name     = "dup",        .errmsg = true, },
 600        { .name     = "dup2",       .errmsg = true, },
 601        { .name     = "dup3",       .errmsg = true, },
 602        { .name     = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
 603        { .name     = "eventfd2",   .errmsg = true,
 604          .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
 605        { .name     = "faccessat",  .errmsg = true, },
 606        { .name     = "fadvise64",  .errmsg = true, },
 607        { .name     = "fallocate",  .errmsg = true, },
 608        { .name     = "fchdir",     .errmsg = true, },
 609        { .name     = "fchmod",     .errmsg = true, },
 610        { .name     = "fchmodat",   .errmsg = true,
 611          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
 612        { .name     = "fchown",     .errmsg = true, },
 613        { .name     = "fchownat",   .errmsg = true,
 614          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
 615        { .name     = "fcntl",      .errmsg = true,
 616          .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
 617          .arg_parm      = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
 618        { .name     = "fdatasync",  .errmsg = true, },
 619        { .name     = "flock",      .errmsg = true,
 620          .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
 621        { .name     = "fsetxattr",  .errmsg = true, },
 622        { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
 623        { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
 624        { .name     = "fstatfs",    .errmsg = true, },
 625        { .name     = "fsync",    .errmsg = true, },
 626        { .name     = "ftruncate", .errmsg = true, },
 627        { .name     = "futex",      .errmsg = true,
 628          .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
 629        { .name     = "futimesat", .errmsg = true,
 630          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
 631        { .name     = "getdents",   .errmsg = true, },
 632        { .name     = "getdents64", .errmsg = true, },
 633        { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
 634        { .name     = "getpid",     .errpid = true, },
 635        { .name     = "getpgid",    .errpid = true, },
 636        { .name     = "getppid",    .errpid = true, },
 637        { .name     = "getrandom",  .errmsg = true,
 638          .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
 639        { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
 640        { .name     = "getxattr",   .errmsg = true, },
 641        { .name     = "inotify_add_watch",          .errmsg = true, },
 642        { .name     = "ioctl",      .errmsg = true,
 643          .arg_scnprintf = {
 644#if defined(__i386__) || defined(__x86_64__)
 645/*
 646 * FIXME: Make this available to all arches.
 647 */
 648                             [1] = SCA_STRHEXARRAY, /* cmd */
 649                             [2] = SCA_HEX, /* arg */ },
 650          .arg_parm      = { [1] = &strarray__tioctls, /* cmd */ }, },
 651#else
 652                             [2] = SCA_HEX, /* arg */ }, },
 653#endif
 654        { .name     = "keyctl",     .errmsg = true, STRARRAY(0, option, keyctl_options), },
 655        { .name     = "kill",       .errmsg = true,
 656          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
 657        { .name     = "lchown",    .errmsg = true, },
 658        { .name     = "lgetxattr",  .errmsg = true, },
 659        { .name     = "linkat",     .errmsg = true,
 660          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
 661        { .name     = "listxattr",  .errmsg = true, },
 662        { .name     = "llistxattr", .errmsg = true, },
 663        { .name     = "lremovexattr",  .errmsg = true, },
 664        { .name     = "lseek",      .errmsg = true,
 665          .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
 666          .arg_parm      = { [2] = &strarray__whences, /* whence */ }, },
 667        { .name     = "lsetxattr",  .errmsg = true, },
 668        { .name     = "lstat",      .errmsg = true, .alias = "newlstat", },
 669        { .name     = "lsxattr",    .errmsg = true, },
 670        { .name     = "madvise",    .errmsg = true,
 671          .arg_scnprintf = { [0] = SCA_HEX,      /* start */
 672                             [2] = SCA_MADV_BHV, /* behavior */ }, },
 673        { .name     = "mkdir",    .errmsg = true, },
 674        { .name     = "mkdirat",    .errmsg = true,
 675          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
 676        { .name     = "mknod",      .errmsg = true, },
 677        { .name     = "mknodat",    .errmsg = true,
 678          .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
 679        { .name     = "mlock",      .errmsg = true,
 680          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 681        { .name     = "mlockall",   .errmsg = true,
 682          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 683        { .name     = "mmap",       .hexret = true,
 684/* The standard mmap maps to old_mmap on s390x */
 685#if defined(__s390x__)
 686        .alias = "old_mmap",
 687#endif
 688          .arg_scnprintf = { [0] = SCA_HEX,       /* addr */
 689                             [2] = SCA_MMAP_PROT, /* prot */
 690                             [3] = SCA_MMAP_FLAGS, /* flags */ }, },
 691        { .name     = "mprotect",   .errmsg = true,
 692          .arg_scnprintf = { [0] = SCA_HEX, /* start */
 693                             [2] = SCA_MMAP_PROT, /* prot */ }, },
 694        { .name     = "mq_unlink", .errmsg = true,
 695          .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
 696        { .name     = "mremap",     .hexret = true,
 697          .arg_scnprintf = { [0] = SCA_HEX, /* addr */
 698                             [3] = SCA_MREMAP_FLAGS, /* flags */
 699                             [4] = SCA_HEX, /* new_addr */ }, },
 700        { .name     = "munlock",    .errmsg = true,
 701          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 702        { .name     = "munmap",     .errmsg = true,
 703          .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 704        { .name     = "name_to_handle_at", .errmsg = true,
 705          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
 706        { .name     = "newfstatat", .errmsg = true,
 707          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
 708        { .name     = "open",       .errmsg = true,
 709          .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
 710        { .name     = "open_by_handle_at", .errmsg = true,
 711          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
 712                             [2] = SCA_OPEN_FLAGS, /* flags */ }, },
 713        { .name     = "openat",     .errmsg = true,
 714          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
 715                             [2] = SCA_OPEN_FLAGS, /* flags */ }, },
 716        { .name     = "perf_event_open", .errmsg = true,
 717          .arg_scnprintf = { [2] = SCA_INT, /* cpu */
 718                             [3] = SCA_FD,  /* group_fd */
 719                             [4] = SCA_PERF_FLAGS,  /* flags */ }, },
 720        { .name     = "pipe2",      .errmsg = true,
 721          .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
 722        { .name     = "poll",       .errmsg = true, .timeout = true, },
 723        { .name     = "ppoll",      .errmsg = true, .timeout = true, },
 724        { .name     = "pread",      .errmsg = true, .alias = "pread64", },
 725        { .name     = "preadv",     .errmsg = true, .alias = "pread", },
 726        { .name     = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
 727        { .name     = "pwrite",     .errmsg = true, .alias = "pwrite64", },
 728        { .name     = "pwritev",    .errmsg = true, },
 729        { .name     = "read",       .errmsg = true, },
 730        { .name     = "readlink",   .errmsg = true, },
 731        { .name     = "readlinkat", .errmsg = true,
 732          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
 733        { .name     = "readv",      .errmsg = true, },
 734        { .name     = "recvfrom",   .errmsg = true,
 735          .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
 736        { .name     = "recvmmsg",   .errmsg = true,
 737          .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
 738        { .name     = "recvmsg",    .errmsg = true,
 739          .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
 740        { .name     = "removexattr", .errmsg = true, },
 741        { .name     = "renameat",   .errmsg = true,
 742          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
 743        { .name     = "rmdir",    .errmsg = true, },
 744        { .name     = "rt_sigaction", .errmsg = true,
 745          .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
 746        { .name     = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
 747        { .name     = "rt_sigqueueinfo", .errmsg = true,
 748          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
 749        { .name     = "rt_tgsigqueueinfo", .errmsg = true,
 750          .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
 751        { .name     = "sched_getattr",        .errmsg = true, },
 752        { .name     = "sched_setattr",        .errmsg = true, },
 753        { .name     = "sched_setscheduler",   .errmsg = true,
 754          .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
 755        { .name     = "seccomp", .errmsg = true,
 756          .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
 757                             [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
 758        { .name     = "select",     .errmsg = true, .timeout = true, },
 759        { .name     = "sendmmsg",    .errmsg = true,
 760          .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
 761        { .name     = "sendmsg",    .errmsg = true,
 762          .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
 763        { .name     = "sendto",     .errmsg = true,
 764          .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
 765        { .name     = "set_tid_address", .errpid = true, },
 766        { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
 767        { .name     = "setpgid",    .errmsg = true, },
 768        { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
 769        { .name     = "setxattr",   .errmsg = true, },
 770        { .name     = "shutdown",   .errmsg = true, },
 771        { .name     = "socket",     .errmsg = true,
 772          .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
 773                             [1] = SCA_SK_TYPE, /* type */ },
 774          .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
 775        { .name     = "socketpair", .errmsg = true,
 776          .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
 777                             [1] = SCA_SK_TYPE, /* type */ },
 778          .arg_parm      = { [0] = &strarray__socket_families, /* family */ }, },
 779        { .name     = "stat",       .errmsg = true, .alias = "newstat", },
 780        { .name     = "statfs",     .errmsg = true, },
 781        { .name     = "statx",      .errmsg = true,
 782          .arg_scnprintf = { [0] = SCA_FDAT, /* flags */
 783                             [2] = SCA_STATX_FLAGS, /* flags */
 784                             [3] = SCA_STATX_MASK, /* mask */ }, },
 785        { .name     = "swapoff",    .errmsg = true,
 786          .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
 787        { .name     = "swapon",     .errmsg = true,
 788          .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
 789        { .name     = "symlinkat",  .errmsg = true,
 790          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
 791        { .name     = "tgkill",     .errmsg = true,
 792          .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
 793        { .name     = "tkill",      .errmsg = true,
 794          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
 795        { .name     = "truncate",   .errmsg = true, },
 796        { .name     = "uname",      .errmsg = true, .alias = "newuname", },
 797        { .name     = "unlinkat",   .errmsg = true,
 798          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
 799        { .name     = "utime",  .errmsg = true, },
 800        { .name     = "utimensat",  .errmsg = true,
 801          .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
 802        { .name     = "utimes",  .errmsg = true, },
 803        { .name     = "vmsplice",  .errmsg = true, },
 804        { .name     = "wait4",      .errpid = true,
 805          .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
 806        { .name     = "waitid",     .errpid = true,
 807          .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
 808        { .name     = "write",      .errmsg = true, },
 809        { .name     = "writev",     .errmsg = true, },
 810};
 811
 812static int syscall_fmt__cmp(const void *name, const void *fmtp)
 813{
 814        const struct syscall_fmt *fmt = fmtp;
 815        return strcmp(name, fmt->name);
 816}
 817
 818static struct syscall_fmt *syscall_fmt__find(const char *name)
 819{
 820        const int nmemb = ARRAY_SIZE(syscall_fmts);
 821        return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
 822}
 823
 824struct syscall {
 825        struct event_format *tp_format;
 826        int                 nr_args;
 827        struct format_field *args;
 828        const char          *name;
 829        bool                is_exit;
 830        struct syscall_fmt  *fmt;
 831        size_t              (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
 832        void                **arg_parm;
 833};
 834
 835/*
 836 * We need to have this 'calculated' boolean because in some cases we really
 837 * don't know what is the duration of a syscall, for instance, when we start
 838 * a session and some threads are waiting for a syscall to finish, say 'poll',
 839 * in which case all we can do is to print "( ? ) for duration and for the
 840 * start timestamp.
 841 */
 842static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
 843{
 844        double duration = (double)t / NSEC_PER_MSEC;
 845        size_t printed = fprintf(fp, "(");
 846
 847        if (!calculated)
 848                printed += fprintf(fp, "     ?   ");
 849        else if (duration >= 1.0)
 850                printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
 851        else if (duration >= 0.01)
 852                printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
 853        else
 854                printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
 855        return printed + fprintf(fp, "): ");
 856}
 857
 858/**
 859 * filename.ptr: The filename char pointer that will be vfs_getname'd
 860 * filename.entry_str_pos: Where to insert the string translated from
 861 *                         filename.ptr by the vfs_getname tracepoint/kprobe.
 862 */
 863struct thread_trace {
 864        u64               entry_time;
 865        bool              entry_pending;
 866        unsigned long     nr_events;
 867        unsigned long     pfmaj, pfmin;
 868        char              *entry_str;
 869        double            runtime_ms;
 870        struct {
 871                unsigned long ptr;
 872                short int     entry_str_pos;
 873                bool          pending_open;
 874                unsigned int  namelen;
 875                char          *name;
 876        } filename;
 877        struct {
 878                int       max;
 879                char      **table;
 880        } paths;
 881
 882        struct intlist *syscall_stats;
 883};
 884
 885static struct thread_trace *thread_trace__new(void)
 886{
 887        struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
 888
 889        if (ttrace)
 890                ttrace->paths.max = -1;
 891
 892        ttrace->syscall_stats = intlist__new(NULL);
 893
 894        return ttrace;
 895}
 896
 897static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
 898{
 899        struct thread_trace *ttrace;
 900
 901        if (thread == NULL)
 902                goto fail;
 903
 904        if (thread__priv(thread) == NULL)
 905                thread__set_priv(thread, thread_trace__new());
 906
 907        if (thread__priv(thread) == NULL)
 908                goto fail;
 909
 910        ttrace = thread__priv(thread);
 911        ++ttrace->nr_events;
 912
 913        return ttrace;
 914fail:
 915        color_fprintf(fp, PERF_COLOR_RED,
 916                      "WARNING: not enough memory, dropping samples!\n");
 917        return NULL;
 918}
 919
 920#define TRACE_PFMAJ             (1 << 0)
 921#define TRACE_PFMIN             (1 << 1)
 922
 923static const size_t trace__entry_str_size = 2048;
 924
 925static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
 926{
 927        struct thread_trace *ttrace = thread__priv(thread);
 928
 929        if (fd > ttrace->paths.max) {
 930                char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
 931
 932                if (npath == NULL)
 933                        return -1;
 934
 935                if (ttrace->paths.max != -1) {
 936                        memset(npath + ttrace->paths.max + 1, 0,
 937                               (fd - ttrace->paths.max) * sizeof(char *));
 938                } else {
 939                        memset(npath, 0, (fd + 1) * sizeof(char *));
 940                }
 941
 942                ttrace->paths.table = npath;
 943                ttrace->paths.max   = fd;
 944        }
 945
 946        ttrace->paths.table[fd] = strdup(pathname);
 947
 948        return ttrace->paths.table[fd] != NULL ? 0 : -1;
 949}
 950
 951static int thread__read_fd_path(struct thread *thread, int fd)
 952{
 953        char linkname[PATH_MAX], pathname[PATH_MAX];
 954        struct stat st;
 955        int ret;
 956
 957        if (thread->pid_ == thread->tid) {
 958                scnprintf(linkname, sizeof(linkname),
 959                          "/proc/%d/fd/%d", thread->pid_, fd);
 960        } else {
 961                scnprintf(linkname, sizeof(linkname),
 962                          "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
 963        }
 964
 965        if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
 966                return -1;
 967
 968        ret = readlink(linkname, pathname, sizeof(pathname));
 969
 970        if (ret < 0 || ret > st.st_size)
 971                return -1;
 972
 973        pathname[ret] = '\0';
 974        return trace__set_fd_pathname(thread, fd, pathname);
 975}
 976
 977static const char *thread__fd_path(struct thread *thread, int fd,
 978                                   struct trace *trace)
 979{
 980        struct thread_trace *ttrace = thread__priv(thread);
 981
 982        if (ttrace == NULL)
 983                return NULL;
 984
 985        if (fd < 0)
 986                return NULL;
 987
 988        if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
 989                if (!trace->live)
 990                        return NULL;
 991                ++trace->stats.proc_getname;
 992                if (thread__read_fd_path(thread, fd))
 993                        return NULL;
 994        }
 995
 996        return ttrace->paths.table[fd];
 997}
 998
 999static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1000                                        struct syscall_arg *arg)
1001{
1002        int fd = arg->val;
1003        size_t printed = scnprintf(bf, size, "%d", fd);
1004        const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1005
1006        if (path)
1007                printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1008
1009        return printed;
1010}
1011
1012static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1013                                              struct syscall_arg *arg)
1014{
1015        int fd = arg->val;
1016        size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1017        struct thread_trace *ttrace = thread__priv(arg->thread);
1018
1019        if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1020                zfree(&ttrace->paths.table[fd]);
1021
1022        return printed;
1023}
1024
1025static void thread__set_filename_pos(struct thread *thread, const char *bf,
1026                                     unsigned long ptr)
1027{
1028        struct thread_trace *ttrace = thread__priv(thread);
1029
1030        ttrace->filename.ptr = ptr;
1031        ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1032}
1033
1034static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1035                                              struct syscall_arg *arg)
1036{
1037        unsigned long ptr = arg->val;
1038
1039        if (!arg->trace->vfs_getname)
1040                return scnprintf(bf, size, "%#x", ptr);
1041
1042        thread__set_filename_pos(arg->thread, bf, ptr);
1043        return 0;
1044}
1045
1046static bool trace__filter_duration(struct trace *trace, double t)
1047{
1048        return t < (trace->duration_filter * NSEC_PER_MSEC);
1049}
1050
1051static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1052{
1053        double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1054
1055        return fprintf(fp, "%10.3f ", ts);
1056}
1057
1058/*
1059 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1060 * using ttrace->entry_time for a thread that receives a sys_exit without
1061 * first having received a sys_enter ("poll" issued before tracing session
1062 * starts, lost sys_enter exit due to ring buffer overflow).
1063 */
1064static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1065{
1066        if (tstamp > 0)
1067                return __trace__fprintf_tstamp(trace, tstamp, fp);
1068
1069        return fprintf(fp, "         ? ");
1070}
1071
1072static bool done = false;
1073static bool interrupted = false;
1074
1075static void sig_handler(int sig)
1076{
1077        done = true;
1078        interrupted = sig == SIGINT;
1079}
1080
1081static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1082                                        u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1083{
1084        size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1085        printed += fprintf_duration(duration, duration_calculated, fp);
1086
1087        if (trace->multiple_threads) {
1088                if (trace->show_comm)
1089                        printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1090                printed += fprintf(fp, "%d ", thread->tid);
1091        }
1092
1093        return printed;
1094}
1095
1096static int trace__process_event(struct trace *trace, struct machine *machine,
1097                                union perf_event *event, struct perf_sample *sample)
1098{
1099        int ret = 0;
1100
1101        switch (event->header.type) {
1102        case PERF_RECORD_LOST:
1103                color_fprintf(trace->output, PERF_COLOR_RED,
1104                              "LOST %" PRIu64 " events!\n", event->lost.lost);
1105                ret = machine__process_lost_event(machine, event, sample);
1106                break;
1107        default:
1108                ret = machine__process_event(machine, event, sample);
1109                break;
1110        }
1111
1112        return ret;
1113}
1114
1115static int trace__tool_process(struct perf_tool *tool,
1116                               union perf_event *event,
1117                               struct perf_sample *sample,
1118                               struct machine *machine)
1119{
1120        struct trace *trace = container_of(tool, struct trace, tool);
1121        return trace__process_event(trace, machine, event, sample);
1122}
1123
1124static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1125{
1126        struct machine *machine = vmachine;
1127
1128        if (machine->kptr_restrict_warned)
1129                return NULL;
1130
1131        if (symbol_conf.kptr_restrict) {
1132                pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1133                           "Check /proc/sys/kernel/kptr_restrict.\n\n"
1134                           "Kernel samples will not be resolved.\n");
1135                machine->kptr_restrict_warned = true;
1136                return NULL;
1137        }
1138
1139        return machine__resolve_kernel_addr(vmachine, addrp, modp);
1140}
1141
1142static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1143{
1144        int err = symbol__init(NULL);
1145
1146        if (err)
1147                return err;
1148
1149        trace->host = machine__new_host();
1150        if (trace->host == NULL)
1151                return -ENOMEM;
1152
1153        if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1154                return -errno;
1155
1156        err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1157                                            evlist->threads, trace__tool_process, false,
1158                                            trace->opts.proc_map_timeout);
1159        if (err)
1160                symbol__exit();
1161
1162        return err;
1163}
1164
1165static int syscall__set_arg_fmts(struct syscall *sc)
1166{
1167        struct format_field *field;
1168        int idx = 0, len;
1169
1170        sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1171        if (sc->arg_scnprintf == NULL)
1172                return -1;
1173
1174        if (sc->fmt)
1175                sc->arg_parm = sc->fmt->arg_parm;
1176
1177        for (field = sc->args; field; field = field->next) {
1178                if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1179                        sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1180                else if (strcmp(field->type, "const char *") == 0 &&
1181                         (strcmp(field->name, "filename") == 0 ||
1182                          strcmp(field->name, "path") == 0 ||
1183                          strcmp(field->name, "pathname") == 0))
1184                        sc->arg_scnprintf[idx] = SCA_FILENAME;
1185                else if (field->flags & FIELD_IS_POINTER)
1186                        sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1187                else if (strcmp(field->type, "pid_t") == 0)
1188                        sc->arg_scnprintf[idx] = SCA_PID;
1189                else if (strcmp(field->type, "umode_t") == 0)
1190                        sc->arg_scnprintf[idx] = SCA_MODE_T;
1191                else if ((strcmp(field->type, "int") == 0 ||
1192                          strcmp(field->type, "unsigned int") == 0 ||
1193                          strcmp(field->type, "long") == 0) &&
1194                         (len = strlen(field->name)) >= 2 &&
1195                         strcmp(field->name + len - 2, "fd") == 0) {
1196                        /*
1197                         * /sys/kernel/tracing/events/syscalls/sys_enter*
1198                         * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1199                         * 65 int
1200                         * 23 unsigned int
1201                         * 7 unsigned long
1202                         */
1203                        sc->arg_scnprintf[idx] = SCA_FD;
1204                }
1205                ++idx;
1206        }
1207
1208        return 0;
1209}
1210
1211static int trace__read_syscall_info(struct trace *trace, int id)
1212{
1213        char tp_name[128];
1214        struct syscall *sc;
1215        const char *name = syscalltbl__name(trace->sctbl, id);
1216
1217        if (name == NULL)
1218                return -1;
1219
1220        if (id > trace->syscalls.max) {
1221                struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1222
1223                if (nsyscalls == NULL)
1224                        return -1;
1225
1226                if (trace->syscalls.max != -1) {
1227                        memset(nsyscalls + trace->syscalls.max + 1, 0,
1228                               (id - trace->syscalls.max) * sizeof(*sc));
1229                } else {
1230                        memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1231                }
1232
1233                trace->syscalls.table = nsyscalls;
1234                trace->syscalls.max   = id;
1235        }
1236
1237        sc = trace->syscalls.table + id;
1238        sc->name = name;
1239
1240        sc->fmt  = syscall_fmt__find(sc->name);
1241
1242        snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1243        sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1244
1245        if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1246                snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1247                sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1248        }
1249
1250        if (IS_ERR(sc->tp_format))
1251                return -1;
1252
1253        sc->args = sc->tp_format->format.fields;
1254        sc->nr_args = sc->tp_format->format.nr_fields;
1255        /*
1256         * We need to check and discard the first variable '__syscall_nr'
1257         * or 'nr' that mean the syscall number. It is needless here.
1258         * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1259         */
1260        if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1261                sc->args = sc->args->next;
1262                --sc->nr_args;
1263        }
1264
1265        sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1266
1267        return syscall__set_arg_fmts(sc);
1268}
1269
1270static int trace__validate_ev_qualifier(struct trace *trace)
1271{
1272        int err = 0, i;
1273        struct str_node *pos;
1274
1275        trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1276        trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1277                                                 sizeof(trace->ev_qualifier_ids.entries[0]));
1278
1279        if (trace->ev_qualifier_ids.entries == NULL) {
1280                fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1281                       trace->output);
1282                err = -EINVAL;
1283                goto out;
1284        }
1285
1286        i = 0;
1287
1288        strlist__for_each_entry(pos, trace->ev_qualifier) {
1289                const char *sc = pos->s;
1290                int id = syscalltbl__id(trace->sctbl, sc);
1291
1292                if (id < 0) {
1293                        if (err == 0) {
1294                                fputs("Error:\tInvalid syscall ", trace->output);
1295                                err = -EINVAL;
1296                        } else {
1297                                fputs(", ", trace->output);
1298                        }
1299
1300                        fputs(sc, trace->output);
1301                }
1302
1303                trace->ev_qualifier_ids.entries[i++] = id;
1304        }
1305
1306        if (err < 0) {
1307                fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1308                      "\nHint:\tand: 'man syscalls'\n", trace->output);
1309                zfree(&trace->ev_qualifier_ids.entries);
1310                trace->ev_qualifier_ids.nr = 0;
1311        }
1312out:
1313        return err;
1314}
1315
1316/*
1317 * args is to be interpreted as a series of longs but we need to handle
1318 * 8-byte unaligned accesses. args points to raw_data within the event
1319 * and raw_data is guaranteed to be 8-byte unaligned because it is
1320 * preceded by raw_size which is a u32. So we need to copy args to a temp
1321 * variable to read it. Most notably this avoids extended load instructions
1322 * on unaligned addresses
1323 */
1324
1325static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1326                                      unsigned char *args, struct trace *trace,
1327                                      struct thread *thread)
1328{
1329        size_t printed = 0;
1330        unsigned char *p;
1331        unsigned long val;
1332
1333        if (sc->args != NULL) {
1334                struct format_field *field;
1335                u8 bit = 1;
1336                struct syscall_arg arg = {
1337                        .idx    = 0,
1338                        .mask   = 0,
1339                        .trace  = trace,
1340                        .thread = thread,
1341                };
1342
1343                for (field = sc->args; field;
1344                     field = field->next, ++arg.idx, bit <<= 1) {
1345                        if (arg.mask & bit)
1346                                continue;
1347
1348                        /* special care for unaligned accesses */
1349                        p = args + sizeof(unsigned long) * arg.idx;
1350                        memcpy(&val, p, sizeof(val));
1351
1352                        /*
1353                         * Suppress this argument if its value is zero and
1354                         * and we don't have a string associated in an
1355                         * strarray for it.
1356                         */
1357                        if (val == 0 &&
1358                            !(sc->arg_scnprintf &&
1359                              sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1360                              sc->arg_parm[arg.idx]))
1361                                continue;
1362
1363                        printed += scnprintf(bf + printed, size - printed,
1364                                             "%s%s: ", printed ? ", " : "", field->name);
1365                        if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1366                                arg.val = val;
1367                                if (sc->arg_parm)
1368                                        arg.parm = sc->arg_parm[arg.idx];
1369                                printed += sc->arg_scnprintf[arg.idx](bf + printed,
1370                                                                      size - printed, &arg);
1371                        } else {
1372                                printed += scnprintf(bf + printed, size - printed,
1373                                                     "%ld", val);
1374                        }
1375                }
1376        } else if (IS_ERR(sc->tp_format)) {
1377                /*
1378                 * If we managed to read the tracepoint /format file, then we
1379                 * may end up not having any args, like with gettid(), so only
1380                 * print the raw args when we didn't manage to read it.
1381                 */
1382                int i = 0;
1383
1384                while (i < 6) {
1385                        /* special care for unaligned accesses */
1386                        p = args + sizeof(unsigned long) * i;
1387                        memcpy(&val, p, sizeof(val));
1388                        printed += scnprintf(bf + printed, size - printed,
1389                                             "%sarg%d: %ld",
1390                                             printed ? ", " : "", i, val);
1391                        ++i;
1392                }
1393        }
1394
1395        return printed;
1396}
1397
1398typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1399                                  union perf_event *event,
1400                                  struct perf_sample *sample);
1401
1402static struct syscall *trace__syscall_info(struct trace *trace,
1403                                           struct perf_evsel *evsel, int id)
1404{
1405
1406        if (id < 0) {
1407
1408                /*
1409                 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1410                 * before that, leaving at a higher verbosity level till that is
1411                 * explained. Reproduced with plain ftrace with:
1412                 *
1413                 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1414                 * grep "NR -1 " /t/trace_pipe
1415                 *
1416                 * After generating some load on the machine.
1417                 */
1418                if (verbose > 1) {
1419                        static u64 n;
1420                        fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1421                                id, perf_evsel__name(evsel), ++n);
1422                }
1423                return NULL;
1424        }
1425
1426        if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1427            trace__read_syscall_info(trace, id))
1428                goto out_cant_read;
1429
1430        if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1431                goto out_cant_read;
1432
1433        return &trace->syscalls.table[id];
1434
1435out_cant_read:
1436        if (verbose > 0) {
1437                fprintf(trace->output, "Problems reading syscall %d", id);
1438                if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1439                        fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1440                fputs(" information\n", trace->output);
1441        }
1442        return NULL;
1443}
1444
1445static void thread__update_stats(struct thread_trace *ttrace,
1446                                 int id, struct perf_sample *sample)
1447{
1448        struct int_node *inode;
1449        struct stats *stats;
1450        u64 duration = 0;
1451
1452        inode = intlist__findnew(ttrace->syscall_stats, id);
1453        if (inode == NULL)
1454                return;
1455
1456        stats = inode->priv;
1457        if (stats == NULL) {
1458                stats = malloc(sizeof(struct stats));
1459                if (stats == NULL)
1460                        return;
1461                init_stats(stats);
1462                inode->priv = stats;
1463        }
1464
1465        if (ttrace->entry_time && sample->time > ttrace->entry_time)
1466                duration = sample->time - ttrace->entry_time;
1467
1468        update_stats(stats, duration);
1469}
1470
1471static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1472{
1473        struct thread_trace *ttrace;
1474        u64 duration;
1475        size_t printed;
1476
1477        if (trace->current == NULL)
1478                return 0;
1479
1480        ttrace = thread__priv(trace->current);
1481
1482        if (!ttrace->entry_pending)
1483                return 0;
1484
1485        duration = sample->time - ttrace->entry_time;
1486
1487        printed  = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
1488        printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1489        ttrace->entry_pending = false;
1490
1491        return printed;
1492}
1493
1494static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1495                            union perf_event *event __maybe_unused,
1496                            struct perf_sample *sample)
1497{
1498        char *msg;
1499        void *args;
1500        size_t printed = 0;
1501        struct thread *thread;
1502        int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1503        struct syscall *sc = trace__syscall_info(trace, evsel, id);
1504        struct thread_trace *ttrace;
1505
1506        if (sc == NULL)
1507                return -1;
1508
1509        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1510        ttrace = thread__trace(thread, trace->output);
1511        if (ttrace == NULL)
1512                goto out_put;
1513
1514        args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1515
1516        if (ttrace->entry_str == NULL) {
1517                ttrace->entry_str = malloc(trace__entry_str_size);
1518                if (!ttrace->entry_str)
1519                        goto out_put;
1520        }
1521
1522        if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1523                trace__printf_interrupted_entry(trace, sample);
1524
1525        ttrace->entry_time = sample->time;
1526        msg = ttrace->entry_str;
1527        printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1528
1529        printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1530                                           args, trace, thread);
1531
1532        if (sc->is_exit) {
1533                if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1534                        trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
1535                        fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1536                }
1537        } else {
1538                ttrace->entry_pending = true;
1539                /* See trace__vfs_getname & trace__sys_exit */
1540                ttrace->filename.pending_open = false;
1541        }
1542
1543        if (trace->current != thread) {
1544                thread__put(trace->current);
1545                trace->current = thread__get(thread);
1546        }
1547        err = 0;
1548out_put:
1549        thread__put(thread);
1550        return err;
1551}
1552
1553static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1554                                    struct perf_sample *sample,
1555                                    struct callchain_cursor *cursor)
1556{
1557        struct addr_location al;
1558
1559        if (machine__resolve(trace->host, &al, sample) < 0 ||
1560            thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1561                return -1;
1562
1563        return 0;
1564}
1565
1566static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1567{
1568        /* TODO: user-configurable print_opts */
1569        const unsigned int print_opts = EVSEL__PRINT_SYM |
1570                                        EVSEL__PRINT_DSO |
1571                                        EVSEL__PRINT_UNKNOWN_AS_ADDR;
1572
1573        return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1574}
1575
1576static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1577                           union perf_event *event __maybe_unused,
1578                           struct perf_sample *sample)
1579{
1580        long ret;
1581        u64 duration = 0;
1582        bool duration_calculated = false;
1583        struct thread *thread;
1584        int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1585        struct syscall *sc = trace__syscall_info(trace, evsel, id);
1586        struct thread_trace *ttrace;
1587
1588        if (sc == NULL)
1589                return -1;
1590
1591        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1592        ttrace = thread__trace(thread, trace->output);
1593        if (ttrace == NULL)
1594                goto out_put;
1595
1596        if (trace->summary)
1597                thread__update_stats(ttrace, id, sample);
1598
1599        ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1600
1601        if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1602                trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1603                ttrace->filename.pending_open = false;
1604                ++trace->stats.vfs_getname;
1605        }
1606
1607        if (ttrace->entry_time) {
1608                duration = sample->time - ttrace->entry_time;
1609                if (trace__filter_duration(trace, duration))
1610                        goto out;
1611                duration_calculated = true;
1612        } else if (trace->duration_filter)
1613                goto out;
1614
1615        if (sample->callchain) {
1616                callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1617                if (callchain_ret == 0) {
1618                        if (callchain_cursor.nr < trace->min_stack)
1619                                goto out;
1620                        callchain_ret = 1;
1621                }
1622        }
1623
1624        if (trace->summary_only)
1625                goto out;
1626
1627        trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
1628
1629        if (ttrace->entry_pending) {
1630                fprintf(trace->output, "%-70s", ttrace->entry_str);
1631        } else {
1632                fprintf(trace->output, " ... [");
1633                color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1634                fprintf(trace->output, "]: %s()", sc->name);
1635        }
1636
1637        if (sc->fmt == NULL) {
1638signed_print:
1639                fprintf(trace->output, ") = %ld", ret);
1640        } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1641                char bf[STRERR_BUFSIZE];
1642                const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1643                           *e = audit_errno_to_name(-ret);
1644
1645                fprintf(trace->output, ") = -1 %s %s", e, emsg);
1646        } else if (ret == 0 && sc->fmt->timeout)
1647                fprintf(trace->output, ") = 0 Timeout");
1648        else if (sc->fmt->hexret)
1649                fprintf(trace->output, ") = %#lx", ret);
1650        else if (sc->fmt->errpid) {
1651                struct thread *child = machine__find_thread(trace->host, ret, ret);
1652
1653                if (child != NULL) {
1654                        fprintf(trace->output, ") = %ld", ret);
1655                        if (child->comm_set)
1656                                fprintf(trace->output, " (%s)", thread__comm_str(child));
1657                        thread__put(child);
1658                }
1659        } else
1660                goto signed_print;
1661
1662        fputc('\n', trace->output);
1663
1664        if (callchain_ret > 0)
1665                trace__fprintf_callchain(trace, sample);
1666        else if (callchain_ret < 0)
1667                pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1668out:
1669        ttrace->entry_pending = false;
1670        err = 0;
1671out_put:
1672        thread__put(thread);
1673        return err;
1674}
1675
1676static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1677                              union perf_event *event __maybe_unused,
1678                              struct perf_sample *sample)
1679{
1680        struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1681        struct thread_trace *ttrace;
1682        size_t filename_len, entry_str_len, to_move;
1683        ssize_t remaining_space;
1684        char *pos;
1685        const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1686
1687        if (!thread)
1688                goto out;
1689
1690        ttrace = thread__priv(thread);
1691        if (!ttrace)
1692                goto out_put;
1693
1694        filename_len = strlen(filename);
1695        if (filename_len == 0)
1696                goto out_put;
1697
1698        if (ttrace->filename.namelen < filename_len) {
1699                char *f = realloc(ttrace->filename.name, filename_len + 1);
1700
1701                if (f == NULL)
1702                        goto out_put;
1703
1704                ttrace->filename.namelen = filename_len;
1705                ttrace->filename.name = f;
1706        }
1707
1708        strcpy(ttrace->filename.name, filename);
1709        ttrace->filename.pending_open = true;
1710
1711        if (!ttrace->filename.ptr)
1712                goto out_put;
1713
1714        entry_str_len = strlen(ttrace->entry_str);
1715        remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1716        if (remaining_space <= 0)
1717                goto out_put;
1718
1719        if (filename_len > (size_t)remaining_space) {
1720                filename += filename_len - remaining_space;
1721                filename_len = remaining_space;
1722        }
1723
1724        to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1725        pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1726        memmove(pos + filename_len, pos, to_move);
1727        memcpy(pos, filename, filename_len);
1728
1729        ttrace->filename.ptr = 0;
1730        ttrace->filename.entry_str_pos = 0;
1731out_put:
1732        thread__put(thread);
1733out:
1734        return 0;
1735}
1736
1737static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1738                                     union perf_event *event __maybe_unused,
1739                                     struct perf_sample *sample)
1740{
1741        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1742        double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1743        struct thread *thread = machine__findnew_thread(trace->host,
1744                                                        sample->pid,
1745                                                        sample->tid);
1746        struct thread_trace *ttrace = thread__trace(thread, trace->output);
1747
1748        if (ttrace == NULL)
1749                goto out_dump;
1750
1751        ttrace->runtime_ms += runtime_ms;
1752        trace->runtime_ms += runtime_ms;
1753out_put:
1754        thread__put(thread);
1755        return 0;
1756
1757out_dump:
1758        fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1759               evsel->name,
1760               perf_evsel__strval(evsel, sample, "comm"),
1761               (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1762               runtime,
1763               perf_evsel__intval(evsel, sample, "vruntime"));
1764        goto out_put;
1765}
1766
1767static void bpf_output__printer(enum binary_printer_ops op,
1768                                unsigned int val, void *extra)
1769{
1770        FILE *output = extra;
1771        unsigned char ch = (unsigned char)val;
1772
1773        switch (op) {
1774        case BINARY_PRINT_CHAR_DATA:
1775                fprintf(output, "%c", isprint(ch) ? ch : '.');
1776                break;
1777        case BINARY_PRINT_DATA_BEGIN:
1778        case BINARY_PRINT_LINE_BEGIN:
1779        case BINARY_PRINT_ADDR:
1780        case BINARY_PRINT_NUM_DATA:
1781        case BINARY_PRINT_NUM_PAD:
1782        case BINARY_PRINT_SEP:
1783        case BINARY_PRINT_CHAR_PAD:
1784        case BINARY_PRINT_LINE_END:
1785        case BINARY_PRINT_DATA_END:
1786        default:
1787                break;
1788        }
1789}
1790
1791static void bpf_output__fprintf(struct trace *trace,
1792                                struct perf_sample *sample)
1793{
1794        print_binary(sample->raw_data, sample->raw_size, 8,
1795                     bpf_output__printer, trace->output);
1796}
1797
1798static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1799                                union perf_event *event __maybe_unused,
1800                                struct perf_sample *sample)
1801{
1802        int callchain_ret = 0;
1803
1804        if (sample->callchain) {
1805                callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1806                if (callchain_ret == 0) {
1807                        if (callchain_cursor.nr < trace->min_stack)
1808                                goto out;
1809                        callchain_ret = 1;
1810                }
1811        }
1812
1813        trace__printf_interrupted_entry(trace, sample);
1814        trace__fprintf_tstamp(trace, sample->time, trace->output);
1815
1816        if (trace->trace_syscalls)
1817                fprintf(trace->output, "(         ): ");
1818
1819        fprintf(trace->output, "%s:", evsel->name);
1820
1821        if (perf_evsel__is_bpf_output(evsel)) {
1822                bpf_output__fprintf(trace, sample);
1823        } else if (evsel->tp_format) {
1824                event_format__fprintf(evsel->tp_format, sample->cpu,
1825                                      sample->raw_data, sample->raw_size,
1826                                      trace->output);
1827        }
1828
1829        fprintf(trace->output, ")\n");
1830
1831        if (callchain_ret > 0)
1832                trace__fprintf_callchain(trace, sample);
1833        else if (callchain_ret < 0)
1834                pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1835out:
1836        return 0;
1837}
1838
1839static void print_location(FILE *f, struct perf_sample *sample,
1840                           struct addr_location *al,
1841                           bool print_dso, bool print_sym)
1842{
1843
1844        if ((verbose > 0 || print_dso) && al->map)
1845                fprintf(f, "%s@", al->map->dso->long_name);
1846
1847        if ((verbose > 0 || print_sym) && al->sym)
1848                fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1849                        al->addr - al->sym->start);
1850        else if (al->map)
1851                fprintf(f, "0x%" PRIx64, al->addr);
1852        else
1853                fprintf(f, "0x%" PRIx64, sample->addr);
1854}
1855
1856static int trace__pgfault(struct trace *trace,
1857                          struct perf_evsel *evsel,
1858                          union perf_event *event __maybe_unused,
1859                          struct perf_sample *sample)
1860{
1861        struct thread *thread;
1862        struct addr_location al;
1863        char map_type = 'd';
1864        struct thread_trace *ttrace;
1865        int err = -1;
1866        int callchain_ret = 0;
1867
1868        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1869
1870        if (sample->callchain) {
1871                callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1872                if (callchain_ret == 0) {
1873                        if (callchain_cursor.nr < trace->min_stack)
1874                                goto out_put;
1875                        callchain_ret = 1;
1876                }
1877        }
1878
1879        ttrace = thread__trace(thread, trace->output);
1880        if (ttrace == NULL)
1881                goto out_put;
1882
1883        if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1884                ttrace->pfmaj++;
1885        else
1886                ttrace->pfmin++;
1887
1888        if (trace->summary_only)
1889                goto out;
1890
1891        thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1892                              sample->ip, &al);
1893
1894        trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
1895
1896        fprintf(trace->output, "%sfault [",
1897                evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1898                "maj" : "min");
1899
1900        print_location(trace->output, sample, &al, false, true);
1901
1902        fprintf(trace->output, "] => ");
1903
1904        thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1905                                   sample->addr, &al);
1906
1907        if (!al.map) {
1908                thread__find_addr_location(thread, sample->cpumode,
1909                                           MAP__FUNCTION, sample->addr, &al);
1910
1911                if (al.map)
1912                        map_type = 'x';
1913                else
1914                        map_type = '?';
1915        }
1916
1917        print_location(trace->output, sample, &al, true, false);
1918
1919        fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1920
1921        if (callchain_ret > 0)
1922                trace__fprintf_callchain(trace, sample);
1923        else if (callchain_ret < 0)
1924                pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1925out:
1926        err = 0;
1927out_put:
1928        thread__put(thread);
1929        return err;
1930}
1931
1932static void trace__set_base_time(struct trace *trace,
1933                                 struct perf_evsel *evsel,
1934                                 struct perf_sample *sample)
1935{
1936        /*
1937         * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1938         * and don't use sample->time unconditionally, we may end up having
1939         * some other event in the future without PERF_SAMPLE_TIME for good
1940         * reason, i.e. we may not be interested in its timestamps, just in
1941         * it taking place, picking some piece of information when it
1942         * appears in our event stream (vfs_getname comes to mind).
1943         */
1944        if (trace->base_time == 0 && !trace->full_time &&
1945            (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1946                trace->base_time = sample->time;
1947}
1948
1949static int trace__process_sample(struct perf_tool *tool,
1950                                 union perf_event *event,
1951                                 struct perf_sample *sample,
1952                                 struct perf_evsel *evsel,
1953                                 struct machine *machine __maybe_unused)
1954{
1955        struct trace *trace = container_of(tool, struct trace, tool);
1956        struct thread *thread;
1957        int err = 0;
1958
1959        tracepoint_handler handler = evsel->handler;
1960
1961        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1962        if (thread && thread__is_filtered(thread))
1963                goto out;
1964
1965        trace__set_base_time(trace, evsel, sample);
1966
1967        if (handler) {
1968                ++trace->nr_events;
1969                handler(trace, evsel, event, sample);
1970        }
1971out:
1972        thread__put(thread);
1973        return err;
1974}
1975
1976static int trace__record(struct trace *trace, int argc, const char **argv)
1977{
1978        unsigned int rec_argc, i, j;
1979        const char **rec_argv;
1980        const char * const record_args[] = {
1981                "record",
1982                "-R",
1983                "-m", "1024",
1984                "-c", "1",
1985        };
1986
1987        const char * const sc_args[] = { "-e", };
1988        unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1989        const char * const majpf_args[] = { "-e", "major-faults" };
1990        unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1991        const char * const minpf_args[] = { "-e", "minor-faults" };
1992        unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1993
1994        /* +1 is for the event string below */
1995        rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1996                majpf_args_nr + minpf_args_nr + argc;
1997        rec_argv = calloc(rec_argc + 1, sizeof(char *));
1998
1999        if (rec_argv == NULL)
2000                return -ENOMEM;
2001
2002        j = 0;
2003        for (i = 0; i < ARRAY_SIZE(record_args); i++)
2004                rec_argv[j++] = record_args[i];
2005
2006        if (trace->trace_syscalls) {
2007                for (i = 0; i < sc_args_nr; i++)
2008                        rec_argv[j++] = sc_args[i];
2009
2010                /* event string may be different for older kernels - e.g., RHEL6 */
2011                if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2012                        rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2013                else if (is_valid_tracepoint("syscalls:sys_enter"))
2014                        rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2015                else {
2016                        pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2017                        return -1;
2018                }
2019        }
2020
2021        if (trace->trace_pgfaults & TRACE_PFMAJ)
2022                for (i = 0; i < majpf_args_nr; i++)
2023                        rec_argv[j++] = majpf_args[i];
2024
2025        if (trace->trace_pgfaults & TRACE_PFMIN)
2026                for (i = 0; i < minpf_args_nr; i++)
2027                        rec_argv[j++] = minpf_args[i];
2028
2029        for (i = 0; i < (unsigned int)argc; i++)
2030                rec_argv[j++] = argv[i];
2031
2032        return cmd_record(j, rec_argv);
2033}
2034
2035static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2036
2037static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2038{
2039        struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2040
2041        if (IS_ERR(evsel))
2042                return false;
2043
2044        if (perf_evsel__field(evsel, "pathname") == NULL) {
2045                perf_evsel__delete(evsel);
2046                return false;
2047        }
2048
2049        evsel->handler = trace__vfs_getname;
2050        perf_evlist__add(evlist, evsel);
2051        return true;
2052}
2053
2054static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2055{
2056        struct perf_evsel *evsel;
2057        struct perf_event_attr attr = {
2058                .type = PERF_TYPE_SOFTWARE,
2059                .mmap_data = 1,
2060        };
2061
2062        attr.config = config;
2063        attr.sample_period = 1;
2064
2065        event_attr_init(&attr);
2066
2067        evsel = perf_evsel__new(&attr);
2068        if (evsel)
2069                evsel->handler = trace__pgfault;
2070
2071        return evsel;
2072}
2073
2074static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2075{
2076        const u32 type = event->header.type;
2077        struct perf_evsel *evsel;
2078
2079        if (type != PERF_RECORD_SAMPLE) {
2080                trace__process_event(trace, trace->host, event, sample);
2081                return;
2082        }
2083
2084        evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2085        if (evsel == NULL) {
2086                fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2087                return;
2088        }
2089
2090        trace__set_base_time(trace, evsel, sample);
2091
2092        if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2093            sample->raw_data == NULL) {
2094                fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2095                       perf_evsel__name(evsel), sample->tid,
2096                       sample->cpu, sample->raw_size);
2097        } else {
2098                tracepoint_handler handler = evsel->handler;
2099                handler(trace, evsel, event, sample);
2100        }
2101}
2102
2103static int trace__add_syscall_newtp(struct trace *trace)
2104{
2105        int ret = -1;
2106        struct perf_evlist *evlist = trace->evlist;
2107        struct perf_evsel *sys_enter, *sys_exit;
2108
2109        sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2110        if (sys_enter == NULL)
2111                goto out;
2112
2113        if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2114                goto out_delete_sys_enter;
2115
2116        sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2117        if (sys_exit == NULL)
2118                goto out_delete_sys_enter;
2119
2120        if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2121                goto out_delete_sys_exit;
2122
2123        perf_evlist__add(evlist, sys_enter);
2124        perf_evlist__add(evlist, sys_exit);
2125
2126        if (callchain_param.enabled && !trace->kernel_syscallchains) {
2127                /*
2128                 * We're interested only in the user space callchain
2129                 * leading to the syscall, allow overriding that for
2130                 * debugging reasons using --kernel_syscall_callchains
2131                 */
2132                sys_exit->attr.exclude_callchain_kernel = 1;
2133        }
2134
2135        trace->syscalls.events.sys_enter = sys_enter;
2136        trace->syscalls.events.sys_exit  = sys_exit;
2137
2138        ret = 0;
2139out:
2140        return ret;
2141
2142out_delete_sys_exit:
2143        perf_evsel__delete_priv(sys_exit);
2144out_delete_sys_enter:
2145        perf_evsel__delete_priv(sys_enter);
2146        goto out;
2147}
2148
2149static int trace__set_ev_qualifier_filter(struct trace *trace)
2150{
2151        int err = -1;
2152        struct perf_evsel *sys_exit;
2153        char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2154                                                trace->ev_qualifier_ids.nr,
2155                                                trace->ev_qualifier_ids.entries);
2156
2157        if (filter == NULL)
2158                goto out_enomem;
2159
2160        if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2161                                          filter)) {
2162                sys_exit = trace->syscalls.events.sys_exit;
2163                err = perf_evsel__append_tp_filter(sys_exit, filter);
2164        }
2165
2166        free(filter);
2167out:
2168        return err;
2169out_enomem:
2170        errno = ENOMEM;
2171        goto out;
2172}
2173
2174static int trace__run(struct trace *trace, int argc, const char **argv)
2175{
2176        struct perf_evlist *evlist = trace->evlist;
2177        struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2178        int err = -1, i;
2179        unsigned long before;
2180        const bool forks = argc > 0;
2181        bool draining = false;
2182
2183        trace->live = true;
2184
2185        if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2186                goto out_error_raw_syscalls;
2187
2188        if (trace->trace_syscalls)
2189                trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2190
2191        if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2192                pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2193                if (pgfault_maj == NULL)
2194                        goto out_error_mem;
2195                perf_evlist__add(evlist, pgfault_maj);
2196        }
2197
2198        if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2199                pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2200                if (pgfault_min == NULL)
2201                        goto out_error_mem;
2202                perf_evlist__add(evlist, pgfault_min);
2203        }
2204
2205        if (trace->sched &&
2206            perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2207                                   trace__sched_stat_runtime))
2208                goto out_error_sched_stat_runtime;
2209
2210        err = perf_evlist__create_maps(evlist, &trace->opts.target);
2211        if (err < 0) {
2212                fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2213                goto out_delete_evlist;
2214        }
2215
2216        err = trace__symbols_init(trace, evlist);
2217        if (err < 0) {
2218                fprintf(trace->output, "Problems initializing symbol libraries!\n");
2219                goto out_delete_evlist;
2220        }
2221
2222        perf_evlist__config(evlist, &trace->opts, NULL);
2223
2224        if (callchain_param.enabled) {
2225                bool use_identifier = false;
2226
2227                if (trace->syscalls.events.sys_exit) {
2228                        perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2229                                                     &trace->opts, &callchain_param);
2230                        use_identifier = true;
2231                }
2232
2233                if (pgfault_maj) {
2234                        perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2235                        use_identifier = true;
2236                }
2237
2238                if (pgfault_min) {
2239                        perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2240                        use_identifier = true;
2241                }
2242
2243                if (use_identifier) {
2244                       /*
2245                        * Now we have evsels with different sample_ids, use
2246                        * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2247                        * from a fixed position in each ring buffer record.
2248                        *
2249                        * As of this the changeset introducing this comment, this
2250                        * isn't strictly needed, as the fields that can come before
2251                        * PERF_SAMPLE_ID are all used, but we'll probably disable
2252                        * some of those for things like copying the payload of
2253                        * pointer syscall arguments, and for vfs_getname we don't
2254                        * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2255                        * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2256                        */
2257                        perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2258                        perf_evlist__reset_sample_bit(evlist, ID);
2259                }
2260        }
2261
2262        signal(SIGCHLD, sig_handler);
2263        signal(SIGINT, sig_handler);
2264
2265        if (forks) {
2266                err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2267                                                    argv, false, NULL);
2268                if (err < 0) {
2269                        fprintf(trace->output, "Couldn't run the workload!\n");
2270                        goto out_delete_evlist;
2271                }
2272        }
2273
2274        err = perf_evlist__open(evlist);
2275        if (err < 0)
2276                goto out_error_open;
2277
2278        err = bpf__apply_obj_config();
2279        if (err) {
2280                char errbuf[BUFSIZ];
2281
2282                bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2283                pr_err("ERROR: Apply config to BPF failed: %s\n",
2284                         errbuf);
2285                goto out_error_open;
2286        }
2287
2288        /*
2289         * Better not use !target__has_task() here because we need to cover the
2290         * case where no threads were specified in the command line, but a
2291         * workload was, and in that case we will fill in the thread_map when
2292         * we fork the workload in perf_evlist__prepare_workload.
2293         */
2294        if (trace->filter_pids.nr > 0)
2295                err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2296        else if (thread_map__pid(evlist->threads, 0) == -1)
2297                err = perf_evlist__set_filter_pid(evlist, getpid());
2298
2299        if (err < 0)
2300                goto out_error_mem;
2301
2302        if (trace->ev_qualifier_ids.nr > 0) {
2303                err = trace__set_ev_qualifier_filter(trace);
2304                if (err < 0)
2305                        goto out_errno;
2306
2307                pr_debug("event qualifier tracepoint filter: %s\n",
2308                         trace->syscalls.events.sys_exit->filter);
2309        }
2310
2311        err = perf_evlist__apply_filters(evlist, &evsel);
2312        if (err < 0)
2313                goto out_error_apply_filters;
2314
2315        err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2316        if (err < 0)
2317                goto out_error_mmap;
2318
2319        if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
2320                perf_evlist__enable(evlist);
2321
2322        if (forks)
2323                perf_evlist__start_workload(evlist);
2324
2325        if (trace->opts.initial_delay) {
2326                usleep(trace->opts.initial_delay * 1000);
2327                perf_evlist__enable(evlist);
2328        }
2329
2330        trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2331                                  evlist->threads->nr > 1 ||
2332                                  perf_evlist__first(evlist)->attr.inherit;
2333again:
2334        before = trace->nr_events;
2335
2336        for (i = 0; i < evlist->nr_mmaps; i++) {
2337                union perf_event *event;
2338
2339                while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2340                        struct perf_sample sample;
2341
2342                        ++trace->nr_events;
2343
2344                        err = perf_evlist__parse_sample(evlist, event, &sample);
2345                        if (err) {
2346                                fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2347                                goto next_event;
2348                        }
2349
2350                        trace__handle_event(trace, event, &sample);
2351next_event:
2352                        perf_evlist__mmap_consume(evlist, i);
2353
2354                        if (interrupted)
2355                                goto out_disable;
2356
2357                        if (done && !draining) {
2358                                perf_evlist__disable(evlist);
2359                                draining = true;
2360                        }
2361                }
2362        }
2363
2364        if (trace->nr_events == before) {
2365                int timeout = done ? 100 : -1;
2366
2367                if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2368                        if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2369                                draining = true;
2370
2371                        goto again;
2372                }
2373        } else {
2374                goto again;
2375        }
2376
2377out_disable:
2378        thread__zput(trace->current);
2379
2380        perf_evlist__disable(evlist);
2381
2382        if (!err) {
2383                if (trace->summary)
2384                        trace__fprintf_thread_summary(trace, trace->output);
2385
2386                if (trace->show_tool_stats) {
2387                        fprintf(trace->output, "Stats:\n "
2388                                               " vfs_getname : %" PRIu64 "\n"
2389                                               " proc_getname: %" PRIu64 "\n",
2390                                trace->stats.vfs_getname,
2391                                trace->stats.proc_getname);
2392                }
2393        }
2394
2395out_delete_evlist:
2396        perf_evlist__delete(evlist);
2397        trace->evlist = NULL;
2398        trace->live = false;
2399        return err;
2400{
2401        char errbuf[BUFSIZ];
2402
2403out_error_sched_stat_runtime:
2404        tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2405        goto out_error;
2406
2407out_error_raw_syscalls:
2408        tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2409        goto out_error;
2410
2411out_error_mmap:
2412        perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2413        goto out_error;
2414
2415out_error_open:
2416        perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2417
2418out_error:
2419        fprintf(trace->output, "%s\n", errbuf);
2420        goto out_delete_evlist;
2421
2422out_error_apply_filters:
2423        fprintf(trace->output,
2424                "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2425                evsel->filter, perf_evsel__name(evsel), errno,
2426                str_error_r(errno, errbuf, sizeof(errbuf)));
2427        goto out_delete_evlist;
2428}
2429out_error_mem:
2430        fprintf(trace->output, "Not enough memory to run!\n");
2431        goto out_delete_evlist;
2432
2433out_errno:
2434        fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2435        goto out_delete_evlist;
2436}
2437
2438static int trace__replay(struct trace *trace)
2439{
2440        const struct perf_evsel_str_handler handlers[] = {
2441                { "probe:vfs_getname",       trace__vfs_getname, },
2442        };
2443        struct perf_data_file file = {
2444                .path  = input_name,
2445                .mode  = PERF_DATA_MODE_READ,
2446                .force = trace->force,
2447        };
2448        struct perf_session *session;
2449        struct perf_evsel *evsel;
2450        int err = -1;
2451
2452        trace->tool.sample        = trace__process_sample;
2453        trace->tool.mmap          = perf_event__process_mmap;
2454        trace->tool.mmap2         = perf_event__process_mmap2;
2455        trace->tool.comm          = perf_event__process_comm;
2456        trace->tool.exit          = perf_event__process_exit;
2457        trace->tool.fork          = perf_event__process_fork;
2458        trace->tool.attr          = perf_event__process_attr;
2459        trace->tool.tracing_data  = perf_event__process_tracing_data;
2460        trace->tool.build_id      = perf_event__process_build_id;
2461        trace->tool.namespaces    = perf_event__process_namespaces;
2462
2463        trace->tool.ordered_events = true;
2464        trace->tool.ordering_requires_timestamps = true;
2465
2466        /* add tid to output */
2467        trace->multiple_threads = true;
2468
2469        session = perf_session__new(&file, false, &trace->tool);
2470        if (session == NULL)
2471                return -1;
2472
2473        if (trace->opts.target.pid)
2474                symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2475
2476        if (trace->opts.target.tid)
2477                symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2478
2479        if (symbol__init(&session->header.env) < 0)
2480                goto out;
2481
2482        trace->host = &session->machines.host;
2483
2484        err = perf_session__set_tracepoints_handlers(session, handlers);
2485        if (err)
2486                goto out;
2487
2488        evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2489                                                     "raw_syscalls:sys_enter");
2490        /* older kernels have syscalls tp versus raw_syscalls */
2491        if (evsel == NULL)
2492                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2493                                                             "syscalls:sys_enter");
2494
2495        if (evsel &&
2496            (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2497            perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2498                pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2499                goto out;
2500        }
2501
2502        evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2503                                                     "raw_syscalls:sys_exit");
2504        if (evsel == NULL)
2505                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2506                                                             "syscalls:sys_exit");
2507        if (evsel &&
2508            (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2509            perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2510                pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2511                goto out;
2512        }
2513
2514        evlist__for_each_entry(session->evlist, evsel) {
2515                if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2516                    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2517                     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2518                     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2519                        evsel->handler = trace__pgfault;
2520        }
2521
2522        setup_pager();
2523
2524        err = perf_session__process_events(session);
2525        if (err)
2526                pr_err("Failed to process events, error %d", err);
2527
2528        else if (trace->summary)
2529                trace__fprintf_thread_summary(trace, trace->output);
2530
2531out:
2532        perf_session__delete(session);
2533
2534        return err;
2535}
2536
2537static size_t trace__fprintf_threads_header(FILE *fp)
2538{
2539        size_t printed;
2540
2541        printed  = fprintf(fp, "\n Summary of events:\n\n");
2542
2543        return printed;
2544}
2545
2546DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2547        struct stats    *stats;
2548        double          msecs;
2549        int             syscall;
2550)
2551{
2552        struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2553        struct stats *stats = source->priv;
2554
2555        entry->syscall = source->i;
2556        entry->stats   = stats;
2557        entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2558}
2559
2560static size_t thread__dump_stats(struct thread_trace *ttrace,
2561                                 struct trace *trace, FILE *fp)
2562{
2563        size_t printed = 0;
2564        struct syscall *sc;
2565        struct rb_node *nd;
2566        DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2567
2568        if (syscall_stats == NULL)
2569                return 0;
2570
2571        printed += fprintf(fp, "\n");
2572
2573        printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2574        printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2575        printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2576
2577        resort_rb__for_each_entry(nd, syscall_stats) {
2578                struct stats *stats = syscall_stats_entry->stats;
2579                if (stats) {
2580                        double min = (double)(stats->min) / NSEC_PER_MSEC;
2581                        double max = (double)(stats->max) / NSEC_PER_MSEC;
2582                        double avg = avg_stats(stats);
2583                        double pct;
2584                        u64 n = (u64) stats->n;
2585
2586                        pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2587                        avg /= NSEC_PER_MSEC;
2588
2589                        sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2590                        printed += fprintf(fp, "   %-15s", sc->name);
2591                        printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2592                                           n, syscall_stats_entry->msecs, min, avg);
2593                        printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2594                }
2595        }
2596
2597        resort_rb__delete(syscall_stats);
2598        printed += fprintf(fp, "\n\n");
2599
2600        return printed;
2601}
2602
2603static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2604{
2605        size_t printed = 0;
2606        struct thread_trace *ttrace = thread__priv(thread);
2607        double ratio;
2608
2609        if (ttrace == NULL)
2610                return 0;
2611
2612        ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2613
2614        printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2615        printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2616        printed += fprintf(fp, "%.1f%%", ratio);
2617        if (ttrace->pfmaj)
2618                printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2619        if (ttrace->pfmin)
2620                printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2621        if (trace->sched)
2622                printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2623        else if (fputc('\n', fp) != EOF)
2624                ++printed;
2625
2626        printed += thread__dump_stats(ttrace, trace, fp);
2627
2628        return printed;
2629}
2630
2631static unsigned long thread__nr_events(struct thread_trace *ttrace)
2632{
2633        return ttrace ? ttrace->nr_events : 0;
2634}
2635
2636DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2637        struct thread *thread;
2638)
2639{
2640        entry->thread = rb_entry(nd, struct thread, rb_node);
2641}
2642
2643static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2644{
2645        DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2646        size_t printed = trace__fprintf_threads_header(fp);
2647        struct rb_node *nd;
2648
2649        if (threads == NULL) {
2650                fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2651                return 0;
2652        }
2653
2654        resort_rb__for_each_entry(nd, threads)
2655                printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2656
2657        resort_rb__delete(threads);
2658
2659        return printed;
2660}
2661
2662static int trace__set_duration(const struct option *opt, const char *str,
2663                               int unset __maybe_unused)
2664{
2665        struct trace *trace = opt->value;
2666
2667        trace->duration_filter = atof(str);
2668        return 0;
2669}
2670
2671static int trace__set_filter_pids(const struct option *opt, const char *str,
2672                                  int unset __maybe_unused)
2673{
2674        int ret = -1;
2675        size_t i;
2676        struct trace *trace = opt->value;
2677        /*
2678         * FIXME: introduce a intarray class, plain parse csv and create a
2679         * { int nr, int entries[] } struct...
2680         */
2681        struct intlist *list = intlist__new(str);
2682
2683        if (list == NULL)
2684                return -1;
2685
2686        i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2687        trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2688
2689        if (trace->filter_pids.entries == NULL)
2690                goto out;
2691
2692        trace->filter_pids.entries[0] = getpid();
2693
2694        for (i = 1; i < trace->filter_pids.nr; ++i)
2695                trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2696
2697        intlist__delete(list);
2698        ret = 0;
2699out:
2700        return ret;
2701}
2702
2703static int trace__open_output(struct trace *trace, const char *filename)
2704{
2705        struct stat st;
2706
2707        if (!stat(filename, &st) && st.st_size) {
2708                char oldname[PATH_MAX];
2709
2710                scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2711                unlink(oldname);
2712                rename(filename, oldname);
2713        }
2714
2715        trace->output = fopen(filename, "w");
2716
2717        return trace->output == NULL ? -errno : 0;
2718}
2719
2720static int parse_pagefaults(const struct option *opt, const char *str,
2721                            int unset __maybe_unused)
2722{
2723        int *trace_pgfaults = opt->value;
2724
2725        if (strcmp(str, "all") == 0)
2726                *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2727        else if (strcmp(str, "maj") == 0)
2728                *trace_pgfaults |= TRACE_PFMAJ;
2729        else if (strcmp(str, "min") == 0)
2730                *trace_pgfaults |= TRACE_PFMIN;
2731        else
2732                return -1;
2733
2734        return 0;
2735}
2736
2737static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2738{
2739        struct perf_evsel *evsel;
2740
2741        evlist__for_each_entry(evlist, evsel)
2742                evsel->handler = handler;
2743}
2744
2745/*
2746 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2747 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2748 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2749 *
2750 * It'd be better to introduce a parse_options() variant that would return a
2751 * list with the terms it didn't match to an event...
2752 */
2753static int trace__parse_events_option(const struct option *opt, const char *str,
2754                                      int unset __maybe_unused)
2755{
2756        struct trace *trace = (struct trace *)opt->value;
2757        const char *s = str;
2758        char *sep = NULL, *lists[2] = { NULL, NULL, };
2759        int len = strlen(str), err = -1, list;
2760        char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2761        char group_name[PATH_MAX];
2762
2763        if (strace_groups_dir == NULL)
2764                return -1;
2765
2766        if (*s == '!') {
2767                ++s;
2768                trace->not_ev_qualifier = true;
2769        }
2770
2771        while (1) {
2772                if ((sep = strchr(s, ',')) != NULL)
2773                        *sep = '\0';
2774
2775                list = 0;
2776                if (syscalltbl__id(trace->sctbl, s) >= 0) {
2777                        list = 1;
2778                } else {
2779                        path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2780                        if (access(group_name, R_OK) == 0)
2781                                list = 1;
2782                }
2783
2784                if (lists[list]) {
2785                        sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2786                } else {
2787                        lists[list] = malloc(len);
2788                        if (lists[list] == NULL)
2789                                goto out;
2790                        strcpy(lists[list], s);
2791                }
2792
2793                if (!sep)
2794                        break;
2795
2796                *sep = ',';
2797                s = sep + 1;
2798        }
2799
2800        if (lists[1] != NULL) {
2801                struct strlist_config slist_config = {
2802                        .dirname = strace_groups_dir,
2803                };
2804
2805                trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2806                if (trace->ev_qualifier == NULL) {
2807                        fputs("Not enough memory to parse event qualifier", trace->output);
2808                        goto out;
2809                }
2810
2811                if (trace__validate_ev_qualifier(trace))
2812                        goto out;
2813        }
2814
2815        err = 0;
2816
2817        if (lists[0]) {
2818                struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2819                                               "event selector. use 'perf list' to list available events",
2820                                               parse_events_option);
2821                err = parse_events_option(&o, lists[0], 0);
2822        }
2823out:
2824        if (sep)
2825                *sep = ',';
2826
2827        return err;
2828}
2829
2830int cmd_trace(int argc, const char **argv)
2831{
2832        const char *trace_usage[] = {
2833                "perf trace [<options>] [<command>]",
2834                "perf trace [<options>] -- <command> [<options>]",
2835                "perf trace record [<options>] [<command>]",
2836                "perf trace record [<options>] -- <command> [<options>]",
2837                NULL
2838        };
2839        struct trace trace = {
2840                .syscalls = {
2841                        . max = -1,
2842                },
2843                .opts = {
2844                        .target = {
2845                                .uid       = UINT_MAX,
2846                                .uses_mmap = true,
2847                        },
2848                        .user_freq     = UINT_MAX,
2849                        .user_interval = ULLONG_MAX,
2850                        .no_buffering  = true,
2851                        .mmap_pages    = UINT_MAX,
2852                        .proc_map_timeout  = 500,
2853                },
2854                .output = stderr,
2855                .show_comm = true,
2856                .trace_syscalls = true,
2857                .kernel_syscallchains = false,
2858                .max_stack = UINT_MAX,
2859        };
2860        const char *output_name = NULL;
2861        const struct option trace_options[] = {
2862        OPT_CALLBACK('e', "event", &trace, "event",
2863                     "event/syscall selector. use 'perf list' to list available events",
2864                     trace__parse_events_option),
2865        OPT_BOOLEAN(0, "comm", &trace.show_comm,
2866                    "show the thread COMM next to its id"),
2867        OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2868        OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2869                     trace__parse_events_option),
2870        OPT_STRING('o', "output", &output_name, "file", "output file name"),
2871        OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2872        OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2873                    "trace events on existing process id"),
2874        OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2875                    "trace events on existing thread id"),
2876        OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2877                     "pids to filter (by the kernel)", trace__set_filter_pids),
2878        OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2879                    "system-wide collection from all CPUs"),
2880        OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2881                    "list of cpus to monitor"),
2882        OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2883                    "child tasks do not inherit counters"),
2884        OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2885                     "number of mmap data pages",
2886                     perf_evlist__parse_mmap_pages),
2887        OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2888                   "user to profile"),
2889        OPT_CALLBACK(0, "duration", &trace, "float",
2890                     "show only events with duration > N.M ms",
2891                     trace__set_duration),
2892        OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2893        OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2894        OPT_BOOLEAN('T', "time", &trace.full_time,
2895                    "Show full timestamp, not time relative to first start"),
2896        OPT_BOOLEAN('s', "summary", &trace.summary_only,
2897                    "Show only syscall summary with statistics"),
2898        OPT_BOOLEAN('S', "with-summary", &trace.summary,
2899                    "Show all syscalls and summary with statistics"),
2900        OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2901                     "Trace pagefaults", parse_pagefaults, "maj"),
2902        OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2903        OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2904        OPT_CALLBACK(0, "call-graph", &trace.opts,
2905                     "record_mode[,record_size]", record_callchain_help,
2906                     &record_parse_callchain_opt),
2907        OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2908                    "Show the kernel callchains on the syscall exit path"),
2909        OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2910                     "Set the minimum stack depth when parsing the callchain, "
2911                     "anything below the specified depth will be ignored."),
2912        OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2913                     "Set the maximum stack depth when parsing the callchain, "
2914                     "anything beyond the specified depth will be ignored. "
2915                     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2916        OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2917                        "per thread proc mmap processing timeout in ms"),
2918        OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2919                     "ms to wait before starting measurement after program "
2920                     "start"),
2921        OPT_END()
2922        };
2923        bool __maybe_unused max_stack_user_set = true;
2924        bool mmap_pages_user_set = true;
2925        const char * const trace_subcommands[] = { "record", NULL };
2926        int err;
2927        char bf[BUFSIZ];
2928
2929        signal(SIGSEGV, sighandler_dump_stack);
2930        signal(SIGFPE, sighandler_dump_stack);
2931
2932        trace.evlist = perf_evlist__new();
2933        trace.sctbl = syscalltbl__new();
2934
2935        if (trace.evlist == NULL || trace.sctbl == NULL) {
2936                pr_err("Not enough memory to run!\n");
2937                err = -ENOMEM;
2938                goto out;
2939        }
2940
2941        argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2942                                 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2943
2944        err = bpf__setup_stdout(trace.evlist);
2945        if (err) {
2946                bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2947                pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2948                goto out;
2949        }
2950
2951        err = -1;
2952
2953        if (trace.trace_pgfaults) {
2954                trace.opts.sample_address = true;
2955                trace.opts.sample_time = true;
2956        }
2957
2958        if (trace.opts.mmap_pages == UINT_MAX)
2959                mmap_pages_user_set = false;
2960
2961        if (trace.max_stack == UINT_MAX) {
2962                trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2963                max_stack_user_set = false;
2964        }
2965
2966#ifdef HAVE_DWARF_UNWIND_SUPPORT
2967        if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
2968                record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2969#endif
2970
2971        if (callchain_param.enabled) {
2972                if (!mmap_pages_user_set && geteuid() == 0)
2973                        trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2974
2975                symbol_conf.use_callchain = true;
2976        }
2977
2978        if (trace.evlist->nr_entries > 0)
2979                evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2980
2981        if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2982                return trace__record(&trace, argc-1, &argv[1]);
2983
2984        /* summary_only implies summary option, but don't overwrite summary if set */
2985        if (trace.summary_only)
2986                trace.summary = trace.summary_only;
2987
2988        if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2989            trace.evlist->nr_entries == 0 /* Was --events used? */) {
2990                pr_err("Please specify something to trace.\n");
2991                return -1;
2992        }
2993
2994        if (!trace.trace_syscalls && trace.ev_qualifier) {
2995                pr_err("The -e option can't be used with --no-syscalls.\n");
2996                goto out;
2997        }
2998
2999        if (output_name != NULL) {
3000                err = trace__open_output(&trace, output_name);
3001                if (err < 0) {
3002                        perror("failed to create output file");
3003                        goto out;
3004                }
3005        }
3006
3007        trace.open_id = syscalltbl__id(trace.sctbl, "open");
3008
3009        err = target__validate(&trace.opts.target);
3010        if (err) {
3011                target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3012                fprintf(trace.output, "%s", bf);
3013                goto out_close;
3014        }
3015
3016        err = target__parse_uid(&trace.opts.target);
3017        if (err) {
3018                target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3019                fprintf(trace.output, "%s", bf);
3020                goto out_close;
3021        }
3022
3023        if (!argc && target__none(&trace.opts.target))
3024                trace.opts.target.system_wide = true;
3025
3026        if (input_name)
3027                err = trace__replay(&trace);
3028        else
3029                err = trace__run(&trace, argc, argv);
3030
3031out_close:
3032        if (output_name != NULL)
3033                fclose(trace.output);
3034out:
3035        return err;
3036}
3037