linux/tools/perf/builtin-trace.c
<<
>>
Prefs
   1#include "builtin.h"
   2#include "util/color.h"
   3#include "util/evlist.h"
   4#include "util/machine.h"
   5#include "util/thread.h"
   6#include "util/parse-options.h"
   7#include "util/thread_map.h"
   8#include "event-parse.h"
   9
  10#include <libaudit.h>
  11#include <stdlib.h>
  12
  13static struct syscall_fmt {
  14        const char *name;
  15        const char *alias;
  16        bool       errmsg;
  17        bool       timeout;
  18} syscall_fmts[] = {
  19        { .name     = "access",     .errmsg = true, },
  20        { .name     = "arch_prctl", .errmsg = true, .alias = "prctl", },
  21        { .name     = "fstat",      .errmsg = true, .alias = "newfstat", },
  22        { .name     = "fstatat",    .errmsg = true, .alias = "newfstatat", },
  23        { .name     = "futex",      .errmsg = true, },
  24        { .name     = "open",       .errmsg = true, },
  25        { .name     = "poll",       .errmsg = true, .timeout = true, },
  26        { .name     = "ppoll",      .errmsg = true, .timeout = true, },
  27        { .name     = "read",       .errmsg = true, },
  28        { .name     = "recvfrom",   .errmsg = true, },
  29        { .name     = "select",     .errmsg = true, .timeout = true, },
  30        { .name     = "socket",     .errmsg = true, },
  31        { .name     = "stat",       .errmsg = true, .alias = "newstat", },
  32};
  33
  34static int syscall_fmt__cmp(const void *name, const void *fmtp)
  35{
  36        const struct syscall_fmt *fmt = fmtp;
  37        return strcmp(name, fmt->name);
  38}
  39
  40static struct syscall_fmt *syscall_fmt__find(const char *name)
  41{
  42        const int nmemb = ARRAY_SIZE(syscall_fmts);
  43        return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
  44}
  45
  46struct syscall {
  47        struct event_format *tp_format;
  48        const char          *name;
  49        struct syscall_fmt  *fmt;
  50};
  51
  52static size_t fprintf_duration(unsigned long t, FILE *fp)
  53{
  54        double duration = (double)t / NSEC_PER_MSEC;
  55        size_t printed = fprintf(fp, "(");
  56
  57        if (duration >= 1.0)
  58                printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
  59        else if (duration >= 0.01)
  60                printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
  61        else
  62                printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
  63        return printed + fprintf(stdout, "): ");
  64}
  65
  66struct thread_trace {
  67        u64               entry_time;
  68        u64               exit_time;
  69        bool              entry_pending;
  70        unsigned long     nr_events;
  71        char              *entry_str;
  72        double            runtime_ms;
  73};
  74
  75static struct thread_trace *thread_trace__new(void)
  76{
  77        return zalloc(sizeof(struct thread_trace));
  78}
  79
  80static struct thread_trace *thread__trace(struct thread *thread)
  81{
  82        struct thread_trace *ttrace;
  83
  84        if (thread == NULL)
  85                goto fail;
  86
  87        if (thread->priv == NULL)
  88                thread->priv = thread_trace__new();
  89                
  90        if (thread->priv == NULL)
  91                goto fail;
  92
  93        ttrace = thread->priv;
  94        ++ttrace->nr_events;
  95
  96        return ttrace;
  97fail:
  98        color_fprintf(stdout, PERF_COLOR_RED,
  99                      "WARNING: not enough memory, dropping samples!\n");
 100        return NULL;
 101}
 102
 103struct trace {
 104        int                     audit_machine;
 105        struct {
 106                int             max;
 107                struct syscall  *table;
 108        } syscalls;
 109        struct perf_record_opts opts;
 110        struct machine          host;
 111        u64                     base_time;
 112        unsigned long           nr_events;
 113        bool                    sched;
 114        bool                    multiple_threads;
 115        double                  duration_filter;
 116        double                  runtime_ms;
 117};
 118
 119static bool trace__filter_duration(struct trace *trace, double t)
 120{
 121        return t < (trace->duration_filter * NSEC_PER_MSEC);
 122}
 123
 124static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
 125{
 126        double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
 127
 128        return fprintf(fp, "%10.3f ", ts);
 129}
 130
 131static bool done = false;
 132
 133static void sig_handler(int sig __maybe_unused)
 134{
 135        done = true;
 136}
 137
 138static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
 139                                        u64 duration, u64 tstamp, FILE *fp)
 140{
 141        size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
 142        printed += fprintf_duration(duration, fp);
 143
 144        if (trace->multiple_threads)
 145                printed += fprintf(fp, "%d ", thread->pid);
 146
 147        return printed;
 148}
 149
 150static int trace__process_event(struct machine *machine, union perf_event *event)
 151{
 152        int ret = 0;
 153
 154        switch (event->header.type) {
 155        case PERF_RECORD_LOST:
 156                color_fprintf(stdout, PERF_COLOR_RED,
 157                              "LOST %" PRIu64 " events!\n", event->lost.lost);
 158                ret = machine__process_lost_event(machine, event);
 159        default:
 160                ret = machine__process_event(machine, event);
 161                break;
 162        }
 163
 164        return ret;
 165}
 166
 167static int trace__tool_process(struct perf_tool *tool __maybe_unused,
 168                               union perf_event *event,
 169                               struct perf_sample *sample __maybe_unused,
 170                               struct machine *machine)
 171{
 172        return trace__process_event(machine, event);
 173}
 174
 175static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 176{
 177        int err = symbol__init();
 178
 179        if (err)
 180                return err;
 181
 182        machine__init(&trace->host, "", HOST_KERNEL_ID);
 183        machine__create_kernel_maps(&trace->host);
 184
 185        if (perf_target__has_task(&trace->opts.target)) {
 186                err = perf_event__synthesize_thread_map(NULL, evlist->threads,
 187                                                        trace__tool_process,
 188                                                        &trace->host);
 189        } else {
 190                err = perf_event__synthesize_threads(NULL, trace__tool_process,
 191                                                     &trace->host);
 192        }
 193
 194        if (err)
 195                symbol__exit();
 196
 197        return err;
 198}
 199
 200static int trace__read_syscall_info(struct trace *trace, int id)
 201{
 202        char tp_name[128];
 203        struct syscall *sc;
 204        const char *name = audit_syscall_to_name(id, trace->audit_machine);
 205
 206        if (name == NULL)
 207                return -1;
 208
 209        if (id > trace->syscalls.max) {
 210                struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
 211
 212                if (nsyscalls == NULL)
 213                        return -1;
 214
 215                if (trace->syscalls.max != -1) {
 216                        memset(nsyscalls + trace->syscalls.max + 1, 0,
 217                               (id - trace->syscalls.max) * sizeof(*sc));
 218                } else {
 219                        memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
 220                }
 221
 222                trace->syscalls.table = nsyscalls;
 223                trace->syscalls.max   = id;
 224        }
 225
 226        sc = trace->syscalls.table + id;
 227        sc->name = name;
 228        sc->fmt  = syscall_fmt__find(sc->name);
 229
 230        snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
 231        sc->tp_format = event_format__new("syscalls", tp_name);
 232
 233        if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
 234                snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
 235                sc->tp_format = event_format__new("syscalls", tp_name);
 236        }
 237
 238        return sc->tp_format != NULL ? 0 : -1;
 239}
 240
 241static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 242                                      unsigned long *args)
 243{
 244        int i = 0;
 245        size_t printed = 0;
 246
 247        if (sc->tp_format != NULL) {
 248                struct format_field *field;
 249
 250                for (field = sc->tp_format->format.fields->next; field; field = field->next) {
 251                        printed += scnprintf(bf + printed, size - printed,
 252                                             "%s%s: %ld", printed ? ", " : "",
 253                                             field->name, args[i++]);
 254                }
 255        } else {
 256                while (i < 6) {
 257                        printed += scnprintf(bf + printed, size - printed,
 258                                             "%sarg%d: %ld",
 259                                             printed ? ", " : "", i, args[i]);
 260                        ++i;
 261                }
 262        }
 263
 264        return printed;
 265}
 266
 267typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
 268                                  struct perf_sample *sample);
 269
 270static struct syscall *trace__syscall_info(struct trace *trace,
 271                                           struct perf_evsel *evsel,
 272                                           struct perf_sample *sample)
 273{
 274        int id = perf_evsel__intval(evsel, sample, "id");
 275
 276        if (id < 0) {
 277                printf("Invalid syscall %d id, skipping...\n", id);
 278                return NULL;
 279        }
 280
 281        if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
 282            trace__read_syscall_info(trace, id))
 283                goto out_cant_read;
 284
 285        if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
 286                goto out_cant_read;
 287
 288        return &trace->syscalls.table[id];
 289
 290out_cant_read:
 291        printf("Problems reading syscall %d", id);
 292        if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
 293                printf("(%s)", trace->syscalls.table[id].name);
 294        puts(" information");
 295        return NULL;
 296}
 297
 298static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 299                            struct perf_sample *sample)
 300{
 301        char *msg;
 302        void *args;
 303        size_t printed = 0;
 304        struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
 305        struct syscall *sc = trace__syscall_info(trace, evsel, sample);
 306        struct thread_trace *ttrace = thread__trace(thread);
 307
 308        if (ttrace == NULL || sc == NULL)
 309                return -1;
 310
 311        args = perf_evsel__rawptr(evsel, sample, "args");
 312        if (args == NULL) {
 313                printf("Problems reading syscall arguments\n");
 314                return -1;
 315        }
 316
 317        ttrace = thread->priv;
 318
 319        if (ttrace->entry_str == NULL) {
 320                ttrace->entry_str = malloc(1024);
 321                if (!ttrace->entry_str)
 322                        return -1;
 323        }
 324
 325        ttrace->entry_time = sample->time;
 326        msg = ttrace->entry_str;
 327        printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
 328
 329        printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,  args);
 330
 331        if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
 332                if (!trace->duration_filter) {
 333                        trace__fprintf_entry_head(trace, thread, 1, sample->time, stdout);
 334                        printf("%-70s\n", ttrace->entry_str);
 335                }
 336        } else
 337                ttrace->entry_pending = true;
 338
 339        return 0;
 340}
 341
 342static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 343                           struct perf_sample *sample)
 344{
 345        int ret;
 346        u64 duration = 0;
 347        struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
 348        struct thread_trace *ttrace = thread__trace(thread);
 349        struct syscall *sc = trace__syscall_info(trace, evsel, sample);
 350
 351        if (ttrace == NULL || sc == NULL)
 352                return -1;
 353
 354        ret = perf_evsel__intval(evsel, sample, "ret");
 355
 356        ttrace = thread->priv;
 357
 358        ttrace->exit_time = sample->time;
 359
 360        if (ttrace->entry_time) {
 361                duration = sample->time - ttrace->entry_time;
 362                if (trace__filter_duration(trace, duration))
 363                        goto out;
 364        } else if (trace->duration_filter)
 365                goto out;
 366
 367        trace__fprintf_entry_head(trace, thread, duration, sample->time, stdout);
 368
 369        if (ttrace->entry_pending) {
 370                printf("%-70s", ttrace->entry_str);
 371        } else {
 372                printf(" ... [");
 373                color_fprintf(stdout, PERF_COLOR_YELLOW, "continued");
 374                printf("]: %s()", sc->name);
 375        }
 376
 377        if (ret < 0 && sc->fmt && sc->fmt->errmsg) {
 378                char bf[256];
 379                const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
 380                           *e = audit_errno_to_name(-ret);
 381
 382                printf(") = -1 %s %s", e, emsg);
 383        } else if (ret == 0 && sc->fmt && sc->fmt->timeout)
 384                printf(") = 0 Timeout");
 385        else
 386                printf(") = %d", ret);
 387
 388        putchar('\n');
 389out:
 390        ttrace->entry_pending = false;
 391
 392        return 0;
 393}
 394
 395static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
 396                                     struct perf_sample *sample)
 397{
 398        u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
 399        double runtime_ms = (double)runtime / NSEC_PER_MSEC;
 400        struct thread *thread = machine__findnew_thread(&trace->host, sample->tid);
 401        struct thread_trace *ttrace = thread__trace(thread);
 402
 403        if (ttrace == NULL)
 404                goto out_dump;
 405
 406        ttrace->runtime_ms += runtime_ms;
 407        trace->runtime_ms += runtime_ms;
 408        return 0;
 409
 410out_dump:
 411        printf("%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
 412               evsel->name,
 413               perf_evsel__strval(evsel, sample, "comm"),
 414               (pid_t)perf_evsel__intval(evsel, sample, "pid"),
 415               runtime,
 416               perf_evsel__intval(evsel, sample, "vruntime"));
 417        return 0;
 418}
 419
 420static int trace__run(struct trace *trace, int argc, const char **argv)
 421{
 422        struct perf_evlist *evlist = perf_evlist__new();
 423        struct perf_evsel *evsel;
 424        int err = -1, i;
 425        unsigned long before;
 426        const bool forks = argc > 0;
 427
 428        if (evlist == NULL) {
 429                printf("Not enough memory to run!\n");
 430                goto out;
 431        }
 432
 433        if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) ||
 434            perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) {
 435                printf("Couldn't read the raw_syscalls tracepoints information!\n");
 436                goto out_delete_evlist;
 437        }
 438
 439        if (trace->sched &&
 440            perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
 441                                   trace__sched_stat_runtime)) {
 442                printf("Couldn't read the sched_stat_runtime tracepoint information!\n");
 443                goto out_delete_evlist;
 444        }
 445
 446        err = perf_evlist__create_maps(evlist, &trace->opts.target);
 447        if (err < 0) {
 448                printf("Problems parsing the target to trace, check your options!\n");
 449                goto out_delete_evlist;
 450        }
 451
 452        err = trace__symbols_init(trace, evlist);
 453        if (err < 0) {
 454                printf("Problems initializing symbol libraries!\n");
 455                goto out_delete_maps;
 456        }
 457
 458        perf_evlist__config(evlist, &trace->opts);
 459
 460        signal(SIGCHLD, sig_handler);
 461        signal(SIGINT, sig_handler);
 462
 463        if (forks) {
 464                err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
 465                                                    argv, false, false);
 466                if (err < 0) {
 467                        printf("Couldn't run the workload!\n");
 468                        goto out_delete_maps;
 469                }
 470        }
 471
 472        err = perf_evlist__open(evlist);
 473        if (err < 0) {
 474                printf("Couldn't create the events: %s\n", strerror(errno));
 475                goto out_delete_maps;
 476        }
 477
 478        err = perf_evlist__mmap(evlist, UINT_MAX, false);
 479        if (err < 0) {
 480                printf("Couldn't mmap the events: %s\n", strerror(errno));
 481                goto out_close_evlist;
 482        }
 483
 484        perf_evlist__enable(evlist);
 485
 486        if (forks)
 487                perf_evlist__start_workload(evlist);
 488
 489        trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
 490again:
 491        before = trace->nr_events;
 492
 493        for (i = 0; i < evlist->nr_mmaps; i++) {
 494                union perf_event *event;
 495
 496                while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
 497                        const u32 type = event->header.type;
 498                        tracepoint_handler handler;
 499                        struct perf_sample sample;
 500
 501                        ++trace->nr_events;
 502
 503                        err = perf_evlist__parse_sample(evlist, event, &sample);
 504                        if (err) {
 505                                printf("Can't parse sample, err = %d, skipping...\n", err);
 506                                continue;
 507                        }
 508
 509                        if (trace->base_time == 0)
 510                                trace->base_time = sample.time;
 511
 512                        if (type != PERF_RECORD_SAMPLE) {
 513                                trace__process_event(&trace->host, event);
 514                                continue;
 515                        }
 516
 517                        evsel = perf_evlist__id2evsel(evlist, sample.id);
 518                        if (evsel == NULL) {
 519                                printf("Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
 520                                continue;
 521                        }
 522
 523                        if (sample.raw_data == NULL) {
 524                                printf("%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
 525                                       perf_evsel__name(evsel), sample.tid,
 526                                       sample.cpu, sample.raw_size);
 527                                continue;
 528                        }
 529
 530                        handler = evsel->handler.func;
 531                        handler(trace, evsel, &sample);
 532                }
 533        }
 534
 535        if (trace->nr_events == before) {
 536                if (done)
 537                        goto out_unmap_evlist;
 538
 539                poll(evlist->pollfd, evlist->nr_fds, -1);
 540        }
 541
 542        if (done)
 543                perf_evlist__disable(evlist);
 544
 545        goto again;
 546
 547out_unmap_evlist:
 548        perf_evlist__munmap(evlist);
 549out_close_evlist:
 550        perf_evlist__close(evlist);
 551out_delete_maps:
 552        perf_evlist__delete_maps(evlist);
 553out_delete_evlist:
 554        perf_evlist__delete(evlist);
 555out:
 556        return err;
 557}
 558
 559static size_t trace__fprintf_threads_header(FILE *fp)
 560{
 561        size_t printed;
 562
 563        printed  = fprintf(fp, "\n _____________________________________________________________________\n");
 564        printed += fprintf(fp," __)    Summary of events    (__\n\n");
 565        printed += fprintf(fp,"              [ task - pid ]     [ events ] [ ratio ]  [ runtime ]\n");
 566        printed += fprintf(fp," _____________________________________________________________________\n\n");
 567
 568        return printed;
 569}
 570
 571static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
 572{
 573        size_t printed = trace__fprintf_threads_header(fp);
 574        struct rb_node *nd;
 575
 576        for (nd = rb_first(&trace->host.threads); nd; nd = rb_next(nd)) {
 577                struct thread *thread = rb_entry(nd, struct thread, rb_node);
 578                struct thread_trace *ttrace = thread->priv;
 579                const char *color;
 580                double ratio;
 581
 582                if (ttrace == NULL)
 583                        continue;
 584
 585                ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
 586
 587                color = PERF_COLOR_NORMAL;
 588                if (ratio > 50.0)
 589                        color = PERF_COLOR_RED;
 590                else if (ratio > 25.0)
 591                        color = PERF_COLOR_GREEN;
 592                else if (ratio > 5.0)
 593                        color = PERF_COLOR_YELLOW;
 594
 595                printed += color_fprintf(fp, color, "%20s", thread->comm);
 596                printed += fprintf(fp, " - %-5d :%11lu   [", thread->pid, ttrace->nr_events);
 597                printed += color_fprintf(fp, color, "%5.1f%%", ratio);
 598                printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
 599        }
 600
 601        return printed;
 602}
 603
 604static int trace__set_duration(const struct option *opt, const char *str,
 605                               int unset __maybe_unused)
 606{
 607        struct trace *trace = opt->value;
 608
 609        trace->duration_filter = atof(str);
 610        return 0;
 611}
 612
 613int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 614{
 615        const char * const trace_usage[] = {
 616                "perf trace [<options>] [<command>]",
 617                "perf trace [<options>] -- <command> [<options>]",
 618                NULL
 619        };
 620        struct trace trace = {
 621                .audit_machine = audit_detect_machine(),
 622                .syscalls = {
 623                        . max = -1,
 624                },
 625                .opts = {
 626                        .target = {
 627                                .uid       = UINT_MAX,
 628                                .uses_mmap = true,
 629                        },
 630                        .user_freq     = UINT_MAX,
 631                        .user_interval = ULLONG_MAX,
 632                        .no_delay      = true,
 633                        .mmap_pages    = 1024,
 634                },
 635        };
 636        const struct option trace_options[] = {
 637        OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
 638                    "trace events on existing process id"),
 639        OPT_STRING(0, "tid", &trace.opts.target.tid, "tid",
 640                    "trace events on existing thread id"),
 641        OPT_BOOLEAN(0, "all-cpus", &trace.opts.target.system_wide,
 642                    "system-wide collection from all CPUs"),
 643        OPT_STRING(0, "cpu", &trace.opts.target.cpu_list, "cpu",
 644                    "list of cpus to monitor"),
 645        OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
 646                    "child tasks do not inherit counters"),
 647        OPT_UINTEGER(0, "mmap-pages", &trace.opts.mmap_pages,
 648                     "number of mmap data pages"),
 649        OPT_STRING(0, "uid", &trace.opts.target.uid_str, "user",
 650                   "user to profile"),
 651        OPT_CALLBACK(0, "duration", &trace, "float",
 652                     "show only events with duration > N.M ms",
 653                     trace__set_duration),
 654        OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
 655        OPT_END()
 656        };
 657        int err;
 658        char bf[BUFSIZ];
 659
 660        argc = parse_options(argc, argv, trace_options, trace_usage, 0);
 661
 662        err = perf_target__validate(&trace.opts.target);
 663        if (err) {
 664                perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
 665                printf("%s", bf);
 666                return err;
 667        }
 668
 669        err = perf_target__parse_uid(&trace.opts.target);
 670        if (err) {
 671                perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
 672                printf("%s", bf);
 673                return err;
 674        }
 675
 676        if (!argc && perf_target__none(&trace.opts.target))
 677                trace.opts.target.system_wide = true;
 678
 679        err = trace__run(&trace, argc, argv);
 680
 681        if (trace.sched && !err)
 682                trace__fprintf_thread_summary(&trace, stdout);
 683
 684        return err;
 685}
 686