linux/tools/perf/builtin-record.c
<<
>>
Prefs
   1/*
   2 * builtin-record.c
   3 *
   4 * Builtin record command: Record the profile of a workload
   5 * (or a CPU, or a PID) into the perf.data output file - for
   6 * later analysis via perf report.
   7 */
   8#include "builtin.h"
   9
  10#include "perf.h"
  11
  12#include "util/build-id.h"
  13#include "util/util.h"
  14#include "util/parse-options.h"
  15#include "util/parse-events.h"
  16
  17#include "util/callchain.h"
  18#include "util/cgroup.h"
  19#include "util/header.h"
  20#include "util/event.h"
  21#include "util/evlist.h"
  22#include "util/evsel.h"
  23#include "util/debug.h"
  24#include "util/session.h"
  25#include "util/tool.h"
  26#include "util/symbol.h"
  27#include "util/cpumap.h"
  28#include "util/thread_map.h"
  29#include "util/data.h"
  30#include "util/perf_regs.h"
  31#include "util/auxtrace.h"
  32#include "util/parse-branch-options.h"
  33#include "util/parse-regs-options.h"
  34
  35#include <unistd.h>
  36#include <sched.h>
  37#include <sys/mman.h>
  38
  39
  40struct record {
  41        struct perf_tool        tool;
  42        struct record_opts      opts;
  43        u64                     bytes_written;
  44        struct perf_data_file   file;
  45        struct auxtrace_record  *itr;
  46        struct perf_evlist      *evlist;
  47        struct perf_session     *session;
  48        const char              *progname;
  49        int                     realtime_prio;
  50        bool                    no_buildid;
  51        bool                    no_buildid_cache;
  52        long                    samples;
  53};
  54
  55static int record__write(struct record *rec, void *bf, size_t size)
  56{
  57        if (perf_data_file__write(rec->session->file, bf, size) < 0) {
  58                pr_err("failed to write perf data, error: %m\n");
  59                return -1;
  60        }
  61
  62        rec->bytes_written += size;
  63        return 0;
  64}
  65
  66static int process_synthesized_event(struct perf_tool *tool,
  67                                     union perf_event *event,
  68                                     struct perf_sample *sample __maybe_unused,
  69                                     struct machine *machine __maybe_unused)
  70{
  71        struct record *rec = container_of(tool, struct record, tool);
  72        return record__write(rec, event, event->header.size);
  73}
  74
  75static int record__mmap_read(struct record *rec, int idx)
  76{
  77        struct perf_mmap *md = &rec->evlist->mmap[idx];
  78        u64 head = perf_mmap__read_head(md);
  79        u64 old = md->prev;
  80        unsigned char *data = md->base + page_size;
  81        unsigned long size;
  82        void *buf;
  83        int rc = 0;
  84
  85        if (old == head)
  86                return 0;
  87
  88        rec->samples++;
  89
  90        size = head - old;
  91
  92        if ((old & md->mask) + size != (head & md->mask)) {
  93                buf = &data[old & md->mask];
  94                size = md->mask + 1 - (old & md->mask);
  95                old += size;
  96
  97                if (record__write(rec, buf, size) < 0) {
  98                        rc = -1;
  99                        goto out;
 100                }
 101        }
 102
 103        buf = &data[old & md->mask];
 104        size = head - old;
 105        old += size;
 106
 107        if (record__write(rec, buf, size) < 0) {
 108                rc = -1;
 109                goto out;
 110        }
 111
 112        md->prev = old;
 113        perf_evlist__mmap_consume(rec->evlist, idx);
 114out:
 115        return rc;
 116}
 117
 118static volatile int done;
 119static volatile int signr = -1;
 120static volatile int child_finished;
 121static volatile int auxtrace_snapshot_enabled;
 122static volatile int auxtrace_snapshot_err;
 123static volatile int auxtrace_record__snapshot_started;
 124
 125static void sig_handler(int sig)
 126{
 127        if (sig == SIGCHLD)
 128                child_finished = 1;
 129        else
 130                signr = sig;
 131
 132        done = 1;
 133}
 134
 135static void record__sig_exit(void)
 136{
 137        if (signr == -1)
 138                return;
 139
 140        signal(signr, SIG_DFL);
 141        raise(signr);
 142}
 143
 144#ifdef HAVE_AUXTRACE_SUPPORT
 145
 146static int record__process_auxtrace(struct perf_tool *tool,
 147                                    union perf_event *event, void *data1,
 148                                    size_t len1, void *data2, size_t len2)
 149{
 150        struct record *rec = container_of(tool, struct record, tool);
 151        struct perf_data_file *file = &rec->file;
 152        size_t padding;
 153        u8 pad[8] = {0};
 154
 155        if (!perf_data_file__is_pipe(file)) {
 156                off_t file_offset;
 157                int fd = perf_data_file__fd(file);
 158                int err;
 159
 160                file_offset = lseek(fd, 0, SEEK_CUR);
 161                if (file_offset == -1)
 162                        return -1;
 163                err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
 164                                                     event, file_offset);
 165                if (err)
 166                        return err;
 167        }
 168
 169        /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
 170        padding = (len1 + len2) & 7;
 171        if (padding)
 172                padding = 8 - padding;
 173
 174        record__write(rec, event, event->header.size);
 175        record__write(rec, data1, len1);
 176        if (len2)
 177                record__write(rec, data2, len2);
 178        record__write(rec, &pad, padding);
 179
 180        return 0;
 181}
 182
 183static int record__auxtrace_mmap_read(struct record *rec,
 184                                      struct auxtrace_mmap *mm)
 185{
 186        int ret;
 187
 188        ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
 189                                  record__process_auxtrace);
 190        if (ret < 0)
 191                return ret;
 192
 193        if (ret)
 194                rec->samples++;
 195
 196        return 0;
 197}
 198
 199static int record__auxtrace_mmap_read_snapshot(struct record *rec,
 200                                               struct auxtrace_mmap *mm)
 201{
 202        int ret;
 203
 204        ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
 205                                           record__process_auxtrace,
 206                                           rec->opts.auxtrace_snapshot_size);
 207        if (ret < 0)
 208                return ret;
 209
 210        if (ret)
 211                rec->samples++;
 212
 213        return 0;
 214}
 215
 216static int record__auxtrace_read_snapshot_all(struct record *rec)
 217{
 218        int i;
 219        int rc = 0;
 220
 221        for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 222                struct auxtrace_mmap *mm =
 223                                &rec->evlist->mmap[i].auxtrace_mmap;
 224
 225                if (!mm->base)
 226                        continue;
 227
 228                if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
 229                        rc = -1;
 230                        goto out;
 231                }
 232        }
 233out:
 234        return rc;
 235}
 236
 237static void record__read_auxtrace_snapshot(struct record *rec)
 238{
 239        pr_debug("Recording AUX area tracing snapshot\n");
 240        if (record__auxtrace_read_snapshot_all(rec) < 0) {
 241                auxtrace_snapshot_err = -1;
 242        } else {
 243                auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
 244                if (!auxtrace_snapshot_err)
 245                        auxtrace_snapshot_enabled = 1;
 246        }
 247}
 248
 249#else
 250
 251static inline
 252int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
 253                               struct auxtrace_mmap *mm __maybe_unused)
 254{
 255        return 0;
 256}
 257
 258static inline
 259void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
 260{
 261}
 262
 263static inline
 264int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
 265{
 266        return 0;
 267}
 268
 269#endif
 270
 271static int record__open(struct record *rec)
 272{
 273        char msg[512];
 274        struct perf_evsel *pos;
 275        struct perf_evlist *evlist = rec->evlist;
 276        struct perf_session *session = rec->session;
 277        struct record_opts *opts = &rec->opts;
 278        int rc = 0;
 279
 280        perf_evlist__config(evlist, opts);
 281
 282        evlist__for_each(evlist, pos) {
 283try_again:
 284                if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
 285                        if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
 286                                if (verbose)
 287                                        ui__warning("%s\n", msg);
 288                                goto try_again;
 289                        }
 290
 291                        rc = -errno;
 292                        perf_evsel__open_strerror(pos, &opts->target,
 293                                                  errno, msg, sizeof(msg));
 294                        ui__error("%s\n", msg);
 295                        goto out;
 296                }
 297        }
 298
 299        if (perf_evlist__apply_filters(evlist, &pos)) {
 300                error("failed to set filter \"%s\" on event %s with %d (%s)\n",
 301                        pos->filter, perf_evsel__name(pos), errno,
 302                        strerror_r(errno, msg, sizeof(msg)));
 303                rc = -1;
 304                goto out;
 305        }
 306
 307        if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
 308                                 opts->auxtrace_mmap_pages,
 309                                 opts->auxtrace_snapshot_mode) < 0) {
 310                if (errno == EPERM) {
 311                        pr_err("Permission error mapping pages.\n"
 312                               "Consider increasing "
 313                               "/proc/sys/kernel/perf_event_mlock_kb,\n"
 314                               "or try again with a smaller value of -m/--mmap_pages.\n"
 315                               "(current value: %u,%u)\n",
 316                               opts->mmap_pages, opts->auxtrace_mmap_pages);
 317                        rc = -errno;
 318                } else {
 319                        pr_err("failed to mmap with %d (%s)\n", errno,
 320                                strerror_r(errno, msg, sizeof(msg)));
 321                        rc = -errno;
 322                }
 323                goto out;
 324        }
 325
 326        session->evlist = evlist;
 327        perf_session__set_id_hdr_size(session);
 328out:
 329        return rc;
 330}
 331
 332static int process_sample_event(struct perf_tool *tool,
 333                                union perf_event *event,
 334                                struct perf_sample *sample,
 335                                struct perf_evsel *evsel,
 336                                struct machine *machine)
 337{
 338        struct record *rec = container_of(tool, struct record, tool);
 339
 340        rec->samples++;
 341
 342        return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
 343}
 344
 345static int process_buildids(struct record *rec)
 346{
 347        struct perf_data_file *file  = &rec->file;
 348        struct perf_session *session = rec->session;
 349
 350        if (file->size == 0)
 351                return 0;
 352
 353        /*
 354         * During this process, it'll load kernel map and replace the
 355         * dso->long_name to a real pathname it found.  In this case
 356         * we prefer the vmlinux path like
 357         *   /lib/modules/3.16.4/build/vmlinux
 358         *
 359         * rather than build-id path (in debug directory).
 360         *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
 361         */
 362        symbol_conf.ignore_vmlinux_buildid = true;
 363
 364        return perf_session__process_events(session);
 365}
 366
 367static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 368{
 369        int err;
 370        struct perf_tool *tool = data;
 371        /*
 372         *As for guest kernel when processing subcommand record&report,
 373         *we arrange module mmap prior to guest kernel mmap and trigger
 374         *a preload dso because default guest module symbols are loaded
 375         *from guest kallsyms instead of /lib/modules/XXX/XXX. This
 376         *method is used to avoid symbol missing when the first addr is
 377         *in module instead of in guest kernel.
 378         */
 379        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 380                                             machine);
 381        if (err < 0)
 382                pr_err("Couldn't record guest kernel [%d]'s reference"
 383                       " relocation symbol.\n", machine->pid);
 384
 385        /*
 386         * We use _stext for guest kernel because guest kernel's /proc/kallsyms
 387         * have no _text sometimes.
 388         */
 389        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 390                                                 machine);
 391        if (err < 0)
 392                pr_err("Couldn't record guest kernel [%d]'s reference"
 393                       " relocation symbol.\n", machine->pid);
 394}
 395
 396static struct perf_event_header finished_round_event = {
 397        .size = sizeof(struct perf_event_header),
 398        .type = PERF_RECORD_FINISHED_ROUND,
 399};
 400
 401static int record__mmap_read_all(struct record *rec)
 402{
 403        u64 bytes_written = rec->bytes_written;
 404        int i;
 405        int rc = 0;
 406
 407        for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 408                struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
 409
 410                if (rec->evlist->mmap[i].base) {
 411                        if (record__mmap_read(rec, i) != 0) {
 412                                rc = -1;
 413                                goto out;
 414                        }
 415                }
 416
 417                if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
 418                    record__auxtrace_mmap_read(rec, mm) != 0) {
 419                        rc = -1;
 420                        goto out;
 421                }
 422        }
 423
 424        /*
 425         * Mark the round finished in case we wrote
 426         * at least one event.
 427         */
 428        if (bytes_written != rec->bytes_written)
 429                rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
 430
 431out:
 432        return rc;
 433}
 434
 435static void record__init_features(struct record *rec)
 436{
 437        struct perf_session *session = rec->session;
 438        int feat;
 439
 440        for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
 441                perf_header__set_feat(&session->header, feat);
 442
 443        if (rec->no_buildid)
 444                perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 445
 446        if (!have_tracepoints(&rec->evlist->entries))
 447                perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
 448
 449        if (!rec->opts.branch_stack)
 450                perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
 451
 452        if (!rec->opts.full_auxtrace)
 453                perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 454}
 455
 456static volatile int workload_exec_errno;
 457
 458/*
 459 * perf_evlist__prepare_workload will send a SIGUSR1
 460 * if the fork fails, since we asked by setting its
 461 * want_signal to true.
 462 */
 463static void workload_exec_failed_signal(int signo __maybe_unused,
 464                                        siginfo_t *info,
 465                                        void *ucontext __maybe_unused)
 466{
 467        workload_exec_errno = info->si_value.sival_int;
 468        done = 1;
 469        child_finished = 1;
 470}
 471
 472static void snapshot_sig_handler(int sig);
 473
 474static int __cmd_record(struct record *rec, int argc, const char **argv)
 475{
 476        int err;
 477        int status = 0;
 478        unsigned long waking = 0;
 479        const bool forks = argc > 0;
 480        struct machine *machine;
 481        struct perf_tool *tool = &rec->tool;
 482        struct record_opts *opts = &rec->opts;
 483        struct perf_data_file *file = &rec->file;
 484        struct perf_session *session;
 485        bool disabled = false, draining = false;
 486        int fd;
 487
 488        rec->progname = argv[0];
 489
 490        atexit(record__sig_exit);
 491        signal(SIGCHLD, sig_handler);
 492        signal(SIGINT, sig_handler);
 493        signal(SIGTERM, sig_handler);
 494        if (rec->opts.auxtrace_snapshot_mode)
 495                signal(SIGUSR2, snapshot_sig_handler);
 496        else
 497                signal(SIGUSR2, SIG_IGN);
 498
 499        session = perf_session__new(file, false, tool);
 500        if (session == NULL) {
 501                pr_err("Perf session creation failed.\n");
 502                return -1;
 503        }
 504
 505        fd = perf_data_file__fd(file);
 506        rec->session = session;
 507
 508        record__init_features(rec);
 509
 510        if (forks) {
 511                err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
 512                                                    argv, file->is_pipe,
 513                                                    workload_exec_failed_signal);
 514                if (err < 0) {
 515                        pr_err("Couldn't run the workload!\n");
 516                        status = err;
 517                        goto out_delete_session;
 518                }
 519        }
 520
 521        if (record__open(rec) != 0) {
 522                err = -1;
 523                goto out_child;
 524        }
 525
 526        /*
 527         * Normally perf_session__new would do this, but it doesn't have the
 528         * evlist.
 529         */
 530        if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
 531                pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
 532                rec->tool.ordered_events = false;
 533        }
 534
 535        if (!rec->evlist->nr_groups)
 536                perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
 537
 538        if (file->is_pipe) {
 539                err = perf_header__write_pipe(fd);
 540                if (err < 0)
 541                        goto out_child;
 542        } else {
 543                err = perf_session__write_header(session, rec->evlist, fd, false);
 544                if (err < 0)
 545                        goto out_child;
 546        }
 547
 548        if (!rec->no_buildid
 549            && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
 550                pr_err("Couldn't generate buildids. "
 551                       "Use --no-buildid to profile anyway.\n");
 552                err = -1;
 553                goto out_child;
 554        }
 555
 556        machine = &session->machines.host;
 557
 558        if (file->is_pipe) {
 559                err = perf_event__synthesize_attrs(tool, session,
 560                                                   process_synthesized_event);
 561                if (err < 0) {
 562                        pr_err("Couldn't synthesize attrs.\n");
 563                        goto out_child;
 564                }
 565
 566                if (have_tracepoints(&rec->evlist->entries)) {
 567                        /*
 568                         * FIXME err <= 0 here actually means that
 569                         * there were no tracepoints so its not really
 570                         * an error, just that we don't need to
 571                         * synthesize anything.  We really have to
 572                         * return this more properly and also
 573                         * propagate errors that now are calling die()
 574                         */
 575                        err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
 576                                                                  process_synthesized_event);
 577                        if (err <= 0) {
 578                                pr_err("Couldn't record tracing data.\n");
 579                                goto out_child;
 580                        }
 581                        rec->bytes_written += err;
 582                }
 583        }
 584
 585        if (rec->opts.full_auxtrace) {
 586                err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
 587                                        session, process_synthesized_event);
 588                if (err)
 589                        goto out_delete_session;
 590        }
 591
 592        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 593                                                 machine);
 594        if (err < 0)
 595                pr_err("Couldn't record kernel reference relocation symbol\n"
 596                       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 597                       "Check /proc/kallsyms permission or run as root.\n");
 598
 599        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 600                                             machine);
 601        if (err < 0)
 602                pr_err("Couldn't record kernel module information.\n"
 603                       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 604                       "Check /proc/modules permission or run as root.\n");
 605
 606        if (perf_guest) {
 607                machines__process_guests(&session->machines,
 608                                         perf_event__synthesize_guest_os, tool);
 609        }
 610
 611        err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
 612                                            process_synthesized_event, opts->sample_address,
 613                                            opts->proc_map_timeout);
 614        if (err != 0)
 615                goto out_child;
 616
 617        if (rec->realtime_prio) {
 618                struct sched_param param;
 619
 620                param.sched_priority = rec->realtime_prio;
 621                if (sched_setscheduler(0, SCHED_FIFO, &param)) {
 622                        pr_err("Could not set realtime priority.\n");
 623                        err = -1;
 624                        goto out_child;
 625                }
 626        }
 627
 628        /*
 629         * When perf is starting the traced process, all the events
 630         * (apart from group members) have enable_on_exec=1 set,
 631         * so don't spoil it by prematurely enabling them.
 632         */
 633        if (!target__none(&opts->target) && !opts->initial_delay)
 634                perf_evlist__enable(rec->evlist);
 635
 636        /*
 637         * Let the child rip
 638         */
 639        if (forks)
 640                perf_evlist__start_workload(rec->evlist);
 641
 642        if (opts->initial_delay) {
 643                usleep(opts->initial_delay * 1000);
 644                perf_evlist__enable(rec->evlist);
 645        }
 646
 647        auxtrace_snapshot_enabled = 1;
 648        for (;;) {
 649                int hits = rec->samples;
 650
 651                if (record__mmap_read_all(rec) < 0) {
 652                        auxtrace_snapshot_enabled = 0;
 653                        err = -1;
 654                        goto out_child;
 655                }
 656
 657                if (auxtrace_record__snapshot_started) {
 658                        auxtrace_record__snapshot_started = 0;
 659                        if (!auxtrace_snapshot_err)
 660                                record__read_auxtrace_snapshot(rec);
 661                        if (auxtrace_snapshot_err) {
 662                                pr_err("AUX area tracing snapshot failed\n");
 663                                err = -1;
 664                                goto out_child;
 665                        }
 666                }
 667
 668                if (hits == rec->samples) {
 669                        if (done || draining)
 670                                break;
 671                        err = perf_evlist__poll(rec->evlist, -1);
 672                        /*
 673                         * Propagate error, only if there's any. Ignore positive
 674                         * number of returned events and interrupt error.
 675                         */
 676                        if (err > 0 || (err < 0 && errno == EINTR))
 677                                err = 0;
 678                        waking++;
 679
 680                        if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
 681                                draining = true;
 682                }
 683
 684                /*
 685                 * When perf is starting the traced process, at the end events
 686                 * die with the process and we wait for that. Thus no need to
 687                 * disable events in this case.
 688                 */
 689                if (done && !disabled && !target__none(&opts->target)) {
 690                        auxtrace_snapshot_enabled = 0;
 691                        perf_evlist__disable(rec->evlist);
 692                        disabled = true;
 693                }
 694        }
 695        auxtrace_snapshot_enabled = 0;
 696
 697        if (forks && workload_exec_errno) {
 698                char msg[STRERR_BUFSIZE];
 699                const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
 700                pr_err("Workload failed: %s\n", emsg);
 701                err = -1;
 702                goto out_child;
 703        }
 704
 705        if (!quiet)
 706                fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
 707
 708out_child:
 709        if (forks) {
 710                int exit_status;
 711
 712                if (!child_finished)
 713                        kill(rec->evlist->workload.pid, SIGTERM);
 714
 715                wait(&exit_status);
 716
 717                if (err < 0)
 718                        status = err;
 719                else if (WIFEXITED(exit_status))
 720                        status = WEXITSTATUS(exit_status);
 721                else if (WIFSIGNALED(exit_status))
 722                        signr = WTERMSIG(exit_status);
 723        } else
 724                status = err;
 725
 726        /* this will be recalculated during process_buildids() */
 727        rec->samples = 0;
 728
 729        if (!err && !file->is_pipe) {
 730                rec->session->header.data_size += rec->bytes_written;
 731                file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
 732
 733                if (!rec->no_buildid) {
 734                        process_buildids(rec);
 735                        /*
 736                         * We take all buildids when the file contains
 737                         * AUX area tracing data because we do not decode the
 738                         * trace because it would take too long.
 739                         */
 740                        if (rec->opts.full_auxtrace)
 741                                dsos__hit_all(rec->session);
 742                }
 743                perf_session__write_header(rec->session, rec->evlist, fd, true);
 744        }
 745
 746        if (!err && !quiet) {
 747                char samples[128];
 748
 749                if (rec->samples && !rec->opts.full_auxtrace)
 750                        scnprintf(samples, sizeof(samples),
 751                                  " (%" PRIu64 " samples)", rec->samples);
 752                else
 753                        samples[0] = '\0';
 754
 755                fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n",
 756                        perf_data_file__size(file) / 1024.0 / 1024.0,
 757                        file->path, samples);
 758        }
 759
 760out_delete_session:
 761        perf_session__delete(session);
 762        return status;
 763}
 764
 765static void callchain_debug(void)
 766{
 767        static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
 768
 769        pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
 770
 771        if (callchain_param.record_mode == CALLCHAIN_DWARF)
 772                pr_debug("callchain: stack dump size %d\n",
 773                         callchain_param.dump_size);
 774}
 775
 776int record_parse_callchain_opt(const struct option *opt,
 777                               const char *arg,
 778                               int unset)
 779{
 780        int ret;
 781        struct record_opts *record = (struct record_opts *)opt->value;
 782
 783        record->callgraph_set = true;
 784        callchain_param.enabled = !unset;
 785
 786        /* --no-call-graph */
 787        if (unset) {
 788                callchain_param.record_mode = CALLCHAIN_NONE;
 789                pr_debug("callchain: disabled\n");
 790                return 0;
 791        }
 792
 793        ret = parse_callchain_record_opt(arg, &callchain_param);
 794        if (!ret)
 795                callchain_debug();
 796
 797        return ret;
 798}
 799
 800int record_callchain_opt(const struct option *opt,
 801                         const char *arg __maybe_unused,
 802                         int unset __maybe_unused)
 803{
 804        struct record_opts *record = (struct record_opts *)opt->value;
 805
 806        record->callgraph_set = true;
 807        callchain_param.enabled = true;
 808
 809        if (callchain_param.record_mode == CALLCHAIN_NONE)
 810                callchain_param.record_mode = CALLCHAIN_FP;
 811
 812        callchain_debug();
 813        return 0;
 814}
 815
 816static int perf_record_config(const char *var, const char *value, void *cb)
 817{
 818        if (!strcmp(var, "record.call-graph"))
 819                var = "call-graph.record-mode"; /* fall-through */
 820
 821        return perf_default_config(var, value, cb);
 822}
 823
 824struct clockid_map {
 825        const char *name;
 826        int clockid;
 827};
 828
 829#define CLOCKID_MAP(n, c)       \
 830        { .name = n, .clockid = (c), }
 831
 832#define CLOCKID_END     { .name = NULL, }
 833
 834
 835/*
 836 * Add the missing ones, we need to build on many distros...
 837 */
 838#ifndef CLOCK_MONOTONIC_RAW
 839#define CLOCK_MONOTONIC_RAW 4
 840#endif
 841#ifndef CLOCK_BOOTTIME
 842#define CLOCK_BOOTTIME 7
 843#endif
 844#ifndef CLOCK_TAI
 845#define CLOCK_TAI 11
 846#endif
 847
 848static const struct clockid_map clockids[] = {
 849        /* available for all events, NMI safe */
 850        CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
 851        CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
 852
 853        /* available for some events */
 854        CLOCKID_MAP("realtime", CLOCK_REALTIME),
 855        CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
 856        CLOCKID_MAP("tai", CLOCK_TAI),
 857
 858        /* available for the lazy */
 859        CLOCKID_MAP("mono", CLOCK_MONOTONIC),
 860        CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
 861        CLOCKID_MAP("real", CLOCK_REALTIME),
 862        CLOCKID_MAP("boot", CLOCK_BOOTTIME),
 863
 864        CLOCKID_END,
 865};
 866
 867static int parse_clockid(const struct option *opt, const char *str, int unset)
 868{
 869        struct record_opts *opts = (struct record_opts *)opt->value;
 870        const struct clockid_map *cm;
 871        const char *ostr = str;
 872
 873        if (unset) {
 874                opts->use_clockid = 0;
 875                return 0;
 876        }
 877
 878        /* no arg passed */
 879        if (!str)
 880                return 0;
 881
 882        /* no setting it twice */
 883        if (opts->use_clockid)
 884                return -1;
 885
 886        opts->use_clockid = true;
 887
 888        /* if its a number, we're done */
 889        if (sscanf(str, "%d", &opts->clockid) == 1)
 890                return 0;
 891
 892        /* allow a "CLOCK_" prefix to the name */
 893        if (!strncasecmp(str, "CLOCK_", 6))
 894                str += 6;
 895
 896        for (cm = clockids; cm->name; cm++) {
 897                if (!strcasecmp(str, cm->name)) {
 898                        opts->clockid = cm->clockid;
 899                        return 0;
 900                }
 901        }
 902
 903        opts->use_clockid = false;
 904        ui__warning("unknown clockid %s, check man page\n", ostr);
 905        return -1;
 906}
 907
 908static int record__parse_mmap_pages(const struct option *opt,
 909                                    const char *str,
 910                                    int unset __maybe_unused)
 911{
 912        struct record_opts *opts = opt->value;
 913        char *s, *p;
 914        unsigned int mmap_pages;
 915        int ret;
 916
 917        if (!str)
 918                return -EINVAL;
 919
 920        s = strdup(str);
 921        if (!s)
 922                return -ENOMEM;
 923
 924        p = strchr(s, ',');
 925        if (p)
 926                *p = '\0';
 927
 928        if (*s) {
 929                ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
 930                if (ret)
 931                        goto out_free;
 932                opts->mmap_pages = mmap_pages;
 933        }
 934
 935        if (!p) {
 936                ret = 0;
 937                goto out_free;
 938        }
 939
 940        ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
 941        if (ret)
 942                goto out_free;
 943
 944        opts->auxtrace_mmap_pages = mmap_pages;
 945
 946out_free:
 947        free(s);
 948        return ret;
 949}
 950
 951static const char * const __record_usage[] = {
 952        "perf record [<options>] [<command>]",
 953        "perf record [<options>] -- <command> [<options>]",
 954        NULL
 955};
 956const char * const *record_usage = __record_usage;
 957
 958/*
 959 * XXX Ideally would be local to cmd_record() and passed to a record__new
 960 * because we need to have access to it in record__exit, that is called
 961 * after cmd_record() exits, but since record_options need to be accessible to
 962 * builtin-script, leave it here.
 963 *
 964 * At least we don't ouch it in all the other functions here directly.
 965 *
 966 * Just say no to tons of global variables, sigh.
 967 */
 968static struct record record = {
 969        .opts = {
 970                .sample_time         = true,
 971                .mmap_pages          = UINT_MAX,
 972                .user_freq           = UINT_MAX,
 973                .user_interval       = ULLONG_MAX,
 974                .freq                = 4000,
 975                .target              = {
 976                        .uses_mmap   = true,
 977                        .default_per_cpu = true,
 978                },
 979                .proc_map_timeout     = 500,
 980        },
 981        .tool = {
 982                .sample         = process_sample_event,
 983                .fork           = perf_event__process_fork,
 984                .exit           = perf_event__process_exit,
 985                .comm           = perf_event__process_comm,
 986                .mmap           = perf_event__process_mmap,
 987                .mmap2          = perf_event__process_mmap2,
 988                .ordered_events = true,
 989        },
 990};
 991
 992#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
 993
 994#ifdef HAVE_DWARF_UNWIND_SUPPORT
 995const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr";
 996#else
 997const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr";
 998#endif
 999
1000/*
1001 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1002 * with it and switch to use the library functions in perf_evlist that came
1003 * from builtin-record.c, i.e. use record_opts,
1004 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1005 * using pipes, etc.
1006 */
1007struct option __record_options[] = {
1008        OPT_CALLBACK('e', "event", &record.evlist, "event",
1009                     "event selector. use 'perf list' to list available events",
1010                     parse_events_option),
1011        OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1012                     "event filter", parse_filter),
1013        OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1014                           NULL, "don't record events from perf itself",
1015                           exclude_perf),
1016        OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1017                    "record events on existing process id"),
1018        OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1019                    "record events on existing thread id"),
1020        OPT_INTEGER('r', "realtime", &record.realtime_prio,
1021                    "collect data with this RT SCHED_FIFO priority"),
1022        OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1023                    "collect data without buffering"),
1024        OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1025                    "collect raw sample records from all opened counters"),
1026        OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1027                            "system-wide collection from all CPUs"),
1028        OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1029                    "list of cpus to monitor"),
1030        OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1031        OPT_STRING('o', "output", &record.file.path, "file",
1032                    "output file name"),
1033        OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1034                        &record.opts.no_inherit_set,
1035                        "child tasks do not inherit counters"),
1036        OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1037        OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1038                     "number of mmap data pages and AUX area tracing mmap pages",
1039                     record__parse_mmap_pages),
1040        OPT_BOOLEAN(0, "group", &record.opts.group,
1041                    "put the counters into a counter group"),
1042        OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
1043                           NULL, "enables call-graph recording" ,
1044                           &record_callchain_opt),
1045        OPT_CALLBACK(0, "call-graph", &record.opts,
1046                     "mode[,dump_size]", record_callchain_help,
1047                     &record_parse_callchain_opt),
1048        OPT_INCR('v', "verbose", &verbose,
1049                    "be more verbose (show counter open errors, etc)"),
1050        OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1051        OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1052                    "per thread counts"),
1053        OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1054        OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1055                        &record.opts.sample_time_set,
1056                        "Record the sample timestamps"),
1057        OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1058        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1059                    "don't sample"),
1060        OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
1061                    "do not update the buildid cache"),
1062        OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
1063                    "do not collect buildids in perf.data"),
1064        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1065                     "monitor event in cgroup name only",
1066                     parse_cgroups),
1067        OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1068                  "ms to wait before starting measurement after program start"),
1069        OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1070                   "user to profile"),
1071
1072        OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1073                     "branch any", "sample any taken branches",
1074                     parse_branch_stack),
1075
1076        OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1077                     "branch filter mask", "branch stack filter modes",
1078                     parse_branch_stack),
1079        OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1080                    "sample by weight (on special events only)"),
1081        OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1082                    "sample transaction flags (special events only)"),
1083        OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1084                    "use per-thread mmaps"),
1085        OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1086                    "sample selected machine registers on interrupt,"
1087                    " use -I ? to list register names", parse_regs),
1088        OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1089                    "Record running/enabled time of read (:S) events"),
1090        OPT_CALLBACK('k', "clockid", &record.opts,
1091        "clockid", "clockid to use for events, see clock_gettime()",
1092        parse_clockid),
1093        OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1094                          "opts", "AUX area tracing Snapshot Mode", ""),
1095        OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1096                        "per thread proc mmap processing timeout in ms"),
1097        OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1098                    "Record context switch events"),
1099        OPT_END()
1100};
1101
1102struct option *record_options = __record_options;
1103
1104int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1105{
1106        int err;
1107        struct record *rec = &record;
1108        char errbuf[BUFSIZ];
1109
1110        rec->evlist = perf_evlist__new();
1111        if (rec->evlist == NULL)
1112                return -ENOMEM;
1113
1114        perf_config(perf_record_config, rec);
1115
1116        argc = parse_options(argc, argv, record_options, record_usage,
1117                            PARSE_OPT_STOP_AT_NON_OPTION);
1118        if (!argc && target__none(&rec->opts.target))
1119                usage_with_options(record_usage, record_options);
1120
1121        if (nr_cgroups && !rec->opts.target.system_wide) {
1122                ui__error("cgroup monitoring only available in"
1123                          " system-wide mode\n");
1124                usage_with_options(record_usage, record_options);
1125        }
1126        if (rec->opts.record_switch_events &&
1127            !perf_can_record_switch_events()) {
1128                ui__error("kernel does not support recording context switch events (--switch-events option)\n");
1129                usage_with_options(record_usage, record_options);
1130        }
1131
1132        if (!rec->itr) {
1133                rec->itr = auxtrace_record__init(rec->evlist, &err);
1134                if (err)
1135                        return err;
1136        }
1137
1138        err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1139                                              rec->opts.auxtrace_snapshot_opts);
1140        if (err)
1141                return err;
1142
1143        err = -ENOMEM;
1144
1145        symbol__init(NULL);
1146
1147        if (symbol_conf.kptr_restrict)
1148                pr_warning(
1149"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1150"check /proc/sys/kernel/kptr_restrict.\n\n"
1151"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1152"file is not found in the buildid cache or in the vmlinux path.\n\n"
1153"Samples in kernel modules won't be resolved at all.\n\n"
1154"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1155"even with a suitable vmlinux or kallsyms file.\n\n");
1156
1157        if (rec->no_buildid_cache || rec->no_buildid)
1158                disable_buildid_cache();
1159
1160        if (rec->evlist->nr_entries == 0 &&
1161            perf_evlist__add_default(rec->evlist) < 0) {
1162                pr_err("Not enough memory for event selector list\n");
1163                goto out_symbol_exit;
1164        }
1165
1166        if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1167                rec->opts.no_inherit = true;
1168
1169        err = target__validate(&rec->opts.target);
1170        if (err) {
1171                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1172                ui__warning("%s", errbuf);
1173        }
1174
1175        err = target__parse_uid(&rec->opts.target);
1176        if (err) {
1177                int saved_errno = errno;
1178
1179                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1180                ui__error("%s", errbuf);
1181
1182                err = -saved_errno;
1183                goto out_symbol_exit;
1184        }
1185
1186        err = -ENOMEM;
1187        if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1188                usage_with_options(record_usage, record_options);
1189
1190        err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1191        if (err)
1192                goto out_symbol_exit;
1193
1194        if (record_opts__config(&rec->opts)) {
1195                err = -EINVAL;
1196                goto out_symbol_exit;
1197        }
1198
1199        err = __cmd_record(&record, argc, argv);
1200out_symbol_exit:
1201        perf_evlist__delete(rec->evlist);
1202        symbol__exit();
1203        auxtrace_record__free(rec->itr);
1204        return err;
1205}
1206
1207static void snapshot_sig_handler(int sig __maybe_unused)
1208{
1209        if (!auxtrace_snapshot_enabled)
1210                return;
1211        auxtrace_snapshot_enabled = 0;
1212        auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
1213        auxtrace_record__snapshot_started = 1;
1214}
1215