linux/tools/perf/builtin-record.c
<<
>>
Prefs
   1/*
   2 * builtin-record.c
   3 *
   4 * Builtin record command: Record the profile of a workload
   5 * (or a CPU, or a PID) into the perf.data output file - for
   6 * later analysis via perf report.
   7 */
   8#include "builtin.h"
   9
  10#include "perf.h"
  11
  12#include "util/build-id.h"
  13#include "util/util.h"
  14#include <subcmd/parse-options.h>
  15#include "util/parse-events.h"
  16#include "util/config.h"
  17
  18#include "util/callchain.h"
  19#include "util/cgroup.h"
  20#include "util/header.h"
  21#include "util/event.h"
  22#include "util/evlist.h"
  23#include "util/evsel.h"
  24#include "util/debug.h"
  25#include "util/drv_configs.h"
  26#include "util/session.h"
  27#include "util/tool.h"
  28#include "util/symbol.h"
  29#include "util/cpumap.h"
  30#include "util/thread_map.h"
  31#include "util/data.h"
  32#include "util/perf_regs.h"
  33#include "util/auxtrace.h"
  34#include "util/tsc.h"
  35#include "util/parse-branch-options.h"
  36#include "util/parse-regs-options.h"
  37#include "util/llvm-utils.h"
  38#include "util/bpf-loader.h"
  39#include "util/trigger.h"
  40#include "asm/bug.h"
  41
  42#include <unistd.h>
  43#include <sched.h>
  44#include <sys/mman.h>
  45#include <asm/bug.h>
  46#include <linux/time64.h>
  47
  48struct record {
  49        struct perf_tool        tool;
  50        struct record_opts      opts;
  51        u64                     bytes_written;
  52        struct perf_data_file   file;
  53        struct auxtrace_record  *itr;
  54        struct perf_evlist      *evlist;
  55        struct perf_session     *session;
  56        const char              *progname;
  57        int                     realtime_prio;
  58        bool                    no_buildid;
  59        bool                    no_buildid_set;
  60        bool                    no_buildid_cache;
  61        bool                    no_buildid_cache_set;
  62        bool                    buildid_all;
  63        bool                    timestamp_filename;
  64        bool                    switch_output;
  65        unsigned long long      samples;
  66};
  67
  68static int record__write(struct record *rec, void *bf, size_t size)
  69{
  70        if (perf_data_file__write(rec->session->file, bf, size) < 0) {
  71                pr_err("failed to write perf data, error: %m\n");
  72                return -1;
  73        }
  74
  75        rec->bytes_written += size;
  76        return 0;
  77}
  78
  79static int process_synthesized_event(struct perf_tool *tool,
  80                                     union perf_event *event,
  81                                     struct perf_sample *sample __maybe_unused,
  82                                     struct machine *machine __maybe_unused)
  83{
  84        struct record *rec = container_of(tool, struct record, tool);
  85        return record__write(rec, event, event->header.size);
  86}
  87
  88static int
  89backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
  90{
  91        struct perf_event_header *pheader;
  92        u64 evt_head = head;
  93        int size = mask + 1;
  94
  95        pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
  96        pheader = (struct perf_event_header *)(buf + (head & mask));
  97        *start = head;
  98        while (true) {
  99                if (evt_head - head >= (unsigned int)size) {
 100                        pr_debug("Finished reading backward ring buffer: rewind\n");
 101                        if (evt_head - head > (unsigned int)size)
 102                                evt_head -= pheader->size;
 103                        *end = evt_head;
 104                        return 0;
 105                }
 106
 107                pheader = (struct perf_event_header *)(buf + (evt_head & mask));
 108
 109                if (pheader->size == 0) {
 110                        pr_debug("Finished reading backward ring buffer: get start\n");
 111                        *end = evt_head;
 112                        return 0;
 113                }
 114
 115                evt_head += pheader->size;
 116                pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
 117        }
 118        WARN_ONCE(1, "Shouldn't get here\n");
 119        return -1;
 120}
 121
 122static int
 123rb_find_range(void *data, int mask, u64 head, u64 old,
 124              u64 *start, u64 *end, bool backward)
 125{
 126        if (!backward) {
 127                *start = old;
 128                *end = head;
 129                return 0;
 130        }
 131
 132        return backward_rb_find_range(data, mask, head, start, end);
 133}
 134
 135static int
 136record__mmap_read(struct record *rec, struct perf_mmap *md,
 137                  bool overwrite, bool backward)
 138{
 139        u64 head = perf_mmap__read_head(md);
 140        u64 old = md->prev;
 141        u64 end = head, start = old;
 142        unsigned char *data = md->base + page_size;
 143        unsigned long size;
 144        void *buf;
 145        int rc = 0;
 146
 147        if (rb_find_range(data, md->mask, head,
 148                          old, &start, &end, backward))
 149                return -1;
 150
 151        if (start == end)
 152                return 0;
 153
 154        rec->samples++;
 155
 156        size = end - start;
 157        if (size > (unsigned long)(md->mask) + 1) {
 158                WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
 159
 160                md->prev = head;
 161                perf_mmap__consume(md, overwrite || backward);
 162                return 0;
 163        }
 164
 165        if ((start & md->mask) + size != (end & md->mask)) {
 166                buf = &data[start & md->mask];
 167                size = md->mask + 1 - (start & md->mask);
 168                start += size;
 169
 170                if (record__write(rec, buf, size) < 0) {
 171                        rc = -1;
 172                        goto out;
 173                }
 174        }
 175
 176        buf = &data[start & md->mask];
 177        size = end - start;
 178        start += size;
 179
 180        if (record__write(rec, buf, size) < 0) {
 181                rc = -1;
 182                goto out;
 183        }
 184
 185        md->prev = head;
 186        perf_mmap__consume(md, overwrite || backward);
 187out:
 188        return rc;
 189}
 190
 191static volatile int done;
 192static volatile int signr = -1;
 193static volatile int child_finished;
 194
 195static volatile int auxtrace_record__snapshot_started;
 196static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
 197static DEFINE_TRIGGER(switch_output_trigger);
 198
 199static void sig_handler(int sig)
 200{
 201        if (sig == SIGCHLD)
 202                child_finished = 1;
 203        else
 204                signr = sig;
 205
 206        done = 1;
 207}
 208
 209static void record__sig_exit(void)
 210{
 211        if (signr == -1)
 212                return;
 213
 214        signal(signr, SIG_DFL);
 215        raise(signr);
 216}
 217
 218#ifdef HAVE_AUXTRACE_SUPPORT
 219
 220static int record__process_auxtrace(struct perf_tool *tool,
 221                                    union perf_event *event, void *data1,
 222                                    size_t len1, void *data2, size_t len2)
 223{
 224        struct record *rec = container_of(tool, struct record, tool);
 225        struct perf_data_file *file = &rec->file;
 226        size_t padding;
 227        u8 pad[8] = {0};
 228
 229        if (!perf_data_file__is_pipe(file)) {
 230                off_t file_offset;
 231                int fd = perf_data_file__fd(file);
 232                int err;
 233
 234                file_offset = lseek(fd, 0, SEEK_CUR);
 235                if (file_offset == -1)
 236                        return -1;
 237                err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
 238                                                     event, file_offset);
 239                if (err)
 240                        return err;
 241        }
 242
 243        /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
 244        padding = (len1 + len2) & 7;
 245        if (padding)
 246                padding = 8 - padding;
 247
 248        record__write(rec, event, event->header.size);
 249        record__write(rec, data1, len1);
 250        if (len2)
 251                record__write(rec, data2, len2);
 252        record__write(rec, &pad, padding);
 253
 254        return 0;
 255}
 256
 257static int record__auxtrace_mmap_read(struct record *rec,
 258                                      struct auxtrace_mmap *mm)
 259{
 260        int ret;
 261
 262        ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
 263                                  record__process_auxtrace);
 264        if (ret < 0)
 265                return ret;
 266
 267        if (ret)
 268                rec->samples++;
 269
 270        return 0;
 271}
 272
 273static int record__auxtrace_mmap_read_snapshot(struct record *rec,
 274                                               struct auxtrace_mmap *mm)
 275{
 276        int ret;
 277
 278        ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
 279                                           record__process_auxtrace,
 280                                           rec->opts.auxtrace_snapshot_size);
 281        if (ret < 0)
 282                return ret;
 283
 284        if (ret)
 285                rec->samples++;
 286
 287        return 0;
 288}
 289
 290static int record__auxtrace_read_snapshot_all(struct record *rec)
 291{
 292        int i;
 293        int rc = 0;
 294
 295        for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 296                struct auxtrace_mmap *mm =
 297                                &rec->evlist->mmap[i].auxtrace_mmap;
 298
 299                if (!mm->base)
 300                        continue;
 301
 302                if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
 303                        rc = -1;
 304                        goto out;
 305                }
 306        }
 307out:
 308        return rc;
 309}
 310
 311static void record__read_auxtrace_snapshot(struct record *rec)
 312{
 313        pr_debug("Recording AUX area tracing snapshot\n");
 314        if (record__auxtrace_read_snapshot_all(rec) < 0) {
 315                trigger_error(&auxtrace_snapshot_trigger);
 316        } else {
 317                if (auxtrace_record__snapshot_finish(rec->itr))
 318                        trigger_error(&auxtrace_snapshot_trigger);
 319                else
 320                        trigger_ready(&auxtrace_snapshot_trigger);
 321        }
 322}
 323
 324#else
 325
 326static inline
 327int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
 328                               struct auxtrace_mmap *mm __maybe_unused)
 329{
 330        return 0;
 331}
 332
 333static inline
 334void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
 335{
 336}
 337
 338static inline
 339int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
 340{
 341        return 0;
 342}
 343
 344#endif
 345
 346static int record__mmap_evlist(struct record *rec,
 347                               struct perf_evlist *evlist)
 348{
 349        struct record_opts *opts = &rec->opts;
 350        char msg[512];
 351
 352        if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
 353                                 opts->auxtrace_mmap_pages,
 354                                 opts->auxtrace_snapshot_mode) < 0) {
 355                if (errno == EPERM) {
 356                        pr_err("Permission error mapping pages.\n"
 357                               "Consider increasing "
 358                               "/proc/sys/kernel/perf_event_mlock_kb,\n"
 359                               "or try again with a smaller value of -m/--mmap_pages.\n"
 360                               "(current value: %u,%u)\n",
 361                               opts->mmap_pages, opts->auxtrace_mmap_pages);
 362                        return -errno;
 363                } else {
 364                        pr_err("failed to mmap with %d (%s)\n", errno,
 365                                str_error_r(errno, msg, sizeof(msg)));
 366                        if (errno)
 367                                return -errno;
 368                        else
 369                                return -EINVAL;
 370                }
 371        }
 372        return 0;
 373}
 374
 375static int record__mmap(struct record *rec)
 376{
 377        return record__mmap_evlist(rec, rec->evlist);
 378}
 379
 380static int record__open(struct record *rec)
 381{
 382        char msg[512];
 383        struct perf_evsel *pos;
 384        struct perf_evlist *evlist = rec->evlist;
 385        struct perf_session *session = rec->session;
 386        struct record_opts *opts = &rec->opts;
 387        struct perf_evsel_config_term *err_term;
 388        int rc = 0;
 389
 390        perf_evlist__config(evlist, opts, &callchain_param);
 391
 392        evlist__for_each_entry(evlist, pos) {
 393try_again:
 394                if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
 395                        if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
 396                                if (verbose)
 397                                        ui__warning("%s\n", msg);
 398                                goto try_again;
 399                        }
 400
 401                        rc = -errno;
 402                        perf_evsel__open_strerror(pos, &opts->target,
 403                                                  errno, msg, sizeof(msg));
 404                        ui__error("%s\n", msg);
 405                        goto out;
 406                }
 407        }
 408
 409        if (perf_evlist__apply_filters(evlist, &pos)) {
 410                error("failed to set filter \"%s\" on event %s with %d (%s)\n",
 411                        pos->filter, perf_evsel__name(pos), errno,
 412                        str_error_r(errno, msg, sizeof(msg)));
 413                rc = -1;
 414                goto out;
 415        }
 416
 417        if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
 418                error("failed to set config \"%s\" on event %s with %d (%s)\n",
 419                      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
 420                      str_error_r(errno, msg, sizeof(msg)));
 421                rc = -1;
 422                goto out;
 423        }
 424
 425        rc = record__mmap(rec);
 426        if (rc)
 427                goto out;
 428
 429        session->evlist = evlist;
 430        perf_session__set_id_hdr_size(session);
 431out:
 432        return rc;
 433}
 434
 435static int process_sample_event(struct perf_tool *tool,
 436                                union perf_event *event,
 437                                struct perf_sample *sample,
 438                                struct perf_evsel *evsel,
 439                                struct machine *machine)
 440{
 441        struct record *rec = container_of(tool, struct record, tool);
 442
 443        rec->samples++;
 444
 445        return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
 446}
 447
 448static int process_buildids(struct record *rec)
 449{
 450        struct perf_data_file *file  = &rec->file;
 451        struct perf_session *session = rec->session;
 452
 453        if (file->size == 0)
 454                return 0;
 455
 456        /*
 457         * During this process, it'll load kernel map and replace the
 458         * dso->long_name to a real pathname it found.  In this case
 459         * we prefer the vmlinux path like
 460         *   /lib/modules/3.16.4/build/vmlinux
 461         *
 462         * rather than build-id path (in debug directory).
 463         *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
 464         */
 465        symbol_conf.ignore_vmlinux_buildid = true;
 466
 467        /*
 468         * If --buildid-all is given, it marks all DSO regardless of hits,
 469         * so no need to process samples.
 470         */
 471        if (rec->buildid_all)
 472                rec->tool.sample = NULL;
 473
 474        return perf_session__process_events(session);
 475}
 476
 477static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 478{
 479        int err;
 480        struct perf_tool *tool = data;
 481        /*
 482         *As for guest kernel when processing subcommand record&report,
 483         *we arrange module mmap prior to guest kernel mmap and trigger
 484         *a preload dso because default guest module symbols are loaded
 485         *from guest kallsyms instead of /lib/modules/XXX/XXX. This
 486         *method is used to avoid symbol missing when the first addr is
 487         *in module instead of in guest kernel.
 488         */
 489        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 490                                             machine);
 491        if (err < 0)
 492                pr_err("Couldn't record guest kernel [%d]'s reference"
 493                       " relocation symbol.\n", machine->pid);
 494
 495        /*
 496         * We use _stext for guest kernel because guest kernel's /proc/kallsyms
 497         * have no _text sometimes.
 498         */
 499        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 500                                                 machine);
 501        if (err < 0)
 502                pr_err("Couldn't record guest kernel [%d]'s reference"
 503                       " relocation symbol.\n", machine->pid);
 504}
 505
 506static struct perf_event_header finished_round_event = {
 507        .size = sizeof(struct perf_event_header),
 508        .type = PERF_RECORD_FINISHED_ROUND,
 509};
 510
 511static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
 512                                    bool backward)
 513{
 514        u64 bytes_written = rec->bytes_written;
 515        int i;
 516        int rc = 0;
 517        struct perf_mmap *maps;
 518
 519        if (!evlist)
 520                return 0;
 521
 522        maps = backward ? evlist->backward_mmap : evlist->mmap;
 523        if (!maps)
 524                return 0;
 525
 526        if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
 527                return 0;
 528
 529        for (i = 0; i < evlist->nr_mmaps; i++) {
 530                struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
 531
 532                if (maps[i].base) {
 533                        if (record__mmap_read(rec, &maps[i],
 534                                              evlist->overwrite, backward) != 0) {
 535                                rc = -1;
 536                                goto out;
 537                        }
 538                }
 539
 540                if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
 541                    record__auxtrace_mmap_read(rec, mm) != 0) {
 542                        rc = -1;
 543                        goto out;
 544                }
 545        }
 546
 547        /*
 548         * Mark the round finished in case we wrote
 549         * at least one event.
 550         */
 551        if (bytes_written != rec->bytes_written)
 552                rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
 553
 554        if (backward)
 555                perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
 556out:
 557        return rc;
 558}
 559
 560static int record__mmap_read_all(struct record *rec)
 561{
 562        int err;
 563
 564        err = record__mmap_read_evlist(rec, rec->evlist, false);
 565        if (err)
 566                return err;
 567
 568        return record__mmap_read_evlist(rec, rec->evlist, true);
 569}
 570
 571static void record__init_features(struct record *rec)
 572{
 573        struct perf_session *session = rec->session;
 574        int feat;
 575
 576        for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
 577                perf_header__set_feat(&session->header, feat);
 578
 579        if (rec->no_buildid)
 580                perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 581
 582        if (!have_tracepoints(&rec->evlist->entries))
 583                perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
 584
 585        if (!rec->opts.branch_stack)
 586                perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
 587
 588        if (!rec->opts.full_auxtrace)
 589                perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 590
 591        perf_header__clear_feat(&session->header, HEADER_STAT);
 592}
 593
 594static void
 595record__finish_output(struct record *rec)
 596{
 597        struct perf_data_file *file = &rec->file;
 598        int fd = perf_data_file__fd(file);
 599
 600        if (file->is_pipe)
 601                return;
 602
 603        rec->session->header.data_size += rec->bytes_written;
 604        file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
 605
 606        if (!rec->no_buildid) {
 607                process_buildids(rec);
 608
 609                if (rec->buildid_all)
 610                        dsos__hit_all(rec->session);
 611        }
 612        perf_session__write_header(rec->session, rec->evlist, fd, true);
 613
 614        return;
 615}
 616
 617static int record__synthesize_workload(struct record *rec, bool tail)
 618{
 619        struct {
 620                struct thread_map map;
 621                struct thread_map_data map_data;
 622        } thread_map;
 623
 624        if (rec->opts.tail_synthesize != tail)
 625                return 0;
 626
 627        thread_map.map.nr = 1;
 628        thread_map.map.map[0].pid = rec->evlist->workload.pid;
 629        thread_map.map.map[0].comm = NULL;
 630        return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
 631                                                 process_synthesized_event,
 632                                                 &rec->session->machines.host,
 633                                                 rec->opts.sample_address,
 634                                                 rec->opts.proc_map_timeout);
 635}
 636
 637static int record__synthesize(struct record *rec, bool tail);
 638
 639static int
 640record__switch_output(struct record *rec, bool at_exit)
 641{
 642        struct perf_data_file *file = &rec->file;
 643        int fd, err;
 644
 645        /* Same Size:      "2015122520103046"*/
 646        char timestamp[] = "InvalidTimestamp";
 647
 648        record__synthesize(rec, true);
 649        if (target__none(&rec->opts.target))
 650                record__synthesize_workload(rec, true);
 651
 652        rec->samples = 0;
 653        record__finish_output(rec);
 654        err = fetch_current_timestamp(timestamp, sizeof(timestamp));
 655        if (err) {
 656                pr_err("Failed to get current timestamp\n");
 657                return -EINVAL;
 658        }
 659
 660        fd = perf_data_file__switch(file, timestamp,
 661                                    rec->session->header.data_offset,
 662                                    at_exit);
 663        if (fd >= 0 && !at_exit) {
 664                rec->bytes_written = 0;
 665                rec->session->header.data_size = 0;
 666        }
 667
 668        if (!quiet)
 669                fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
 670                        file->path, timestamp);
 671
 672        /* Output tracking events */
 673        if (!at_exit) {
 674                record__synthesize(rec, false);
 675
 676                /*
 677                 * In 'perf record --switch-output' without -a,
 678                 * record__synthesize() in record__switch_output() won't
 679                 * generate tracking events because there's no thread_map
 680                 * in evlist. Which causes newly created perf.data doesn't
 681                 * contain map and comm information.
 682                 * Create a fake thread_map and directly call
 683                 * perf_event__synthesize_thread_map() for those events.
 684                 */
 685                if (target__none(&rec->opts.target))
 686                        record__synthesize_workload(rec, false);
 687        }
 688        return fd;
 689}
 690
 691static volatile int workload_exec_errno;
 692
 693/*
 694 * perf_evlist__prepare_workload will send a SIGUSR1
 695 * if the fork fails, since we asked by setting its
 696 * want_signal to true.
 697 */
 698static void workload_exec_failed_signal(int signo __maybe_unused,
 699                                        siginfo_t *info,
 700                                        void *ucontext __maybe_unused)
 701{
 702        workload_exec_errno = info->si_value.sival_int;
 703        done = 1;
 704        child_finished = 1;
 705}
 706
 707static void snapshot_sig_handler(int sig);
 708
 709int __weak
 710perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
 711                            struct perf_tool *tool __maybe_unused,
 712                            perf_event__handler_t process __maybe_unused,
 713                            struct machine *machine __maybe_unused)
 714{
 715        return 0;
 716}
 717
 718static const struct perf_event_mmap_page *
 719perf_evlist__pick_pc(struct perf_evlist *evlist)
 720{
 721        if (evlist) {
 722                if (evlist->mmap && evlist->mmap[0].base)
 723                        return evlist->mmap[0].base;
 724                if (evlist->backward_mmap && evlist->backward_mmap[0].base)
 725                        return evlist->backward_mmap[0].base;
 726        }
 727        return NULL;
 728}
 729
 730static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
 731{
 732        const struct perf_event_mmap_page *pc;
 733
 734        pc = perf_evlist__pick_pc(rec->evlist);
 735        if (pc)
 736                return pc;
 737        return NULL;
 738}
 739
 740static int record__synthesize(struct record *rec, bool tail)
 741{
 742        struct perf_session *session = rec->session;
 743        struct machine *machine = &session->machines.host;
 744        struct perf_data_file *file = &rec->file;
 745        struct record_opts *opts = &rec->opts;
 746        struct perf_tool *tool = &rec->tool;
 747        int fd = perf_data_file__fd(file);
 748        int err = 0;
 749
 750        if (rec->opts.tail_synthesize != tail)
 751                return 0;
 752
 753        if (file->is_pipe) {
 754                err = perf_event__synthesize_attrs(tool, session,
 755                                                   process_synthesized_event);
 756                if (err < 0) {
 757                        pr_err("Couldn't synthesize attrs.\n");
 758                        goto out;
 759                }
 760
 761                if (have_tracepoints(&rec->evlist->entries)) {
 762                        /*
 763                         * FIXME err <= 0 here actually means that
 764                         * there were no tracepoints so its not really
 765                         * an error, just that we don't need to
 766                         * synthesize anything.  We really have to
 767                         * return this more properly and also
 768                         * propagate errors that now are calling die()
 769                         */
 770                        err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
 771                                                                  process_synthesized_event);
 772                        if (err <= 0) {
 773                                pr_err("Couldn't record tracing data.\n");
 774                                goto out;
 775                        }
 776                        rec->bytes_written += err;
 777                }
 778        }
 779
 780        err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
 781                                          process_synthesized_event, machine);
 782        if (err)
 783                goto out;
 784
 785        if (rec->opts.full_auxtrace) {
 786                err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
 787                                        session, process_synthesized_event);
 788                if (err)
 789                        goto out;
 790        }
 791
 792        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 793                                                 machine);
 794        WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
 795                           "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 796                           "Check /proc/kallsyms permission or run as root.\n");
 797
 798        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 799                                             machine);
 800        WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
 801                           "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 802                           "Check /proc/modules permission or run as root.\n");
 803
 804        if (perf_guest) {
 805                machines__process_guests(&session->machines,
 806                                         perf_event__synthesize_guest_os, tool);
 807        }
 808
 809        err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
 810                                            process_synthesized_event, opts->sample_address,
 811                                            opts->proc_map_timeout);
 812out:
 813        return err;
 814}
 815
 816static int __cmd_record(struct record *rec, int argc, const char **argv)
 817{
 818        int err;
 819        int status = 0;
 820        unsigned long waking = 0;
 821        const bool forks = argc > 0;
 822        struct machine *machine;
 823        struct perf_tool *tool = &rec->tool;
 824        struct record_opts *opts = &rec->opts;
 825        struct perf_data_file *file = &rec->file;
 826        struct perf_session *session;
 827        bool disabled = false, draining = false;
 828        int fd;
 829
 830        rec->progname = argv[0];
 831
 832        atexit(record__sig_exit);
 833        signal(SIGCHLD, sig_handler);
 834        signal(SIGINT, sig_handler);
 835        signal(SIGTERM, sig_handler);
 836
 837        if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
 838                signal(SIGUSR2, snapshot_sig_handler);
 839                if (rec->opts.auxtrace_snapshot_mode)
 840                        trigger_on(&auxtrace_snapshot_trigger);
 841                if (rec->switch_output)
 842                        trigger_on(&switch_output_trigger);
 843        } else {
 844                signal(SIGUSR2, SIG_IGN);
 845        }
 846
 847        session = perf_session__new(file, false, tool);
 848        if (session == NULL) {
 849                pr_err("Perf session creation failed.\n");
 850                return -1;
 851        }
 852
 853        fd = perf_data_file__fd(file);
 854        rec->session = session;
 855
 856        record__init_features(rec);
 857
 858        if (forks) {
 859                err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
 860                                                    argv, file->is_pipe,
 861                                                    workload_exec_failed_signal);
 862                if (err < 0) {
 863                        pr_err("Couldn't run the workload!\n");
 864                        status = err;
 865                        goto out_delete_session;
 866                }
 867        }
 868
 869        if (record__open(rec) != 0) {
 870                err = -1;
 871                goto out_child;
 872        }
 873
 874        err = bpf__apply_obj_config();
 875        if (err) {
 876                char errbuf[BUFSIZ];
 877
 878                bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
 879                pr_err("ERROR: Apply config to BPF failed: %s\n",
 880                         errbuf);
 881                goto out_child;
 882        }
 883
 884        /*
 885         * Normally perf_session__new would do this, but it doesn't have the
 886         * evlist.
 887         */
 888        if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
 889                pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
 890                rec->tool.ordered_events = false;
 891        }
 892
 893        if (!rec->evlist->nr_groups)
 894                perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
 895
 896        if (file->is_pipe) {
 897                err = perf_header__write_pipe(fd);
 898                if (err < 0)
 899                        goto out_child;
 900        } else {
 901                err = perf_session__write_header(session, rec->evlist, fd, false);
 902                if (err < 0)
 903                        goto out_child;
 904        }
 905
 906        if (!rec->no_buildid
 907            && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
 908                pr_err("Couldn't generate buildids. "
 909                       "Use --no-buildid to profile anyway.\n");
 910                err = -1;
 911                goto out_child;
 912        }
 913
 914        machine = &session->machines.host;
 915
 916        err = record__synthesize(rec, false);
 917        if (err < 0)
 918                goto out_child;
 919
 920        if (rec->realtime_prio) {
 921                struct sched_param param;
 922
 923                param.sched_priority = rec->realtime_prio;
 924                if (sched_setscheduler(0, SCHED_FIFO, &param)) {
 925                        pr_err("Could not set realtime priority.\n");
 926                        err = -1;
 927                        goto out_child;
 928                }
 929        }
 930
 931        /*
 932         * When perf is starting the traced process, all the events
 933         * (apart from group members) have enable_on_exec=1 set,
 934         * so don't spoil it by prematurely enabling them.
 935         */
 936        if (!target__none(&opts->target) && !opts->initial_delay)
 937                perf_evlist__enable(rec->evlist);
 938
 939        /*
 940         * Let the child rip
 941         */
 942        if (forks) {
 943                union perf_event *event;
 944
 945                event = malloc(sizeof(event->comm) + machine->id_hdr_size);
 946                if (event == NULL) {
 947                        err = -ENOMEM;
 948                        goto out_child;
 949                }
 950
 951                /*
 952                 * Some H/W events are generated before COMM event
 953                 * which is emitted during exec(), so perf script
 954                 * cannot see a correct process name for those events.
 955                 * Synthesize COMM event to prevent it.
 956                 */
 957                perf_event__synthesize_comm(tool, event,
 958                                            rec->evlist->workload.pid,
 959                                            process_synthesized_event,
 960                                            machine);
 961                free(event);
 962
 963                perf_evlist__start_workload(rec->evlist);
 964        }
 965
 966        if (opts->initial_delay) {
 967                usleep(opts->initial_delay * USEC_PER_MSEC);
 968                perf_evlist__enable(rec->evlist);
 969        }
 970
 971        trigger_ready(&auxtrace_snapshot_trigger);
 972        trigger_ready(&switch_output_trigger);
 973        for (;;) {
 974                unsigned long long hits = rec->samples;
 975
 976                /*
 977                 * rec->evlist->bkw_mmap_state is possible to be
 978                 * BKW_MMAP_EMPTY here: when done == true and
 979                 * hits != rec->samples in previous round.
 980                 *
 981                 * perf_evlist__toggle_bkw_mmap ensure we never
 982                 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
 983                 */
 984                if (trigger_is_hit(&switch_output_trigger) || done || draining)
 985                        perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
 986
 987                if (record__mmap_read_all(rec) < 0) {
 988                        trigger_error(&auxtrace_snapshot_trigger);
 989                        trigger_error(&switch_output_trigger);
 990                        err = -1;
 991                        goto out_child;
 992                }
 993
 994                if (auxtrace_record__snapshot_started) {
 995                        auxtrace_record__snapshot_started = 0;
 996                        if (!trigger_is_error(&auxtrace_snapshot_trigger))
 997                                record__read_auxtrace_snapshot(rec);
 998                        if (trigger_is_error(&auxtrace_snapshot_trigger)) {
 999                                pr_err("AUX area tracing snapshot failed\n");
1000                                err = -1;
1001                                goto out_child;
1002                        }
1003                }
1004
1005                if (trigger_is_hit(&switch_output_trigger)) {
1006                        /*
1007                         * If switch_output_trigger is hit, the data in
1008                         * overwritable ring buffer should have been collected,
1009                         * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1010                         *
1011                         * If SIGUSR2 raise after or during record__mmap_read_all(),
1012                         * record__mmap_read_all() didn't collect data from
1013                         * overwritable ring buffer. Read again.
1014                         */
1015                        if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1016                                continue;
1017                        trigger_ready(&switch_output_trigger);
1018
1019                        /*
1020                         * Reenable events in overwrite ring buffer after
1021                         * record__mmap_read_all(): we should have collected
1022                         * data from it.
1023                         */
1024                        perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1025
1026                        if (!quiet)
1027                                fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1028                                        waking);
1029                        waking = 0;
1030                        fd = record__switch_output(rec, false);
1031                        if (fd < 0) {
1032                                pr_err("Failed to switch to new file\n");
1033                                trigger_error(&switch_output_trigger);
1034                                err = fd;
1035                                goto out_child;
1036                        }
1037                }
1038
1039                if (hits == rec->samples) {
1040                        if (done || draining)
1041                                break;
1042                        err = perf_evlist__poll(rec->evlist, -1);
1043                        /*
1044                         * Propagate error, only if there's any. Ignore positive
1045                         * number of returned events and interrupt error.
1046                         */
1047                        if (err > 0 || (err < 0 && errno == EINTR))
1048                                err = 0;
1049                        waking++;
1050
1051                        if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1052                                draining = true;
1053                }
1054
1055                /*
1056                 * When perf is starting the traced process, at the end events
1057                 * die with the process and we wait for that. Thus no need to
1058                 * disable events in this case.
1059                 */
1060                if (done && !disabled && !target__none(&opts->target)) {
1061                        trigger_off(&auxtrace_snapshot_trigger);
1062                        perf_evlist__disable(rec->evlist);
1063                        disabled = true;
1064                }
1065        }
1066        trigger_off(&auxtrace_snapshot_trigger);
1067        trigger_off(&switch_output_trigger);
1068
1069        if (forks && workload_exec_errno) {
1070                char msg[STRERR_BUFSIZE];
1071                const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1072                pr_err("Workload failed: %s\n", emsg);
1073                err = -1;
1074                goto out_child;
1075        }
1076
1077        if (!quiet)
1078                fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1079
1080        if (target__none(&rec->opts.target))
1081                record__synthesize_workload(rec, true);
1082
1083out_child:
1084        if (forks) {
1085                int exit_status;
1086
1087                if (!child_finished)
1088                        kill(rec->evlist->workload.pid, SIGTERM);
1089
1090                wait(&exit_status);
1091
1092                if (err < 0)
1093                        status = err;
1094                else if (WIFEXITED(exit_status))
1095                        status = WEXITSTATUS(exit_status);
1096                else if (WIFSIGNALED(exit_status))
1097                        signr = WTERMSIG(exit_status);
1098        } else
1099                status = err;
1100
1101        record__synthesize(rec, true);
1102        /* this will be recalculated during process_buildids() */
1103        rec->samples = 0;
1104
1105        if (!err) {
1106                if (!rec->timestamp_filename) {
1107                        record__finish_output(rec);
1108                } else {
1109                        fd = record__switch_output(rec, true);
1110                        if (fd < 0) {
1111                                status = fd;
1112                                goto out_delete_session;
1113                        }
1114                }
1115        }
1116
1117        if (!err && !quiet) {
1118                char samples[128];
1119                const char *postfix = rec->timestamp_filename ?
1120                                        ".<timestamp>" : "";
1121
1122                if (rec->samples && !rec->opts.full_auxtrace)
1123                        scnprintf(samples, sizeof(samples),
1124                                  " (%" PRIu64 " samples)", rec->samples);
1125                else
1126                        samples[0] = '\0';
1127
1128                fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1129                        perf_data_file__size(file) / 1024.0 / 1024.0,
1130                        file->path, postfix, samples);
1131        }
1132
1133out_delete_session:
1134        perf_session__delete(session);
1135        return status;
1136}
1137
1138static void callchain_debug(struct callchain_param *callchain)
1139{
1140        static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1141
1142        pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1143
1144        if (callchain->record_mode == CALLCHAIN_DWARF)
1145                pr_debug("callchain: stack dump size %d\n",
1146                         callchain->dump_size);
1147}
1148
1149int record_opts__parse_callchain(struct record_opts *record,
1150                                 struct callchain_param *callchain,
1151                                 const char *arg, bool unset)
1152{
1153        int ret;
1154        callchain->enabled = !unset;
1155
1156        /* --no-call-graph */
1157        if (unset) {
1158                callchain->record_mode = CALLCHAIN_NONE;
1159                pr_debug("callchain: disabled\n");
1160                return 0;
1161        }
1162
1163        ret = parse_callchain_record_opt(arg, callchain);
1164        if (!ret) {
1165                /* Enable data address sampling for DWARF unwind. */
1166                if (callchain->record_mode == CALLCHAIN_DWARF)
1167                        record->sample_address = true;
1168                callchain_debug(callchain);
1169        }
1170
1171        return ret;
1172}
1173
1174int record_parse_callchain_opt(const struct option *opt,
1175                               const char *arg,
1176                               int unset)
1177{
1178        return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1179}
1180
1181int record_callchain_opt(const struct option *opt,
1182                         const char *arg __maybe_unused,
1183                         int unset __maybe_unused)
1184{
1185        struct callchain_param *callchain = opt->value;
1186
1187        callchain->enabled = true;
1188
1189        if (callchain->record_mode == CALLCHAIN_NONE)
1190                callchain->record_mode = CALLCHAIN_FP;
1191
1192        callchain_debug(callchain);
1193        return 0;
1194}
1195
1196static int perf_record_config(const char *var, const char *value, void *cb)
1197{
1198        struct record *rec = cb;
1199
1200        if (!strcmp(var, "record.build-id")) {
1201                if (!strcmp(value, "cache"))
1202                        rec->no_buildid_cache = false;
1203                else if (!strcmp(value, "no-cache"))
1204                        rec->no_buildid_cache = true;
1205                else if (!strcmp(value, "skip"))
1206                        rec->no_buildid = true;
1207                else
1208                        return -1;
1209                return 0;
1210        }
1211        if (!strcmp(var, "record.call-graph"))
1212                var = "call-graph.record-mode"; /* fall-through */
1213
1214        return perf_default_config(var, value, cb);
1215}
1216
1217struct clockid_map {
1218        const char *name;
1219        int clockid;
1220};
1221
1222#define CLOCKID_MAP(n, c)       \
1223        { .name = n, .clockid = (c), }
1224
1225#define CLOCKID_END     { .name = NULL, }
1226
1227
1228/*
1229 * Add the missing ones, we need to build on many distros...
1230 */
1231#ifndef CLOCK_MONOTONIC_RAW
1232#define CLOCK_MONOTONIC_RAW 4
1233#endif
1234#ifndef CLOCK_BOOTTIME
1235#define CLOCK_BOOTTIME 7
1236#endif
1237#ifndef CLOCK_TAI
1238#define CLOCK_TAI 11
1239#endif
1240
1241static const struct clockid_map clockids[] = {
1242        /* available for all events, NMI safe */
1243        CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1244        CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1245
1246        /* available for some events */
1247        CLOCKID_MAP("realtime", CLOCK_REALTIME),
1248        CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1249        CLOCKID_MAP("tai", CLOCK_TAI),
1250
1251        /* available for the lazy */
1252        CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1253        CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1254        CLOCKID_MAP("real", CLOCK_REALTIME),
1255        CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1256
1257        CLOCKID_END,
1258};
1259
1260static int parse_clockid(const struct option *opt, const char *str, int unset)
1261{
1262        struct record_opts *opts = (struct record_opts *)opt->value;
1263        const struct clockid_map *cm;
1264        const char *ostr = str;
1265
1266        if (unset) {
1267                opts->use_clockid = 0;
1268                return 0;
1269        }
1270
1271        /* no arg passed */
1272        if (!str)
1273                return 0;
1274
1275        /* no setting it twice */
1276        if (opts->use_clockid)
1277                return -1;
1278
1279        opts->use_clockid = true;
1280
1281        /* if its a number, we're done */
1282        if (sscanf(str, "%d", &opts->clockid) == 1)
1283                return 0;
1284
1285        /* allow a "CLOCK_" prefix to the name */
1286        if (!strncasecmp(str, "CLOCK_", 6))
1287                str += 6;
1288
1289        for (cm = clockids; cm->name; cm++) {
1290                if (!strcasecmp(str, cm->name)) {
1291                        opts->clockid = cm->clockid;
1292                        return 0;
1293                }
1294        }
1295
1296        opts->use_clockid = false;
1297        ui__warning("unknown clockid %s, check man page\n", ostr);
1298        return -1;
1299}
1300
1301static int record__parse_mmap_pages(const struct option *opt,
1302                                    const char *str,
1303                                    int unset __maybe_unused)
1304{
1305        struct record_opts *opts = opt->value;
1306        char *s, *p;
1307        unsigned int mmap_pages;
1308        int ret;
1309
1310        if (!str)
1311                return -EINVAL;
1312
1313        s = strdup(str);
1314        if (!s)
1315                return -ENOMEM;
1316
1317        p = strchr(s, ',');
1318        if (p)
1319                *p = '\0';
1320
1321        if (*s) {
1322                ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1323                if (ret)
1324                        goto out_free;
1325                opts->mmap_pages = mmap_pages;
1326        }
1327
1328        if (!p) {
1329                ret = 0;
1330                goto out_free;
1331        }
1332
1333        ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1334        if (ret)
1335                goto out_free;
1336
1337        opts->auxtrace_mmap_pages = mmap_pages;
1338
1339out_free:
1340        free(s);
1341        return ret;
1342}
1343
1344static const char * const __record_usage[] = {
1345        "perf record [<options>] [<command>]",
1346        "perf record [<options>] -- <command> [<options>]",
1347        NULL
1348};
1349const char * const *record_usage = __record_usage;
1350
1351/*
1352 * XXX Ideally would be local to cmd_record() and passed to a record__new
1353 * because we need to have access to it in record__exit, that is called
1354 * after cmd_record() exits, but since record_options need to be accessible to
1355 * builtin-script, leave it here.
1356 *
1357 * At least we don't ouch it in all the other functions here directly.
1358 *
1359 * Just say no to tons of global variables, sigh.
1360 */
1361static struct record record = {
1362        .opts = {
1363                .sample_time         = true,
1364                .mmap_pages          = UINT_MAX,
1365                .user_freq           = UINT_MAX,
1366                .user_interval       = ULLONG_MAX,
1367                .freq                = 4000,
1368                .target              = {
1369                        .uses_mmap   = true,
1370                        .default_per_cpu = true,
1371                },
1372                .proc_map_timeout     = 500,
1373        },
1374        .tool = {
1375                .sample         = process_sample_event,
1376                .fork           = perf_event__process_fork,
1377                .exit           = perf_event__process_exit,
1378                .comm           = perf_event__process_comm,
1379                .mmap           = perf_event__process_mmap,
1380                .mmap2          = perf_event__process_mmap2,
1381                .ordered_events = true,
1382        },
1383};
1384
1385const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1386        "\n\t\t\t\tDefault: fp";
1387
1388static bool dry_run;
1389
1390/*
1391 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1392 * with it and switch to use the library functions in perf_evlist that came
1393 * from builtin-record.c, i.e. use record_opts,
1394 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1395 * using pipes, etc.
1396 */
1397struct option __record_options[] = {
1398        OPT_CALLBACK('e', "event", &record.evlist, "event",
1399                     "event selector. use 'perf list' to list available events",
1400                     parse_events_option),
1401        OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1402                     "event filter", parse_filter),
1403        OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1404                           NULL, "don't record events from perf itself",
1405                           exclude_perf),
1406        OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1407                    "record events on existing process id"),
1408        OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1409                    "record events on existing thread id"),
1410        OPT_INTEGER('r', "realtime", &record.realtime_prio,
1411                    "collect data with this RT SCHED_FIFO priority"),
1412        OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1413                    "collect data without buffering"),
1414        OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1415                    "collect raw sample records from all opened counters"),
1416        OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1417                            "system-wide collection from all CPUs"),
1418        OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1419                    "list of cpus to monitor"),
1420        OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1421        OPT_STRING('o', "output", &record.file.path, "file",
1422                    "output file name"),
1423        OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1424                        &record.opts.no_inherit_set,
1425                        "child tasks do not inherit counters"),
1426        OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1427                    "synthesize non-sample events at the end of output"),
1428        OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1429        OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1430        OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1431                     "number of mmap data pages and AUX area tracing mmap pages",
1432                     record__parse_mmap_pages),
1433        OPT_BOOLEAN(0, "group", &record.opts.group,
1434                    "put the counters into a counter group"),
1435        OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1436                           NULL, "enables call-graph recording" ,
1437                           &record_callchain_opt),
1438        OPT_CALLBACK(0, "call-graph", &record.opts,
1439                     "record_mode[,record_size]", record_callchain_help,
1440                     &record_parse_callchain_opt),
1441        OPT_INCR('v', "verbose", &verbose,
1442                    "be more verbose (show counter open errors, etc)"),
1443        OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1444        OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1445                    "per thread counts"),
1446        OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1447        OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1448        OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1449                        &record.opts.sample_time_set,
1450                        "Record the sample timestamps"),
1451        OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1452        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1453                    "don't sample"),
1454        OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1455                        &record.no_buildid_cache_set,
1456                        "do not update the buildid cache"),
1457        OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1458                        &record.no_buildid_set,
1459                        "do not collect buildids in perf.data"),
1460        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1461                     "monitor event in cgroup name only",
1462                     parse_cgroups),
1463        OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1464                  "ms to wait before starting measurement after program start"),
1465        OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1466                   "user to profile"),
1467
1468        OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1469                     "branch any", "sample any taken branches",
1470                     parse_branch_stack),
1471
1472        OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1473                     "branch filter mask", "branch stack filter modes",
1474                     parse_branch_stack),
1475        OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1476                    "sample by weight (on special events only)"),
1477        OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1478                    "sample transaction flags (special events only)"),
1479        OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1480                    "use per-thread mmaps"),
1481        OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1482                    "sample selected machine registers on interrupt,"
1483                    " use -I ? to list register names", parse_regs),
1484        OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1485                    "Record running/enabled time of read (:S) events"),
1486        OPT_CALLBACK('k', "clockid", &record.opts,
1487        "clockid", "clockid to use for events, see clock_gettime()",
1488        parse_clockid),
1489        OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1490                          "opts", "AUX area tracing Snapshot Mode", ""),
1491        OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1492                        "per thread proc mmap processing timeout in ms"),
1493        OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1494                    "Record context switch events"),
1495        OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1496                         "Configure all used events to run in kernel space.",
1497                         PARSE_OPT_EXCLUSIVE),
1498        OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1499                         "Configure all used events to run in user space.",
1500                         PARSE_OPT_EXCLUSIVE),
1501        OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1502                   "clang binary to use for compiling BPF scriptlets"),
1503        OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1504                   "options passed to clang when compiling BPF scriptlets"),
1505        OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1506                   "file", "vmlinux pathname"),
1507        OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1508                    "Record build-id of all DSOs regardless of hits"),
1509        OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1510                    "append timestamp to output filename"),
1511        OPT_BOOLEAN(0, "switch-output", &record.switch_output,
1512                    "Switch output when receive SIGUSR2"),
1513        OPT_BOOLEAN(0, "dry-run", &dry_run,
1514                    "Parse options then exit"),
1515        OPT_END()
1516};
1517
1518struct option *record_options = __record_options;
1519
1520int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1521{
1522        int err;
1523        struct record *rec = &record;
1524        char errbuf[BUFSIZ];
1525
1526#ifndef HAVE_LIBBPF_SUPPORT
1527# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1528        set_nobuild('\0', "clang-path", true);
1529        set_nobuild('\0', "clang-opt", true);
1530# undef set_nobuild
1531#endif
1532
1533#ifndef HAVE_BPF_PROLOGUE
1534# if !defined (HAVE_DWARF_SUPPORT)
1535#  define REASON  "NO_DWARF=1"
1536# elif !defined (HAVE_LIBBPF_SUPPORT)
1537#  define REASON  "NO_LIBBPF=1"
1538# else
1539#  define REASON  "this architecture doesn't support BPF prologue"
1540# endif
1541# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1542        set_nobuild('\0', "vmlinux", true);
1543# undef set_nobuild
1544# undef REASON
1545#endif
1546
1547        rec->evlist = perf_evlist__new();
1548        if (rec->evlist == NULL)
1549                return -ENOMEM;
1550
1551        perf_config(perf_record_config, rec);
1552
1553        argc = parse_options(argc, argv, record_options, record_usage,
1554                            PARSE_OPT_STOP_AT_NON_OPTION);
1555        if (!argc && target__none(&rec->opts.target))
1556                usage_with_options(record_usage, record_options);
1557
1558        if (nr_cgroups && !rec->opts.target.system_wide) {
1559                usage_with_options_msg(record_usage, record_options,
1560                        "cgroup monitoring only available in system-wide mode");
1561
1562        }
1563        if (rec->opts.record_switch_events &&
1564            !perf_can_record_switch_events()) {
1565                ui__error("kernel does not support recording context switch events\n");
1566                parse_options_usage(record_usage, record_options, "switch-events", 0);
1567                return -EINVAL;
1568        }
1569
1570        if (rec->switch_output)
1571                rec->timestamp_filename = true;
1572
1573        if (!rec->itr) {
1574                rec->itr = auxtrace_record__init(rec->evlist, &err);
1575                if (err)
1576                        goto out;
1577        }
1578
1579        err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1580                                              rec->opts.auxtrace_snapshot_opts);
1581        if (err)
1582                goto out;
1583
1584        /*
1585         * Allow aliases to facilitate the lookup of symbols for address
1586         * filters. Refer to auxtrace_parse_filters().
1587         */
1588        symbol_conf.allow_aliases = true;
1589
1590        symbol__init(NULL);
1591
1592        err = auxtrace_parse_filters(rec->evlist);
1593        if (err)
1594                goto out;
1595
1596        if (dry_run)
1597                goto out;
1598
1599        err = bpf__setup_stdout(rec->evlist);
1600        if (err) {
1601                bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1602                pr_err("ERROR: Setup BPF stdout failed: %s\n",
1603                         errbuf);
1604                goto out;
1605        }
1606
1607        err = -ENOMEM;
1608
1609        if (symbol_conf.kptr_restrict)
1610                pr_warning(
1611"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1612"check /proc/sys/kernel/kptr_restrict.\n\n"
1613"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1614"file is not found in the buildid cache or in the vmlinux path.\n\n"
1615"Samples in kernel modules won't be resolved at all.\n\n"
1616"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1617"even with a suitable vmlinux or kallsyms file.\n\n");
1618
1619        if (rec->no_buildid_cache || rec->no_buildid) {
1620                disable_buildid_cache();
1621        } else if (rec->switch_output) {
1622                /*
1623                 * In 'perf record --switch-output', disable buildid
1624                 * generation by default to reduce data file switching
1625                 * overhead. Still generate buildid if they are required
1626                 * explicitly using
1627                 *
1628                 *  perf record --signal-trigger --no-no-buildid \
1629                 *              --no-no-buildid-cache
1630                 *
1631                 * Following code equals to:
1632                 *
1633                 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1634                 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1635                 *         disable_buildid_cache();
1636                 */
1637                bool disable = true;
1638
1639                if (rec->no_buildid_set && !rec->no_buildid)
1640                        disable = false;
1641                if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1642                        disable = false;
1643                if (disable) {
1644                        rec->no_buildid = true;
1645                        rec->no_buildid_cache = true;
1646                        disable_buildid_cache();
1647                }
1648        }
1649
1650        if (record.opts.overwrite)
1651                record.opts.tail_synthesize = true;
1652
1653        if (rec->evlist->nr_entries == 0 &&
1654            perf_evlist__add_default(rec->evlist) < 0) {
1655                pr_err("Not enough memory for event selector list\n");
1656                goto out;
1657        }
1658
1659        if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1660                rec->opts.no_inherit = true;
1661
1662        err = target__validate(&rec->opts.target);
1663        if (err) {
1664                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1665                ui__warning("%s", errbuf);
1666        }
1667
1668        err = target__parse_uid(&rec->opts.target);
1669        if (err) {
1670                int saved_errno = errno;
1671
1672                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1673                ui__error("%s", errbuf);
1674
1675                err = -saved_errno;
1676                goto out;
1677        }
1678
1679        err = -ENOMEM;
1680        if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1681                usage_with_options(record_usage, record_options);
1682
1683        err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1684        if (err)
1685                goto out;
1686
1687        /*
1688         * We take all buildids when the file contains
1689         * AUX area tracing data because we do not decode the
1690         * trace because it would take too long.
1691         */
1692        if (rec->opts.full_auxtrace)
1693                rec->buildid_all = true;
1694
1695        if (record_opts__config(&rec->opts)) {
1696                err = -EINVAL;
1697                goto out;
1698        }
1699
1700        err = __cmd_record(&record, argc, argv);
1701out:
1702        perf_evlist__delete(rec->evlist);
1703        symbol__exit();
1704        auxtrace_record__free(rec->itr);
1705        return err;
1706}
1707
1708static void snapshot_sig_handler(int sig __maybe_unused)
1709{
1710        if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1711                trigger_hit(&auxtrace_snapshot_trigger);
1712                auxtrace_record__snapshot_started = 1;
1713                if (auxtrace_record__snapshot_start(record.itr))
1714                        trigger_error(&auxtrace_snapshot_trigger);
1715        }
1716
1717        if (trigger_is_ready(&switch_output_trigger))
1718                trigger_hit(&switch_output_trigger);
1719}
1720