linux/tools/perf/builtin-record.c
<<
>>
Prefs
   1/*
   2 * builtin-record.c
   3 *
   4 * Builtin record command: Record the profile of a workload
   5 * (or a CPU, or a PID) into the perf.data output file - for
   6 * later analysis via perf report.
   7 */
   8#include "builtin.h"
   9
  10#include "perf.h"
  11
  12#include "util/build-id.h"
  13#include "util/util.h"
  14#include <subcmd/parse-options.h>
  15#include "util/parse-events.h"
  16#include "util/config.h"
  17
  18#include "util/callchain.h"
  19#include "util/cgroup.h"
  20#include "util/header.h"
  21#include "util/event.h"
  22#include "util/evlist.h"
  23#include "util/evsel.h"
  24#include "util/debug.h"
  25#include "util/drv_configs.h"
  26#include "util/session.h"
  27#include "util/tool.h"
  28#include "util/symbol.h"
  29#include "util/cpumap.h"
  30#include "util/thread_map.h"
  31#include "util/data.h"
  32#include "util/perf_regs.h"
  33#include "util/auxtrace.h"
  34#include "util/tsc.h"
  35#include "util/parse-branch-options.h"
  36#include "util/parse-regs-options.h"
  37#include "util/llvm-utils.h"
  38#include "util/bpf-loader.h"
  39#include "util/trigger.h"
  40#include "util/perf-hooks.h"
  41#include "util/time-utils.h"
  42#include "util/units.h"
  43#include "asm/bug.h"
  44
  45#include <errno.h>
  46#include <inttypes.h>
  47#include <poll.h>
  48#include <unistd.h>
  49#include <sched.h>
  50#include <signal.h>
  51#include <sys/mman.h>
  52#include <sys/wait.h>
  53#include <asm/bug.h>
  54#include <linux/time64.h>
  55
  56struct switch_output {
  57        bool             enabled;
  58        bool             signal;
  59        unsigned long    size;
  60        unsigned long    time;
  61        const char      *str;
  62        bool             set;
  63};
  64
  65struct record {
  66        struct perf_tool        tool;
  67        struct record_opts      opts;
  68        u64                     bytes_written;
  69        struct perf_data_file   file;
  70        struct auxtrace_record  *itr;
  71        struct perf_evlist      *evlist;
  72        struct perf_session     *session;
  73        const char              *progname;
  74        int                     realtime_prio;
  75        bool                    no_buildid;
  76        bool                    no_buildid_set;
  77        bool                    no_buildid_cache;
  78        bool                    no_buildid_cache_set;
  79        bool                    buildid_all;
  80        bool                    timestamp_filename;
  81        struct switch_output    switch_output;
  82        unsigned long long      samples;
  83};
  84
  85static volatile int auxtrace_record__snapshot_started;
  86static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
  87static DEFINE_TRIGGER(switch_output_trigger);
  88
  89static bool switch_output_signal(struct record *rec)
  90{
  91        return rec->switch_output.signal &&
  92               trigger_is_ready(&switch_output_trigger);
  93}
  94
  95static bool switch_output_size(struct record *rec)
  96{
  97        return rec->switch_output.size &&
  98               trigger_is_ready(&switch_output_trigger) &&
  99               (rec->bytes_written >= rec->switch_output.size);
 100}
 101
 102static bool switch_output_time(struct record *rec)
 103{
 104        return rec->switch_output.time &&
 105               trigger_is_ready(&switch_output_trigger);
 106}
 107
 108static int record__write(struct record *rec, void *bf, size_t size)
 109{
 110        if (perf_data_file__write(rec->session->file, bf, size) < 0) {
 111                pr_err("failed to write perf data, error: %m\n");
 112                return -1;
 113        }
 114
 115        rec->bytes_written += size;
 116
 117        if (switch_output_size(rec))
 118                trigger_hit(&switch_output_trigger);
 119
 120        return 0;
 121}
 122
 123static int process_synthesized_event(struct perf_tool *tool,
 124                                     union perf_event *event,
 125                                     struct perf_sample *sample __maybe_unused,
 126                                     struct machine *machine __maybe_unused)
 127{
 128        struct record *rec = container_of(tool, struct record, tool);
 129        return record__write(rec, event, event->header.size);
 130}
 131
 132static int
 133backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
 134{
 135        struct perf_event_header *pheader;
 136        u64 evt_head = head;
 137        int size = mask + 1;
 138
 139        pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
 140        pheader = (struct perf_event_header *)(buf + (head & mask));
 141        *start = head;
 142        while (true) {
 143                if (evt_head - head >= (unsigned int)size) {
 144                        pr_debug("Finished reading backward ring buffer: rewind\n");
 145                        if (evt_head - head > (unsigned int)size)
 146                                evt_head -= pheader->size;
 147                        *end = evt_head;
 148                        return 0;
 149                }
 150
 151                pheader = (struct perf_event_header *)(buf + (evt_head & mask));
 152
 153                if (pheader->size == 0) {
 154                        pr_debug("Finished reading backward ring buffer: get start\n");
 155                        *end = evt_head;
 156                        return 0;
 157                }
 158
 159                evt_head += pheader->size;
 160                pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
 161        }
 162        WARN_ONCE(1, "Shouldn't get here\n");
 163        return -1;
 164}
 165
 166static int
 167rb_find_range(void *data, int mask, u64 head, u64 old,
 168              u64 *start, u64 *end, bool backward)
 169{
 170        if (!backward) {
 171                *start = old;
 172                *end = head;
 173                return 0;
 174        }
 175
 176        return backward_rb_find_range(data, mask, head, start, end);
 177}
 178
 179static int
 180record__mmap_read(struct record *rec, struct perf_mmap *md,
 181                  bool overwrite, bool backward)
 182{
 183        u64 head = perf_mmap__read_head(md);
 184        u64 old = md->prev;
 185        u64 end = head, start = old;
 186        unsigned char *data = md->base + page_size;
 187        unsigned long size;
 188        void *buf;
 189        int rc = 0;
 190
 191        if (rb_find_range(data, md->mask, head,
 192                          old, &start, &end, backward))
 193                return -1;
 194
 195        if (start == end)
 196                return 0;
 197
 198        rec->samples++;
 199
 200        size = end - start;
 201        if (size > (unsigned long)(md->mask) + 1) {
 202                WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
 203
 204                md->prev = head;
 205                perf_mmap__consume(md, overwrite || backward);
 206                return 0;
 207        }
 208
 209        if ((start & md->mask) + size != (end & md->mask)) {
 210                buf = &data[start & md->mask];
 211                size = md->mask + 1 - (start & md->mask);
 212                start += size;
 213
 214                if (record__write(rec, buf, size) < 0) {
 215                        rc = -1;
 216                        goto out;
 217                }
 218        }
 219
 220        buf = &data[start & md->mask];
 221        size = end - start;
 222        start += size;
 223
 224        if (record__write(rec, buf, size) < 0) {
 225                rc = -1;
 226                goto out;
 227        }
 228
 229        md->prev = head;
 230        perf_mmap__consume(md, overwrite || backward);
 231out:
 232        return rc;
 233}
 234
 235static volatile int done;
 236static volatile int signr = -1;
 237static volatile int child_finished;
 238
 239static void sig_handler(int sig)
 240{
 241        if (sig == SIGCHLD)
 242                child_finished = 1;
 243        else
 244                signr = sig;
 245
 246        done = 1;
 247}
 248
 249static void sigsegv_handler(int sig)
 250{
 251        perf_hooks__recover();
 252        sighandler_dump_stack(sig);
 253}
 254
 255static void record__sig_exit(void)
 256{
 257        if (signr == -1)
 258                return;
 259
 260        signal(signr, SIG_DFL);
 261        raise(signr);
 262}
 263
 264#ifdef HAVE_AUXTRACE_SUPPORT
 265
 266static int record__process_auxtrace(struct perf_tool *tool,
 267                                    union perf_event *event, void *data1,
 268                                    size_t len1, void *data2, size_t len2)
 269{
 270        struct record *rec = container_of(tool, struct record, tool);
 271        struct perf_data_file *file = &rec->file;
 272        size_t padding;
 273        u8 pad[8] = {0};
 274
 275        if (!perf_data_file__is_pipe(file)) {
 276                off_t file_offset;
 277                int fd = perf_data_file__fd(file);
 278                int err;
 279
 280                file_offset = lseek(fd, 0, SEEK_CUR);
 281                if (file_offset == -1)
 282                        return -1;
 283                err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
 284                                                     event, file_offset);
 285                if (err)
 286                        return err;
 287        }
 288
 289        /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
 290        padding = (len1 + len2) & 7;
 291        if (padding)
 292                padding = 8 - padding;
 293
 294        record__write(rec, event, event->header.size);
 295        record__write(rec, data1, len1);
 296        if (len2)
 297                record__write(rec, data2, len2);
 298        record__write(rec, &pad, padding);
 299
 300        return 0;
 301}
 302
 303static int record__auxtrace_mmap_read(struct record *rec,
 304                                      struct auxtrace_mmap *mm)
 305{
 306        int ret;
 307
 308        ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
 309                                  record__process_auxtrace);
 310        if (ret < 0)
 311                return ret;
 312
 313        if (ret)
 314                rec->samples++;
 315
 316        return 0;
 317}
 318
 319static int record__auxtrace_mmap_read_snapshot(struct record *rec,
 320                                               struct auxtrace_mmap *mm)
 321{
 322        int ret;
 323
 324        ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
 325                                           record__process_auxtrace,
 326                                           rec->opts.auxtrace_snapshot_size);
 327        if (ret < 0)
 328                return ret;
 329
 330        if (ret)
 331                rec->samples++;
 332
 333        return 0;
 334}
 335
 336static int record__auxtrace_read_snapshot_all(struct record *rec)
 337{
 338        int i;
 339        int rc = 0;
 340
 341        for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 342                struct auxtrace_mmap *mm =
 343                                &rec->evlist->mmap[i].auxtrace_mmap;
 344
 345                if (!mm->base)
 346                        continue;
 347
 348                if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
 349                        rc = -1;
 350                        goto out;
 351                }
 352        }
 353out:
 354        return rc;
 355}
 356
 357static void record__read_auxtrace_snapshot(struct record *rec)
 358{
 359        pr_debug("Recording AUX area tracing snapshot\n");
 360        if (record__auxtrace_read_snapshot_all(rec) < 0) {
 361                trigger_error(&auxtrace_snapshot_trigger);
 362        } else {
 363                if (auxtrace_record__snapshot_finish(rec->itr))
 364                        trigger_error(&auxtrace_snapshot_trigger);
 365                else
 366                        trigger_ready(&auxtrace_snapshot_trigger);
 367        }
 368}
 369
 370#else
 371
 372static inline
 373int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
 374                               struct auxtrace_mmap *mm __maybe_unused)
 375{
 376        return 0;
 377}
 378
 379static inline
 380void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
 381{
 382}
 383
 384static inline
 385int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
 386{
 387        return 0;
 388}
 389
 390#endif
 391
 392static int record__mmap_evlist(struct record *rec,
 393                               struct perf_evlist *evlist)
 394{
 395        struct record_opts *opts = &rec->opts;
 396        char msg[512];
 397
 398        if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
 399                                 opts->auxtrace_mmap_pages,
 400                                 opts->auxtrace_snapshot_mode) < 0) {
 401                if (errno == EPERM) {
 402                        pr_err("Permission error mapping pages.\n"
 403                               "Consider increasing "
 404                               "/proc/sys/kernel/perf_event_mlock_kb,\n"
 405                               "or try again with a smaller value of -m/--mmap_pages.\n"
 406                               "(current value: %u,%u)\n",
 407                               opts->mmap_pages, opts->auxtrace_mmap_pages);
 408                        return -errno;
 409                } else {
 410                        pr_err("failed to mmap with %d (%s)\n", errno,
 411                                str_error_r(errno, msg, sizeof(msg)));
 412                        if (errno)
 413                                return -errno;
 414                        else
 415                                return -EINVAL;
 416                }
 417        }
 418        return 0;
 419}
 420
 421static int record__mmap(struct record *rec)
 422{
 423        return record__mmap_evlist(rec, rec->evlist);
 424}
 425
 426static int record__open(struct record *rec)
 427{
 428        char msg[BUFSIZ];
 429        struct perf_evsel *pos;
 430        struct perf_evlist *evlist = rec->evlist;
 431        struct perf_session *session = rec->session;
 432        struct record_opts *opts = &rec->opts;
 433        struct perf_evsel_config_term *err_term;
 434        int rc = 0;
 435
 436        perf_evlist__config(evlist, opts, &callchain_param);
 437
 438        evlist__for_each_entry(evlist, pos) {
 439try_again:
 440                if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
 441                        if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
 442                                if (verbose > 0)
 443                                        ui__warning("%s\n", msg);
 444                                goto try_again;
 445                        }
 446
 447                        rc = -errno;
 448                        perf_evsel__open_strerror(pos, &opts->target,
 449                                                  errno, msg, sizeof(msg));
 450                        ui__error("%s\n", msg);
 451                        goto out;
 452                }
 453        }
 454
 455        if (perf_evlist__apply_filters(evlist, &pos)) {
 456                pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
 457                        pos->filter, perf_evsel__name(pos), errno,
 458                        str_error_r(errno, msg, sizeof(msg)));
 459                rc = -1;
 460                goto out;
 461        }
 462
 463        if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
 464                pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
 465                      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
 466                      str_error_r(errno, msg, sizeof(msg)));
 467                rc = -1;
 468                goto out;
 469        }
 470
 471        rc = record__mmap(rec);
 472        if (rc)
 473                goto out;
 474
 475        session->evlist = evlist;
 476        perf_session__set_id_hdr_size(session);
 477out:
 478        return rc;
 479}
 480
 481static int process_sample_event(struct perf_tool *tool,
 482                                union perf_event *event,
 483                                struct perf_sample *sample,
 484                                struct perf_evsel *evsel,
 485                                struct machine *machine)
 486{
 487        struct record *rec = container_of(tool, struct record, tool);
 488
 489        rec->samples++;
 490
 491        return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
 492}
 493
 494static int process_buildids(struct record *rec)
 495{
 496        struct perf_data_file *file  = &rec->file;
 497        struct perf_session *session = rec->session;
 498
 499        if (file->size == 0)
 500                return 0;
 501
 502        /*
 503         * During this process, it'll load kernel map and replace the
 504         * dso->long_name to a real pathname it found.  In this case
 505         * we prefer the vmlinux path like
 506         *   /lib/modules/3.16.4/build/vmlinux
 507         *
 508         * rather than build-id path (in debug directory).
 509         *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
 510         */
 511        symbol_conf.ignore_vmlinux_buildid = true;
 512
 513        /*
 514         * If --buildid-all is given, it marks all DSO regardless of hits,
 515         * so no need to process samples.
 516         */
 517        if (rec->buildid_all)
 518                rec->tool.sample = NULL;
 519
 520        return perf_session__process_events(session);
 521}
 522
 523static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 524{
 525        int err;
 526        struct perf_tool *tool = data;
 527        /*
 528         *As for guest kernel when processing subcommand record&report,
 529         *we arrange module mmap prior to guest kernel mmap and trigger
 530         *a preload dso because default guest module symbols are loaded
 531         *from guest kallsyms instead of /lib/modules/XXX/XXX. This
 532         *method is used to avoid symbol missing when the first addr is
 533         *in module instead of in guest kernel.
 534         */
 535        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 536                                             machine);
 537        if (err < 0)
 538                pr_err("Couldn't record guest kernel [%d]'s reference"
 539                       " relocation symbol.\n", machine->pid);
 540
 541        /*
 542         * We use _stext for guest kernel because guest kernel's /proc/kallsyms
 543         * have no _text sometimes.
 544         */
 545        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 546                                                 machine);
 547        if (err < 0)
 548                pr_err("Couldn't record guest kernel [%d]'s reference"
 549                       " relocation symbol.\n", machine->pid);
 550}
 551
 552static struct perf_event_header finished_round_event = {
 553        .size = sizeof(struct perf_event_header),
 554        .type = PERF_RECORD_FINISHED_ROUND,
 555};
 556
 557static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
 558                                    bool backward)
 559{
 560        u64 bytes_written = rec->bytes_written;
 561        int i;
 562        int rc = 0;
 563        struct perf_mmap *maps;
 564
 565        if (!evlist)
 566                return 0;
 567
 568        maps = backward ? evlist->backward_mmap : evlist->mmap;
 569        if (!maps)
 570                return 0;
 571
 572        if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
 573                return 0;
 574
 575        for (i = 0; i < evlist->nr_mmaps; i++) {
 576                struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
 577
 578                if (maps[i].base) {
 579                        if (record__mmap_read(rec, &maps[i],
 580                                              evlist->overwrite, backward) != 0) {
 581                                rc = -1;
 582                                goto out;
 583                        }
 584                }
 585
 586                if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
 587                    record__auxtrace_mmap_read(rec, mm) != 0) {
 588                        rc = -1;
 589                        goto out;
 590                }
 591        }
 592
 593        /*
 594         * Mark the round finished in case we wrote
 595         * at least one event.
 596         */
 597        if (bytes_written != rec->bytes_written)
 598                rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
 599
 600        if (backward)
 601                perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
 602out:
 603        return rc;
 604}
 605
 606static int record__mmap_read_all(struct record *rec)
 607{
 608        int err;
 609
 610        err = record__mmap_read_evlist(rec, rec->evlist, false);
 611        if (err)
 612                return err;
 613
 614        return record__mmap_read_evlist(rec, rec->evlist, true);
 615}
 616
 617static void record__init_features(struct record *rec)
 618{
 619        struct perf_session *session = rec->session;
 620        int feat;
 621
 622        for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
 623                perf_header__set_feat(&session->header, feat);
 624
 625        if (rec->no_buildid)
 626                perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 627
 628        if (!have_tracepoints(&rec->evlist->entries))
 629                perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
 630
 631        if (!rec->opts.branch_stack)
 632                perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
 633
 634        if (!rec->opts.full_auxtrace)
 635                perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 636
 637        perf_header__clear_feat(&session->header, HEADER_STAT);
 638}
 639
 640static void
 641record__finish_output(struct record *rec)
 642{
 643        struct perf_data_file *file = &rec->file;
 644        int fd = perf_data_file__fd(file);
 645
 646        if (file->is_pipe)
 647                return;
 648
 649        rec->session->header.data_size += rec->bytes_written;
 650        file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
 651
 652        if (!rec->no_buildid) {
 653                process_buildids(rec);
 654
 655                if (rec->buildid_all)
 656                        dsos__hit_all(rec->session);
 657        }
 658        perf_session__write_header(rec->session, rec->evlist, fd, true);
 659
 660        return;
 661}
 662
 663static int record__synthesize_workload(struct record *rec, bool tail)
 664{
 665        int err;
 666        struct thread_map *thread_map;
 667
 668        if (rec->opts.tail_synthesize != tail)
 669                return 0;
 670
 671        thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
 672        if (thread_map == NULL)
 673                return -1;
 674
 675        err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
 676                                                 process_synthesized_event,
 677                                                 &rec->session->machines.host,
 678                                                 rec->opts.sample_address,
 679                                                 rec->opts.proc_map_timeout);
 680        thread_map__put(thread_map);
 681        return err;
 682}
 683
 684static int record__synthesize(struct record *rec, bool tail);
 685
 686static int
 687record__switch_output(struct record *rec, bool at_exit)
 688{
 689        struct perf_data_file *file = &rec->file;
 690        int fd, err;
 691
 692        /* Same Size:      "2015122520103046"*/
 693        char timestamp[] = "InvalidTimestamp";
 694
 695        record__synthesize(rec, true);
 696        if (target__none(&rec->opts.target))
 697                record__synthesize_workload(rec, true);
 698
 699        rec->samples = 0;
 700        record__finish_output(rec);
 701        err = fetch_current_timestamp(timestamp, sizeof(timestamp));
 702        if (err) {
 703                pr_err("Failed to get current timestamp\n");
 704                return -EINVAL;
 705        }
 706
 707        fd = perf_data_file__switch(file, timestamp,
 708                                    rec->session->header.data_offset,
 709                                    at_exit);
 710        if (fd >= 0 && !at_exit) {
 711                rec->bytes_written = 0;
 712                rec->session->header.data_size = 0;
 713        }
 714
 715        if (!quiet)
 716                fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
 717                        file->path, timestamp);
 718
 719        /* Output tracking events */
 720        if (!at_exit) {
 721                record__synthesize(rec, false);
 722
 723                /*
 724                 * In 'perf record --switch-output' without -a,
 725                 * record__synthesize() in record__switch_output() won't
 726                 * generate tracking events because there's no thread_map
 727                 * in evlist. Which causes newly created perf.data doesn't
 728                 * contain map and comm information.
 729                 * Create a fake thread_map and directly call
 730                 * perf_event__synthesize_thread_map() for those events.
 731                 */
 732                if (target__none(&rec->opts.target))
 733                        record__synthesize_workload(rec, false);
 734        }
 735        return fd;
 736}
 737
 738static volatile int workload_exec_errno;
 739
 740/*
 741 * perf_evlist__prepare_workload will send a SIGUSR1
 742 * if the fork fails, since we asked by setting its
 743 * want_signal to true.
 744 */
 745static void workload_exec_failed_signal(int signo __maybe_unused,
 746                                        siginfo_t *info,
 747                                        void *ucontext __maybe_unused)
 748{
 749        workload_exec_errno = info->si_value.sival_int;
 750        done = 1;
 751        child_finished = 1;
 752}
 753
 754static void snapshot_sig_handler(int sig);
 755static void alarm_sig_handler(int sig);
 756
 757int __weak
 758perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
 759                            struct perf_tool *tool __maybe_unused,
 760                            perf_event__handler_t process __maybe_unused,
 761                            struct machine *machine __maybe_unused)
 762{
 763        return 0;
 764}
 765
 766static const struct perf_event_mmap_page *
 767perf_evlist__pick_pc(struct perf_evlist *evlist)
 768{
 769        if (evlist) {
 770                if (evlist->mmap && evlist->mmap[0].base)
 771                        return evlist->mmap[0].base;
 772                if (evlist->backward_mmap && evlist->backward_mmap[0].base)
 773                        return evlist->backward_mmap[0].base;
 774        }
 775        return NULL;
 776}
 777
 778static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
 779{
 780        const struct perf_event_mmap_page *pc;
 781
 782        pc = perf_evlist__pick_pc(rec->evlist);
 783        if (pc)
 784                return pc;
 785        return NULL;
 786}
 787
 788static int record__synthesize(struct record *rec, bool tail)
 789{
 790        struct perf_session *session = rec->session;
 791        struct machine *machine = &session->machines.host;
 792        struct perf_data_file *file = &rec->file;
 793        struct record_opts *opts = &rec->opts;
 794        struct perf_tool *tool = &rec->tool;
 795        int fd = perf_data_file__fd(file);
 796        int err = 0;
 797
 798        if (rec->opts.tail_synthesize != tail)
 799                return 0;
 800
 801        if (file->is_pipe) {
 802                err = perf_event__synthesize_attrs(tool, session,
 803                                                   process_synthesized_event);
 804                if (err < 0) {
 805                        pr_err("Couldn't synthesize attrs.\n");
 806                        goto out;
 807                }
 808
 809                if (have_tracepoints(&rec->evlist->entries)) {
 810                        /*
 811                         * FIXME err <= 0 here actually means that
 812                         * there were no tracepoints so its not really
 813                         * an error, just that we don't need to
 814                         * synthesize anything.  We really have to
 815                         * return this more properly and also
 816                         * propagate errors that now are calling die()
 817                         */
 818                        err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
 819                                                                  process_synthesized_event);
 820                        if (err <= 0) {
 821                                pr_err("Couldn't record tracing data.\n");
 822                                goto out;
 823                        }
 824                        rec->bytes_written += err;
 825                }
 826        }
 827
 828        err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
 829                                          process_synthesized_event, machine);
 830        if (err)
 831                goto out;
 832
 833        if (rec->opts.full_auxtrace) {
 834                err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
 835                                        session, process_synthesized_event);
 836                if (err)
 837                        goto out;
 838        }
 839
 840        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 841                                                 machine);
 842        WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
 843                           "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 844                           "Check /proc/kallsyms permission or run as root.\n");
 845
 846        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 847                                             machine);
 848        WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
 849                           "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 850                           "Check /proc/modules permission or run as root.\n");
 851
 852        if (perf_guest) {
 853                machines__process_guests(&session->machines,
 854                                         perf_event__synthesize_guest_os, tool);
 855        }
 856
 857        err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
 858                                            process_synthesized_event, opts->sample_address,
 859                                            opts->proc_map_timeout);
 860out:
 861        return err;
 862}
 863
 864static int __cmd_record(struct record *rec, int argc, const char **argv)
 865{
 866        int err;
 867        int status = 0;
 868        unsigned long waking = 0;
 869        const bool forks = argc > 0;
 870        struct machine *machine;
 871        struct perf_tool *tool = &rec->tool;
 872        struct record_opts *opts = &rec->opts;
 873        struct perf_data_file *file = &rec->file;
 874        struct perf_session *session;
 875        bool disabled = false, draining = false;
 876        int fd;
 877
 878        rec->progname = argv[0];
 879
 880        atexit(record__sig_exit);
 881        signal(SIGCHLD, sig_handler);
 882        signal(SIGINT, sig_handler);
 883        signal(SIGTERM, sig_handler);
 884        signal(SIGSEGV, sigsegv_handler);
 885
 886        if (rec->opts.record_namespaces)
 887                tool->namespace_events = true;
 888
 889        if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
 890                signal(SIGUSR2, snapshot_sig_handler);
 891                if (rec->opts.auxtrace_snapshot_mode)
 892                        trigger_on(&auxtrace_snapshot_trigger);
 893                if (rec->switch_output.enabled)
 894                        trigger_on(&switch_output_trigger);
 895        } else {
 896                signal(SIGUSR2, SIG_IGN);
 897        }
 898
 899        session = perf_session__new(file, false, tool);
 900        if (session == NULL) {
 901                pr_err("Perf session creation failed.\n");
 902                return -1;
 903        }
 904
 905        fd = perf_data_file__fd(file);
 906        rec->session = session;
 907
 908        record__init_features(rec);
 909
 910        if (forks) {
 911                err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
 912                                                    argv, file->is_pipe,
 913                                                    workload_exec_failed_signal);
 914                if (err < 0) {
 915                        pr_err("Couldn't run the workload!\n");
 916                        status = err;
 917                        goto out_delete_session;
 918                }
 919        }
 920
 921        if (record__open(rec) != 0) {
 922                err = -1;
 923                goto out_child;
 924        }
 925
 926        err = bpf__apply_obj_config();
 927        if (err) {
 928                char errbuf[BUFSIZ];
 929
 930                bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
 931                pr_err("ERROR: Apply config to BPF failed: %s\n",
 932                         errbuf);
 933                goto out_child;
 934        }
 935
 936        /*
 937         * Normally perf_session__new would do this, but it doesn't have the
 938         * evlist.
 939         */
 940        if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
 941                pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
 942                rec->tool.ordered_events = false;
 943        }
 944
 945        if (!rec->evlist->nr_groups)
 946                perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
 947
 948        if (file->is_pipe) {
 949                err = perf_header__write_pipe(fd);
 950                if (err < 0)
 951                        goto out_child;
 952        } else {
 953                err = perf_session__write_header(session, rec->evlist, fd, false);
 954                if (err < 0)
 955                        goto out_child;
 956        }
 957
 958        if (!rec->no_buildid
 959            && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
 960                pr_err("Couldn't generate buildids. "
 961                       "Use --no-buildid to profile anyway.\n");
 962                err = -1;
 963                goto out_child;
 964        }
 965
 966        machine = &session->machines.host;
 967
 968        err = record__synthesize(rec, false);
 969        if (err < 0)
 970                goto out_child;
 971
 972        if (rec->realtime_prio) {
 973                struct sched_param param;
 974
 975                param.sched_priority = rec->realtime_prio;
 976                if (sched_setscheduler(0, SCHED_FIFO, &param)) {
 977                        pr_err("Could not set realtime priority.\n");
 978                        err = -1;
 979                        goto out_child;
 980                }
 981        }
 982
 983        /*
 984         * When perf is starting the traced process, all the events
 985         * (apart from group members) have enable_on_exec=1 set,
 986         * so don't spoil it by prematurely enabling them.
 987         */
 988        if (!target__none(&opts->target) && !opts->initial_delay)
 989                perf_evlist__enable(rec->evlist);
 990
 991        /*
 992         * Let the child rip
 993         */
 994        if (forks) {
 995                union perf_event *event;
 996                pid_t tgid;
 997
 998                event = malloc(sizeof(event->comm) + machine->id_hdr_size);
 999                if (event == NULL) {
1000                        err = -ENOMEM;
1001                        goto out_child;
1002                }
1003
1004                /*
1005                 * Some H/W events are generated before COMM event
1006                 * which is emitted during exec(), so perf script
1007                 * cannot see a correct process name for those events.
1008                 * Synthesize COMM event to prevent it.
1009                 */
1010                tgid = perf_event__synthesize_comm(tool, event,
1011                                                   rec->evlist->workload.pid,
1012                                                   process_synthesized_event,
1013                                                   machine);
1014                free(event);
1015
1016                if (tgid == -1)
1017                        goto out_child;
1018
1019                event = malloc(sizeof(event->namespaces) +
1020                               (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1021                               machine->id_hdr_size);
1022                if (event == NULL) {
1023                        err = -ENOMEM;
1024                        goto out_child;
1025                }
1026
1027                /*
1028                 * Synthesize NAMESPACES event for the command specified.
1029                 */
1030                perf_event__synthesize_namespaces(tool, event,
1031                                                  rec->evlist->workload.pid,
1032                                                  tgid, process_synthesized_event,
1033                                                  machine);
1034                free(event);
1035
1036                perf_evlist__start_workload(rec->evlist);
1037        }
1038
1039        if (opts->initial_delay) {
1040                usleep(opts->initial_delay * USEC_PER_MSEC);
1041                perf_evlist__enable(rec->evlist);
1042        }
1043
1044        trigger_ready(&auxtrace_snapshot_trigger);
1045        trigger_ready(&switch_output_trigger);
1046        perf_hooks__invoke_record_start();
1047        for (;;) {
1048                unsigned long long hits = rec->samples;
1049
1050                /*
1051                 * rec->evlist->bkw_mmap_state is possible to be
1052                 * BKW_MMAP_EMPTY here: when done == true and
1053                 * hits != rec->samples in previous round.
1054                 *
1055                 * perf_evlist__toggle_bkw_mmap ensure we never
1056                 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1057                 */
1058                if (trigger_is_hit(&switch_output_trigger) || done || draining)
1059                        perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1060
1061                if (record__mmap_read_all(rec) < 0) {
1062                        trigger_error(&auxtrace_snapshot_trigger);
1063                        trigger_error(&switch_output_trigger);
1064                        err = -1;
1065                        goto out_child;
1066                }
1067
1068                if (auxtrace_record__snapshot_started) {
1069                        auxtrace_record__snapshot_started = 0;
1070                        if (!trigger_is_error(&auxtrace_snapshot_trigger))
1071                                record__read_auxtrace_snapshot(rec);
1072                        if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1073                                pr_err("AUX area tracing snapshot failed\n");
1074                                err = -1;
1075                                goto out_child;
1076                        }
1077                }
1078
1079                if (trigger_is_hit(&switch_output_trigger)) {
1080                        /*
1081                         * If switch_output_trigger is hit, the data in
1082                         * overwritable ring buffer should have been collected,
1083                         * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1084                         *
1085                         * If SIGUSR2 raise after or during record__mmap_read_all(),
1086                         * record__mmap_read_all() didn't collect data from
1087                         * overwritable ring buffer. Read again.
1088                         */
1089                        if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1090                                continue;
1091                        trigger_ready(&switch_output_trigger);
1092
1093                        /*
1094                         * Reenable events in overwrite ring buffer after
1095                         * record__mmap_read_all(): we should have collected
1096                         * data from it.
1097                         */
1098                        perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1099
1100                        if (!quiet)
1101                                fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1102                                        waking);
1103                        waking = 0;
1104                        fd = record__switch_output(rec, false);
1105                        if (fd < 0) {
1106                                pr_err("Failed to switch to new file\n");
1107                                trigger_error(&switch_output_trigger);
1108                                err = fd;
1109                                goto out_child;
1110                        }
1111
1112                        /* re-arm the alarm */
1113                        if (rec->switch_output.time)
1114                                alarm(rec->switch_output.time);
1115                }
1116
1117                if (hits == rec->samples) {
1118                        if (done || draining)
1119                                break;
1120                        err = perf_evlist__poll(rec->evlist, -1);
1121                        /*
1122                         * Propagate error, only if there's any. Ignore positive
1123                         * number of returned events and interrupt error.
1124                         */
1125                        if (err > 0 || (err < 0 && errno == EINTR))
1126                                err = 0;
1127                        waking++;
1128
1129                        if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1130                                draining = true;
1131                }
1132
1133                /*
1134                 * When perf is starting the traced process, at the end events
1135                 * die with the process and we wait for that. Thus no need to
1136                 * disable events in this case.
1137                 */
1138                if (done && !disabled && !target__none(&opts->target)) {
1139                        trigger_off(&auxtrace_snapshot_trigger);
1140                        perf_evlist__disable(rec->evlist);
1141                        disabled = true;
1142                }
1143        }
1144        trigger_off(&auxtrace_snapshot_trigger);
1145        trigger_off(&switch_output_trigger);
1146
1147        if (forks && workload_exec_errno) {
1148                char msg[STRERR_BUFSIZE];
1149                const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1150                pr_err("Workload failed: %s\n", emsg);
1151                err = -1;
1152                goto out_child;
1153        }
1154
1155        if (!quiet)
1156                fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1157
1158        if (target__none(&rec->opts.target))
1159                record__synthesize_workload(rec, true);
1160
1161out_child:
1162        if (forks) {
1163                int exit_status;
1164
1165                if (!child_finished)
1166                        kill(rec->evlist->workload.pid, SIGTERM);
1167
1168                wait(&exit_status);
1169
1170                if (err < 0)
1171                        status = err;
1172                else if (WIFEXITED(exit_status))
1173                        status = WEXITSTATUS(exit_status);
1174                else if (WIFSIGNALED(exit_status))
1175                        signr = WTERMSIG(exit_status);
1176        } else
1177                status = err;
1178
1179        record__synthesize(rec, true);
1180        /* this will be recalculated during process_buildids() */
1181        rec->samples = 0;
1182
1183        if (!err) {
1184                if (!rec->timestamp_filename) {
1185                        record__finish_output(rec);
1186                } else {
1187                        fd = record__switch_output(rec, true);
1188                        if (fd < 0) {
1189                                status = fd;
1190                                goto out_delete_session;
1191                        }
1192                }
1193        }
1194
1195        perf_hooks__invoke_record_end();
1196
1197        if (!err && !quiet) {
1198                char samples[128];
1199                const char *postfix = rec->timestamp_filename ?
1200                                        ".<timestamp>" : "";
1201
1202                if (rec->samples && !rec->opts.full_auxtrace)
1203                        scnprintf(samples, sizeof(samples),
1204                                  " (%" PRIu64 " samples)", rec->samples);
1205                else
1206                        samples[0] = '\0';
1207
1208                fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1209                        perf_data_file__size(file) / 1024.0 / 1024.0,
1210                        file->path, postfix, samples);
1211        }
1212
1213out_delete_session:
1214        perf_session__delete(session);
1215        return status;
1216}
1217
1218static void callchain_debug(struct callchain_param *callchain)
1219{
1220        static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1221
1222        pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1223
1224        if (callchain->record_mode == CALLCHAIN_DWARF)
1225                pr_debug("callchain: stack dump size %d\n",
1226                         callchain->dump_size);
1227}
1228
1229int record_opts__parse_callchain(struct record_opts *record,
1230                                 struct callchain_param *callchain,
1231                                 const char *arg, bool unset)
1232{
1233        int ret;
1234        callchain->enabled = !unset;
1235
1236        /* --no-call-graph */
1237        if (unset) {
1238                callchain->record_mode = CALLCHAIN_NONE;
1239                pr_debug("callchain: disabled\n");
1240                return 0;
1241        }
1242
1243        ret = parse_callchain_record_opt(arg, callchain);
1244        if (!ret) {
1245                /* Enable data address sampling for DWARF unwind. */
1246                if (callchain->record_mode == CALLCHAIN_DWARF)
1247                        record->sample_address = true;
1248                callchain_debug(callchain);
1249        }
1250
1251        return ret;
1252}
1253
1254int record_parse_callchain_opt(const struct option *opt,
1255                               const char *arg,
1256                               int unset)
1257{
1258        return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1259}
1260
1261int record_callchain_opt(const struct option *opt,
1262                         const char *arg __maybe_unused,
1263                         int unset __maybe_unused)
1264{
1265        struct callchain_param *callchain = opt->value;
1266
1267        callchain->enabled = true;
1268
1269        if (callchain->record_mode == CALLCHAIN_NONE)
1270                callchain->record_mode = CALLCHAIN_FP;
1271
1272        callchain_debug(callchain);
1273        return 0;
1274}
1275
1276static int perf_record_config(const char *var, const char *value, void *cb)
1277{
1278        struct record *rec = cb;
1279
1280        if (!strcmp(var, "record.build-id")) {
1281                if (!strcmp(value, "cache"))
1282                        rec->no_buildid_cache = false;
1283                else if (!strcmp(value, "no-cache"))
1284                        rec->no_buildid_cache = true;
1285                else if (!strcmp(value, "skip"))
1286                        rec->no_buildid = true;
1287                else
1288                        return -1;
1289                return 0;
1290        }
1291        if (!strcmp(var, "record.call-graph"))
1292                var = "call-graph.record-mode"; /* fall-through */
1293
1294        return perf_default_config(var, value, cb);
1295}
1296
1297struct clockid_map {
1298        const char *name;
1299        int clockid;
1300};
1301
1302#define CLOCKID_MAP(n, c)       \
1303        { .name = n, .clockid = (c), }
1304
1305#define CLOCKID_END     { .name = NULL, }
1306
1307
1308/*
1309 * Add the missing ones, we need to build on many distros...
1310 */
1311#ifndef CLOCK_MONOTONIC_RAW
1312#define CLOCK_MONOTONIC_RAW 4
1313#endif
1314#ifndef CLOCK_BOOTTIME
1315#define CLOCK_BOOTTIME 7
1316#endif
1317#ifndef CLOCK_TAI
1318#define CLOCK_TAI 11
1319#endif
1320
1321static const struct clockid_map clockids[] = {
1322        /* available for all events, NMI safe */
1323        CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1324        CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1325
1326        /* available for some events */
1327        CLOCKID_MAP("realtime", CLOCK_REALTIME),
1328        CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1329        CLOCKID_MAP("tai", CLOCK_TAI),
1330
1331        /* available for the lazy */
1332        CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1333        CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1334        CLOCKID_MAP("real", CLOCK_REALTIME),
1335        CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1336
1337        CLOCKID_END,
1338};
1339
1340static int parse_clockid(const struct option *opt, const char *str, int unset)
1341{
1342        struct record_opts *opts = (struct record_opts *)opt->value;
1343        const struct clockid_map *cm;
1344        const char *ostr = str;
1345
1346        if (unset) {
1347                opts->use_clockid = 0;
1348                return 0;
1349        }
1350
1351        /* no arg passed */
1352        if (!str)
1353                return 0;
1354
1355        /* no setting it twice */
1356        if (opts->use_clockid)
1357                return -1;
1358
1359        opts->use_clockid = true;
1360
1361        /* if its a number, we're done */
1362        if (sscanf(str, "%d", &opts->clockid) == 1)
1363                return 0;
1364
1365        /* allow a "CLOCK_" prefix to the name */
1366        if (!strncasecmp(str, "CLOCK_", 6))
1367                str += 6;
1368
1369        for (cm = clockids; cm->name; cm++) {
1370                if (!strcasecmp(str, cm->name)) {
1371                        opts->clockid = cm->clockid;
1372                        return 0;
1373                }
1374        }
1375
1376        opts->use_clockid = false;
1377        ui__warning("unknown clockid %s, check man page\n", ostr);
1378        return -1;
1379}
1380
1381static int record__parse_mmap_pages(const struct option *opt,
1382                                    const char *str,
1383                                    int unset __maybe_unused)
1384{
1385        struct record_opts *opts = opt->value;
1386        char *s, *p;
1387        unsigned int mmap_pages;
1388        int ret;
1389
1390        if (!str)
1391                return -EINVAL;
1392
1393        s = strdup(str);
1394        if (!s)
1395                return -ENOMEM;
1396
1397        p = strchr(s, ',');
1398        if (p)
1399                *p = '\0';
1400
1401        if (*s) {
1402                ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1403                if (ret)
1404                        goto out_free;
1405                opts->mmap_pages = mmap_pages;
1406        }
1407
1408        if (!p) {
1409                ret = 0;
1410                goto out_free;
1411        }
1412
1413        ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1414        if (ret)
1415                goto out_free;
1416
1417        opts->auxtrace_mmap_pages = mmap_pages;
1418
1419out_free:
1420        free(s);
1421        return ret;
1422}
1423
1424static void switch_output_size_warn(struct record *rec)
1425{
1426        u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1427        struct switch_output *s = &rec->switch_output;
1428
1429        wakeup_size /= 2;
1430
1431        if (s->size < wakeup_size) {
1432                char buf[100];
1433
1434                unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1435                pr_warning("WARNING: switch-output data size lower than "
1436                           "wakeup kernel buffer size (%s) "
1437                           "expect bigger perf.data sizes\n", buf);
1438        }
1439}
1440
1441static int switch_output_setup(struct record *rec)
1442{
1443        struct switch_output *s = &rec->switch_output;
1444        static struct parse_tag tags_size[] = {
1445                { .tag  = 'B', .mult = 1       },
1446                { .tag  = 'K', .mult = 1 << 10 },
1447                { .tag  = 'M', .mult = 1 << 20 },
1448                { .tag  = 'G', .mult = 1 << 30 },
1449                { .tag  = 0 },
1450        };
1451        static struct parse_tag tags_time[] = {
1452                { .tag  = 's', .mult = 1        },
1453                { .tag  = 'm', .mult = 60       },
1454                { .tag  = 'h', .mult = 60*60    },
1455                { .tag  = 'd', .mult = 60*60*24 },
1456                { .tag  = 0 },
1457        };
1458        unsigned long val;
1459
1460        if (!s->set)
1461                return 0;
1462
1463        if (!strcmp(s->str, "signal")) {
1464                s->signal = true;
1465                pr_debug("switch-output with SIGUSR2 signal\n");
1466                goto enabled;
1467        }
1468
1469        val = parse_tag_value(s->str, tags_size);
1470        if (val != (unsigned long) -1) {
1471                s->size = val;
1472                pr_debug("switch-output with %s size threshold\n", s->str);
1473                goto enabled;
1474        }
1475
1476        val = parse_tag_value(s->str, tags_time);
1477        if (val != (unsigned long) -1) {
1478                s->time = val;
1479                pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1480                         s->str, s->time);
1481                goto enabled;
1482        }
1483
1484        return -1;
1485
1486enabled:
1487        rec->timestamp_filename = true;
1488        s->enabled              = true;
1489
1490        if (s->size && !rec->opts.no_buffering)
1491                switch_output_size_warn(rec);
1492
1493        return 0;
1494}
1495
1496static const char * const __record_usage[] = {
1497        "perf record [<options>] [<command>]",
1498        "perf record [<options>] -- <command> [<options>]",
1499        NULL
1500};
1501const char * const *record_usage = __record_usage;
1502
1503/*
1504 * XXX Ideally would be local to cmd_record() and passed to a record__new
1505 * because we need to have access to it in record__exit, that is called
1506 * after cmd_record() exits, but since record_options need to be accessible to
1507 * builtin-script, leave it here.
1508 *
1509 * At least we don't ouch it in all the other functions here directly.
1510 *
1511 * Just say no to tons of global variables, sigh.
1512 */
1513static struct record record = {
1514        .opts = {
1515                .sample_time         = true,
1516                .mmap_pages          = UINT_MAX,
1517                .user_freq           = UINT_MAX,
1518                .user_interval       = ULLONG_MAX,
1519                .freq                = 4000,
1520                .target              = {
1521                        .uses_mmap   = true,
1522                        .default_per_cpu = true,
1523                },
1524                .proc_map_timeout     = 500,
1525        },
1526        .tool = {
1527                .sample         = process_sample_event,
1528                .fork           = perf_event__process_fork,
1529                .exit           = perf_event__process_exit,
1530                .comm           = perf_event__process_comm,
1531                .namespaces     = perf_event__process_namespaces,
1532                .mmap           = perf_event__process_mmap,
1533                .mmap2          = perf_event__process_mmap2,
1534                .ordered_events = true,
1535        },
1536};
1537
1538const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1539        "\n\t\t\t\tDefault: fp";
1540
1541static bool dry_run;
1542
1543/*
1544 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1545 * with it and switch to use the library functions in perf_evlist that came
1546 * from builtin-record.c, i.e. use record_opts,
1547 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1548 * using pipes, etc.
1549 */
1550static struct option __record_options[] = {
1551        OPT_CALLBACK('e', "event", &record.evlist, "event",
1552                     "event selector. use 'perf list' to list available events",
1553                     parse_events_option),
1554        OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1555                     "event filter", parse_filter),
1556        OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1557                           NULL, "don't record events from perf itself",
1558                           exclude_perf),
1559        OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1560                    "record events on existing process id"),
1561        OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1562                    "record events on existing thread id"),
1563        OPT_INTEGER('r', "realtime", &record.realtime_prio,
1564                    "collect data with this RT SCHED_FIFO priority"),
1565        OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1566                    "collect data without buffering"),
1567        OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1568                    "collect raw sample records from all opened counters"),
1569        OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1570                            "system-wide collection from all CPUs"),
1571        OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1572                    "list of cpus to monitor"),
1573        OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1574        OPT_STRING('o', "output", &record.file.path, "file",
1575                    "output file name"),
1576        OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1577                        &record.opts.no_inherit_set,
1578                        "child tasks do not inherit counters"),
1579        OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1580                    "synthesize non-sample events at the end of output"),
1581        OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1582        OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1583        OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1584                     "number of mmap data pages and AUX area tracing mmap pages",
1585                     record__parse_mmap_pages),
1586        OPT_BOOLEAN(0, "group", &record.opts.group,
1587                    "put the counters into a counter group"),
1588        OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1589                           NULL, "enables call-graph recording" ,
1590                           &record_callchain_opt),
1591        OPT_CALLBACK(0, "call-graph", &record.opts,
1592                     "record_mode[,record_size]", record_callchain_help,
1593                     &record_parse_callchain_opt),
1594        OPT_INCR('v', "verbose", &verbose,
1595                    "be more verbose (show counter open errors, etc)"),
1596        OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1597        OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1598                    "per thread counts"),
1599        OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1600        OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1601        OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1602                        &record.opts.sample_time_set,
1603                        "Record the sample timestamps"),
1604        OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1605        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1606                    "don't sample"),
1607        OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1608                        &record.no_buildid_cache_set,
1609                        "do not update the buildid cache"),
1610        OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1611                        &record.no_buildid_set,
1612                        "do not collect buildids in perf.data"),
1613        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1614                     "monitor event in cgroup name only",
1615                     parse_cgroups),
1616        OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1617                  "ms to wait before starting measurement after program start"),
1618        OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1619                   "user to profile"),
1620
1621        OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1622                     "branch any", "sample any taken branches",
1623                     parse_branch_stack),
1624
1625        OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1626                     "branch filter mask", "branch stack filter modes",
1627                     parse_branch_stack),
1628        OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1629                    "sample by weight (on special events only)"),
1630        OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1631                    "sample transaction flags (special events only)"),
1632        OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1633                    "use per-thread mmaps"),
1634        OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1635                    "sample selected machine registers on interrupt,"
1636                    " use -I ? to list register names", parse_regs),
1637        OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1638                    "Record running/enabled time of read (:S) events"),
1639        OPT_CALLBACK('k', "clockid", &record.opts,
1640        "clockid", "clockid to use for events, see clock_gettime()",
1641        parse_clockid),
1642        OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1643                          "opts", "AUX area tracing Snapshot Mode", ""),
1644        OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1645                        "per thread proc mmap processing timeout in ms"),
1646        OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1647                    "Record namespaces events"),
1648        OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1649                    "Record context switch events"),
1650        OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1651                         "Configure all used events to run in kernel space.",
1652                         PARSE_OPT_EXCLUSIVE),
1653        OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1654                         "Configure all used events to run in user space.",
1655                         PARSE_OPT_EXCLUSIVE),
1656        OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1657                   "clang binary to use for compiling BPF scriptlets"),
1658        OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1659                   "options passed to clang when compiling BPF scriptlets"),
1660        OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1661                   "file", "vmlinux pathname"),
1662        OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1663                    "Record build-id of all DSOs regardless of hits"),
1664        OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1665                    "append timestamp to output filename"),
1666        OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1667                          &record.switch_output.set, "signal,size,time",
1668                          "Switch output when receive SIGUSR2 or cross size,time threshold",
1669                          "signal"),
1670        OPT_BOOLEAN(0, "dry-run", &dry_run,
1671                    "Parse options then exit"),
1672        OPT_END()
1673};
1674
1675struct option *record_options = __record_options;
1676
1677int cmd_record(int argc, const char **argv)
1678{
1679        int err;
1680        struct record *rec = &record;
1681        char errbuf[BUFSIZ];
1682
1683#ifndef HAVE_LIBBPF_SUPPORT
1684# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1685        set_nobuild('\0', "clang-path", true);
1686        set_nobuild('\0', "clang-opt", true);
1687# undef set_nobuild
1688#endif
1689
1690#ifndef HAVE_BPF_PROLOGUE
1691# if !defined (HAVE_DWARF_SUPPORT)
1692#  define REASON  "NO_DWARF=1"
1693# elif !defined (HAVE_LIBBPF_SUPPORT)
1694#  define REASON  "NO_LIBBPF=1"
1695# else
1696#  define REASON  "this architecture doesn't support BPF prologue"
1697# endif
1698# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1699        set_nobuild('\0', "vmlinux", true);
1700# undef set_nobuild
1701# undef REASON
1702#endif
1703
1704        rec->evlist = perf_evlist__new();
1705        if (rec->evlist == NULL)
1706                return -ENOMEM;
1707
1708        err = perf_config(perf_record_config, rec);
1709        if (err)
1710                return err;
1711
1712        argc = parse_options(argc, argv, record_options, record_usage,
1713                            PARSE_OPT_STOP_AT_NON_OPTION);
1714        if (quiet)
1715                perf_quiet_option();
1716
1717        /* Make system wide (-a) the default target. */
1718        if (!argc && target__none(&rec->opts.target))
1719                rec->opts.target.system_wide = true;
1720
1721        if (nr_cgroups && !rec->opts.target.system_wide) {
1722                usage_with_options_msg(record_usage, record_options,
1723                        "cgroup monitoring only available in system-wide mode");
1724
1725        }
1726        if (rec->opts.record_switch_events &&
1727            !perf_can_record_switch_events()) {
1728                ui__error("kernel does not support recording context switch events\n");
1729                parse_options_usage(record_usage, record_options, "switch-events", 0);
1730                return -EINVAL;
1731        }
1732
1733        if (switch_output_setup(rec)) {
1734                parse_options_usage(record_usage, record_options, "switch-output", 0);
1735                return -EINVAL;
1736        }
1737
1738        if (rec->switch_output.time) {
1739                signal(SIGALRM, alarm_sig_handler);
1740                alarm(rec->switch_output.time);
1741        }
1742
1743        if (!rec->itr) {
1744                rec->itr = auxtrace_record__init(rec->evlist, &err);
1745                if (err)
1746                        goto out;
1747        }
1748
1749        err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1750                                              rec->opts.auxtrace_snapshot_opts);
1751        if (err)
1752                goto out;
1753
1754        /*
1755         * Allow aliases to facilitate the lookup of symbols for address
1756         * filters. Refer to auxtrace_parse_filters().
1757         */
1758        symbol_conf.allow_aliases = true;
1759
1760        symbol__init(NULL);
1761
1762        err = auxtrace_parse_filters(rec->evlist);
1763        if (err)
1764                goto out;
1765
1766        if (dry_run)
1767                goto out;
1768
1769        err = bpf__setup_stdout(rec->evlist);
1770        if (err) {
1771                bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1772                pr_err("ERROR: Setup BPF stdout failed: %s\n",
1773                         errbuf);
1774                goto out;
1775        }
1776
1777        err = -ENOMEM;
1778
1779        if (symbol_conf.kptr_restrict)
1780                pr_warning(
1781"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1782"check /proc/sys/kernel/kptr_restrict.\n\n"
1783"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1784"file is not found in the buildid cache or in the vmlinux path.\n\n"
1785"Samples in kernel modules won't be resolved at all.\n\n"
1786"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1787"even with a suitable vmlinux or kallsyms file.\n\n");
1788
1789        if (rec->no_buildid_cache || rec->no_buildid) {
1790                disable_buildid_cache();
1791        } else if (rec->switch_output.enabled) {
1792                /*
1793                 * In 'perf record --switch-output', disable buildid
1794                 * generation by default to reduce data file switching
1795                 * overhead. Still generate buildid if they are required
1796                 * explicitly using
1797                 *
1798                 *  perf record --switch-output --no-no-buildid \
1799                 *              --no-no-buildid-cache
1800                 *
1801                 * Following code equals to:
1802                 *
1803                 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1804                 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1805                 *         disable_buildid_cache();
1806                 */
1807                bool disable = true;
1808
1809                if (rec->no_buildid_set && !rec->no_buildid)
1810                        disable = false;
1811                if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1812                        disable = false;
1813                if (disable) {
1814                        rec->no_buildid = true;
1815                        rec->no_buildid_cache = true;
1816                        disable_buildid_cache();
1817                }
1818        }
1819
1820        if (record.opts.overwrite)
1821                record.opts.tail_synthesize = true;
1822
1823        if (rec->evlist->nr_entries == 0 &&
1824            perf_evlist__add_default(rec->evlist) < 0) {
1825                pr_err("Not enough memory for event selector list\n");
1826                goto out;
1827        }
1828
1829        if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1830                rec->opts.no_inherit = true;
1831
1832        err = target__validate(&rec->opts.target);
1833        if (err) {
1834                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1835                ui__warning("%s", errbuf);
1836        }
1837
1838        err = target__parse_uid(&rec->opts.target);
1839        if (err) {
1840                int saved_errno = errno;
1841
1842                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1843                ui__error("%s", errbuf);
1844
1845                err = -saved_errno;
1846                goto out;
1847        }
1848
1849        /* Enable ignoring missing threads when -u option is defined. */
1850        rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1851
1852        err = -ENOMEM;
1853        if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1854                usage_with_options(record_usage, record_options);
1855
1856        err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1857        if (err)
1858                goto out;
1859
1860        /*
1861         * We take all buildids when the file contains
1862         * AUX area tracing data because we do not decode the
1863         * trace because it would take too long.
1864         */
1865        if (rec->opts.full_auxtrace)
1866                rec->buildid_all = true;
1867
1868        if (record_opts__config(&rec->opts)) {
1869                err = -EINVAL;
1870                goto out;
1871        }
1872
1873        err = __cmd_record(&record, argc, argv);
1874out:
1875        perf_evlist__delete(rec->evlist);
1876        symbol__exit();
1877        auxtrace_record__free(rec->itr);
1878        return err;
1879}
1880
1881static void snapshot_sig_handler(int sig __maybe_unused)
1882{
1883        struct record *rec = &record;
1884
1885        if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1886                trigger_hit(&auxtrace_snapshot_trigger);
1887                auxtrace_record__snapshot_started = 1;
1888                if (auxtrace_record__snapshot_start(record.itr))
1889                        trigger_error(&auxtrace_snapshot_trigger);
1890        }
1891
1892        if (switch_output_signal(rec))
1893                trigger_hit(&switch_output_trigger);
1894}
1895
1896static void alarm_sig_handler(int sig __maybe_unused)
1897{
1898        struct record *rec = &record;
1899
1900        if (switch_output_time(rec))
1901                trigger_hit(&switch_output_trigger);
1902}
1903