linux/tools/perf/builtin-record.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * builtin-record.c
   4 *
   5 * Builtin record command: Record the profile of a workload
   6 * (or a CPU, or a PID) into the perf.data output file - for
   7 * later analysis via perf report.
   8 */
   9#include "builtin.h"
  10
  11#include "perf.h"
  12
  13#include "util/build-id.h"
  14#include "util/util.h"
  15#include <subcmd/parse-options.h>
  16#include "util/parse-events.h"
  17#include "util/config.h"
  18
  19#include "util/callchain.h"
  20#include "util/cgroup.h"
  21#include "util/header.h"
  22#include "util/event.h"
  23#include "util/evlist.h"
  24#include "util/evsel.h"
  25#include "util/debug.h"
  26#include "util/drv_configs.h"
  27#include "util/session.h"
  28#include "util/tool.h"
  29#include "util/symbol.h"
  30#include "util/cpumap.h"
  31#include "util/thread_map.h"
  32#include "util/data.h"
  33#include "util/perf_regs.h"
  34#include "util/auxtrace.h"
  35#include "util/tsc.h"
  36#include "util/parse-branch-options.h"
  37#include "util/parse-regs-options.h"
  38#include "util/llvm-utils.h"
  39#include "util/bpf-loader.h"
  40#include "util/trigger.h"
  41#include "util/perf-hooks.h"
  42#include "util/time-utils.h"
  43#include "util/units.h"
  44#include "asm/bug.h"
  45
  46#include <errno.h>
  47#include <inttypes.h>
  48#include <poll.h>
  49#include <unistd.h>
  50#include <sched.h>
  51#include <signal.h>
  52#include <sys/mman.h>
  53#include <sys/wait.h>
  54#include <linux/time64.h>
  55
  56struct switch_output {
  57        bool             enabled;
  58        bool             signal;
  59        unsigned long    size;
  60        unsigned long    time;
  61        const char      *str;
  62        bool             set;
  63};
  64
  65struct record {
  66        struct perf_tool        tool;
  67        struct record_opts      opts;
  68        u64                     bytes_written;
  69        struct perf_data        data;
  70        struct auxtrace_record  *itr;
  71        struct perf_evlist      *evlist;
  72        struct perf_session     *session;
  73        const char              *progname;
  74        int                     realtime_prio;
  75        bool                    no_buildid;
  76        bool                    no_buildid_set;
  77        bool                    no_buildid_cache;
  78        bool                    no_buildid_cache_set;
  79        bool                    buildid_all;
  80        bool                    timestamp_filename;
  81        bool                    timestamp_boundary;
  82        struct switch_output    switch_output;
  83        unsigned long long      samples;
  84};
  85
  86static volatile int auxtrace_record__snapshot_started;
  87static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
  88static DEFINE_TRIGGER(switch_output_trigger);
  89
  90static bool switch_output_signal(struct record *rec)
  91{
  92        return rec->switch_output.signal &&
  93               trigger_is_ready(&switch_output_trigger);
  94}
  95
  96static bool switch_output_size(struct record *rec)
  97{
  98        return rec->switch_output.size &&
  99               trigger_is_ready(&switch_output_trigger) &&
 100               (rec->bytes_written >= rec->switch_output.size);
 101}
 102
 103static bool switch_output_time(struct record *rec)
 104{
 105        return rec->switch_output.time &&
 106               trigger_is_ready(&switch_output_trigger);
 107}
 108
 109static int record__write(struct record *rec, void *bf, size_t size)
 110{
 111        if (perf_data__write(rec->session->data, bf, size) < 0) {
 112                pr_err("failed to write perf data, error: %m\n");
 113                return -1;
 114        }
 115
 116        rec->bytes_written += size;
 117
 118        if (switch_output_size(rec))
 119                trigger_hit(&switch_output_trigger);
 120
 121        return 0;
 122}
 123
 124static int process_synthesized_event(struct perf_tool *tool,
 125                                     union perf_event *event,
 126                                     struct perf_sample *sample __maybe_unused,
 127                                     struct machine *machine __maybe_unused)
 128{
 129        struct record *rec = container_of(tool, struct record, tool);
 130        return record__write(rec, event, event->header.size);
 131}
 132
 133static int record__pushfn(void *to, void *bf, size_t size)
 134{
 135        struct record *rec = to;
 136
 137        rec->samples++;
 138        return record__write(rec, bf, size);
 139}
 140
 141static volatile int done;
 142static volatile int signr = -1;
 143static volatile int child_finished;
 144
 145static void sig_handler(int sig)
 146{
 147        if (sig == SIGCHLD)
 148                child_finished = 1;
 149        else
 150                signr = sig;
 151
 152        done = 1;
 153}
 154
 155static void sigsegv_handler(int sig)
 156{
 157        perf_hooks__recover();
 158        sighandler_dump_stack(sig);
 159}
 160
 161static void record__sig_exit(void)
 162{
 163        if (signr == -1)
 164                return;
 165
 166        signal(signr, SIG_DFL);
 167        raise(signr);
 168}
 169
 170#ifdef HAVE_AUXTRACE_SUPPORT
 171
 172static int record__process_auxtrace(struct perf_tool *tool,
 173                                    union perf_event *event, void *data1,
 174                                    size_t len1, void *data2, size_t len2)
 175{
 176        struct record *rec = container_of(tool, struct record, tool);
 177        struct perf_data *data = &rec->data;
 178        size_t padding;
 179        u8 pad[8] = {0};
 180
 181        if (!perf_data__is_pipe(data)) {
 182                off_t file_offset;
 183                int fd = perf_data__fd(data);
 184                int err;
 185
 186                file_offset = lseek(fd, 0, SEEK_CUR);
 187                if (file_offset == -1)
 188                        return -1;
 189                err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
 190                                                     event, file_offset);
 191                if (err)
 192                        return err;
 193        }
 194
 195        /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
 196        padding = (len1 + len2) & 7;
 197        if (padding)
 198                padding = 8 - padding;
 199
 200        record__write(rec, event, event->header.size);
 201        record__write(rec, data1, len1);
 202        if (len2)
 203                record__write(rec, data2, len2);
 204        record__write(rec, &pad, padding);
 205
 206        return 0;
 207}
 208
 209static int record__auxtrace_mmap_read(struct record *rec,
 210                                      struct auxtrace_mmap *mm)
 211{
 212        int ret;
 213
 214        ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
 215                                  record__process_auxtrace);
 216        if (ret < 0)
 217                return ret;
 218
 219        if (ret)
 220                rec->samples++;
 221
 222        return 0;
 223}
 224
 225static int record__auxtrace_mmap_read_snapshot(struct record *rec,
 226                                               struct auxtrace_mmap *mm)
 227{
 228        int ret;
 229
 230        ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
 231                                           record__process_auxtrace,
 232                                           rec->opts.auxtrace_snapshot_size);
 233        if (ret < 0)
 234                return ret;
 235
 236        if (ret)
 237                rec->samples++;
 238
 239        return 0;
 240}
 241
 242static int record__auxtrace_read_snapshot_all(struct record *rec)
 243{
 244        int i;
 245        int rc = 0;
 246
 247        for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 248                struct auxtrace_mmap *mm =
 249                                &rec->evlist->mmap[i].auxtrace_mmap;
 250
 251                if (!mm->base)
 252                        continue;
 253
 254                if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
 255                        rc = -1;
 256                        goto out;
 257                }
 258        }
 259out:
 260        return rc;
 261}
 262
 263static void record__read_auxtrace_snapshot(struct record *rec)
 264{
 265        pr_debug("Recording AUX area tracing snapshot\n");
 266        if (record__auxtrace_read_snapshot_all(rec) < 0) {
 267                trigger_error(&auxtrace_snapshot_trigger);
 268        } else {
 269                if (auxtrace_record__snapshot_finish(rec->itr))
 270                        trigger_error(&auxtrace_snapshot_trigger);
 271                else
 272                        trigger_ready(&auxtrace_snapshot_trigger);
 273        }
 274}
 275
 276#else
 277
 278static inline
 279int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
 280                               struct auxtrace_mmap *mm __maybe_unused)
 281{
 282        return 0;
 283}
 284
 285static inline
 286void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
 287{
 288}
 289
 290static inline
 291int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
 292{
 293        return 0;
 294}
 295
 296#endif
 297
 298static int record__mmap_evlist(struct record *rec,
 299                               struct perf_evlist *evlist)
 300{
 301        struct record_opts *opts = &rec->opts;
 302        char msg[512];
 303
 304        if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
 305                                 opts->auxtrace_mmap_pages,
 306                                 opts->auxtrace_snapshot_mode) < 0) {
 307                if (errno == EPERM) {
 308                        pr_err("Permission error mapping pages.\n"
 309                               "Consider increasing "
 310                               "/proc/sys/kernel/perf_event_mlock_kb,\n"
 311                               "or try again with a smaller value of -m/--mmap_pages.\n"
 312                               "(current value: %u,%u)\n",
 313                               opts->mmap_pages, opts->auxtrace_mmap_pages);
 314                        return -errno;
 315                } else {
 316                        pr_err("failed to mmap with %d (%s)\n", errno,
 317                                str_error_r(errno, msg, sizeof(msg)));
 318                        if (errno)
 319                                return -errno;
 320                        else
 321                                return -EINVAL;
 322                }
 323        }
 324        return 0;
 325}
 326
 327static int record__mmap(struct record *rec)
 328{
 329        return record__mmap_evlist(rec, rec->evlist);
 330}
 331
 332static int record__open(struct record *rec)
 333{
 334        char msg[BUFSIZ];
 335        struct perf_evsel *pos;
 336        struct perf_evlist *evlist = rec->evlist;
 337        struct perf_session *session = rec->session;
 338        struct record_opts *opts = &rec->opts;
 339        struct perf_evsel_config_term *err_term;
 340        int rc = 0;
 341
 342        /*
 343         * For initial_delay we need to add a dummy event so that we can track
 344         * PERF_RECORD_MMAP while we wait for the initial delay to enable the
 345         * real events, the ones asked by the user.
 346         */
 347        if (opts->initial_delay) {
 348                if (perf_evlist__add_dummy(evlist))
 349                        return -ENOMEM;
 350
 351                pos = perf_evlist__first(evlist);
 352                pos->tracking = 0;
 353                pos = perf_evlist__last(evlist);
 354                pos->tracking = 1;
 355                pos->attr.enable_on_exec = 1;
 356        }
 357
 358        perf_evlist__config(evlist, opts, &callchain_param);
 359
 360        evlist__for_each_entry(evlist, pos) {
 361try_again:
 362                if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
 363                        if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
 364                                if (verbose > 0)
 365                                        ui__warning("%s\n", msg);
 366                                goto try_again;
 367                        }
 368
 369                        rc = -errno;
 370                        perf_evsel__open_strerror(pos, &opts->target,
 371                                                  errno, msg, sizeof(msg));
 372                        ui__error("%s\n", msg);
 373                        goto out;
 374                }
 375
 376                pos->supported = true;
 377        }
 378
 379        if (perf_evlist__apply_filters(evlist, &pos)) {
 380                pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
 381                        pos->filter, perf_evsel__name(pos), errno,
 382                        str_error_r(errno, msg, sizeof(msg)));
 383                rc = -1;
 384                goto out;
 385        }
 386
 387        if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
 388                pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
 389                      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
 390                      str_error_r(errno, msg, sizeof(msg)));
 391                rc = -1;
 392                goto out;
 393        }
 394
 395        rc = record__mmap(rec);
 396        if (rc)
 397                goto out;
 398
 399        session->evlist = evlist;
 400        perf_session__set_id_hdr_size(session);
 401out:
 402        return rc;
 403}
 404
 405static int process_sample_event(struct perf_tool *tool,
 406                                union perf_event *event,
 407                                struct perf_sample *sample,
 408                                struct perf_evsel *evsel,
 409                                struct machine *machine)
 410{
 411        struct record *rec = container_of(tool, struct record, tool);
 412
 413        if (rec->evlist->first_sample_time == 0)
 414                rec->evlist->first_sample_time = sample->time;
 415
 416        rec->evlist->last_sample_time = sample->time;
 417
 418        if (rec->buildid_all)
 419                return 0;
 420
 421        rec->samples++;
 422        return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
 423}
 424
 425static int process_buildids(struct record *rec)
 426{
 427        struct perf_data *data = &rec->data;
 428        struct perf_session *session = rec->session;
 429
 430        if (data->size == 0)
 431                return 0;
 432
 433        /*
 434         * During this process, it'll load kernel map and replace the
 435         * dso->long_name to a real pathname it found.  In this case
 436         * we prefer the vmlinux path like
 437         *   /lib/modules/3.16.4/build/vmlinux
 438         *
 439         * rather than build-id path (in debug directory).
 440         *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
 441         */
 442        symbol_conf.ignore_vmlinux_buildid = true;
 443
 444        /*
 445         * If --buildid-all is given, it marks all DSO regardless of hits,
 446         * so no need to process samples. But if timestamp_boundary is enabled,
 447         * it still needs to walk on all samples to get the timestamps of
 448         * first/last samples.
 449         */
 450        if (rec->buildid_all && !rec->timestamp_boundary)
 451                rec->tool.sample = NULL;
 452
 453        return perf_session__process_events(session);
 454}
 455
 456static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 457{
 458        int err;
 459        struct perf_tool *tool = data;
 460        /*
 461         *As for guest kernel when processing subcommand record&report,
 462         *we arrange module mmap prior to guest kernel mmap and trigger
 463         *a preload dso because default guest module symbols are loaded
 464         *from guest kallsyms instead of /lib/modules/XXX/XXX. This
 465         *method is used to avoid symbol missing when the first addr is
 466         *in module instead of in guest kernel.
 467         */
 468        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 469                                             machine);
 470        if (err < 0)
 471                pr_err("Couldn't record guest kernel [%d]'s reference"
 472                       " relocation symbol.\n", machine->pid);
 473
 474        /*
 475         * We use _stext for guest kernel because guest kernel's /proc/kallsyms
 476         * have no _text sometimes.
 477         */
 478        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 479                                                 machine);
 480        if (err < 0)
 481                pr_err("Couldn't record guest kernel [%d]'s reference"
 482                       " relocation symbol.\n", machine->pid);
 483}
 484
 485static struct perf_event_header finished_round_event = {
 486        .size = sizeof(struct perf_event_header),
 487        .type = PERF_RECORD_FINISHED_ROUND,
 488};
 489
 490static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
 491                                    bool overwrite)
 492{
 493        u64 bytes_written = rec->bytes_written;
 494        int i;
 495        int rc = 0;
 496        struct perf_mmap *maps;
 497
 498        if (!evlist)
 499                return 0;
 500
 501        maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
 502        if (!maps)
 503                return 0;
 504
 505        if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
 506                return 0;
 507
 508        for (i = 0; i < evlist->nr_mmaps; i++) {
 509                struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
 510
 511                if (maps[i].base) {
 512                        if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) {
 513                                rc = -1;
 514                                goto out;
 515                        }
 516                }
 517
 518                if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
 519                    record__auxtrace_mmap_read(rec, mm) != 0) {
 520                        rc = -1;
 521                        goto out;
 522                }
 523        }
 524
 525        /*
 526         * Mark the round finished in case we wrote
 527         * at least one event.
 528         */
 529        if (bytes_written != rec->bytes_written)
 530                rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
 531
 532        if (overwrite)
 533                perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
 534out:
 535        return rc;
 536}
 537
 538static int record__mmap_read_all(struct record *rec)
 539{
 540        int err;
 541
 542        err = record__mmap_read_evlist(rec, rec->evlist, false);
 543        if (err)
 544                return err;
 545
 546        return record__mmap_read_evlist(rec, rec->evlist, true);
 547}
 548
 549static void record__init_features(struct record *rec)
 550{
 551        struct perf_session *session = rec->session;
 552        int feat;
 553
 554        for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
 555                perf_header__set_feat(&session->header, feat);
 556
 557        if (rec->no_buildid)
 558                perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 559
 560        if (!have_tracepoints(&rec->evlist->entries))
 561                perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
 562
 563        if (!rec->opts.branch_stack)
 564                perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
 565
 566        if (!rec->opts.full_auxtrace)
 567                perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 568
 569        perf_header__clear_feat(&session->header, HEADER_STAT);
 570}
 571
 572static void
 573record__finish_output(struct record *rec)
 574{
 575        struct perf_data *data = &rec->data;
 576        int fd = perf_data__fd(data);
 577
 578        if (data->is_pipe)
 579                return;
 580
 581        rec->session->header.data_size += rec->bytes_written;
 582        data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
 583
 584        if (!rec->no_buildid) {
 585                process_buildids(rec);
 586
 587                if (rec->buildid_all)
 588                        dsos__hit_all(rec->session);
 589        }
 590        perf_session__write_header(rec->session, rec->evlist, fd, true);
 591
 592        return;
 593}
 594
 595static int record__synthesize_workload(struct record *rec, bool tail)
 596{
 597        int err;
 598        struct thread_map *thread_map;
 599
 600        if (rec->opts.tail_synthesize != tail)
 601                return 0;
 602
 603        thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
 604        if (thread_map == NULL)
 605                return -1;
 606
 607        err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
 608                                                 process_synthesized_event,
 609                                                 &rec->session->machines.host,
 610                                                 rec->opts.sample_address,
 611                                                 rec->opts.proc_map_timeout);
 612        thread_map__put(thread_map);
 613        return err;
 614}
 615
 616static int record__synthesize(struct record *rec, bool tail);
 617
 618static int
 619record__switch_output(struct record *rec, bool at_exit)
 620{
 621        struct perf_data *data = &rec->data;
 622        int fd, err;
 623
 624        /* Same Size:      "2015122520103046"*/
 625        char timestamp[] = "InvalidTimestamp";
 626
 627        record__synthesize(rec, true);
 628        if (target__none(&rec->opts.target))
 629                record__synthesize_workload(rec, true);
 630
 631        rec->samples = 0;
 632        record__finish_output(rec);
 633        err = fetch_current_timestamp(timestamp, sizeof(timestamp));
 634        if (err) {
 635                pr_err("Failed to get current timestamp\n");
 636                return -EINVAL;
 637        }
 638
 639        fd = perf_data__switch(data, timestamp,
 640                                    rec->session->header.data_offset,
 641                                    at_exit);
 642        if (fd >= 0 && !at_exit) {
 643                rec->bytes_written = 0;
 644                rec->session->header.data_size = 0;
 645        }
 646
 647        if (!quiet)
 648                fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
 649                        data->file.path, timestamp);
 650
 651        /* Output tracking events */
 652        if (!at_exit) {
 653                record__synthesize(rec, false);
 654
 655                /*
 656                 * In 'perf record --switch-output' without -a,
 657                 * record__synthesize() in record__switch_output() won't
 658                 * generate tracking events because there's no thread_map
 659                 * in evlist. Which causes newly created perf.data doesn't
 660                 * contain map and comm information.
 661                 * Create a fake thread_map and directly call
 662                 * perf_event__synthesize_thread_map() for those events.
 663                 */
 664                if (target__none(&rec->opts.target))
 665                        record__synthesize_workload(rec, false);
 666        }
 667        return fd;
 668}
 669
 670static volatile int workload_exec_errno;
 671
 672/*
 673 * perf_evlist__prepare_workload will send a SIGUSR1
 674 * if the fork fails, since we asked by setting its
 675 * want_signal to true.
 676 */
 677static void workload_exec_failed_signal(int signo __maybe_unused,
 678                                        siginfo_t *info,
 679                                        void *ucontext __maybe_unused)
 680{
 681        workload_exec_errno = info->si_value.sival_int;
 682        done = 1;
 683        child_finished = 1;
 684}
 685
 686static void snapshot_sig_handler(int sig);
 687static void alarm_sig_handler(int sig);
 688
 689int __weak
 690perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
 691                            struct perf_tool *tool __maybe_unused,
 692                            perf_event__handler_t process __maybe_unused,
 693                            struct machine *machine __maybe_unused)
 694{
 695        return 0;
 696}
 697
 698static const struct perf_event_mmap_page *
 699perf_evlist__pick_pc(struct perf_evlist *evlist)
 700{
 701        if (evlist) {
 702                if (evlist->mmap && evlist->mmap[0].base)
 703                        return evlist->mmap[0].base;
 704                if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
 705                        return evlist->overwrite_mmap[0].base;
 706        }
 707        return NULL;
 708}
 709
 710static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
 711{
 712        const struct perf_event_mmap_page *pc;
 713
 714        pc = perf_evlist__pick_pc(rec->evlist);
 715        if (pc)
 716                return pc;
 717        return NULL;
 718}
 719
 720static int record__synthesize(struct record *rec, bool tail)
 721{
 722        struct perf_session *session = rec->session;
 723        struct machine *machine = &session->machines.host;
 724        struct perf_data *data = &rec->data;
 725        struct record_opts *opts = &rec->opts;
 726        struct perf_tool *tool = &rec->tool;
 727        int fd = perf_data__fd(data);
 728        int err = 0;
 729
 730        if (rec->opts.tail_synthesize != tail)
 731                return 0;
 732
 733        if (data->is_pipe) {
 734                err = perf_event__synthesize_features(
 735                        tool, session, rec->evlist, process_synthesized_event);
 736                if (err < 0) {
 737                        pr_err("Couldn't synthesize features.\n");
 738                        return err;
 739                }
 740
 741                err = perf_event__synthesize_attrs(tool, session,
 742                                                   process_synthesized_event);
 743                if (err < 0) {
 744                        pr_err("Couldn't synthesize attrs.\n");
 745                        goto out;
 746                }
 747
 748                if (have_tracepoints(&rec->evlist->entries)) {
 749                        /*
 750                         * FIXME err <= 0 here actually means that
 751                         * there were no tracepoints so its not really
 752                         * an error, just that we don't need to
 753                         * synthesize anything.  We really have to
 754                         * return this more properly and also
 755                         * propagate errors that now are calling die()
 756                         */
 757                        err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
 758                                                                  process_synthesized_event);
 759                        if (err <= 0) {
 760                                pr_err("Couldn't record tracing data.\n");
 761                                goto out;
 762                        }
 763                        rec->bytes_written += err;
 764                }
 765        }
 766
 767        err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
 768                                          process_synthesized_event, machine);
 769        if (err)
 770                goto out;
 771
 772        if (rec->opts.full_auxtrace) {
 773                err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
 774                                        session, process_synthesized_event);
 775                if (err)
 776                        goto out;
 777        }
 778
 779        if (!perf_evlist__exclude_kernel(rec->evlist)) {
 780                err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 781                                                         machine);
 782                WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
 783                                   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 784                                   "Check /proc/kallsyms permission or run as root.\n");
 785
 786                err = perf_event__synthesize_modules(tool, process_synthesized_event,
 787                                                     machine);
 788                WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
 789                                   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 790                                   "Check /proc/modules permission or run as root.\n");
 791        }
 792
 793        if (perf_guest) {
 794                machines__process_guests(&session->machines,
 795                                         perf_event__synthesize_guest_os, tool);
 796        }
 797
 798        err = perf_event__synthesize_extra_attr(&rec->tool,
 799                                                rec->evlist,
 800                                                process_synthesized_event,
 801                                                data->is_pipe);
 802        if (err)
 803                goto out;
 804
 805        err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
 806                                                 process_synthesized_event,
 807                                                NULL);
 808        if (err < 0) {
 809                pr_err("Couldn't synthesize thread map.\n");
 810                return err;
 811        }
 812
 813        err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
 814                                             process_synthesized_event, NULL);
 815        if (err < 0) {
 816                pr_err("Couldn't synthesize cpu map.\n");
 817                return err;
 818        }
 819
 820        err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
 821                                            process_synthesized_event, opts->sample_address,
 822                                            opts->proc_map_timeout, 1);
 823out:
 824        return err;
 825}
 826
 827static int __cmd_record(struct record *rec, int argc, const char **argv)
 828{
 829        int err;
 830        int status = 0;
 831        unsigned long waking = 0;
 832        const bool forks = argc > 0;
 833        struct machine *machine;
 834        struct perf_tool *tool = &rec->tool;
 835        struct record_opts *opts = &rec->opts;
 836        struct perf_data *data = &rec->data;
 837        struct perf_session *session;
 838        bool disabled = false, draining = false;
 839        int fd;
 840
 841        rec->progname = argv[0];
 842
 843        atexit(record__sig_exit);
 844        signal(SIGCHLD, sig_handler);
 845        signal(SIGINT, sig_handler);
 846        signal(SIGTERM, sig_handler);
 847        signal(SIGSEGV, sigsegv_handler);
 848
 849        if (rec->opts.record_namespaces)
 850                tool->namespace_events = true;
 851
 852        if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
 853                signal(SIGUSR2, snapshot_sig_handler);
 854                if (rec->opts.auxtrace_snapshot_mode)
 855                        trigger_on(&auxtrace_snapshot_trigger);
 856                if (rec->switch_output.enabled)
 857                        trigger_on(&switch_output_trigger);
 858        } else {
 859                signal(SIGUSR2, SIG_IGN);
 860        }
 861
 862        session = perf_session__new(data, false, tool);
 863        if (session == NULL) {
 864                pr_err("Perf session creation failed.\n");
 865                return -1;
 866        }
 867
 868        fd = perf_data__fd(data);
 869        rec->session = session;
 870
 871        record__init_features(rec);
 872
 873        if (forks) {
 874                err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
 875                                                    argv, data->is_pipe,
 876                                                    workload_exec_failed_signal);
 877                if (err < 0) {
 878                        pr_err("Couldn't run the workload!\n");
 879                        status = err;
 880                        goto out_delete_session;
 881                }
 882        }
 883
 884        /*
 885         * If we have just single event and are sending data
 886         * through pipe, we need to force the ids allocation,
 887         * because we synthesize event name through the pipe
 888         * and need the id for that.
 889         */
 890        if (data->is_pipe && rec->evlist->nr_entries == 1)
 891                rec->opts.sample_id = true;
 892
 893        if (record__open(rec) != 0) {
 894                err = -1;
 895                goto out_child;
 896        }
 897
 898        err = bpf__apply_obj_config();
 899        if (err) {
 900                char errbuf[BUFSIZ];
 901
 902                bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
 903                pr_err("ERROR: Apply config to BPF failed: %s\n",
 904                         errbuf);
 905                goto out_child;
 906        }
 907
 908        /*
 909         * Normally perf_session__new would do this, but it doesn't have the
 910         * evlist.
 911         */
 912        if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
 913                pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
 914                rec->tool.ordered_events = false;
 915        }
 916
 917        if (!rec->evlist->nr_groups)
 918                perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
 919
 920        if (data->is_pipe) {
 921                err = perf_header__write_pipe(fd);
 922                if (err < 0)
 923                        goto out_child;
 924        } else {
 925                err = perf_session__write_header(session, rec->evlist, fd, false);
 926                if (err < 0)
 927                        goto out_child;
 928        }
 929
 930        if (!rec->no_buildid
 931            && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
 932                pr_err("Couldn't generate buildids. "
 933                       "Use --no-buildid to profile anyway.\n");
 934                err = -1;
 935                goto out_child;
 936        }
 937
 938        machine = &session->machines.host;
 939
 940        err = record__synthesize(rec, false);
 941        if (err < 0)
 942                goto out_child;
 943
 944        if (rec->realtime_prio) {
 945                struct sched_param param;
 946
 947                param.sched_priority = rec->realtime_prio;
 948                if (sched_setscheduler(0, SCHED_FIFO, &param)) {
 949                        pr_err("Could not set realtime priority.\n");
 950                        err = -1;
 951                        goto out_child;
 952                }
 953        }
 954
 955        /*
 956         * When perf is starting the traced process, all the events
 957         * (apart from group members) have enable_on_exec=1 set,
 958         * so don't spoil it by prematurely enabling them.
 959         */
 960        if (!target__none(&opts->target) && !opts->initial_delay)
 961                perf_evlist__enable(rec->evlist);
 962
 963        /*
 964         * Let the child rip
 965         */
 966        if (forks) {
 967                union perf_event *event;
 968                pid_t tgid;
 969
 970                event = malloc(sizeof(event->comm) + machine->id_hdr_size);
 971                if (event == NULL) {
 972                        err = -ENOMEM;
 973                        goto out_child;
 974                }
 975
 976                /*
 977                 * Some H/W events are generated before COMM event
 978                 * which is emitted during exec(), so perf script
 979                 * cannot see a correct process name for those events.
 980                 * Synthesize COMM event to prevent it.
 981                 */
 982                tgid = perf_event__synthesize_comm(tool, event,
 983                                                   rec->evlist->workload.pid,
 984                                                   process_synthesized_event,
 985                                                   machine);
 986                free(event);
 987
 988                if (tgid == -1)
 989                        goto out_child;
 990
 991                event = malloc(sizeof(event->namespaces) +
 992                               (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
 993                               machine->id_hdr_size);
 994                if (event == NULL) {
 995                        err = -ENOMEM;
 996                        goto out_child;
 997                }
 998
 999                /*
1000                 * Synthesize NAMESPACES event for the command specified.
1001                 */
1002                perf_event__synthesize_namespaces(tool, event,
1003                                                  rec->evlist->workload.pid,
1004                                                  tgid, process_synthesized_event,
1005                                                  machine);
1006                free(event);
1007
1008                perf_evlist__start_workload(rec->evlist);
1009        }
1010
1011        if (opts->initial_delay) {
1012                usleep(opts->initial_delay * USEC_PER_MSEC);
1013                perf_evlist__enable(rec->evlist);
1014        }
1015
1016        trigger_ready(&auxtrace_snapshot_trigger);
1017        trigger_ready(&switch_output_trigger);
1018        perf_hooks__invoke_record_start();
1019        for (;;) {
1020                unsigned long long hits = rec->samples;
1021
1022                /*
1023                 * rec->evlist->bkw_mmap_state is possible to be
1024                 * BKW_MMAP_EMPTY here: when done == true and
1025                 * hits != rec->samples in previous round.
1026                 *
1027                 * perf_evlist__toggle_bkw_mmap ensure we never
1028                 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1029                 */
1030                if (trigger_is_hit(&switch_output_trigger) || done || draining)
1031                        perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1032
1033                if (record__mmap_read_all(rec) < 0) {
1034                        trigger_error(&auxtrace_snapshot_trigger);
1035                        trigger_error(&switch_output_trigger);
1036                        err = -1;
1037                        goto out_child;
1038                }
1039
1040                if (auxtrace_record__snapshot_started) {
1041                        auxtrace_record__snapshot_started = 0;
1042                        if (!trigger_is_error(&auxtrace_snapshot_trigger))
1043                                record__read_auxtrace_snapshot(rec);
1044                        if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1045                                pr_err("AUX area tracing snapshot failed\n");
1046                                err = -1;
1047                                goto out_child;
1048                        }
1049                }
1050
1051                if (trigger_is_hit(&switch_output_trigger)) {
1052                        /*
1053                         * If switch_output_trigger is hit, the data in
1054                         * overwritable ring buffer should have been collected,
1055                         * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1056                         *
1057                         * If SIGUSR2 raise after or during record__mmap_read_all(),
1058                         * record__mmap_read_all() didn't collect data from
1059                         * overwritable ring buffer. Read again.
1060                         */
1061                        if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1062                                continue;
1063                        trigger_ready(&switch_output_trigger);
1064
1065                        /*
1066                         * Reenable events in overwrite ring buffer after
1067                         * record__mmap_read_all(): we should have collected
1068                         * data from it.
1069                         */
1070                        perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1071
1072                        if (!quiet)
1073                                fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1074                                        waking);
1075                        waking = 0;
1076                        fd = record__switch_output(rec, false);
1077                        if (fd < 0) {
1078                                pr_err("Failed to switch to new file\n");
1079                                trigger_error(&switch_output_trigger);
1080                                err = fd;
1081                                goto out_child;
1082                        }
1083
1084                        /* re-arm the alarm */
1085                        if (rec->switch_output.time)
1086                                alarm(rec->switch_output.time);
1087                }
1088
1089                if (hits == rec->samples) {
1090                        if (done || draining)
1091                                break;
1092                        err = perf_evlist__poll(rec->evlist, -1);
1093                        /*
1094                         * Propagate error, only if there's any. Ignore positive
1095                         * number of returned events and interrupt error.
1096                         */
1097                        if (err > 0 || (err < 0 && errno == EINTR))
1098                                err = 0;
1099                        waking++;
1100
1101                        if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1102                                draining = true;
1103                }
1104
1105                /*
1106                 * When perf is starting the traced process, at the end events
1107                 * die with the process and we wait for that. Thus no need to
1108                 * disable events in this case.
1109                 */
1110                if (done && !disabled && !target__none(&opts->target)) {
1111                        trigger_off(&auxtrace_snapshot_trigger);
1112                        perf_evlist__disable(rec->evlist);
1113                        disabled = true;
1114                }
1115        }
1116        trigger_off(&auxtrace_snapshot_trigger);
1117        trigger_off(&switch_output_trigger);
1118
1119        if (forks && workload_exec_errno) {
1120                char msg[STRERR_BUFSIZE];
1121                const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1122                pr_err("Workload failed: %s\n", emsg);
1123                err = -1;
1124                goto out_child;
1125        }
1126
1127        if (!quiet)
1128                fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1129
1130        if (target__none(&rec->opts.target))
1131                record__synthesize_workload(rec, true);
1132
1133out_child:
1134        if (forks) {
1135                int exit_status;
1136
1137                if (!child_finished)
1138                        kill(rec->evlist->workload.pid, SIGTERM);
1139
1140                wait(&exit_status);
1141
1142                if (err < 0)
1143                        status = err;
1144                else if (WIFEXITED(exit_status))
1145                        status = WEXITSTATUS(exit_status);
1146                else if (WIFSIGNALED(exit_status))
1147                        signr = WTERMSIG(exit_status);
1148        } else
1149                status = err;
1150
1151        record__synthesize(rec, true);
1152        /* this will be recalculated during process_buildids() */
1153        rec->samples = 0;
1154
1155        if (!err) {
1156                if (!rec->timestamp_filename) {
1157                        record__finish_output(rec);
1158                } else {
1159                        fd = record__switch_output(rec, true);
1160                        if (fd < 0) {
1161                                status = fd;
1162                                goto out_delete_session;
1163                        }
1164                }
1165        }
1166
1167        perf_hooks__invoke_record_end();
1168
1169        if (!err && !quiet) {
1170                char samples[128];
1171                const char *postfix = rec->timestamp_filename ?
1172                                        ".<timestamp>" : "";
1173
1174                if (rec->samples && !rec->opts.full_auxtrace)
1175                        scnprintf(samples, sizeof(samples),
1176                                  " (%" PRIu64 " samples)", rec->samples);
1177                else
1178                        samples[0] = '\0';
1179
1180                fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1181                        perf_data__size(data) / 1024.0 / 1024.0,
1182                        data->file.path, postfix, samples);
1183        }
1184
1185out_delete_session:
1186        perf_session__delete(session);
1187        return status;
1188}
1189
1190static void callchain_debug(struct callchain_param *callchain)
1191{
1192        static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1193
1194        pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1195
1196        if (callchain->record_mode == CALLCHAIN_DWARF)
1197                pr_debug("callchain: stack dump size %d\n",
1198                         callchain->dump_size);
1199}
1200
1201int record_opts__parse_callchain(struct record_opts *record,
1202                                 struct callchain_param *callchain,
1203                                 const char *arg, bool unset)
1204{
1205        int ret;
1206        callchain->enabled = !unset;
1207
1208        /* --no-call-graph */
1209        if (unset) {
1210                callchain->record_mode = CALLCHAIN_NONE;
1211                pr_debug("callchain: disabled\n");
1212                return 0;
1213        }
1214
1215        ret = parse_callchain_record_opt(arg, callchain);
1216        if (!ret) {
1217                /* Enable data address sampling for DWARF unwind. */
1218                if (callchain->record_mode == CALLCHAIN_DWARF)
1219                        record->sample_address = true;
1220                callchain_debug(callchain);
1221        }
1222
1223        return ret;
1224}
1225
1226int record_parse_callchain_opt(const struct option *opt,
1227                               const char *arg,
1228                               int unset)
1229{
1230        return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1231}
1232
1233int record_callchain_opt(const struct option *opt,
1234                         const char *arg __maybe_unused,
1235                         int unset __maybe_unused)
1236{
1237        struct callchain_param *callchain = opt->value;
1238
1239        callchain->enabled = true;
1240
1241        if (callchain->record_mode == CALLCHAIN_NONE)
1242                callchain->record_mode = CALLCHAIN_FP;
1243
1244        callchain_debug(callchain);
1245        return 0;
1246}
1247
1248static int perf_record_config(const char *var, const char *value, void *cb)
1249{
1250        struct record *rec = cb;
1251
1252        if (!strcmp(var, "record.build-id")) {
1253                if (!strcmp(value, "cache"))
1254                        rec->no_buildid_cache = false;
1255                else if (!strcmp(value, "no-cache"))
1256                        rec->no_buildid_cache = true;
1257                else if (!strcmp(value, "skip"))
1258                        rec->no_buildid = true;
1259                else
1260                        return -1;
1261                return 0;
1262        }
1263        if (!strcmp(var, "record.call-graph"))
1264                var = "call-graph.record-mode"; /* fall-through */
1265
1266        return perf_default_config(var, value, cb);
1267}
1268
1269struct clockid_map {
1270        const char *name;
1271        int clockid;
1272};
1273
1274#define CLOCKID_MAP(n, c)       \
1275        { .name = n, .clockid = (c), }
1276
1277#define CLOCKID_END     { .name = NULL, }
1278
1279
1280/*
1281 * Add the missing ones, we need to build on many distros...
1282 */
1283#ifndef CLOCK_MONOTONIC_RAW
1284#define CLOCK_MONOTONIC_RAW 4
1285#endif
1286#ifndef CLOCK_BOOTTIME
1287#define CLOCK_BOOTTIME 7
1288#endif
1289#ifndef CLOCK_TAI
1290#define CLOCK_TAI 11
1291#endif
1292
1293static const struct clockid_map clockids[] = {
1294        /* available for all events, NMI safe */
1295        CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1296        CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1297
1298        /* available for some events */
1299        CLOCKID_MAP("realtime", CLOCK_REALTIME),
1300        CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1301        CLOCKID_MAP("tai", CLOCK_TAI),
1302
1303        /* available for the lazy */
1304        CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1305        CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1306        CLOCKID_MAP("real", CLOCK_REALTIME),
1307        CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1308
1309        CLOCKID_END,
1310};
1311
1312static int parse_clockid(const struct option *opt, const char *str, int unset)
1313{
1314        struct record_opts *opts = (struct record_opts *)opt->value;
1315        const struct clockid_map *cm;
1316        const char *ostr = str;
1317
1318        if (unset) {
1319                opts->use_clockid = 0;
1320                return 0;
1321        }
1322
1323        /* no arg passed */
1324        if (!str)
1325                return 0;
1326
1327        /* no setting it twice */
1328        if (opts->use_clockid)
1329                return -1;
1330
1331        opts->use_clockid = true;
1332
1333        /* if its a number, we're done */
1334        if (sscanf(str, "%d", &opts->clockid) == 1)
1335                return 0;
1336
1337        /* allow a "CLOCK_" prefix to the name */
1338        if (!strncasecmp(str, "CLOCK_", 6))
1339                str += 6;
1340
1341        for (cm = clockids; cm->name; cm++) {
1342                if (!strcasecmp(str, cm->name)) {
1343                        opts->clockid = cm->clockid;
1344                        return 0;
1345                }
1346        }
1347
1348        opts->use_clockid = false;
1349        ui__warning("unknown clockid %s, check man page\n", ostr);
1350        return -1;
1351}
1352
1353static int record__parse_mmap_pages(const struct option *opt,
1354                                    const char *str,
1355                                    int unset __maybe_unused)
1356{
1357        struct record_opts *opts = opt->value;
1358        char *s, *p;
1359        unsigned int mmap_pages;
1360        int ret;
1361
1362        if (!str)
1363                return -EINVAL;
1364
1365        s = strdup(str);
1366        if (!s)
1367                return -ENOMEM;
1368
1369        p = strchr(s, ',');
1370        if (p)
1371                *p = '\0';
1372
1373        if (*s) {
1374                ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1375                if (ret)
1376                        goto out_free;
1377                opts->mmap_pages = mmap_pages;
1378        }
1379
1380        if (!p) {
1381                ret = 0;
1382                goto out_free;
1383        }
1384
1385        ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1386        if (ret)
1387                goto out_free;
1388
1389        opts->auxtrace_mmap_pages = mmap_pages;
1390
1391out_free:
1392        free(s);
1393        return ret;
1394}
1395
1396static void switch_output_size_warn(struct record *rec)
1397{
1398        u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1399        struct switch_output *s = &rec->switch_output;
1400
1401        wakeup_size /= 2;
1402
1403        if (s->size < wakeup_size) {
1404                char buf[100];
1405
1406                unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1407                pr_warning("WARNING: switch-output data size lower than "
1408                           "wakeup kernel buffer size (%s) "
1409                           "expect bigger perf.data sizes\n", buf);
1410        }
1411}
1412
1413static int switch_output_setup(struct record *rec)
1414{
1415        struct switch_output *s = &rec->switch_output;
1416        static struct parse_tag tags_size[] = {
1417                { .tag  = 'B', .mult = 1       },
1418                { .tag  = 'K', .mult = 1 << 10 },
1419                { .tag  = 'M', .mult = 1 << 20 },
1420                { .tag  = 'G', .mult = 1 << 30 },
1421                { .tag  = 0 },
1422        };
1423        static struct parse_tag tags_time[] = {
1424                { .tag  = 's', .mult = 1        },
1425                { .tag  = 'm', .mult = 60       },
1426                { .tag  = 'h', .mult = 60*60    },
1427                { .tag  = 'd', .mult = 60*60*24 },
1428                { .tag  = 0 },
1429        };
1430        unsigned long val;
1431
1432        if (!s->set)
1433                return 0;
1434
1435        if (!strcmp(s->str, "signal")) {
1436                s->signal = true;
1437                pr_debug("switch-output with SIGUSR2 signal\n");
1438                goto enabled;
1439        }
1440
1441        val = parse_tag_value(s->str, tags_size);
1442        if (val != (unsigned long) -1) {
1443                s->size = val;
1444                pr_debug("switch-output with %s size threshold\n", s->str);
1445                goto enabled;
1446        }
1447
1448        val = parse_tag_value(s->str, tags_time);
1449        if (val != (unsigned long) -1) {
1450                s->time = val;
1451                pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1452                         s->str, s->time);
1453                goto enabled;
1454        }
1455
1456        return -1;
1457
1458enabled:
1459        rec->timestamp_filename = true;
1460        s->enabled              = true;
1461
1462        if (s->size && !rec->opts.no_buffering)
1463                switch_output_size_warn(rec);
1464
1465        return 0;
1466}
1467
1468static const char * const __record_usage[] = {
1469        "perf record [<options>] [<command>]",
1470        "perf record [<options>] -- <command> [<options>]",
1471        NULL
1472};
1473const char * const *record_usage = __record_usage;
1474
1475/*
1476 * XXX Ideally would be local to cmd_record() and passed to a record__new
1477 * because we need to have access to it in record__exit, that is called
1478 * after cmd_record() exits, but since record_options need to be accessible to
1479 * builtin-script, leave it here.
1480 *
1481 * At least we don't ouch it in all the other functions here directly.
1482 *
1483 * Just say no to tons of global variables, sigh.
1484 */
1485static struct record record = {
1486        .opts = {
1487                .sample_time         = true,
1488                .mmap_pages          = UINT_MAX,
1489                .user_freq           = UINT_MAX,
1490                .user_interval       = ULLONG_MAX,
1491                .freq                = 4000,
1492                .target              = {
1493                        .uses_mmap   = true,
1494                        .default_per_cpu = true,
1495                },
1496                .proc_map_timeout     = 500,
1497        },
1498        .tool = {
1499                .sample         = process_sample_event,
1500                .fork           = perf_event__process_fork,
1501                .exit           = perf_event__process_exit,
1502                .comm           = perf_event__process_comm,
1503                .namespaces     = perf_event__process_namespaces,
1504                .mmap           = perf_event__process_mmap,
1505                .mmap2          = perf_event__process_mmap2,
1506                .ordered_events = true,
1507        },
1508};
1509
1510const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1511        "\n\t\t\t\tDefault: fp";
1512
1513static bool dry_run;
1514
1515/*
1516 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1517 * with it and switch to use the library functions in perf_evlist that came
1518 * from builtin-record.c, i.e. use record_opts,
1519 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1520 * using pipes, etc.
1521 */
1522static struct option __record_options[] = {
1523        OPT_CALLBACK('e', "event", &record.evlist, "event",
1524                     "event selector. use 'perf list' to list available events",
1525                     parse_events_option),
1526        OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1527                     "event filter", parse_filter),
1528        OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1529                           NULL, "don't record events from perf itself",
1530                           exclude_perf),
1531        OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1532                    "record events on existing process id"),
1533        OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1534                    "record events on existing thread id"),
1535        OPT_INTEGER('r', "realtime", &record.realtime_prio,
1536                    "collect data with this RT SCHED_FIFO priority"),
1537        OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1538                    "collect data without buffering"),
1539        OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1540                    "collect raw sample records from all opened counters"),
1541        OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1542                            "system-wide collection from all CPUs"),
1543        OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1544                    "list of cpus to monitor"),
1545        OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1546        OPT_STRING('o', "output", &record.data.file.path, "file",
1547                    "output file name"),
1548        OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1549                        &record.opts.no_inherit_set,
1550                        "child tasks do not inherit counters"),
1551        OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1552                    "synthesize non-sample events at the end of output"),
1553        OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1554        OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1555        OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1556                     "number of mmap data pages and AUX area tracing mmap pages",
1557                     record__parse_mmap_pages),
1558        OPT_BOOLEAN(0, "group", &record.opts.group,
1559                    "put the counters into a counter group"),
1560        OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1561                           NULL, "enables call-graph recording" ,
1562                           &record_callchain_opt),
1563        OPT_CALLBACK(0, "call-graph", &record.opts,
1564                     "record_mode[,record_size]", record_callchain_help,
1565                     &record_parse_callchain_opt),
1566        OPT_INCR('v', "verbose", &verbose,
1567                    "be more verbose (show counter open errors, etc)"),
1568        OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1569        OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1570                    "per thread counts"),
1571        OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1572        OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1573                    "Record the sample physical addresses"),
1574        OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1575        OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1576                        &record.opts.sample_time_set,
1577                        "Record the sample timestamps"),
1578        OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1579                        "Record the sample period"),
1580        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1581                    "don't sample"),
1582        OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1583                        &record.no_buildid_cache_set,
1584                        "do not update the buildid cache"),
1585        OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1586                        &record.no_buildid_set,
1587                        "do not collect buildids in perf.data"),
1588        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1589                     "monitor event in cgroup name only",
1590                     parse_cgroups),
1591        OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1592                  "ms to wait before starting measurement after program start"),
1593        OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1594                   "user to profile"),
1595
1596        OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1597                     "branch any", "sample any taken branches",
1598                     parse_branch_stack),
1599
1600        OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1601                     "branch filter mask", "branch stack filter modes",
1602                     parse_branch_stack),
1603        OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1604                    "sample by weight (on special events only)"),
1605        OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1606                    "sample transaction flags (special events only)"),
1607        OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1608                    "use per-thread mmaps"),
1609        OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1610                    "sample selected machine registers on interrupt,"
1611                    " use -I ? to list register names", parse_regs),
1612        OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1613                    "sample selected machine registers on interrupt,"
1614                    " use -I ? to list register names", parse_regs),
1615        OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1616                    "Record running/enabled time of read (:S) events"),
1617        OPT_CALLBACK('k', "clockid", &record.opts,
1618        "clockid", "clockid to use for events, see clock_gettime()",
1619        parse_clockid),
1620        OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1621                          "opts", "AUX area tracing Snapshot Mode", ""),
1622        OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1623                        "per thread proc mmap processing timeout in ms"),
1624        OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1625                    "Record namespaces events"),
1626        OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1627                    "Record context switch events"),
1628        OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1629                         "Configure all used events to run in kernel space.",
1630                         PARSE_OPT_EXCLUSIVE),
1631        OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1632                         "Configure all used events to run in user space.",
1633                         PARSE_OPT_EXCLUSIVE),
1634        OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1635                   "clang binary to use for compiling BPF scriptlets"),
1636        OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1637                   "options passed to clang when compiling BPF scriptlets"),
1638        OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1639                   "file", "vmlinux pathname"),
1640        OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1641                    "Record build-id of all DSOs regardless of hits"),
1642        OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1643                    "append timestamp to output filename"),
1644        OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
1645                    "Record timestamp boundary (time of first/last samples)"),
1646        OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1647                          &record.switch_output.set, "signal,size,time",
1648                          "Switch output when receive SIGUSR2 or cross size,time threshold",
1649                          "signal"),
1650        OPT_BOOLEAN(0, "dry-run", &dry_run,
1651                    "Parse options then exit"),
1652        OPT_END()
1653};
1654
1655struct option *record_options = __record_options;
1656
1657int cmd_record(int argc, const char **argv)
1658{
1659        int err;
1660        struct record *rec = &record;
1661        char errbuf[BUFSIZ];
1662
1663#ifndef HAVE_LIBBPF_SUPPORT
1664# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1665        set_nobuild('\0', "clang-path", true);
1666        set_nobuild('\0', "clang-opt", true);
1667# undef set_nobuild
1668#endif
1669
1670#ifndef HAVE_BPF_PROLOGUE
1671# if !defined (HAVE_DWARF_SUPPORT)
1672#  define REASON  "NO_DWARF=1"
1673# elif !defined (HAVE_LIBBPF_SUPPORT)
1674#  define REASON  "NO_LIBBPF=1"
1675# else
1676#  define REASON  "this architecture doesn't support BPF prologue"
1677# endif
1678# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1679        set_nobuild('\0', "vmlinux", true);
1680# undef set_nobuild
1681# undef REASON
1682#endif
1683
1684        rec->evlist = perf_evlist__new();
1685        if (rec->evlist == NULL)
1686                return -ENOMEM;
1687
1688        err = perf_config(perf_record_config, rec);
1689        if (err)
1690                return err;
1691
1692        argc = parse_options(argc, argv, record_options, record_usage,
1693                            PARSE_OPT_STOP_AT_NON_OPTION);
1694        if (quiet)
1695                perf_quiet_option();
1696
1697        /* Make system wide (-a) the default target. */
1698        if (!argc && target__none(&rec->opts.target))
1699                rec->opts.target.system_wide = true;
1700
1701        if (nr_cgroups && !rec->opts.target.system_wide) {
1702                usage_with_options_msg(record_usage, record_options,
1703                        "cgroup monitoring only available in system-wide mode");
1704
1705        }
1706        if (rec->opts.record_switch_events &&
1707            !perf_can_record_switch_events()) {
1708                ui__error("kernel does not support recording context switch events\n");
1709                parse_options_usage(record_usage, record_options, "switch-events", 0);
1710                return -EINVAL;
1711        }
1712
1713        if (switch_output_setup(rec)) {
1714                parse_options_usage(record_usage, record_options, "switch-output", 0);
1715                return -EINVAL;
1716        }
1717
1718        if (rec->switch_output.time) {
1719                signal(SIGALRM, alarm_sig_handler);
1720                alarm(rec->switch_output.time);
1721        }
1722
1723        if (!rec->itr) {
1724                rec->itr = auxtrace_record__init(rec->evlist, &err);
1725                if (err)
1726                        goto out;
1727        }
1728
1729        err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1730                                              rec->opts.auxtrace_snapshot_opts);
1731        if (err)
1732                goto out;
1733
1734        /*
1735         * Allow aliases to facilitate the lookup of symbols for address
1736         * filters. Refer to auxtrace_parse_filters().
1737         */
1738        symbol_conf.allow_aliases = true;
1739
1740        symbol__init(NULL);
1741
1742        err = auxtrace_parse_filters(rec->evlist);
1743        if (err)
1744                goto out;
1745
1746        if (dry_run)
1747                goto out;
1748
1749        err = bpf__setup_stdout(rec->evlist);
1750        if (err) {
1751                bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1752                pr_err("ERROR: Setup BPF stdout failed: %s\n",
1753                         errbuf);
1754                goto out;
1755        }
1756
1757        err = -ENOMEM;
1758
1759        if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
1760                pr_warning(
1761"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1762"check /proc/sys/kernel/kptr_restrict.\n\n"
1763"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1764"file is not found in the buildid cache or in the vmlinux path.\n\n"
1765"Samples in kernel modules won't be resolved at all.\n\n"
1766"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1767"even with a suitable vmlinux or kallsyms file.\n\n");
1768
1769        if (rec->no_buildid_cache || rec->no_buildid) {
1770                disable_buildid_cache();
1771        } else if (rec->switch_output.enabled) {
1772                /*
1773                 * In 'perf record --switch-output', disable buildid
1774                 * generation by default to reduce data file switching
1775                 * overhead. Still generate buildid if they are required
1776                 * explicitly using
1777                 *
1778                 *  perf record --switch-output --no-no-buildid \
1779                 *              --no-no-buildid-cache
1780                 *
1781                 * Following code equals to:
1782                 *
1783                 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1784                 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1785                 *         disable_buildid_cache();
1786                 */
1787                bool disable = true;
1788
1789                if (rec->no_buildid_set && !rec->no_buildid)
1790                        disable = false;
1791                if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1792                        disable = false;
1793                if (disable) {
1794                        rec->no_buildid = true;
1795                        rec->no_buildid_cache = true;
1796                        disable_buildid_cache();
1797                }
1798        }
1799
1800        if (record.opts.overwrite)
1801                record.opts.tail_synthesize = true;
1802
1803        if (rec->evlist->nr_entries == 0 &&
1804            __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1805                pr_err("Not enough memory for event selector list\n");
1806                goto out;
1807        }
1808
1809        if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1810                rec->opts.no_inherit = true;
1811
1812        err = target__validate(&rec->opts.target);
1813        if (err) {
1814                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1815                ui__warning("%s", errbuf);
1816        }
1817
1818        err = target__parse_uid(&rec->opts.target);
1819        if (err) {
1820                int saved_errno = errno;
1821
1822                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1823                ui__error("%s", errbuf);
1824
1825                err = -saved_errno;
1826                goto out;
1827        }
1828
1829        /* Enable ignoring missing threads when -u/-p option is defined. */
1830        rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
1831
1832        err = -ENOMEM;
1833        if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1834                usage_with_options(record_usage, record_options);
1835
1836        err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1837        if (err)
1838                goto out;
1839
1840        /*
1841         * We take all buildids when the file contains
1842         * AUX area tracing data because we do not decode the
1843         * trace because it would take too long.
1844         */
1845        if (rec->opts.full_auxtrace)
1846                rec->buildid_all = true;
1847
1848        if (record_opts__config(&rec->opts)) {
1849                err = -EINVAL;
1850                goto out;
1851        }
1852
1853        err = __cmd_record(&record, argc, argv);
1854out:
1855        perf_evlist__delete(rec->evlist);
1856        symbol__exit();
1857        auxtrace_record__free(rec->itr);
1858        return err;
1859}
1860
1861static void snapshot_sig_handler(int sig __maybe_unused)
1862{
1863        struct record *rec = &record;
1864
1865        if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1866                trigger_hit(&auxtrace_snapshot_trigger);
1867                auxtrace_record__snapshot_started = 1;
1868                if (auxtrace_record__snapshot_start(record.itr))
1869                        trigger_error(&auxtrace_snapshot_trigger);
1870        }
1871
1872        if (switch_output_signal(rec))
1873                trigger_hit(&switch_output_trigger);
1874}
1875
1876static void alarm_sig_handler(int sig __maybe_unused)
1877{
1878        struct record *rec = &record;
1879
1880        if (switch_output_time(rec))
1881                trigger_hit(&switch_output_trigger);
1882}
1883