linux/tools/perf/builtin-record.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * builtin-record.c
   4 *
   5 * Builtin record command: Record the profile of a workload
   6 * (or a CPU, or a PID) into the perf.data output file - for
   7 * later analysis via perf report.
   8 */
   9#include "builtin.h"
  10
  11#include "perf.h"
  12
  13#include "util/build-id.h"
  14#include "util/util.h"
  15#include <subcmd/parse-options.h>
  16#include "util/parse-events.h"
  17#include "util/config.h"
  18
  19#include "util/callchain.h"
  20#include "util/cgroup.h"
  21#include "util/header.h"
  22#include "util/event.h"
  23#include "util/evlist.h"
  24#include "util/evsel.h"
  25#include "util/debug.h"
  26#include "util/drv_configs.h"
  27#include "util/session.h"
  28#include "util/tool.h"
  29#include "util/symbol.h"
  30#include "util/cpumap.h"
  31#include "util/thread_map.h"
  32#include "util/data.h"
  33#include "util/perf_regs.h"
  34#include "util/auxtrace.h"
  35#include "util/tsc.h"
  36#include "util/parse-branch-options.h"
  37#include "util/parse-regs-options.h"
  38#include "util/llvm-utils.h"
  39#include "util/bpf-loader.h"
  40#include "util/trigger.h"
  41#include "util/perf-hooks.h"
  42#include "util/time-utils.h"
  43#include "util/units.h"
  44#include "asm/bug.h"
  45
  46#include <errno.h>
  47#include <inttypes.h>
  48#include <poll.h>
  49#include <unistd.h>
  50#include <sched.h>
  51#include <signal.h>
  52#include <sys/mman.h>
  53#include <sys/wait.h>
  54#include <asm/bug.h>
  55#include <linux/time64.h>
  56
  57struct switch_output {
  58        bool             enabled;
  59        bool             signal;
  60        unsigned long    size;
  61        unsigned long    time;
  62        const char      *str;
  63        bool             set;
  64};
  65
  66struct record {
  67        struct perf_tool        tool;
  68        struct record_opts      opts;
  69        u64                     bytes_written;
  70        struct perf_data_file   file;
  71        struct auxtrace_record  *itr;
  72        struct perf_evlist      *evlist;
  73        struct perf_session     *session;
  74        const char              *progname;
  75        int                     realtime_prio;
  76        bool                    no_buildid;
  77        bool                    no_buildid_set;
  78        bool                    no_buildid_cache;
  79        bool                    no_buildid_cache_set;
  80        bool                    buildid_all;
  81        bool                    timestamp_filename;
  82        struct switch_output    switch_output;
  83        unsigned long long      samples;
  84};
  85
  86static volatile int auxtrace_record__snapshot_started;
  87static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
  88static DEFINE_TRIGGER(switch_output_trigger);
  89
  90static bool switch_output_signal(struct record *rec)
  91{
  92        return rec->switch_output.signal &&
  93               trigger_is_ready(&switch_output_trigger);
  94}
  95
  96static bool switch_output_size(struct record *rec)
  97{
  98        return rec->switch_output.size &&
  99               trigger_is_ready(&switch_output_trigger) &&
 100               (rec->bytes_written >= rec->switch_output.size);
 101}
 102
 103static bool switch_output_time(struct record *rec)
 104{
 105        return rec->switch_output.time &&
 106               trigger_is_ready(&switch_output_trigger);
 107}
 108
 109static int record__write(struct record *rec, void *bf, size_t size)
 110{
 111        if (perf_data_file__write(rec->session->file, bf, size) < 0) {
 112                pr_err("failed to write perf data, error: %m\n");
 113                return -1;
 114        }
 115
 116        rec->bytes_written += size;
 117
 118        if (switch_output_size(rec))
 119                trigger_hit(&switch_output_trigger);
 120
 121        return 0;
 122}
 123
 124static int process_synthesized_event(struct perf_tool *tool,
 125                                     union perf_event *event,
 126                                     struct perf_sample *sample __maybe_unused,
 127                                     struct machine *machine __maybe_unused)
 128{
 129        struct record *rec = container_of(tool, struct record, tool);
 130        return record__write(rec, event, event->header.size);
 131}
 132
 133static int
 134backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
 135{
 136        struct perf_event_header *pheader;
 137        u64 evt_head = head;
 138        int size = mask + 1;
 139
 140        pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
 141        pheader = (struct perf_event_header *)(buf + (head & mask));
 142        *start = head;
 143        while (true) {
 144                if (evt_head - head >= (unsigned int)size) {
 145                        pr_debug("Finished reading backward ring buffer: rewind\n");
 146                        if (evt_head - head > (unsigned int)size)
 147                                evt_head -= pheader->size;
 148                        *end = evt_head;
 149                        return 0;
 150                }
 151
 152                pheader = (struct perf_event_header *)(buf + (evt_head & mask));
 153
 154                if (pheader->size == 0) {
 155                        pr_debug("Finished reading backward ring buffer: get start\n");
 156                        *end = evt_head;
 157                        return 0;
 158                }
 159
 160                evt_head += pheader->size;
 161                pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
 162        }
 163        WARN_ONCE(1, "Shouldn't get here\n");
 164        return -1;
 165}
 166
 167static int
 168rb_find_range(void *data, int mask, u64 head, u64 old,
 169              u64 *start, u64 *end, bool backward)
 170{
 171        if (!backward) {
 172                *start = old;
 173                *end = head;
 174                return 0;
 175        }
 176
 177        return backward_rb_find_range(data, mask, head, start, end);
 178}
 179
 180static int
 181record__mmap_read(struct record *rec, struct perf_mmap *md,
 182                  bool overwrite, bool backward)
 183{
 184        u64 head = perf_mmap__read_head(md);
 185        u64 old = md->prev;
 186        u64 end = head, start = old;
 187        unsigned char *data = md->base + page_size;
 188        unsigned long size;
 189        void *buf;
 190        int rc = 0;
 191
 192        if (rb_find_range(data, md->mask, head,
 193                          old, &start, &end, backward))
 194                return -1;
 195
 196        if (start == end)
 197                return 0;
 198
 199        rec->samples++;
 200
 201        size = end - start;
 202        if (size > (unsigned long)(md->mask) + 1) {
 203                WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
 204
 205                md->prev = head;
 206                perf_mmap__consume(md, overwrite || backward);
 207                return 0;
 208        }
 209
 210        if ((start & md->mask) + size != (end & md->mask)) {
 211                buf = &data[start & md->mask];
 212                size = md->mask + 1 - (start & md->mask);
 213                start += size;
 214
 215                if (record__write(rec, buf, size) < 0) {
 216                        rc = -1;
 217                        goto out;
 218                }
 219        }
 220
 221        buf = &data[start & md->mask];
 222        size = end - start;
 223        start += size;
 224
 225        if (record__write(rec, buf, size) < 0) {
 226                rc = -1;
 227                goto out;
 228        }
 229
 230        md->prev = head;
 231        perf_mmap__consume(md, overwrite || backward);
 232out:
 233        return rc;
 234}
 235
 236static volatile int done;
 237static volatile int signr = -1;
 238static volatile int child_finished;
 239
 240static void sig_handler(int sig)
 241{
 242        if (sig == SIGCHLD)
 243                child_finished = 1;
 244        else
 245                signr = sig;
 246
 247        done = 1;
 248}
 249
 250static void sigsegv_handler(int sig)
 251{
 252        perf_hooks__recover();
 253        sighandler_dump_stack(sig);
 254}
 255
 256static void record__sig_exit(void)
 257{
 258        if (signr == -1)
 259                return;
 260
 261        signal(signr, SIG_DFL);
 262        raise(signr);
 263}
 264
 265#ifdef HAVE_AUXTRACE_SUPPORT
 266
 267static int record__process_auxtrace(struct perf_tool *tool,
 268                                    union perf_event *event, void *data1,
 269                                    size_t len1, void *data2, size_t len2)
 270{
 271        struct record *rec = container_of(tool, struct record, tool);
 272        struct perf_data_file *file = &rec->file;
 273        size_t padding;
 274        u8 pad[8] = {0};
 275
 276        if (!perf_data_file__is_pipe(file)) {
 277                off_t file_offset;
 278                int fd = perf_data_file__fd(file);
 279                int err;
 280
 281                file_offset = lseek(fd, 0, SEEK_CUR);
 282                if (file_offset == -1)
 283                        return -1;
 284                err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
 285                                                     event, file_offset);
 286                if (err)
 287                        return err;
 288        }
 289
 290        /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
 291        padding = (len1 + len2) & 7;
 292        if (padding)
 293                padding = 8 - padding;
 294
 295        record__write(rec, event, event->header.size);
 296        record__write(rec, data1, len1);
 297        if (len2)
 298                record__write(rec, data2, len2);
 299        record__write(rec, &pad, padding);
 300
 301        return 0;
 302}
 303
 304static int record__auxtrace_mmap_read(struct record *rec,
 305                                      struct auxtrace_mmap *mm)
 306{
 307        int ret;
 308
 309        ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
 310                                  record__process_auxtrace);
 311        if (ret < 0)
 312                return ret;
 313
 314        if (ret)
 315                rec->samples++;
 316
 317        return 0;
 318}
 319
 320static int record__auxtrace_mmap_read_snapshot(struct record *rec,
 321                                               struct auxtrace_mmap *mm)
 322{
 323        int ret;
 324
 325        ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
 326                                           record__process_auxtrace,
 327                                           rec->opts.auxtrace_snapshot_size);
 328        if (ret < 0)
 329                return ret;
 330
 331        if (ret)
 332                rec->samples++;
 333
 334        return 0;
 335}
 336
 337static int record__auxtrace_read_snapshot_all(struct record *rec)
 338{
 339        int i;
 340        int rc = 0;
 341
 342        for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 343                struct auxtrace_mmap *mm =
 344                                &rec->evlist->mmap[i].auxtrace_mmap;
 345
 346                if (!mm->base)
 347                        continue;
 348
 349                if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
 350                        rc = -1;
 351                        goto out;
 352                }
 353        }
 354out:
 355        return rc;
 356}
 357
 358static void record__read_auxtrace_snapshot(struct record *rec)
 359{
 360        pr_debug("Recording AUX area tracing snapshot\n");
 361        if (record__auxtrace_read_snapshot_all(rec) < 0) {
 362                trigger_error(&auxtrace_snapshot_trigger);
 363        } else {
 364                if (auxtrace_record__snapshot_finish(rec->itr))
 365                        trigger_error(&auxtrace_snapshot_trigger);
 366                else
 367                        trigger_ready(&auxtrace_snapshot_trigger);
 368        }
 369}
 370
 371#else
 372
 373static inline
 374int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
 375                               struct auxtrace_mmap *mm __maybe_unused)
 376{
 377        return 0;
 378}
 379
 380static inline
 381void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
 382{
 383}
 384
 385static inline
 386int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
 387{
 388        return 0;
 389}
 390
 391#endif
 392
 393static int record__mmap_evlist(struct record *rec,
 394                               struct perf_evlist *evlist)
 395{
 396        struct record_opts *opts = &rec->opts;
 397        char msg[512];
 398
 399        if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
 400                                 opts->auxtrace_mmap_pages,
 401                                 opts->auxtrace_snapshot_mode) < 0) {
 402                if (errno == EPERM) {
 403                        pr_err("Permission error mapping pages.\n"
 404                               "Consider increasing "
 405                               "/proc/sys/kernel/perf_event_mlock_kb,\n"
 406                               "or try again with a smaller value of -m/--mmap_pages.\n"
 407                               "(current value: %u,%u)\n",
 408                               opts->mmap_pages, opts->auxtrace_mmap_pages);
 409                        return -errno;
 410                } else {
 411                        pr_err("failed to mmap with %d (%s)\n", errno,
 412                                str_error_r(errno, msg, sizeof(msg)));
 413                        if (errno)
 414                                return -errno;
 415                        else
 416                                return -EINVAL;
 417                }
 418        }
 419        return 0;
 420}
 421
 422static int record__mmap(struct record *rec)
 423{
 424        return record__mmap_evlist(rec, rec->evlist);
 425}
 426
 427static int record__open(struct record *rec)
 428{
 429        char msg[BUFSIZ];
 430        struct perf_evsel *pos;
 431        struct perf_evlist *evlist = rec->evlist;
 432        struct perf_session *session = rec->session;
 433        struct record_opts *opts = &rec->opts;
 434        struct perf_evsel_config_term *err_term;
 435        int rc = 0;
 436
 437        perf_evlist__config(evlist, opts, &callchain_param);
 438
 439        evlist__for_each_entry(evlist, pos) {
 440try_again:
 441                if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
 442                        if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
 443                                if (verbose > 0)
 444                                        ui__warning("%s\n", msg);
 445                                goto try_again;
 446                        }
 447
 448                        rc = -errno;
 449                        perf_evsel__open_strerror(pos, &opts->target,
 450                                                  errno, msg, sizeof(msg));
 451                        ui__error("%s\n", msg);
 452                        goto out;
 453                }
 454        }
 455
 456        if (perf_evlist__apply_filters(evlist, &pos)) {
 457                pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
 458                        pos->filter, perf_evsel__name(pos), errno,
 459                        str_error_r(errno, msg, sizeof(msg)));
 460                rc = -1;
 461                goto out;
 462        }
 463
 464        if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
 465                pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
 466                      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
 467                      str_error_r(errno, msg, sizeof(msg)));
 468                rc = -1;
 469                goto out;
 470        }
 471
 472        rc = record__mmap(rec);
 473        if (rc)
 474                goto out;
 475
 476        session->evlist = evlist;
 477        perf_session__set_id_hdr_size(session);
 478out:
 479        return rc;
 480}
 481
 482static int process_sample_event(struct perf_tool *tool,
 483                                union perf_event *event,
 484                                struct perf_sample *sample,
 485                                struct perf_evsel *evsel,
 486                                struct machine *machine)
 487{
 488        struct record *rec = container_of(tool, struct record, tool);
 489
 490        rec->samples++;
 491
 492        return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
 493}
 494
 495static int process_buildids(struct record *rec)
 496{
 497        struct perf_data_file *file  = &rec->file;
 498        struct perf_session *session = rec->session;
 499
 500        if (file->size == 0)
 501                return 0;
 502
 503        /*
 504         * During this process, it'll load kernel map and replace the
 505         * dso->long_name to a real pathname it found.  In this case
 506         * we prefer the vmlinux path like
 507         *   /lib/modules/3.16.4/build/vmlinux
 508         *
 509         * rather than build-id path (in debug directory).
 510         *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
 511         */
 512        symbol_conf.ignore_vmlinux_buildid = true;
 513
 514        /*
 515         * If --buildid-all is given, it marks all DSO regardless of hits,
 516         * so no need to process samples.
 517         */
 518        if (rec->buildid_all)
 519                rec->tool.sample = NULL;
 520
 521        return perf_session__process_events(session);
 522}
 523
 524static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 525{
 526        int err;
 527        struct perf_tool *tool = data;
 528        /*
 529         *As for guest kernel when processing subcommand record&report,
 530         *we arrange module mmap prior to guest kernel mmap and trigger
 531         *a preload dso because default guest module symbols are loaded
 532         *from guest kallsyms instead of /lib/modules/XXX/XXX. This
 533         *method is used to avoid symbol missing when the first addr is
 534         *in module instead of in guest kernel.
 535         */
 536        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 537                                             machine);
 538        if (err < 0)
 539                pr_err("Couldn't record guest kernel [%d]'s reference"
 540                       " relocation symbol.\n", machine->pid);
 541
 542        /*
 543         * We use _stext for guest kernel because guest kernel's /proc/kallsyms
 544         * have no _text sometimes.
 545         */
 546        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 547                                                 machine);
 548        if (err < 0)
 549                pr_err("Couldn't record guest kernel [%d]'s reference"
 550                       " relocation symbol.\n", machine->pid);
 551}
 552
 553static struct perf_event_header finished_round_event = {
 554        .size = sizeof(struct perf_event_header),
 555        .type = PERF_RECORD_FINISHED_ROUND,
 556};
 557
 558static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
 559                                    bool backward)
 560{
 561        u64 bytes_written = rec->bytes_written;
 562        int i;
 563        int rc = 0;
 564        struct perf_mmap *maps;
 565
 566        if (!evlist)
 567                return 0;
 568
 569        maps = backward ? evlist->backward_mmap : evlist->mmap;
 570        if (!maps)
 571                return 0;
 572
 573        if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
 574                return 0;
 575
 576        for (i = 0; i < evlist->nr_mmaps; i++) {
 577                struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
 578
 579                if (maps[i].base) {
 580                        if (record__mmap_read(rec, &maps[i],
 581                                              evlist->overwrite, backward) != 0) {
 582                                rc = -1;
 583                                goto out;
 584                        }
 585                }
 586
 587                if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
 588                    record__auxtrace_mmap_read(rec, mm) != 0) {
 589                        rc = -1;
 590                        goto out;
 591                }
 592        }
 593
 594        /*
 595         * Mark the round finished in case we wrote
 596         * at least one event.
 597         */
 598        if (bytes_written != rec->bytes_written)
 599                rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
 600
 601        if (backward)
 602                perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
 603out:
 604        return rc;
 605}
 606
 607static int record__mmap_read_all(struct record *rec)
 608{
 609        int err;
 610
 611        err = record__mmap_read_evlist(rec, rec->evlist, false);
 612        if (err)
 613                return err;
 614
 615        return record__mmap_read_evlist(rec, rec->evlist, true);
 616}
 617
 618static void record__init_features(struct record *rec)
 619{
 620        struct perf_session *session = rec->session;
 621        int feat;
 622
 623        for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
 624                perf_header__set_feat(&session->header, feat);
 625
 626        if (rec->no_buildid)
 627                perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 628
 629        if (!have_tracepoints(&rec->evlist->entries))
 630                perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
 631
 632        if (!rec->opts.branch_stack)
 633                perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
 634
 635        if (!rec->opts.full_auxtrace)
 636                perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 637
 638        perf_header__clear_feat(&session->header, HEADER_STAT);
 639}
 640
 641static void
 642record__finish_output(struct record *rec)
 643{
 644        struct perf_data_file *file = &rec->file;
 645        int fd = perf_data_file__fd(file);
 646
 647        if (file->is_pipe)
 648                return;
 649
 650        rec->session->header.data_size += rec->bytes_written;
 651        file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
 652
 653        if (!rec->no_buildid) {
 654                process_buildids(rec);
 655
 656                if (rec->buildid_all)
 657                        dsos__hit_all(rec->session);
 658        }
 659        perf_session__write_header(rec->session, rec->evlist, fd, true);
 660
 661        return;
 662}
 663
 664static int record__synthesize_workload(struct record *rec, bool tail)
 665{
 666        int err;
 667        struct thread_map *thread_map;
 668
 669        if (rec->opts.tail_synthesize != tail)
 670                return 0;
 671
 672        thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
 673        if (thread_map == NULL)
 674                return -1;
 675
 676        err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
 677                                                 process_synthesized_event,
 678                                                 &rec->session->machines.host,
 679                                                 rec->opts.sample_address,
 680                                                 rec->opts.proc_map_timeout);
 681        thread_map__put(thread_map);
 682        return err;
 683}
 684
 685static int record__synthesize(struct record *rec, bool tail);
 686
 687static int
 688record__switch_output(struct record *rec, bool at_exit)
 689{
 690        struct perf_data_file *file = &rec->file;
 691        int fd, err;
 692
 693        /* Same Size:      "2015122520103046"*/
 694        char timestamp[] = "InvalidTimestamp";
 695
 696        record__synthesize(rec, true);
 697        if (target__none(&rec->opts.target))
 698                record__synthesize_workload(rec, true);
 699
 700        rec->samples = 0;
 701        record__finish_output(rec);
 702        err = fetch_current_timestamp(timestamp, sizeof(timestamp));
 703        if (err) {
 704                pr_err("Failed to get current timestamp\n");
 705                return -EINVAL;
 706        }
 707
 708        fd = perf_data_file__switch(file, timestamp,
 709                                    rec->session->header.data_offset,
 710                                    at_exit);
 711        if (fd >= 0 && !at_exit) {
 712                rec->bytes_written = 0;
 713                rec->session->header.data_size = 0;
 714        }
 715
 716        if (!quiet)
 717                fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
 718                        file->path, timestamp);
 719
 720        /* Output tracking events */
 721        if (!at_exit) {
 722                record__synthesize(rec, false);
 723
 724                /*
 725                 * In 'perf record --switch-output' without -a,
 726                 * record__synthesize() in record__switch_output() won't
 727                 * generate tracking events because there's no thread_map
 728                 * in evlist. Which causes newly created perf.data doesn't
 729                 * contain map and comm information.
 730                 * Create a fake thread_map and directly call
 731                 * perf_event__synthesize_thread_map() for those events.
 732                 */
 733                if (target__none(&rec->opts.target))
 734                        record__synthesize_workload(rec, false);
 735        }
 736        return fd;
 737}
 738
 739static volatile int workload_exec_errno;
 740
 741/*
 742 * perf_evlist__prepare_workload will send a SIGUSR1
 743 * if the fork fails, since we asked by setting its
 744 * want_signal to true.
 745 */
 746static void workload_exec_failed_signal(int signo __maybe_unused,
 747                                        siginfo_t *info,
 748                                        void *ucontext __maybe_unused)
 749{
 750        workload_exec_errno = info->si_value.sival_int;
 751        done = 1;
 752        child_finished = 1;
 753}
 754
 755static void snapshot_sig_handler(int sig);
 756static void alarm_sig_handler(int sig);
 757
 758int __weak
 759perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
 760                            struct perf_tool *tool __maybe_unused,
 761                            perf_event__handler_t process __maybe_unused,
 762                            struct machine *machine __maybe_unused)
 763{
 764        return 0;
 765}
 766
 767static const struct perf_event_mmap_page *
 768perf_evlist__pick_pc(struct perf_evlist *evlist)
 769{
 770        if (evlist) {
 771                if (evlist->mmap && evlist->mmap[0].base)
 772                        return evlist->mmap[0].base;
 773                if (evlist->backward_mmap && evlist->backward_mmap[0].base)
 774                        return evlist->backward_mmap[0].base;
 775        }
 776        return NULL;
 777}
 778
 779static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
 780{
 781        const struct perf_event_mmap_page *pc;
 782
 783        pc = perf_evlist__pick_pc(rec->evlist);
 784        if (pc)
 785                return pc;
 786        return NULL;
 787}
 788
 789static int record__synthesize(struct record *rec, bool tail)
 790{
 791        struct perf_session *session = rec->session;
 792        struct machine *machine = &session->machines.host;
 793        struct perf_data_file *file = &rec->file;
 794        struct record_opts *opts = &rec->opts;
 795        struct perf_tool *tool = &rec->tool;
 796        int fd = perf_data_file__fd(file);
 797        int err = 0;
 798
 799        if (rec->opts.tail_synthesize != tail)
 800                return 0;
 801
 802        if (file->is_pipe) {
 803                err = perf_event__synthesize_features(
 804                        tool, session, rec->evlist, process_synthesized_event);
 805                if (err < 0) {
 806                        pr_err("Couldn't synthesize features.\n");
 807                        return err;
 808                }
 809
 810                err = perf_event__synthesize_attrs(tool, session,
 811                                                   process_synthesized_event);
 812                if (err < 0) {
 813                        pr_err("Couldn't synthesize attrs.\n");
 814                        goto out;
 815                }
 816
 817                if (have_tracepoints(&rec->evlist->entries)) {
 818                        /*
 819                         * FIXME err <= 0 here actually means that
 820                         * there were no tracepoints so its not really
 821                         * an error, just that we don't need to
 822                         * synthesize anything.  We really have to
 823                         * return this more properly and also
 824                         * propagate errors that now are calling die()
 825                         */
 826                        err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
 827                                                                  process_synthesized_event);
 828                        if (err <= 0) {
 829                                pr_err("Couldn't record tracing data.\n");
 830                                goto out;
 831                        }
 832                        rec->bytes_written += err;
 833                }
 834        }
 835
 836        err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
 837                                          process_synthesized_event, machine);
 838        if (err)
 839                goto out;
 840
 841        if (rec->opts.full_auxtrace) {
 842                err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
 843                                        session, process_synthesized_event);
 844                if (err)
 845                        goto out;
 846        }
 847
 848        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 849                                                 machine);
 850        WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
 851                           "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 852                           "Check /proc/kallsyms permission or run as root.\n");
 853
 854        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 855                                             machine);
 856        WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
 857                           "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 858                           "Check /proc/modules permission or run as root.\n");
 859
 860        if (perf_guest) {
 861                machines__process_guests(&session->machines,
 862                                         perf_event__synthesize_guest_os, tool);
 863        }
 864
 865        err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
 866                                            process_synthesized_event, opts->sample_address,
 867                                            opts->proc_map_timeout);
 868out:
 869        return err;
 870}
 871
 872static int __cmd_record(struct record *rec, int argc, const char **argv)
 873{
 874        int err;
 875        int status = 0;
 876        unsigned long waking = 0;
 877        const bool forks = argc > 0;
 878        struct machine *machine;
 879        struct perf_tool *tool = &rec->tool;
 880        struct record_opts *opts = &rec->opts;
 881        struct perf_data_file *file = &rec->file;
 882        struct perf_session *session;
 883        bool disabled = false, draining = false;
 884        int fd;
 885
 886        rec->progname = argv[0];
 887
 888        atexit(record__sig_exit);
 889        signal(SIGCHLD, sig_handler);
 890        signal(SIGINT, sig_handler);
 891        signal(SIGTERM, sig_handler);
 892        signal(SIGSEGV, sigsegv_handler);
 893
 894        if (rec->opts.record_namespaces)
 895                tool->namespace_events = true;
 896
 897        if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
 898                signal(SIGUSR2, snapshot_sig_handler);
 899                if (rec->opts.auxtrace_snapshot_mode)
 900                        trigger_on(&auxtrace_snapshot_trigger);
 901                if (rec->switch_output.enabled)
 902                        trigger_on(&switch_output_trigger);
 903        } else {
 904                signal(SIGUSR2, SIG_IGN);
 905        }
 906
 907        session = perf_session__new(file, false, tool);
 908        if (session == NULL) {
 909                pr_err("Perf session creation failed.\n");
 910                return -1;
 911        }
 912
 913        fd = perf_data_file__fd(file);
 914        rec->session = session;
 915
 916        record__init_features(rec);
 917
 918        if (forks) {
 919                err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
 920                                                    argv, file->is_pipe,
 921                                                    workload_exec_failed_signal);
 922                if (err < 0) {
 923                        pr_err("Couldn't run the workload!\n");
 924                        status = err;
 925                        goto out_delete_session;
 926                }
 927        }
 928
 929        if (record__open(rec) != 0) {
 930                err = -1;
 931                goto out_child;
 932        }
 933
 934        err = bpf__apply_obj_config();
 935        if (err) {
 936                char errbuf[BUFSIZ];
 937
 938                bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
 939                pr_err("ERROR: Apply config to BPF failed: %s\n",
 940                         errbuf);
 941                goto out_child;
 942        }
 943
 944        /*
 945         * Normally perf_session__new would do this, but it doesn't have the
 946         * evlist.
 947         */
 948        if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
 949                pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
 950                rec->tool.ordered_events = false;
 951        }
 952
 953        if (!rec->evlist->nr_groups)
 954                perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
 955
 956        if (file->is_pipe) {
 957                err = perf_header__write_pipe(fd);
 958                if (err < 0)
 959                        goto out_child;
 960        } else {
 961                err = perf_session__write_header(session, rec->evlist, fd, false);
 962                if (err < 0)
 963                        goto out_child;
 964        }
 965
 966        if (!rec->no_buildid
 967            && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
 968                pr_err("Couldn't generate buildids. "
 969                       "Use --no-buildid to profile anyway.\n");
 970                err = -1;
 971                goto out_child;
 972        }
 973
 974        machine = &session->machines.host;
 975
 976        err = record__synthesize(rec, false);
 977        if (err < 0)
 978                goto out_child;
 979
 980        if (rec->realtime_prio) {
 981                struct sched_param param;
 982
 983                param.sched_priority = rec->realtime_prio;
 984                if (sched_setscheduler(0, SCHED_FIFO, &param)) {
 985                        pr_err("Could not set realtime priority.\n");
 986                        err = -1;
 987                        goto out_child;
 988                }
 989        }
 990
 991        /*
 992         * When perf is starting the traced process, all the events
 993         * (apart from group members) have enable_on_exec=1 set,
 994         * so don't spoil it by prematurely enabling them.
 995         */
 996        if (!target__none(&opts->target) && !opts->initial_delay)
 997                perf_evlist__enable(rec->evlist);
 998
 999        /*
1000         * Let the child rip
1001         */
1002        if (forks) {
1003                union perf_event *event;
1004                pid_t tgid;
1005
1006                event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1007                if (event == NULL) {
1008                        err = -ENOMEM;
1009                        goto out_child;
1010                }
1011
1012                /*
1013                 * Some H/W events are generated before COMM event
1014                 * which is emitted during exec(), so perf script
1015                 * cannot see a correct process name for those events.
1016                 * Synthesize COMM event to prevent it.
1017                 */
1018                tgid = perf_event__synthesize_comm(tool, event,
1019                                                   rec->evlist->workload.pid,
1020                                                   process_synthesized_event,
1021                                                   machine);
1022                free(event);
1023
1024                if (tgid == -1)
1025                        goto out_child;
1026
1027                event = malloc(sizeof(event->namespaces) +
1028                               (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1029                               machine->id_hdr_size);
1030                if (event == NULL) {
1031                        err = -ENOMEM;
1032                        goto out_child;
1033                }
1034
1035                /*
1036                 * Synthesize NAMESPACES event for the command specified.
1037                 */
1038                perf_event__synthesize_namespaces(tool, event,
1039                                                  rec->evlist->workload.pid,
1040                                                  tgid, process_synthesized_event,
1041                                                  machine);
1042                free(event);
1043
1044                perf_evlist__start_workload(rec->evlist);
1045        }
1046
1047        if (opts->initial_delay) {
1048                usleep(opts->initial_delay * USEC_PER_MSEC);
1049                perf_evlist__enable(rec->evlist);
1050        }
1051
1052        trigger_ready(&auxtrace_snapshot_trigger);
1053        trigger_ready(&switch_output_trigger);
1054        perf_hooks__invoke_record_start();
1055        for (;;) {
1056                unsigned long long hits = rec->samples;
1057
1058                /*
1059                 * rec->evlist->bkw_mmap_state is possible to be
1060                 * BKW_MMAP_EMPTY here: when done == true and
1061                 * hits != rec->samples in previous round.
1062                 *
1063                 * perf_evlist__toggle_bkw_mmap ensure we never
1064                 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1065                 */
1066                if (trigger_is_hit(&switch_output_trigger) || done || draining)
1067                        perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1068
1069                if (record__mmap_read_all(rec) < 0) {
1070                        trigger_error(&auxtrace_snapshot_trigger);
1071                        trigger_error(&switch_output_trigger);
1072                        err = -1;
1073                        goto out_child;
1074                }
1075
1076                if (auxtrace_record__snapshot_started) {
1077                        auxtrace_record__snapshot_started = 0;
1078                        if (!trigger_is_error(&auxtrace_snapshot_trigger))
1079                                record__read_auxtrace_snapshot(rec);
1080                        if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1081                                pr_err("AUX area tracing snapshot failed\n");
1082                                err = -1;
1083                                goto out_child;
1084                        }
1085                }
1086
1087                if (trigger_is_hit(&switch_output_trigger)) {
1088                        /*
1089                         * If switch_output_trigger is hit, the data in
1090                         * overwritable ring buffer should have been collected,
1091                         * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1092                         *
1093                         * If SIGUSR2 raise after or during record__mmap_read_all(),
1094                         * record__mmap_read_all() didn't collect data from
1095                         * overwritable ring buffer. Read again.
1096                         */
1097                        if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1098                                continue;
1099                        trigger_ready(&switch_output_trigger);
1100
1101                        /*
1102                         * Reenable events in overwrite ring buffer after
1103                         * record__mmap_read_all(): we should have collected
1104                         * data from it.
1105                         */
1106                        perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1107
1108                        if (!quiet)
1109                                fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1110                                        waking);
1111                        waking = 0;
1112                        fd = record__switch_output(rec, false);
1113                        if (fd < 0) {
1114                                pr_err("Failed to switch to new file\n");
1115                                trigger_error(&switch_output_trigger);
1116                                err = fd;
1117                                goto out_child;
1118                        }
1119
1120                        /* re-arm the alarm */
1121                        if (rec->switch_output.time)
1122                                alarm(rec->switch_output.time);
1123                }
1124
1125                if (hits == rec->samples) {
1126                        if (done || draining)
1127                                break;
1128                        err = perf_evlist__poll(rec->evlist, -1);
1129                        /*
1130                         * Propagate error, only if there's any. Ignore positive
1131                         * number of returned events and interrupt error.
1132                         */
1133                        if (err > 0 || (err < 0 && errno == EINTR))
1134                                err = 0;
1135                        waking++;
1136
1137                        if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1138                                draining = true;
1139                }
1140
1141                /*
1142                 * When perf is starting the traced process, at the end events
1143                 * die with the process and we wait for that. Thus no need to
1144                 * disable events in this case.
1145                 */
1146                if (done && !disabled && !target__none(&opts->target)) {
1147                        trigger_off(&auxtrace_snapshot_trigger);
1148                        perf_evlist__disable(rec->evlist);
1149                        disabled = true;
1150                }
1151        }
1152        trigger_off(&auxtrace_snapshot_trigger);
1153        trigger_off(&switch_output_trigger);
1154
1155        if (forks && workload_exec_errno) {
1156                char msg[STRERR_BUFSIZE];
1157                const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1158                pr_err("Workload failed: %s\n", emsg);
1159                err = -1;
1160                goto out_child;
1161        }
1162
1163        if (!quiet)
1164                fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1165
1166        if (target__none(&rec->opts.target))
1167                record__synthesize_workload(rec, true);
1168
1169out_child:
1170        if (forks) {
1171                int exit_status;
1172
1173                if (!child_finished)
1174                        kill(rec->evlist->workload.pid, SIGTERM);
1175
1176                wait(&exit_status);
1177
1178                if (err < 0)
1179                        status = err;
1180                else if (WIFEXITED(exit_status))
1181                        status = WEXITSTATUS(exit_status);
1182                else if (WIFSIGNALED(exit_status))
1183                        signr = WTERMSIG(exit_status);
1184        } else
1185                status = err;
1186
1187        record__synthesize(rec, true);
1188        /* this will be recalculated during process_buildids() */
1189        rec->samples = 0;
1190
1191        if (!err) {
1192                if (!rec->timestamp_filename) {
1193                        record__finish_output(rec);
1194                } else {
1195                        fd = record__switch_output(rec, true);
1196                        if (fd < 0) {
1197                                status = fd;
1198                                goto out_delete_session;
1199                        }
1200                }
1201        }
1202
1203        perf_hooks__invoke_record_end();
1204
1205        if (!err && !quiet) {
1206                char samples[128];
1207                const char *postfix = rec->timestamp_filename ?
1208                                        ".<timestamp>" : "";
1209
1210                if (rec->samples && !rec->opts.full_auxtrace)
1211                        scnprintf(samples, sizeof(samples),
1212                                  " (%" PRIu64 " samples)", rec->samples);
1213                else
1214                        samples[0] = '\0';
1215
1216                fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1217                        perf_data_file__size(file) / 1024.0 / 1024.0,
1218                        file->path, postfix, samples);
1219        }
1220
1221out_delete_session:
1222        perf_session__delete(session);
1223        return status;
1224}
1225
1226static void callchain_debug(struct callchain_param *callchain)
1227{
1228        static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1229
1230        pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1231
1232        if (callchain->record_mode == CALLCHAIN_DWARF)
1233                pr_debug("callchain: stack dump size %d\n",
1234                         callchain->dump_size);
1235}
1236
1237int record_opts__parse_callchain(struct record_opts *record,
1238                                 struct callchain_param *callchain,
1239                                 const char *arg, bool unset)
1240{
1241        int ret;
1242        callchain->enabled = !unset;
1243
1244        /* --no-call-graph */
1245        if (unset) {
1246                callchain->record_mode = CALLCHAIN_NONE;
1247                pr_debug("callchain: disabled\n");
1248                return 0;
1249        }
1250
1251        ret = parse_callchain_record_opt(arg, callchain);
1252        if (!ret) {
1253                /* Enable data address sampling for DWARF unwind. */
1254                if (callchain->record_mode == CALLCHAIN_DWARF)
1255                        record->sample_address = true;
1256                callchain_debug(callchain);
1257        }
1258
1259        return ret;
1260}
1261
1262int record_parse_callchain_opt(const struct option *opt,
1263                               const char *arg,
1264                               int unset)
1265{
1266        return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1267}
1268
1269int record_callchain_opt(const struct option *opt,
1270                         const char *arg __maybe_unused,
1271                         int unset __maybe_unused)
1272{
1273        struct callchain_param *callchain = opt->value;
1274
1275        callchain->enabled = true;
1276
1277        if (callchain->record_mode == CALLCHAIN_NONE)
1278                callchain->record_mode = CALLCHAIN_FP;
1279
1280        callchain_debug(callchain);
1281        return 0;
1282}
1283
1284static int perf_record_config(const char *var, const char *value, void *cb)
1285{
1286        struct record *rec = cb;
1287
1288        if (!strcmp(var, "record.build-id")) {
1289                if (!strcmp(value, "cache"))
1290                        rec->no_buildid_cache = false;
1291                else if (!strcmp(value, "no-cache"))
1292                        rec->no_buildid_cache = true;
1293                else if (!strcmp(value, "skip"))
1294                        rec->no_buildid = true;
1295                else
1296                        return -1;
1297                return 0;
1298        }
1299        if (!strcmp(var, "record.call-graph"))
1300                var = "call-graph.record-mode"; /* fall-through */
1301
1302        return perf_default_config(var, value, cb);
1303}
1304
1305struct clockid_map {
1306        const char *name;
1307        int clockid;
1308};
1309
1310#define CLOCKID_MAP(n, c)       \
1311        { .name = n, .clockid = (c), }
1312
1313#define CLOCKID_END     { .name = NULL, }
1314
1315
1316/*
1317 * Add the missing ones, we need to build on many distros...
1318 */
1319#ifndef CLOCK_MONOTONIC_RAW
1320#define CLOCK_MONOTONIC_RAW 4
1321#endif
1322#ifndef CLOCK_BOOTTIME
1323#define CLOCK_BOOTTIME 7
1324#endif
1325#ifndef CLOCK_TAI
1326#define CLOCK_TAI 11
1327#endif
1328
1329static const struct clockid_map clockids[] = {
1330        /* available for all events, NMI safe */
1331        CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1332        CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1333
1334        /* available for some events */
1335        CLOCKID_MAP("realtime", CLOCK_REALTIME),
1336        CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1337        CLOCKID_MAP("tai", CLOCK_TAI),
1338
1339        /* available for the lazy */
1340        CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1341        CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1342        CLOCKID_MAP("real", CLOCK_REALTIME),
1343        CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1344
1345        CLOCKID_END,
1346};
1347
1348static int parse_clockid(const struct option *opt, const char *str, int unset)
1349{
1350        struct record_opts *opts = (struct record_opts *)opt->value;
1351        const struct clockid_map *cm;
1352        const char *ostr = str;
1353
1354        if (unset) {
1355                opts->use_clockid = 0;
1356                return 0;
1357        }
1358
1359        /* no arg passed */
1360        if (!str)
1361                return 0;
1362
1363        /* no setting it twice */
1364        if (opts->use_clockid)
1365                return -1;
1366
1367        opts->use_clockid = true;
1368
1369        /* if its a number, we're done */
1370        if (sscanf(str, "%d", &opts->clockid) == 1)
1371                return 0;
1372
1373        /* allow a "CLOCK_" prefix to the name */
1374        if (!strncasecmp(str, "CLOCK_", 6))
1375                str += 6;
1376
1377        for (cm = clockids; cm->name; cm++) {
1378                if (!strcasecmp(str, cm->name)) {
1379                        opts->clockid = cm->clockid;
1380                        return 0;
1381                }
1382        }
1383
1384        opts->use_clockid = false;
1385        ui__warning("unknown clockid %s, check man page\n", ostr);
1386        return -1;
1387}
1388
1389static int record__parse_mmap_pages(const struct option *opt,
1390                                    const char *str,
1391                                    int unset __maybe_unused)
1392{
1393        struct record_opts *opts = opt->value;
1394        char *s, *p;
1395        unsigned int mmap_pages;
1396        int ret;
1397
1398        if (!str)
1399                return -EINVAL;
1400
1401        s = strdup(str);
1402        if (!s)
1403                return -ENOMEM;
1404
1405        p = strchr(s, ',');
1406        if (p)
1407                *p = '\0';
1408
1409        if (*s) {
1410                ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1411                if (ret)
1412                        goto out_free;
1413                opts->mmap_pages = mmap_pages;
1414        }
1415
1416        if (!p) {
1417                ret = 0;
1418                goto out_free;
1419        }
1420
1421        ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1422        if (ret)
1423                goto out_free;
1424
1425        opts->auxtrace_mmap_pages = mmap_pages;
1426
1427out_free:
1428        free(s);
1429        return ret;
1430}
1431
1432static void switch_output_size_warn(struct record *rec)
1433{
1434        u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1435        struct switch_output *s = &rec->switch_output;
1436
1437        wakeup_size /= 2;
1438
1439        if (s->size < wakeup_size) {
1440                char buf[100];
1441
1442                unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1443                pr_warning("WARNING: switch-output data size lower than "
1444                           "wakeup kernel buffer size (%s) "
1445                           "expect bigger perf.data sizes\n", buf);
1446        }
1447}
1448
1449static int switch_output_setup(struct record *rec)
1450{
1451        struct switch_output *s = &rec->switch_output;
1452        static struct parse_tag tags_size[] = {
1453                { .tag  = 'B', .mult = 1       },
1454                { .tag  = 'K', .mult = 1 << 10 },
1455                { .tag  = 'M', .mult = 1 << 20 },
1456                { .tag  = 'G', .mult = 1 << 30 },
1457                { .tag  = 0 },
1458        };
1459        static struct parse_tag tags_time[] = {
1460                { .tag  = 's', .mult = 1        },
1461                { .tag  = 'm', .mult = 60       },
1462                { .tag  = 'h', .mult = 60*60    },
1463                { .tag  = 'd', .mult = 60*60*24 },
1464                { .tag  = 0 },
1465        };
1466        unsigned long val;
1467
1468        if (!s->set)
1469                return 0;
1470
1471        if (!strcmp(s->str, "signal")) {
1472                s->signal = true;
1473                pr_debug("switch-output with SIGUSR2 signal\n");
1474                goto enabled;
1475        }
1476
1477        val = parse_tag_value(s->str, tags_size);
1478        if (val != (unsigned long) -1) {
1479                s->size = val;
1480                pr_debug("switch-output with %s size threshold\n", s->str);
1481                goto enabled;
1482        }
1483
1484        val = parse_tag_value(s->str, tags_time);
1485        if (val != (unsigned long) -1) {
1486                s->time = val;
1487                pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1488                         s->str, s->time);
1489                goto enabled;
1490        }
1491
1492        return -1;
1493
1494enabled:
1495        rec->timestamp_filename = true;
1496        s->enabled              = true;
1497
1498        if (s->size && !rec->opts.no_buffering)
1499                switch_output_size_warn(rec);
1500
1501        return 0;
1502}
1503
1504static const char * const __record_usage[] = {
1505        "perf record [<options>] [<command>]",
1506        "perf record [<options>] -- <command> [<options>]",
1507        NULL
1508};
1509const char * const *record_usage = __record_usage;
1510
1511/*
1512 * XXX Ideally would be local to cmd_record() and passed to a record__new
1513 * because we need to have access to it in record__exit, that is called
1514 * after cmd_record() exits, but since record_options need to be accessible to
1515 * builtin-script, leave it here.
1516 *
1517 * At least we don't ouch it in all the other functions here directly.
1518 *
1519 * Just say no to tons of global variables, sigh.
1520 */
1521static struct record record = {
1522        .opts = {
1523                .sample_time         = true,
1524                .mmap_pages          = UINT_MAX,
1525                .user_freq           = UINT_MAX,
1526                .user_interval       = ULLONG_MAX,
1527                .freq                = 4000,
1528                .target              = {
1529                        .uses_mmap   = true,
1530                        .default_per_cpu = true,
1531                },
1532                .proc_map_timeout     = 500,
1533        },
1534        .tool = {
1535                .sample         = process_sample_event,
1536                .fork           = perf_event__process_fork,
1537                .exit           = perf_event__process_exit,
1538                .comm           = perf_event__process_comm,
1539                .namespaces     = perf_event__process_namespaces,
1540                .mmap           = perf_event__process_mmap,
1541                .mmap2          = perf_event__process_mmap2,
1542                .ordered_events = true,
1543        },
1544};
1545
1546const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1547        "\n\t\t\t\tDefault: fp";
1548
1549static bool dry_run;
1550
1551/*
1552 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1553 * with it and switch to use the library functions in perf_evlist that came
1554 * from builtin-record.c, i.e. use record_opts,
1555 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1556 * using pipes, etc.
1557 */
1558static struct option __record_options[] = {
1559        OPT_CALLBACK('e', "event", &record.evlist, "event",
1560                     "event selector. use 'perf list' to list available events",
1561                     parse_events_option),
1562        OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1563                     "event filter", parse_filter),
1564        OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1565                           NULL, "don't record events from perf itself",
1566                           exclude_perf),
1567        OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1568                    "record events on existing process id"),
1569        OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1570                    "record events on existing thread id"),
1571        OPT_INTEGER('r', "realtime", &record.realtime_prio,
1572                    "collect data with this RT SCHED_FIFO priority"),
1573        OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1574                    "collect data without buffering"),
1575        OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1576                    "collect raw sample records from all opened counters"),
1577        OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1578                            "system-wide collection from all CPUs"),
1579        OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1580                    "list of cpus to monitor"),
1581        OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1582        OPT_STRING('o', "output", &record.file.path, "file",
1583                    "output file name"),
1584        OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1585                        &record.opts.no_inherit_set,
1586                        "child tasks do not inherit counters"),
1587        OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1588                    "synthesize non-sample events at the end of output"),
1589        OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1590        OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1591        OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1592                     "number of mmap data pages and AUX area tracing mmap pages",
1593                     record__parse_mmap_pages),
1594        OPT_BOOLEAN(0, "group", &record.opts.group,
1595                    "put the counters into a counter group"),
1596        OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1597                           NULL, "enables call-graph recording" ,
1598                           &record_callchain_opt),
1599        OPT_CALLBACK(0, "call-graph", &record.opts,
1600                     "record_mode[,record_size]", record_callchain_help,
1601                     &record_parse_callchain_opt),
1602        OPT_INCR('v', "verbose", &verbose,
1603                    "be more verbose (show counter open errors, etc)"),
1604        OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1605        OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1606                    "per thread counts"),
1607        OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1608        OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1609                    "Record the sample physical addresses"),
1610        OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1611        OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1612                        &record.opts.sample_time_set,
1613                        "Record the sample timestamps"),
1614        OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1615        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1616                    "don't sample"),
1617        OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1618                        &record.no_buildid_cache_set,
1619                        "do not update the buildid cache"),
1620        OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1621                        &record.no_buildid_set,
1622                        "do not collect buildids in perf.data"),
1623        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1624                     "monitor event in cgroup name only",
1625                     parse_cgroups),
1626        OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1627                  "ms to wait before starting measurement after program start"),
1628        OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1629                   "user to profile"),
1630
1631        OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1632                     "branch any", "sample any taken branches",
1633                     parse_branch_stack),
1634
1635        OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1636                     "branch filter mask", "branch stack filter modes",
1637                     parse_branch_stack),
1638        OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1639                    "sample by weight (on special events only)"),
1640        OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1641                    "sample transaction flags (special events only)"),
1642        OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1643                    "use per-thread mmaps"),
1644        OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1645                    "sample selected machine registers on interrupt,"
1646                    " use -I ? to list register names", parse_regs),
1647        OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1648                    "Record running/enabled time of read (:S) events"),
1649        OPT_CALLBACK('k', "clockid", &record.opts,
1650        "clockid", "clockid to use for events, see clock_gettime()",
1651        parse_clockid),
1652        OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1653                          "opts", "AUX area tracing Snapshot Mode", ""),
1654        OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1655                        "per thread proc mmap processing timeout in ms"),
1656        OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1657                    "Record namespaces events"),
1658        OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1659                    "Record context switch events"),
1660        OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1661                         "Configure all used events to run in kernel space.",
1662                         PARSE_OPT_EXCLUSIVE),
1663        OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1664                         "Configure all used events to run in user space.",
1665                         PARSE_OPT_EXCLUSIVE),
1666        OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1667                   "clang binary to use for compiling BPF scriptlets"),
1668        OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1669                   "options passed to clang when compiling BPF scriptlets"),
1670        OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1671                   "file", "vmlinux pathname"),
1672        OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1673                    "Record build-id of all DSOs regardless of hits"),
1674        OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1675                    "append timestamp to output filename"),
1676        OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1677                          &record.switch_output.set, "signal,size,time",
1678                          "Switch output when receive SIGUSR2 or cross size,time threshold",
1679                          "signal"),
1680        OPT_BOOLEAN(0, "dry-run", &dry_run,
1681                    "Parse options then exit"),
1682        OPT_END()
1683};
1684
1685struct option *record_options = __record_options;
1686
1687int cmd_record(int argc, const char **argv)
1688{
1689        int err;
1690        struct record *rec = &record;
1691        char errbuf[BUFSIZ];
1692
1693#ifndef HAVE_LIBBPF_SUPPORT
1694# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1695        set_nobuild('\0', "clang-path", true);
1696        set_nobuild('\0', "clang-opt", true);
1697# undef set_nobuild
1698#endif
1699
1700#ifndef HAVE_BPF_PROLOGUE
1701# if !defined (HAVE_DWARF_SUPPORT)
1702#  define REASON  "NO_DWARF=1"
1703# elif !defined (HAVE_LIBBPF_SUPPORT)
1704#  define REASON  "NO_LIBBPF=1"
1705# else
1706#  define REASON  "this architecture doesn't support BPF prologue"
1707# endif
1708# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1709        set_nobuild('\0', "vmlinux", true);
1710# undef set_nobuild
1711# undef REASON
1712#endif
1713
1714        rec->evlist = perf_evlist__new();
1715        if (rec->evlist == NULL)
1716                return -ENOMEM;
1717
1718        err = perf_config(perf_record_config, rec);
1719        if (err)
1720                return err;
1721
1722        argc = parse_options(argc, argv, record_options, record_usage,
1723                            PARSE_OPT_STOP_AT_NON_OPTION);
1724        if (quiet)
1725                perf_quiet_option();
1726
1727        /* Make system wide (-a) the default target. */
1728        if (!argc && target__none(&rec->opts.target))
1729                rec->opts.target.system_wide = true;
1730
1731        if (nr_cgroups && !rec->opts.target.system_wide) {
1732                usage_with_options_msg(record_usage, record_options,
1733                        "cgroup monitoring only available in system-wide mode");
1734
1735        }
1736        if (rec->opts.record_switch_events &&
1737            !perf_can_record_switch_events()) {
1738                ui__error("kernel does not support recording context switch events\n");
1739                parse_options_usage(record_usage, record_options, "switch-events", 0);
1740                return -EINVAL;
1741        }
1742
1743        if (switch_output_setup(rec)) {
1744                parse_options_usage(record_usage, record_options, "switch-output", 0);
1745                return -EINVAL;
1746        }
1747
1748        if (rec->switch_output.time) {
1749                signal(SIGALRM, alarm_sig_handler);
1750                alarm(rec->switch_output.time);
1751        }
1752
1753        if (!rec->itr) {
1754                rec->itr = auxtrace_record__init(rec->evlist, &err);
1755                if (err)
1756                        goto out;
1757        }
1758
1759        err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1760                                              rec->opts.auxtrace_snapshot_opts);
1761        if (err)
1762                goto out;
1763
1764        /*
1765         * Allow aliases to facilitate the lookup of symbols for address
1766         * filters. Refer to auxtrace_parse_filters().
1767         */
1768        symbol_conf.allow_aliases = true;
1769
1770        symbol__init(NULL);
1771
1772        err = auxtrace_parse_filters(rec->evlist);
1773        if (err)
1774                goto out;
1775
1776        if (dry_run)
1777                goto out;
1778
1779        err = bpf__setup_stdout(rec->evlist);
1780        if (err) {
1781                bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1782                pr_err("ERROR: Setup BPF stdout failed: %s\n",
1783                         errbuf);
1784                goto out;
1785        }
1786
1787        err = -ENOMEM;
1788
1789        if (symbol_conf.kptr_restrict)
1790                pr_warning(
1791"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1792"check /proc/sys/kernel/kptr_restrict.\n\n"
1793"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1794"file is not found in the buildid cache or in the vmlinux path.\n\n"
1795"Samples in kernel modules won't be resolved at all.\n\n"
1796"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1797"even with a suitable vmlinux or kallsyms file.\n\n");
1798
1799        if (rec->no_buildid_cache || rec->no_buildid) {
1800                disable_buildid_cache();
1801        } else if (rec->switch_output.enabled) {
1802                /*
1803                 * In 'perf record --switch-output', disable buildid
1804                 * generation by default to reduce data file switching
1805                 * overhead. Still generate buildid if they are required
1806                 * explicitly using
1807                 *
1808                 *  perf record --switch-output --no-no-buildid \
1809                 *              --no-no-buildid-cache
1810                 *
1811                 * Following code equals to:
1812                 *
1813                 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1814                 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1815                 *         disable_buildid_cache();
1816                 */
1817                bool disable = true;
1818
1819                if (rec->no_buildid_set && !rec->no_buildid)
1820                        disable = false;
1821                if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1822                        disable = false;
1823                if (disable) {
1824                        rec->no_buildid = true;
1825                        rec->no_buildid_cache = true;
1826                        disable_buildid_cache();
1827                }
1828        }
1829
1830        if (record.opts.overwrite)
1831                record.opts.tail_synthesize = true;
1832
1833        if (rec->evlist->nr_entries == 0 &&
1834            __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1835                pr_err("Not enough memory for event selector list\n");
1836                goto out;
1837        }
1838
1839        if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1840                rec->opts.no_inherit = true;
1841
1842        err = target__validate(&rec->opts.target);
1843        if (err) {
1844                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1845                ui__warning("%s", errbuf);
1846        }
1847
1848        err = target__parse_uid(&rec->opts.target);
1849        if (err) {
1850                int saved_errno = errno;
1851
1852                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1853                ui__error("%s", errbuf);
1854
1855                err = -saved_errno;
1856                goto out;
1857        }
1858
1859        /* Enable ignoring missing threads when -u option is defined. */
1860        rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX;
1861
1862        err = -ENOMEM;
1863        if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1864                usage_with_options(record_usage, record_options);
1865
1866        err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1867        if (err)
1868                goto out;
1869
1870        /*
1871         * We take all buildids when the file contains
1872         * AUX area tracing data because we do not decode the
1873         * trace because it would take too long.
1874         */
1875        if (rec->opts.full_auxtrace)
1876                rec->buildid_all = true;
1877
1878        if (record_opts__config(&rec->opts)) {
1879                err = -EINVAL;
1880                goto out;
1881        }
1882
1883        err = __cmd_record(&record, argc, argv);
1884out:
1885        perf_evlist__delete(rec->evlist);
1886        symbol__exit();
1887        auxtrace_record__free(rec->itr);
1888        return err;
1889}
1890
1891static void snapshot_sig_handler(int sig __maybe_unused)
1892{
1893        struct record *rec = &record;
1894
1895        if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1896                trigger_hit(&auxtrace_snapshot_trigger);
1897                auxtrace_record__snapshot_started = 1;
1898                if (auxtrace_record__snapshot_start(record.itr))
1899                        trigger_error(&auxtrace_snapshot_trigger);
1900        }
1901
1902        if (switch_output_signal(rec))
1903                trigger_hit(&switch_output_trigger);
1904}
1905
1906static void alarm_sig_handler(int sig __maybe_unused)
1907{
1908        struct record *rec = &record;
1909
1910        if (switch_output_time(rec))
1911                trigger_hit(&switch_output_trigger);
1912}
1913