linux/tools/perf/builtin-record.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * builtin-record.c
   4 *
   5 * Builtin record command: Record the profile of a workload
   6 * (or a CPU, or a PID) into the perf.data output file - for
   7 * later analysis via perf report.
   8 */
   9#include "builtin.h"
  10
  11#include "perf.h"
  12
  13#include "util/build-id.h"
  14#include "util/util.h"
  15#include <subcmd/parse-options.h>
  16#include "util/parse-events.h"
  17#include "util/config.h"
  18
  19#include "util/callchain.h"
  20#include "util/cgroup.h"
  21#include "util/header.h"
  22#include "util/event.h"
  23#include "util/evlist.h"
  24#include "util/evsel.h"
  25#include "util/debug.h"
  26#include "util/session.h"
  27#include "util/tool.h"
  28#include "util/symbol.h"
  29#include "util/cpumap.h"
  30#include "util/thread_map.h"
  31#include "util/data.h"
  32#include "util/perf_regs.h"
  33#include "util/auxtrace.h"
  34#include "util/tsc.h"
  35#include "util/parse-branch-options.h"
  36#include "util/parse-regs-options.h"
  37#include "util/llvm-utils.h"
  38#include "util/bpf-loader.h"
  39#include "util/trigger.h"
  40#include "util/perf-hooks.h"
  41#include "util/cpu-set-sched.h"
  42#include "util/time-utils.h"
  43#include "util/units.h"
  44#include "util/bpf-event.h"
  45#include "asm/bug.h"
  46
  47#include <errno.h>
  48#include <inttypes.h>
  49#include <locale.h>
  50#include <poll.h>
  51#include <unistd.h>
  52#include <sched.h>
  53#include <signal.h>
  54#include <sys/mman.h>
  55#include <sys/wait.h>
  56#include <linux/time64.h>
  57
  58struct switch_output {
  59        bool             enabled;
  60        bool             signal;
  61        unsigned long    size;
  62        unsigned long    time;
  63        const char      *str;
  64        bool             set;
  65        char             **filenames;
  66        int              num_files;
  67        int              cur_file;
  68};
  69
  70struct record {
  71        struct perf_tool        tool;
  72        struct record_opts      opts;
  73        u64                     bytes_written;
  74        struct perf_data        data;
  75        struct auxtrace_record  *itr;
  76        struct perf_evlist      *evlist;
  77        struct perf_session     *session;
  78        int                     realtime_prio;
  79        bool                    no_buildid;
  80        bool                    no_buildid_set;
  81        bool                    no_buildid_cache;
  82        bool                    no_buildid_cache_set;
  83        bool                    buildid_all;
  84        bool                    timestamp_filename;
  85        bool                    timestamp_boundary;
  86        struct switch_output    switch_output;
  87        unsigned long long      samples;
  88        cpu_set_t               affinity_mask;
  89};
  90
  91static volatile int auxtrace_record__snapshot_started;
  92static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
  93static DEFINE_TRIGGER(switch_output_trigger);
  94
  95static const char *affinity_tags[PERF_AFFINITY_MAX] = {
  96        "SYS", "NODE", "CPU"
  97};
  98
  99static bool switch_output_signal(struct record *rec)
 100{
 101        return rec->switch_output.signal &&
 102               trigger_is_ready(&switch_output_trigger);
 103}
 104
 105static bool switch_output_size(struct record *rec)
 106{
 107        return rec->switch_output.size &&
 108               trigger_is_ready(&switch_output_trigger) &&
 109               (rec->bytes_written >= rec->switch_output.size);
 110}
 111
 112static bool switch_output_time(struct record *rec)
 113{
 114        return rec->switch_output.time &&
 115               trigger_is_ready(&switch_output_trigger);
 116}
 117
 118static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
 119                         void *bf, size_t size)
 120{
 121        struct perf_data_file *file = &rec->session->data->file;
 122
 123        if (perf_data_file__write(file, bf, size) < 0) {
 124                pr_err("failed to write perf data, error: %m\n");
 125                return -1;
 126        }
 127
 128        rec->bytes_written += size;
 129
 130        if (switch_output_size(rec))
 131                trigger_hit(&switch_output_trigger);
 132
 133        return 0;
 134}
 135
 136#ifdef HAVE_AIO_SUPPORT
 137static int record__aio_write(struct aiocb *cblock, int trace_fd,
 138                void *buf, size_t size, off_t off)
 139{
 140        int rc;
 141
 142        cblock->aio_fildes = trace_fd;
 143        cblock->aio_buf    = buf;
 144        cblock->aio_nbytes = size;
 145        cblock->aio_offset = off;
 146        cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
 147
 148        do {
 149                rc = aio_write(cblock);
 150                if (rc == 0) {
 151                        break;
 152                } else if (errno != EAGAIN) {
 153                        cblock->aio_fildes = -1;
 154                        pr_err("failed to queue perf data, error: %m\n");
 155                        break;
 156                }
 157        } while (1);
 158
 159        return rc;
 160}
 161
 162static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
 163{
 164        void *rem_buf;
 165        off_t rem_off;
 166        size_t rem_size;
 167        int rc, aio_errno;
 168        ssize_t aio_ret, written;
 169
 170        aio_errno = aio_error(cblock);
 171        if (aio_errno == EINPROGRESS)
 172                return 0;
 173
 174        written = aio_ret = aio_return(cblock);
 175        if (aio_ret < 0) {
 176                if (aio_errno != EINTR)
 177                        pr_err("failed to write perf data, error: %m\n");
 178                written = 0;
 179        }
 180
 181        rem_size = cblock->aio_nbytes - written;
 182
 183        if (rem_size == 0) {
 184                cblock->aio_fildes = -1;
 185                /*
 186                 * md->refcount is incremented in perf_mmap__push() for
 187                 * every enqueued aio write request so decrement it because
 188                 * the request is now complete.
 189                 */
 190                perf_mmap__put(md);
 191                rc = 1;
 192        } else {
 193                /*
 194                 * aio write request may require restart with the
 195                 * reminder if the kernel didn't write whole
 196                 * chunk at once.
 197                 */
 198                rem_off = cblock->aio_offset + written;
 199                rem_buf = (void *)(cblock->aio_buf + written);
 200                record__aio_write(cblock, cblock->aio_fildes,
 201                                rem_buf, rem_size, rem_off);
 202                rc = 0;
 203        }
 204
 205        return rc;
 206}
 207
 208static int record__aio_sync(struct perf_mmap *md, bool sync_all)
 209{
 210        struct aiocb **aiocb = md->aio.aiocb;
 211        struct aiocb *cblocks = md->aio.cblocks;
 212        struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
 213        int i, do_suspend;
 214
 215        do {
 216                do_suspend = 0;
 217                for (i = 0; i < md->aio.nr_cblocks; ++i) {
 218                        if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
 219                                if (sync_all)
 220                                        aiocb[i] = NULL;
 221                                else
 222                                        return i;
 223                        } else {
 224                                /*
 225                                 * Started aio write is not complete yet
 226                                 * so it has to be waited before the
 227                                 * next allocation.
 228                                 */
 229                                aiocb[i] = &cblocks[i];
 230                                do_suspend = 1;
 231                        }
 232                }
 233                if (!do_suspend)
 234                        return -1;
 235
 236                while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
 237                        if (!(errno == EAGAIN || errno == EINTR))
 238                                pr_err("failed to sync perf data, error: %m\n");
 239                }
 240        } while (1);
 241}
 242
 243static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
 244{
 245        struct record *rec = to;
 246        int ret, trace_fd = rec->session->data->file.fd;
 247
 248        rec->samples++;
 249
 250        ret = record__aio_write(cblock, trace_fd, bf, size, off);
 251        if (!ret) {
 252                rec->bytes_written += size;
 253                if (switch_output_size(rec))
 254                        trigger_hit(&switch_output_trigger);
 255        }
 256
 257        return ret;
 258}
 259
 260static off_t record__aio_get_pos(int trace_fd)
 261{
 262        return lseek(trace_fd, 0, SEEK_CUR);
 263}
 264
 265static void record__aio_set_pos(int trace_fd, off_t pos)
 266{
 267        lseek(trace_fd, pos, SEEK_SET);
 268}
 269
 270static void record__aio_mmap_read_sync(struct record *rec)
 271{
 272        int i;
 273        struct perf_evlist *evlist = rec->evlist;
 274        struct perf_mmap *maps = evlist->mmap;
 275
 276        if (!rec->opts.nr_cblocks)
 277                return;
 278
 279        for (i = 0; i < evlist->nr_mmaps; i++) {
 280                struct perf_mmap *map = &maps[i];
 281
 282                if (map->base)
 283                        record__aio_sync(map, true);
 284        }
 285}
 286
 287static int nr_cblocks_default = 1;
 288static int nr_cblocks_max = 4;
 289
 290static int record__aio_parse(const struct option *opt,
 291                             const char *str,
 292                             int unset)
 293{
 294        struct record_opts *opts = (struct record_opts *)opt->value;
 295
 296        if (unset) {
 297                opts->nr_cblocks = 0;
 298        } else {
 299                if (str)
 300                        opts->nr_cblocks = strtol(str, NULL, 0);
 301                if (!opts->nr_cblocks)
 302                        opts->nr_cblocks = nr_cblocks_default;
 303        }
 304
 305        return 0;
 306}
 307#else /* HAVE_AIO_SUPPORT */
 308static int nr_cblocks_max = 0;
 309
 310static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
 311{
 312        return -1;
 313}
 314
 315static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
 316                void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
 317{
 318        return -1;
 319}
 320
 321static off_t record__aio_get_pos(int trace_fd __maybe_unused)
 322{
 323        return -1;
 324}
 325
 326static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
 327{
 328}
 329
 330static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
 331{
 332}
 333#endif
 334
 335static int record__aio_enabled(struct record *rec)
 336{
 337        return rec->opts.nr_cblocks > 0;
 338}
 339
 340static int process_synthesized_event(struct perf_tool *tool,
 341                                     union perf_event *event,
 342                                     struct perf_sample *sample __maybe_unused,
 343                                     struct machine *machine __maybe_unused)
 344{
 345        struct record *rec = container_of(tool, struct record, tool);
 346        return record__write(rec, NULL, event, event->header.size);
 347}
 348
 349static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
 350{
 351        struct record *rec = to;
 352
 353        rec->samples++;
 354        return record__write(rec, map, bf, size);
 355}
 356
 357static volatile int done;
 358static volatile int signr = -1;
 359static volatile int child_finished;
 360
 361static void sig_handler(int sig)
 362{
 363        if (sig == SIGCHLD)
 364                child_finished = 1;
 365        else
 366                signr = sig;
 367
 368        done = 1;
 369}
 370
 371static void sigsegv_handler(int sig)
 372{
 373        perf_hooks__recover();
 374        sighandler_dump_stack(sig);
 375}
 376
 377static void record__sig_exit(void)
 378{
 379        if (signr == -1)
 380                return;
 381
 382        signal(signr, SIG_DFL);
 383        raise(signr);
 384}
 385
 386#ifdef HAVE_AUXTRACE_SUPPORT
 387
 388static int record__process_auxtrace(struct perf_tool *tool,
 389                                    struct perf_mmap *map,
 390                                    union perf_event *event, void *data1,
 391                                    size_t len1, void *data2, size_t len2)
 392{
 393        struct record *rec = container_of(tool, struct record, tool);
 394        struct perf_data *data = &rec->data;
 395        size_t padding;
 396        u8 pad[8] = {0};
 397
 398        if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) {
 399                off_t file_offset;
 400                int fd = perf_data__fd(data);
 401                int err;
 402
 403                file_offset = lseek(fd, 0, SEEK_CUR);
 404                if (file_offset == -1)
 405                        return -1;
 406                err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
 407                                                     event, file_offset);
 408                if (err)
 409                        return err;
 410        }
 411
 412        /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
 413        padding = (len1 + len2) & 7;
 414        if (padding)
 415                padding = 8 - padding;
 416
 417        record__write(rec, map, event, event->header.size);
 418        record__write(rec, map, data1, len1);
 419        if (len2)
 420                record__write(rec, map, data2, len2);
 421        record__write(rec, map, &pad, padding);
 422
 423        return 0;
 424}
 425
 426static int record__auxtrace_mmap_read(struct record *rec,
 427                                      struct perf_mmap *map)
 428{
 429        int ret;
 430
 431        ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
 432                                  record__process_auxtrace);
 433        if (ret < 0)
 434                return ret;
 435
 436        if (ret)
 437                rec->samples++;
 438
 439        return 0;
 440}
 441
 442static int record__auxtrace_mmap_read_snapshot(struct record *rec,
 443                                               struct perf_mmap *map)
 444{
 445        int ret;
 446
 447        ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
 448                                           record__process_auxtrace,
 449                                           rec->opts.auxtrace_snapshot_size);
 450        if (ret < 0)
 451                return ret;
 452
 453        if (ret)
 454                rec->samples++;
 455
 456        return 0;
 457}
 458
 459static int record__auxtrace_read_snapshot_all(struct record *rec)
 460{
 461        int i;
 462        int rc = 0;
 463
 464        for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 465                struct perf_mmap *map = &rec->evlist->mmap[i];
 466
 467                if (!map->auxtrace_mmap.base)
 468                        continue;
 469
 470                if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
 471                        rc = -1;
 472                        goto out;
 473                }
 474        }
 475out:
 476        return rc;
 477}
 478
 479static void record__read_auxtrace_snapshot(struct record *rec)
 480{
 481        pr_debug("Recording AUX area tracing snapshot\n");
 482        if (record__auxtrace_read_snapshot_all(rec) < 0) {
 483                trigger_error(&auxtrace_snapshot_trigger);
 484        } else {
 485                if (auxtrace_record__snapshot_finish(rec->itr))
 486                        trigger_error(&auxtrace_snapshot_trigger);
 487                else
 488                        trigger_ready(&auxtrace_snapshot_trigger);
 489        }
 490}
 491
 492static int record__auxtrace_init(struct record *rec)
 493{
 494        int err;
 495
 496        if (!rec->itr) {
 497                rec->itr = auxtrace_record__init(rec->evlist, &err);
 498                if (err)
 499                        return err;
 500        }
 501
 502        err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
 503                                              rec->opts.auxtrace_snapshot_opts);
 504        if (err)
 505                return err;
 506
 507        return auxtrace_parse_filters(rec->evlist);
 508}
 509
 510#else
 511
 512static inline
 513int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
 514                               struct perf_mmap *map __maybe_unused)
 515{
 516        return 0;
 517}
 518
 519static inline
 520void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
 521{
 522}
 523
 524static inline
 525int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
 526{
 527        return 0;
 528}
 529
 530static int record__auxtrace_init(struct record *rec __maybe_unused)
 531{
 532        return 0;
 533}
 534
 535#endif
 536
 537static int record__mmap_evlist(struct record *rec,
 538                               struct perf_evlist *evlist)
 539{
 540        struct record_opts *opts = &rec->opts;
 541        char msg[512];
 542
 543        if (opts->affinity != PERF_AFFINITY_SYS)
 544                cpu__setup_cpunode_map();
 545
 546        if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
 547                                 opts->auxtrace_mmap_pages,
 548                                 opts->auxtrace_snapshot_mode,
 549                                 opts->nr_cblocks, opts->affinity) < 0) {
 550                if (errno == EPERM) {
 551                        pr_err("Permission error mapping pages.\n"
 552                               "Consider increasing "
 553                               "/proc/sys/kernel/perf_event_mlock_kb,\n"
 554                               "or try again with a smaller value of -m/--mmap_pages.\n"
 555                               "(current value: %u,%u)\n",
 556                               opts->mmap_pages, opts->auxtrace_mmap_pages);
 557                        return -errno;
 558                } else {
 559                        pr_err("failed to mmap with %d (%s)\n", errno,
 560                                str_error_r(errno, msg, sizeof(msg)));
 561                        if (errno)
 562                                return -errno;
 563                        else
 564                                return -EINVAL;
 565                }
 566        }
 567        return 0;
 568}
 569
 570static int record__mmap(struct record *rec)
 571{
 572        return record__mmap_evlist(rec, rec->evlist);
 573}
 574
 575static int record__open(struct record *rec)
 576{
 577        char msg[BUFSIZ];
 578        struct perf_evsel *pos;
 579        struct perf_evlist *evlist = rec->evlist;
 580        struct perf_session *session = rec->session;
 581        struct record_opts *opts = &rec->opts;
 582        int rc = 0;
 583
 584        /*
 585         * For initial_delay we need to add a dummy event so that we can track
 586         * PERF_RECORD_MMAP while we wait for the initial delay to enable the
 587         * real events, the ones asked by the user.
 588         */
 589        if (opts->initial_delay) {
 590                if (perf_evlist__add_dummy(evlist))
 591                        return -ENOMEM;
 592
 593                pos = perf_evlist__first(evlist);
 594                pos->tracking = 0;
 595                pos = perf_evlist__last(evlist);
 596                pos->tracking = 1;
 597                pos->attr.enable_on_exec = 1;
 598        }
 599
 600        perf_evlist__config(evlist, opts, &callchain_param);
 601
 602        evlist__for_each_entry(evlist, pos) {
 603try_again:
 604                if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
 605                        if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
 606                                if (verbose > 0)
 607                                        ui__warning("%s\n", msg);
 608                                goto try_again;
 609                        }
 610                        if ((errno == EINVAL || errno == EBADF) &&
 611                            pos->leader != pos &&
 612                            pos->weak_group) {
 613                                pos = perf_evlist__reset_weak_group(evlist, pos);
 614                                goto try_again;
 615                        }
 616                        rc = -errno;
 617                        perf_evsel__open_strerror(pos, &opts->target,
 618                                                  errno, msg, sizeof(msg));
 619                        ui__error("%s\n", msg);
 620                        goto out;
 621                }
 622
 623                pos->supported = true;
 624        }
 625
 626        if (perf_evlist__apply_filters(evlist, &pos)) {
 627                pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
 628                        pos->filter, perf_evsel__name(pos), errno,
 629                        str_error_r(errno, msg, sizeof(msg)));
 630                rc = -1;
 631                goto out;
 632        }
 633
 634        rc = record__mmap(rec);
 635        if (rc)
 636                goto out;
 637
 638        session->evlist = evlist;
 639        perf_session__set_id_hdr_size(session);
 640out:
 641        return rc;
 642}
 643
 644static int process_sample_event(struct perf_tool *tool,
 645                                union perf_event *event,
 646                                struct perf_sample *sample,
 647                                struct perf_evsel *evsel,
 648                                struct machine *machine)
 649{
 650        struct record *rec = container_of(tool, struct record, tool);
 651
 652        if (rec->evlist->first_sample_time == 0)
 653                rec->evlist->first_sample_time = sample->time;
 654
 655        rec->evlist->last_sample_time = sample->time;
 656
 657        if (rec->buildid_all)
 658                return 0;
 659
 660        rec->samples++;
 661        return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
 662}
 663
 664static int process_buildids(struct record *rec)
 665{
 666        struct perf_session *session = rec->session;
 667
 668        if (perf_data__size(&rec->data) == 0)
 669                return 0;
 670
 671        /*
 672         * During this process, it'll load kernel map and replace the
 673         * dso->long_name to a real pathname it found.  In this case
 674         * we prefer the vmlinux path like
 675         *   /lib/modules/3.16.4/build/vmlinux
 676         *
 677         * rather than build-id path (in debug directory).
 678         *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
 679         */
 680        symbol_conf.ignore_vmlinux_buildid = true;
 681
 682        /*
 683         * If --buildid-all is given, it marks all DSO regardless of hits,
 684         * so no need to process samples. But if timestamp_boundary is enabled,
 685         * it still needs to walk on all samples to get the timestamps of
 686         * first/last samples.
 687         */
 688        if (rec->buildid_all && !rec->timestamp_boundary)
 689                rec->tool.sample = NULL;
 690
 691        return perf_session__process_events(session);
 692}
 693
 694static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 695{
 696        int err;
 697        struct perf_tool *tool = data;
 698        /*
 699         *As for guest kernel when processing subcommand record&report,
 700         *we arrange module mmap prior to guest kernel mmap and trigger
 701         *a preload dso because default guest module symbols are loaded
 702         *from guest kallsyms instead of /lib/modules/XXX/XXX. This
 703         *method is used to avoid symbol missing when the first addr is
 704         *in module instead of in guest kernel.
 705         */
 706        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 707                                             machine);
 708        if (err < 0)
 709                pr_err("Couldn't record guest kernel [%d]'s reference"
 710                       " relocation symbol.\n", machine->pid);
 711
 712        /*
 713         * We use _stext for guest kernel because guest kernel's /proc/kallsyms
 714         * have no _text sometimes.
 715         */
 716        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 717                                                 machine);
 718        if (err < 0)
 719                pr_err("Couldn't record guest kernel [%d]'s reference"
 720                       " relocation symbol.\n", machine->pid);
 721}
 722
 723static struct perf_event_header finished_round_event = {
 724        .size = sizeof(struct perf_event_header),
 725        .type = PERF_RECORD_FINISHED_ROUND,
 726};
 727
 728static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
 729{
 730        if (rec->opts.affinity != PERF_AFFINITY_SYS &&
 731            !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
 732                CPU_ZERO(&rec->affinity_mask);
 733                CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
 734                sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
 735        }
 736}
 737
 738static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
 739                                    bool overwrite)
 740{
 741        u64 bytes_written = rec->bytes_written;
 742        int i;
 743        int rc = 0;
 744        struct perf_mmap *maps;
 745        int trace_fd = rec->data.file.fd;
 746        off_t off;
 747
 748        if (!evlist)
 749                return 0;
 750
 751        maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
 752        if (!maps)
 753                return 0;
 754
 755        if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
 756                return 0;
 757
 758        if (record__aio_enabled(rec))
 759                off = record__aio_get_pos(trace_fd);
 760
 761        for (i = 0; i < evlist->nr_mmaps; i++) {
 762                struct perf_mmap *map = &maps[i];
 763
 764                if (map->base) {
 765                        record__adjust_affinity(rec, map);
 766                        if (!record__aio_enabled(rec)) {
 767                                if (perf_mmap__push(map, rec, record__pushfn) != 0) {
 768                                        rc = -1;
 769                                        goto out;
 770                                }
 771                        } else {
 772                                int idx;
 773                                /*
 774                                 * Call record__aio_sync() to wait till map->data buffer
 775                                 * becomes available after previous aio write request.
 776                                 */
 777                                idx = record__aio_sync(map, false);
 778                                if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
 779                                        record__aio_set_pos(trace_fd, off);
 780                                        rc = -1;
 781                                        goto out;
 782                                }
 783                        }
 784                }
 785
 786                if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
 787                    record__auxtrace_mmap_read(rec, map) != 0) {
 788                        rc = -1;
 789                        goto out;
 790                }
 791        }
 792
 793        if (record__aio_enabled(rec))
 794                record__aio_set_pos(trace_fd, off);
 795
 796        /*
 797         * Mark the round finished in case we wrote
 798         * at least one event.
 799         */
 800        if (bytes_written != rec->bytes_written)
 801                rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
 802
 803        if (overwrite)
 804                perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
 805out:
 806        return rc;
 807}
 808
 809static int record__mmap_read_all(struct record *rec)
 810{
 811        int err;
 812
 813        err = record__mmap_read_evlist(rec, rec->evlist, false);
 814        if (err)
 815                return err;
 816
 817        return record__mmap_read_evlist(rec, rec->evlist, true);
 818}
 819
 820static void record__init_features(struct record *rec)
 821{
 822        struct perf_session *session = rec->session;
 823        int feat;
 824
 825        for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
 826                perf_header__set_feat(&session->header, feat);
 827
 828        if (rec->no_buildid)
 829                perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 830
 831        if (!have_tracepoints(&rec->evlist->entries))
 832                perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
 833
 834        if (!rec->opts.branch_stack)
 835                perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
 836
 837        if (!rec->opts.full_auxtrace)
 838                perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 839
 840        if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
 841                perf_header__clear_feat(&session->header, HEADER_CLOCKID);
 842
 843        perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
 844
 845        perf_header__clear_feat(&session->header, HEADER_STAT);
 846}
 847
 848static void
 849record__finish_output(struct record *rec)
 850{
 851        struct perf_data *data = &rec->data;
 852        int fd = perf_data__fd(data);
 853
 854        if (data->is_pipe)
 855                return;
 856
 857        rec->session->header.data_size += rec->bytes_written;
 858        data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
 859
 860        if (!rec->no_buildid) {
 861                process_buildids(rec);
 862
 863                if (rec->buildid_all)
 864                        dsos__hit_all(rec->session);
 865        }
 866        perf_session__write_header(rec->session, rec->evlist, fd, true);
 867
 868        return;
 869}
 870
 871static int record__synthesize_workload(struct record *rec, bool tail)
 872{
 873        int err;
 874        struct thread_map *thread_map;
 875
 876        if (rec->opts.tail_synthesize != tail)
 877                return 0;
 878
 879        thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
 880        if (thread_map == NULL)
 881                return -1;
 882
 883        err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
 884                                                 process_synthesized_event,
 885                                                 &rec->session->machines.host,
 886                                                 rec->opts.sample_address);
 887        thread_map__put(thread_map);
 888        return err;
 889}
 890
 891static int record__synthesize(struct record *rec, bool tail);
 892
 893static int
 894record__switch_output(struct record *rec, bool at_exit)
 895{
 896        struct perf_data *data = &rec->data;
 897        int fd, err;
 898        char *new_filename;
 899
 900        /* Same Size:      "2015122520103046"*/
 901        char timestamp[] = "InvalidTimestamp";
 902
 903        record__aio_mmap_read_sync(rec);
 904
 905        record__synthesize(rec, true);
 906        if (target__none(&rec->opts.target))
 907                record__synthesize_workload(rec, true);
 908
 909        rec->samples = 0;
 910        record__finish_output(rec);
 911        err = fetch_current_timestamp(timestamp, sizeof(timestamp));
 912        if (err) {
 913                pr_err("Failed to get current timestamp\n");
 914                return -EINVAL;
 915        }
 916
 917        fd = perf_data__switch(data, timestamp,
 918                                    rec->session->header.data_offset,
 919                                    at_exit, &new_filename);
 920        if (fd >= 0 && !at_exit) {
 921                rec->bytes_written = 0;
 922                rec->session->header.data_size = 0;
 923        }
 924
 925        if (!quiet)
 926                fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
 927                        data->path, timestamp);
 928
 929        if (rec->switch_output.num_files) {
 930                int n = rec->switch_output.cur_file + 1;
 931
 932                if (n >= rec->switch_output.num_files)
 933                        n = 0;
 934                rec->switch_output.cur_file = n;
 935                if (rec->switch_output.filenames[n]) {
 936                        remove(rec->switch_output.filenames[n]);
 937                        free(rec->switch_output.filenames[n]);
 938                }
 939                rec->switch_output.filenames[n] = new_filename;
 940        } else {
 941                free(new_filename);
 942        }
 943
 944        /* Output tracking events */
 945        if (!at_exit) {
 946                record__synthesize(rec, false);
 947
 948                /*
 949                 * In 'perf record --switch-output' without -a,
 950                 * record__synthesize() in record__switch_output() won't
 951                 * generate tracking events because there's no thread_map
 952                 * in evlist. Which causes newly created perf.data doesn't
 953                 * contain map and comm information.
 954                 * Create a fake thread_map and directly call
 955                 * perf_event__synthesize_thread_map() for those events.
 956                 */
 957                if (target__none(&rec->opts.target))
 958                        record__synthesize_workload(rec, false);
 959        }
 960        return fd;
 961}
 962
 963static volatile int workload_exec_errno;
 964
 965/*
 966 * perf_evlist__prepare_workload will send a SIGUSR1
 967 * if the fork fails, since we asked by setting its
 968 * want_signal to true.
 969 */
 970static void workload_exec_failed_signal(int signo __maybe_unused,
 971                                        siginfo_t *info,
 972                                        void *ucontext __maybe_unused)
 973{
 974        workload_exec_errno = info->si_value.sival_int;
 975        done = 1;
 976        child_finished = 1;
 977}
 978
 979static void snapshot_sig_handler(int sig);
 980static void alarm_sig_handler(int sig);
 981
 982int __weak
 983perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
 984                            struct perf_tool *tool __maybe_unused,
 985                            perf_event__handler_t process __maybe_unused,
 986                            struct machine *machine __maybe_unused)
 987{
 988        return 0;
 989}
 990
 991static const struct perf_event_mmap_page *
 992perf_evlist__pick_pc(struct perf_evlist *evlist)
 993{
 994        if (evlist) {
 995                if (evlist->mmap && evlist->mmap[0].base)
 996                        return evlist->mmap[0].base;
 997                if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
 998                        return evlist->overwrite_mmap[0].base;
 999        }
1000        return NULL;
1001}
1002
1003static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1004{
1005        const struct perf_event_mmap_page *pc;
1006
1007        pc = perf_evlist__pick_pc(rec->evlist);
1008        if (pc)
1009                return pc;
1010        return NULL;
1011}
1012
1013static int record__synthesize(struct record *rec, bool tail)
1014{
1015        struct perf_session *session = rec->session;
1016        struct machine *machine = &session->machines.host;
1017        struct perf_data *data = &rec->data;
1018        struct record_opts *opts = &rec->opts;
1019        struct perf_tool *tool = &rec->tool;
1020        int fd = perf_data__fd(data);
1021        int err = 0;
1022
1023        if (rec->opts.tail_synthesize != tail)
1024                return 0;
1025
1026        if (data->is_pipe) {
1027                /*
1028                 * We need to synthesize events first, because some
1029                 * features works on top of them (on report side).
1030                 */
1031                err = perf_event__synthesize_attrs(tool, rec->evlist,
1032                                                   process_synthesized_event);
1033                if (err < 0) {
1034                        pr_err("Couldn't synthesize attrs.\n");
1035                        goto out;
1036                }
1037
1038                err = perf_event__synthesize_features(tool, session, rec->evlist,
1039                                                      process_synthesized_event);
1040                if (err < 0) {
1041                        pr_err("Couldn't synthesize features.\n");
1042                        return err;
1043                }
1044
1045                if (have_tracepoints(&rec->evlist->entries)) {
1046                        /*
1047                         * FIXME err <= 0 here actually means that
1048                         * there were no tracepoints so its not really
1049                         * an error, just that we don't need to
1050                         * synthesize anything.  We really have to
1051                         * return this more properly and also
1052                         * propagate errors that now are calling die()
1053                         */
1054                        err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1055                                                                  process_synthesized_event);
1056                        if (err <= 0) {
1057                                pr_err("Couldn't record tracing data.\n");
1058                                goto out;
1059                        }
1060                        rec->bytes_written += err;
1061                }
1062        }
1063
1064        err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1065                                          process_synthesized_event, machine);
1066        if (err)
1067                goto out;
1068
1069        if (rec->opts.full_auxtrace) {
1070                err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1071                                        session, process_synthesized_event);
1072                if (err)
1073                        goto out;
1074        }
1075
1076        if (!perf_evlist__exclude_kernel(rec->evlist)) {
1077                err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1078                                                         machine);
1079                WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1080                                   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1081                                   "Check /proc/kallsyms permission or run as root.\n");
1082
1083                err = perf_event__synthesize_modules(tool, process_synthesized_event,
1084                                                     machine);
1085                WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1086                                   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1087                                   "Check /proc/modules permission or run as root.\n");
1088        }
1089
1090        if (perf_guest) {
1091                machines__process_guests(&session->machines,
1092                                         perf_event__synthesize_guest_os, tool);
1093        }
1094
1095        err = perf_event__synthesize_extra_attr(&rec->tool,
1096                                                rec->evlist,
1097                                                process_synthesized_event,
1098                                                data->is_pipe);
1099        if (err)
1100                goto out;
1101
1102        err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1103                                                 process_synthesized_event,
1104                                                NULL);
1105        if (err < 0) {
1106                pr_err("Couldn't synthesize thread map.\n");
1107                return err;
1108        }
1109
1110        err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1111                                             process_synthesized_event, NULL);
1112        if (err < 0) {
1113                pr_err("Couldn't synthesize cpu map.\n");
1114                return err;
1115        }
1116
1117        err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1118                                                machine, opts);
1119        if (err < 0)
1120                pr_warning("Couldn't synthesize bpf events.\n");
1121
1122        err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1123                                            process_synthesized_event, opts->sample_address,
1124                                            1);
1125out:
1126        return err;
1127}
1128
1129static int __cmd_record(struct record *rec, int argc, const char **argv)
1130{
1131        int err;
1132        int status = 0;
1133        unsigned long waking = 0;
1134        const bool forks = argc > 0;
1135        struct perf_tool *tool = &rec->tool;
1136        struct record_opts *opts = &rec->opts;
1137        struct perf_data *data = &rec->data;
1138        struct perf_session *session;
1139        bool disabled = false, draining = false;
1140        struct perf_evlist *sb_evlist = NULL;
1141        int fd;
1142
1143        atexit(record__sig_exit);
1144        signal(SIGCHLD, sig_handler);
1145        signal(SIGINT, sig_handler);
1146        signal(SIGTERM, sig_handler);
1147        signal(SIGSEGV, sigsegv_handler);
1148
1149        if (rec->opts.record_namespaces)
1150                tool->namespace_events = true;
1151
1152        if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1153                signal(SIGUSR2, snapshot_sig_handler);
1154                if (rec->opts.auxtrace_snapshot_mode)
1155                        trigger_on(&auxtrace_snapshot_trigger);
1156                if (rec->switch_output.enabled)
1157                        trigger_on(&switch_output_trigger);
1158        } else {
1159                signal(SIGUSR2, SIG_IGN);
1160        }
1161
1162        session = perf_session__new(data, false, tool);
1163        if (session == NULL) {
1164                pr_err("Perf session creation failed.\n");
1165                return -1;
1166        }
1167
1168        fd = perf_data__fd(data);
1169        rec->session = session;
1170
1171        record__init_features(rec);
1172
1173        if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1174                session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1175
1176        if (forks) {
1177                err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1178                                                    argv, data->is_pipe,
1179                                                    workload_exec_failed_signal);
1180                if (err < 0) {
1181                        pr_err("Couldn't run the workload!\n");
1182                        status = err;
1183                        goto out_delete_session;
1184                }
1185        }
1186
1187        /*
1188         * If we have just single event and are sending data
1189         * through pipe, we need to force the ids allocation,
1190         * because we synthesize event name through the pipe
1191         * and need the id for that.
1192         */
1193        if (data->is_pipe && rec->evlist->nr_entries == 1)
1194                rec->opts.sample_id = true;
1195
1196        if (record__open(rec) != 0) {
1197                err = -1;
1198                goto out_child;
1199        }
1200
1201        err = bpf__apply_obj_config();
1202        if (err) {
1203                char errbuf[BUFSIZ];
1204
1205                bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1206                pr_err("ERROR: Apply config to BPF failed: %s\n",
1207                         errbuf);
1208                goto out_child;
1209        }
1210
1211        /*
1212         * Normally perf_session__new would do this, but it doesn't have the
1213         * evlist.
1214         */
1215        if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1216                pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1217                rec->tool.ordered_events = false;
1218        }
1219
1220        if (!rec->evlist->nr_groups)
1221                perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1222
1223        if (data->is_pipe) {
1224                err = perf_header__write_pipe(fd);
1225                if (err < 0)
1226                        goto out_child;
1227        } else {
1228                err = perf_session__write_header(session, rec->evlist, fd, false);
1229                if (err < 0)
1230                        goto out_child;
1231        }
1232
1233        if (!rec->no_buildid
1234            && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1235                pr_err("Couldn't generate buildids. "
1236                       "Use --no-buildid to profile anyway.\n");
1237                err = -1;
1238                goto out_child;
1239        }
1240
1241        if (!opts->no_bpf_event)
1242                bpf_event__add_sb_event(&sb_evlist, &session->header.env);
1243
1244        if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
1245                pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1246                opts->no_bpf_event = true;
1247        }
1248
1249        err = record__synthesize(rec, false);
1250        if (err < 0)
1251                goto out_child;
1252
1253        if (rec->realtime_prio) {
1254                struct sched_param param;
1255
1256                param.sched_priority = rec->realtime_prio;
1257                if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1258                        pr_err("Could not set realtime priority.\n");
1259                        err = -1;
1260                        goto out_child;
1261                }
1262        }
1263
1264        /*
1265         * When perf is starting the traced process, all the events
1266         * (apart from group members) have enable_on_exec=1 set,
1267         * so don't spoil it by prematurely enabling them.
1268         */
1269        if (!target__none(&opts->target) && !opts->initial_delay)
1270                perf_evlist__enable(rec->evlist);
1271
1272        /*
1273         * Let the child rip
1274         */
1275        if (forks) {
1276                struct machine *machine = &session->machines.host;
1277                union perf_event *event;
1278                pid_t tgid;
1279
1280                event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1281                if (event == NULL) {
1282                        err = -ENOMEM;
1283                        goto out_child;
1284                }
1285
1286                /*
1287                 * Some H/W events are generated before COMM event
1288                 * which is emitted during exec(), so perf script
1289                 * cannot see a correct process name for those events.
1290                 * Synthesize COMM event to prevent it.
1291                 */
1292                tgid = perf_event__synthesize_comm(tool, event,
1293                                                   rec->evlist->workload.pid,
1294                                                   process_synthesized_event,
1295                                                   machine);
1296                free(event);
1297
1298                if (tgid == -1)
1299                        goto out_child;
1300
1301                event = malloc(sizeof(event->namespaces) +
1302                               (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1303                               machine->id_hdr_size);
1304                if (event == NULL) {
1305                        err = -ENOMEM;
1306                        goto out_child;
1307                }
1308
1309                /*
1310                 * Synthesize NAMESPACES event for the command specified.
1311                 */
1312                perf_event__synthesize_namespaces(tool, event,
1313                                                  rec->evlist->workload.pid,
1314                                                  tgid, process_synthesized_event,
1315                                                  machine);
1316                free(event);
1317
1318                perf_evlist__start_workload(rec->evlist);
1319        }
1320
1321        if (opts->initial_delay) {
1322                usleep(opts->initial_delay * USEC_PER_MSEC);
1323                perf_evlist__enable(rec->evlist);
1324        }
1325
1326        trigger_ready(&auxtrace_snapshot_trigger);
1327        trigger_ready(&switch_output_trigger);
1328        perf_hooks__invoke_record_start();
1329        for (;;) {
1330                unsigned long long hits = rec->samples;
1331
1332                /*
1333                 * rec->evlist->bkw_mmap_state is possible to be
1334                 * BKW_MMAP_EMPTY here: when done == true and
1335                 * hits != rec->samples in previous round.
1336                 *
1337                 * perf_evlist__toggle_bkw_mmap ensure we never
1338                 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1339                 */
1340                if (trigger_is_hit(&switch_output_trigger) || done || draining)
1341                        perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1342
1343                if (record__mmap_read_all(rec) < 0) {
1344                        trigger_error(&auxtrace_snapshot_trigger);
1345                        trigger_error(&switch_output_trigger);
1346                        err = -1;
1347                        goto out_child;
1348                }
1349
1350                if (auxtrace_record__snapshot_started) {
1351                        auxtrace_record__snapshot_started = 0;
1352                        if (!trigger_is_error(&auxtrace_snapshot_trigger))
1353                                record__read_auxtrace_snapshot(rec);
1354                        if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1355                                pr_err("AUX area tracing snapshot failed\n");
1356                                err = -1;
1357                                goto out_child;
1358                        }
1359                }
1360
1361                if (trigger_is_hit(&switch_output_trigger)) {
1362                        /*
1363                         * If switch_output_trigger is hit, the data in
1364                         * overwritable ring buffer should have been collected,
1365                         * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1366                         *
1367                         * If SIGUSR2 raise after or during record__mmap_read_all(),
1368                         * record__mmap_read_all() didn't collect data from
1369                         * overwritable ring buffer. Read again.
1370                         */
1371                        if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1372                                continue;
1373                        trigger_ready(&switch_output_trigger);
1374
1375                        /*
1376                         * Reenable events in overwrite ring buffer after
1377                         * record__mmap_read_all(): we should have collected
1378                         * data from it.
1379                         */
1380                        perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1381
1382                        if (!quiet)
1383                                fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1384                                        waking);
1385                        waking = 0;
1386                        fd = record__switch_output(rec, false);
1387                        if (fd < 0) {
1388                                pr_err("Failed to switch to new file\n");
1389                                trigger_error(&switch_output_trigger);
1390                                err = fd;
1391                                goto out_child;
1392                        }
1393
1394                        /* re-arm the alarm */
1395                        if (rec->switch_output.time)
1396                                alarm(rec->switch_output.time);
1397                }
1398
1399                if (hits == rec->samples) {
1400                        if (done || draining)
1401                                break;
1402                        err = perf_evlist__poll(rec->evlist, -1);
1403                        /*
1404                         * Propagate error, only if there's any. Ignore positive
1405                         * number of returned events and interrupt error.
1406                         */
1407                        if (err > 0 || (err < 0 && errno == EINTR))
1408                                err = 0;
1409                        waking++;
1410
1411                        if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1412                                draining = true;
1413                }
1414
1415                /*
1416                 * When perf is starting the traced process, at the end events
1417                 * die with the process and we wait for that. Thus no need to
1418                 * disable events in this case.
1419                 */
1420                if (done && !disabled && !target__none(&opts->target)) {
1421                        trigger_off(&auxtrace_snapshot_trigger);
1422                        perf_evlist__disable(rec->evlist);
1423                        disabled = true;
1424                }
1425        }
1426        trigger_off(&auxtrace_snapshot_trigger);
1427        trigger_off(&switch_output_trigger);
1428
1429        if (forks && workload_exec_errno) {
1430                char msg[STRERR_BUFSIZE];
1431                const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1432                pr_err("Workload failed: %s\n", emsg);
1433                err = -1;
1434                goto out_child;
1435        }
1436
1437        if (!quiet)
1438                fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1439
1440        if (target__none(&rec->opts.target))
1441                record__synthesize_workload(rec, true);
1442
1443out_child:
1444        record__aio_mmap_read_sync(rec);
1445
1446        if (forks) {
1447                int exit_status;
1448
1449                if (!child_finished)
1450                        kill(rec->evlist->workload.pid, SIGTERM);
1451
1452                wait(&exit_status);
1453
1454                if (err < 0)
1455                        status = err;
1456                else if (WIFEXITED(exit_status))
1457                        status = WEXITSTATUS(exit_status);
1458                else if (WIFSIGNALED(exit_status))
1459                        signr = WTERMSIG(exit_status);
1460        } else
1461                status = err;
1462
1463        record__synthesize(rec, true);
1464        /* this will be recalculated during process_buildids() */
1465        rec->samples = 0;
1466
1467        if (!err) {
1468                if (!rec->timestamp_filename) {
1469                        record__finish_output(rec);
1470                } else {
1471                        fd = record__switch_output(rec, true);
1472                        if (fd < 0) {
1473                                status = fd;
1474                                goto out_delete_session;
1475                        }
1476                }
1477        }
1478
1479        perf_hooks__invoke_record_end();
1480
1481        if (!err && !quiet) {
1482                char samples[128];
1483                const char *postfix = rec->timestamp_filename ?
1484                                        ".<timestamp>" : "";
1485
1486                if (rec->samples && !rec->opts.full_auxtrace)
1487                        scnprintf(samples, sizeof(samples),
1488                                  " (%" PRIu64 " samples)", rec->samples);
1489                else
1490                        samples[0] = '\0';
1491
1492                fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1493                        perf_data__size(data) / 1024.0 / 1024.0,
1494                        data->path, postfix, samples);
1495        }
1496
1497out_delete_session:
1498        perf_session__delete(session);
1499
1500        if (!opts->no_bpf_event)
1501                perf_evlist__stop_sb_thread(sb_evlist);
1502        return status;
1503}
1504
1505static void callchain_debug(struct callchain_param *callchain)
1506{
1507        static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1508
1509        pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1510
1511        if (callchain->record_mode == CALLCHAIN_DWARF)
1512                pr_debug("callchain: stack dump size %d\n",
1513                         callchain->dump_size);
1514}
1515
1516int record_opts__parse_callchain(struct record_opts *record,
1517                                 struct callchain_param *callchain,
1518                                 const char *arg, bool unset)
1519{
1520        int ret;
1521        callchain->enabled = !unset;
1522
1523        /* --no-call-graph */
1524        if (unset) {
1525                callchain->record_mode = CALLCHAIN_NONE;
1526                pr_debug("callchain: disabled\n");
1527                return 0;
1528        }
1529
1530        ret = parse_callchain_record_opt(arg, callchain);
1531        if (!ret) {
1532                /* Enable data address sampling for DWARF unwind. */
1533                if (callchain->record_mode == CALLCHAIN_DWARF)
1534                        record->sample_address = true;
1535                callchain_debug(callchain);
1536        }
1537
1538        return ret;
1539}
1540
1541int record_parse_callchain_opt(const struct option *opt,
1542                               const char *arg,
1543                               int unset)
1544{
1545        return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1546}
1547
1548int record_callchain_opt(const struct option *opt,
1549                         const char *arg __maybe_unused,
1550                         int unset __maybe_unused)
1551{
1552        struct callchain_param *callchain = opt->value;
1553
1554        callchain->enabled = true;
1555
1556        if (callchain->record_mode == CALLCHAIN_NONE)
1557                callchain->record_mode = CALLCHAIN_FP;
1558
1559        callchain_debug(callchain);
1560        return 0;
1561}
1562
1563static int perf_record_config(const char *var, const char *value, void *cb)
1564{
1565        struct record *rec = cb;
1566
1567        if (!strcmp(var, "record.build-id")) {
1568                if (!strcmp(value, "cache"))
1569                        rec->no_buildid_cache = false;
1570                else if (!strcmp(value, "no-cache"))
1571                        rec->no_buildid_cache = true;
1572                else if (!strcmp(value, "skip"))
1573                        rec->no_buildid = true;
1574                else
1575                        return -1;
1576                return 0;
1577        }
1578        if (!strcmp(var, "record.call-graph")) {
1579                var = "call-graph.record-mode";
1580                return perf_default_config(var, value, cb);
1581        }
1582#ifdef HAVE_AIO_SUPPORT
1583        if (!strcmp(var, "record.aio")) {
1584                rec->opts.nr_cblocks = strtol(value, NULL, 0);
1585                if (!rec->opts.nr_cblocks)
1586                        rec->opts.nr_cblocks = nr_cblocks_default;
1587        }
1588#endif
1589
1590        return 0;
1591}
1592
1593struct clockid_map {
1594        const char *name;
1595        int clockid;
1596};
1597
1598#define CLOCKID_MAP(n, c)       \
1599        { .name = n, .clockid = (c), }
1600
1601#define CLOCKID_END     { .name = NULL, }
1602
1603
1604/*
1605 * Add the missing ones, we need to build on many distros...
1606 */
1607#ifndef CLOCK_MONOTONIC_RAW
1608#define CLOCK_MONOTONIC_RAW 4
1609#endif
1610#ifndef CLOCK_BOOTTIME
1611#define CLOCK_BOOTTIME 7
1612#endif
1613#ifndef CLOCK_TAI
1614#define CLOCK_TAI 11
1615#endif
1616
1617static const struct clockid_map clockids[] = {
1618        /* available for all events, NMI safe */
1619        CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1620        CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1621
1622        /* available for some events */
1623        CLOCKID_MAP("realtime", CLOCK_REALTIME),
1624        CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1625        CLOCKID_MAP("tai", CLOCK_TAI),
1626
1627        /* available for the lazy */
1628        CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1629        CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1630        CLOCKID_MAP("real", CLOCK_REALTIME),
1631        CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1632
1633        CLOCKID_END,
1634};
1635
1636static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1637{
1638        struct timespec res;
1639
1640        *res_ns = 0;
1641        if (!clock_getres(clk_id, &res))
1642                *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1643        else
1644                pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1645
1646        return 0;
1647}
1648
1649static int parse_clockid(const struct option *opt, const char *str, int unset)
1650{
1651        struct record_opts *opts = (struct record_opts *)opt->value;
1652        const struct clockid_map *cm;
1653        const char *ostr = str;
1654
1655        if (unset) {
1656                opts->use_clockid = 0;
1657                return 0;
1658        }
1659
1660        /* no arg passed */
1661        if (!str)
1662                return 0;
1663
1664        /* no setting it twice */
1665        if (opts->use_clockid)
1666                return -1;
1667
1668        opts->use_clockid = true;
1669
1670        /* if its a number, we're done */
1671        if (sscanf(str, "%d", &opts->clockid) == 1)
1672                return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
1673
1674        /* allow a "CLOCK_" prefix to the name */
1675        if (!strncasecmp(str, "CLOCK_", 6))
1676                str += 6;
1677
1678        for (cm = clockids; cm->name; cm++) {
1679                if (!strcasecmp(str, cm->name)) {
1680                        opts->clockid = cm->clockid;
1681                        return get_clockid_res(opts->clockid,
1682                                               &opts->clockid_res_ns);
1683                }
1684        }
1685
1686        opts->use_clockid = false;
1687        ui__warning("unknown clockid %s, check man page\n", ostr);
1688        return -1;
1689}
1690
1691static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1692{
1693        struct record_opts *opts = (struct record_opts *)opt->value;
1694
1695        if (unset || !str)
1696                return 0;
1697
1698        if (!strcasecmp(str, "node"))
1699                opts->affinity = PERF_AFFINITY_NODE;
1700        else if (!strcasecmp(str, "cpu"))
1701                opts->affinity = PERF_AFFINITY_CPU;
1702
1703        return 0;
1704}
1705
1706static int record__parse_mmap_pages(const struct option *opt,
1707                                    const char *str,
1708                                    int unset __maybe_unused)
1709{
1710        struct record_opts *opts = opt->value;
1711        char *s, *p;
1712        unsigned int mmap_pages;
1713        int ret;
1714
1715        if (!str)
1716                return -EINVAL;
1717
1718        s = strdup(str);
1719        if (!s)
1720                return -ENOMEM;
1721
1722        p = strchr(s, ',');
1723        if (p)
1724                *p = '\0';
1725
1726        if (*s) {
1727                ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1728                if (ret)
1729                        goto out_free;
1730                opts->mmap_pages = mmap_pages;
1731        }
1732
1733        if (!p) {
1734                ret = 0;
1735                goto out_free;
1736        }
1737
1738        ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1739        if (ret)
1740                goto out_free;
1741
1742        opts->auxtrace_mmap_pages = mmap_pages;
1743
1744out_free:
1745        free(s);
1746        return ret;
1747}
1748
1749static void switch_output_size_warn(struct record *rec)
1750{
1751        u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1752        struct switch_output *s = &rec->switch_output;
1753
1754        wakeup_size /= 2;
1755
1756        if (s->size < wakeup_size) {
1757                char buf[100];
1758
1759                unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1760                pr_warning("WARNING: switch-output data size lower than "
1761                           "wakeup kernel buffer size (%s) "
1762                           "expect bigger perf.data sizes\n", buf);
1763        }
1764}
1765
1766static int switch_output_setup(struct record *rec)
1767{
1768        struct switch_output *s = &rec->switch_output;
1769        static struct parse_tag tags_size[] = {
1770                { .tag  = 'B', .mult = 1       },
1771                { .tag  = 'K', .mult = 1 << 10 },
1772                { .tag  = 'M', .mult = 1 << 20 },
1773                { .tag  = 'G', .mult = 1 << 30 },
1774                { .tag  = 0 },
1775        };
1776        static struct parse_tag tags_time[] = {
1777                { .tag  = 's', .mult = 1        },
1778                { .tag  = 'm', .mult = 60       },
1779                { .tag  = 'h', .mult = 60*60    },
1780                { .tag  = 'd', .mult = 60*60*24 },
1781                { .tag  = 0 },
1782        };
1783        unsigned long val;
1784
1785        if (!s->set)
1786                return 0;
1787
1788        if (!strcmp(s->str, "signal")) {
1789                s->signal = true;
1790                pr_debug("switch-output with SIGUSR2 signal\n");
1791                goto enabled;
1792        }
1793
1794        val = parse_tag_value(s->str, tags_size);
1795        if (val != (unsigned long) -1) {
1796                s->size = val;
1797                pr_debug("switch-output with %s size threshold\n", s->str);
1798                goto enabled;
1799        }
1800
1801        val = parse_tag_value(s->str, tags_time);
1802        if (val != (unsigned long) -1) {
1803                s->time = val;
1804                pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1805                         s->str, s->time);
1806                goto enabled;
1807        }
1808
1809        return -1;
1810
1811enabled:
1812        rec->timestamp_filename = true;
1813        s->enabled              = true;
1814
1815        if (s->size && !rec->opts.no_buffering)
1816                switch_output_size_warn(rec);
1817
1818        return 0;
1819}
1820
1821static const char * const __record_usage[] = {
1822        "perf record [<options>] [<command>]",
1823        "perf record [<options>] -- <command> [<options>]",
1824        NULL
1825};
1826const char * const *record_usage = __record_usage;
1827
1828/*
1829 * XXX Ideally would be local to cmd_record() and passed to a record__new
1830 * because we need to have access to it in record__exit, that is called
1831 * after cmd_record() exits, but since record_options need to be accessible to
1832 * builtin-script, leave it here.
1833 *
1834 * At least we don't ouch it in all the other functions here directly.
1835 *
1836 * Just say no to tons of global variables, sigh.
1837 */
1838static struct record record = {
1839        .opts = {
1840                .sample_time         = true,
1841                .mmap_pages          = UINT_MAX,
1842                .user_freq           = UINT_MAX,
1843                .user_interval       = ULLONG_MAX,
1844                .freq                = 4000,
1845                .target              = {
1846                        .uses_mmap   = true,
1847                        .default_per_cpu = true,
1848                },
1849        },
1850        .tool = {
1851                .sample         = process_sample_event,
1852                .fork           = perf_event__process_fork,
1853                .exit           = perf_event__process_exit,
1854                .comm           = perf_event__process_comm,
1855                .namespaces     = perf_event__process_namespaces,
1856                .mmap           = perf_event__process_mmap,
1857                .mmap2          = perf_event__process_mmap2,
1858                .ordered_events = true,
1859        },
1860};
1861
1862const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1863        "\n\t\t\t\tDefault: fp";
1864
1865static bool dry_run;
1866
1867/*
1868 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1869 * with it and switch to use the library functions in perf_evlist that came
1870 * from builtin-record.c, i.e. use record_opts,
1871 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1872 * using pipes, etc.
1873 */
1874static struct option __record_options[] = {
1875        OPT_CALLBACK('e', "event", &record.evlist, "event",
1876                     "event selector. use 'perf list' to list available events",
1877                     parse_events_option),
1878        OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1879                     "event filter", parse_filter),
1880        OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1881                           NULL, "don't record events from perf itself",
1882                           exclude_perf),
1883        OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1884                    "record events on existing process id"),
1885        OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1886                    "record events on existing thread id"),
1887        OPT_INTEGER('r', "realtime", &record.realtime_prio,
1888                    "collect data with this RT SCHED_FIFO priority"),
1889        OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1890                    "collect data without buffering"),
1891        OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1892                    "collect raw sample records from all opened counters"),
1893        OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1894                            "system-wide collection from all CPUs"),
1895        OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1896                    "list of cpus to monitor"),
1897        OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1898        OPT_STRING('o', "output", &record.data.path, "file",
1899                    "output file name"),
1900        OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1901                        &record.opts.no_inherit_set,
1902                        "child tasks do not inherit counters"),
1903        OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1904                    "synthesize non-sample events at the end of output"),
1905        OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1906        OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
1907        OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1908                    "Fail if the specified frequency can't be used"),
1909        OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1910                     "profile at this frequency",
1911                      record__parse_freq),
1912        OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1913                     "number of mmap data pages and AUX area tracing mmap pages",
1914                     record__parse_mmap_pages),
1915        OPT_BOOLEAN(0, "group", &record.opts.group,
1916                    "put the counters into a counter group"),
1917        OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1918                           NULL, "enables call-graph recording" ,
1919                           &record_callchain_opt),
1920        OPT_CALLBACK(0, "call-graph", &record.opts,
1921                     "record_mode[,record_size]", record_callchain_help,
1922                     &record_parse_callchain_opt),
1923        OPT_INCR('v', "verbose", &verbose,
1924                    "be more verbose (show counter open errors, etc)"),
1925        OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1926        OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1927                    "per thread counts"),
1928        OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1929        OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1930                    "Record the sample physical addresses"),
1931        OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1932        OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1933                        &record.opts.sample_time_set,
1934                        "Record the sample timestamps"),
1935        OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1936                        "Record the sample period"),
1937        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1938                    "don't sample"),
1939        OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1940                        &record.no_buildid_cache_set,
1941                        "do not update the buildid cache"),
1942        OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1943                        &record.no_buildid_set,
1944                        "do not collect buildids in perf.data"),
1945        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1946                     "monitor event in cgroup name only",
1947                     parse_cgroups),
1948        OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1949                  "ms to wait before starting measurement after program start"),
1950        OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1951                   "user to profile"),
1952
1953        OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1954                     "branch any", "sample any taken branches",
1955                     parse_branch_stack),
1956
1957        OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1958                     "branch filter mask", "branch stack filter modes",
1959                     parse_branch_stack),
1960        OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1961                    "sample by weight (on special events only)"),
1962        OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1963                    "sample transaction flags (special events only)"),
1964        OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1965                    "use per-thread mmaps"),
1966        OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1967                    "sample selected machine registers on interrupt,"
1968                    " use -I ? to list register names", parse_regs),
1969        OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1970                    "sample selected machine registers on interrupt,"
1971                    " use -I ? to list register names", parse_regs),
1972        OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1973                    "Record running/enabled time of read (:S) events"),
1974        OPT_CALLBACK('k', "clockid", &record.opts,
1975        "clockid", "clockid to use for events, see clock_gettime()",
1976        parse_clockid),
1977        OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1978                          "opts", "AUX area tracing Snapshot Mode", ""),
1979        OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
1980                        "per thread proc mmap processing timeout in ms"),
1981        OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1982                    "Record namespaces events"),
1983        OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1984                    "Record context switch events"),
1985        OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1986                         "Configure all used events to run in kernel space.",
1987                         PARSE_OPT_EXCLUSIVE),
1988        OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1989                         "Configure all used events to run in user space.",
1990                         PARSE_OPT_EXCLUSIVE),
1991        OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1992                   "clang binary to use for compiling BPF scriptlets"),
1993        OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1994                   "options passed to clang when compiling BPF scriptlets"),
1995        OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1996                   "file", "vmlinux pathname"),
1997        OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1998                    "Record build-id of all DSOs regardless of hits"),
1999        OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2000                    "append timestamp to output filename"),
2001        OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2002                    "Record timestamp boundary (time of first/last samples)"),
2003        OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2004                          &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2005                          "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2006                          "signal"),
2007        OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2008                   "Limit number of switch output generated files"),
2009        OPT_BOOLEAN(0, "dry-run", &dry_run,
2010                    "Parse options then exit"),
2011#ifdef HAVE_AIO_SUPPORT
2012        OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2013                     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2014                     record__aio_parse),
2015#endif
2016        OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2017                     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2018                     record__parse_affinity),
2019        OPT_END()
2020};
2021
2022struct option *record_options = __record_options;
2023
2024int cmd_record(int argc, const char **argv)
2025{
2026        int err;
2027        struct record *rec = &record;
2028        char errbuf[BUFSIZ];
2029
2030        setlocale(LC_ALL, "");
2031
2032#ifndef HAVE_LIBBPF_SUPPORT
2033# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2034        set_nobuild('\0', "clang-path", true);
2035        set_nobuild('\0', "clang-opt", true);
2036# undef set_nobuild
2037#endif
2038
2039#ifndef HAVE_BPF_PROLOGUE
2040# if !defined (HAVE_DWARF_SUPPORT)
2041#  define REASON  "NO_DWARF=1"
2042# elif !defined (HAVE_LIBBPF_SUPPORT)
2043#  define REASON  "NO_LIBBPF=1"
2044# else
2045#  define REASON  "this architecture doesn't support BPF prologue"
2046# endif
2047# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2048        set_nobuild('\0', "vmlinux", true);
2049# undef set_nobuild
2050# undef REASON
2051#endif
2052
2053        CPU_ZERO(&rec->affinity_mask);
2054        rec->opts.affinity = PERF_AFFINITY_SYS;
2055
2056        rec->evlist = perf_evlist__new();
2057        if (rec->evlist == NULL)
2058                return -ENOMEM;
2059
2060        err = perf_config(perf_record_config, rec);
2061        if (err)
2062                return err;
2063
2064        argc = parse_options(argc, argv, record_options, record_usage,
2065                            PARSE_OPT_STOP_AT_NON_OPTION);
2066        if (quiet)
2067                perf_quiet_option();
2068
2069        /* Make system wide (-a) the default target. */
2070        if (!argc && target__none(&rec->opts.target))
2071                rec->opts.target.system_wide = true;
2072
2073        if (nr_cgroups && !rec->opts.target.system_wide) {
2074                usage_with_options_msg(record_usage, record_options,
2075                        "cgroup monitoring only available in system-wide mode");
2076
2077        }
2078        if (rec->opts.record_switch_events &&
2079            !perf_can_record_switch_events()) {
2080                ui__error("kernel does not support recording context switch events\n");
2081                parse_options_usage(record_usage, record_options, "switch-events", 0);
2082                return -EINVAL;
2083        }
2084
2085        if (switch_output_setup(rec)) {
2086                parse_options_usage(record_usage, record_options, "switch-output", 0);
2087                return -EINVAL;
2088        }
2089
2090        if (rec->switch_output.time) {
2091                signal(SIGALRM, alarm_sig_handler);
2092                alarm(rec->switch_output.time);
2093        }
2094
2095        if (rec->switch_output.num_files) {
2096                rec->switch_output.filenames = calloc(sizeof(char *),
2097                                                      rec->switch_output.num_files);
2098                if (!rec->switch_output.filenames)
2099                        return -EINVAL;
2100        }
2101
2102        /*
2103         * Allow aliases to facilitate the lookup of symbols for address
2104         * filters. Refer to auxtrace_parse_filters().
2105         */
2106        symbol_conf.allow_aliases = true;
2107
2108        symbol__init(NULL);
2109
2110        err = record__auxtrace_init(rec);
2111        if (err)
2112                goto out;
2113
2114        if (dry_run)
2115                goto out;
2116
2117        err = bpf__setup_stdout(rec->evlist);
2118        if (err) {
2119                bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2120                pr_err("ERROR: Setup BPF stdout failed: %s\n",
2121                         errbuf);
2122                goto out;
2123        }
2124
2125        err = -ENOMEM;
2126
2127        if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
2128                pr_warning(
2129"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2130"check /proc/sys/kernel/kptr_restrict.\n\n"
2131"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2132"file is not found in the buildid cache or in the vmlinux path.\n\n"
2133"Samples in kernel modules won't be resolved at all.\n\n"
2134"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2135"even with a suitable vmlinux or kallsyms file.\n\n");
2136
2137        if (rec->no_buildid_cache || rec->no_buildid) {
2138                disable_buildid_cache();
2139        } else if (rec->switch_output.enabled) {
2140                /*
2141                 * In 'perf record --switch-output', disable buildid
2142                 * generation by default to reduce data file switching
2143                 * overhead. Still generate buildid if they are required
2144                 * explicitly using
2145                 *
2146                 *  perf record --switch-output --no-no-buildid \
2147                 *              --no-no-buildid-cache
2148                 *
2149                 * Following code equals to:
2150                 *
2151                 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2152                 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2153                 *         disable_buildid_cache();
2154                 */
2155                bool disable = true;
2156
2157                if (rec->no_buildid_set && !rec->no_buildid)
2158                        disable = false;
2159                if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2160                        disable = false;
2161                if (disable) {
2162                        rec->no_buildid = true;
2163                        rec->no_buildid_cache = true;
2164                        disable_buildid_cache();
2165                }
2166        }
2167
2168        if (record.opts.overwrite)
2169                record.opts.tail_synthesize = true;
2170
2171        if (rec->evlist->nr_entries == 0 &&
2172            __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2173                pr_err("Not enough memory for event selector list\n");
2174                goto out;
2175        }
2176
2177        if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2178                rec->opts.no_inherit = true;
2179
2180        err = target__validate(&rec->opts.target);
2181        if (err) {
2182                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2183                ui__warning("%s\n", errbuf);
2184        }
2185
2186        err = target__parse_uid(&rec->opts.target);
2187        if (err) {
2188                int saved_errno = errno;
2189
2190                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2191                ui__error("%s", errbuf);
2192
2193                err = -saved_errno;
2194                goto out;
2195        }
2196
2197        /* Enable ignoring missing threads when -u/-p option is defined. */
2198        rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2199
2200        err = -ENOMEM;
2201        if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2202                usage_with_options(record_usage, record_options);
2203
2204        err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2205        if (err)
2206                goto out;
2207
2208        /*
2209         * We take all buildids when the file contains
2210         * AUX area tracing data because we do not decode the
2211         * trace because it would take too long.
2212         */
2213        if (rec->opts.full_auxtrace)
2214                rec->buildid_all = true;
2215
2216        if (record_opts__config(&rec->opts)) {
2217                err = -EINVAL;
2218                goto out;
2219        }
2220
2221        if (rec->opts.nr_cblocks > nr_cblocks_max)
2222                rec->opts.nr_cblocks = nr_cblocks_max;
2223        if (verbose > 0)
2224                pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2225
2226        pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2227
2228        err = __cmd_record(&record, argc, argv);
2229out:
2230        perf_evlist__delete(rec->evlist);
2231        symbol__exit();
2232        auxtrace_record__free(rec->itr);
2233        return err;
2234}
2235
2236static void snapshot_sig_handler(int sig __maybe_unused)
2237{
2238        struct record *rec = &record;
2239
2240        if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2241                trigger_hit(&auxtrace_snapshot_trigger);
2242                auxtrace_record__snapshot_started = 1;
2243                if (auxtrace_record__snapshot_start(record.itr))
2244                        trigger_error(&auxtrace_snapshot_trigger);
2245        }
2246
2247        if (switch_output_signal(rec))
2248                trigger_hit(&switch_output_trigger);
2249}
2250
2251static void alarm_sig_handler(int sig __maybe_unused)
2252{
2253        struct record *rec = &record;
2254
2255        if (switch_output_time(rec))
2256                trigger_hit(&switch_output_trigger);
2257}
2258