linux/tools/perf/builtin-record.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * builtin-record.c
   4 *
   5 * Builtin record command: Record the profile of a workload
   6 * (or a CPU, or a PID) into the perf.data output file - for
   7 * later analysis via perf report.
   8 */
   9#include "builtin.h"
  10
  11#include "util/build-id.h"
  12#include <subcmd/parse-options.h>
  13#include "util/parse-events.h"
  14#include "util/config.h"
  15
  16#include "util/callchain.h"
  17#include "util/cgroup.h"
  18#include "util/header.h"
  19#include "util/event.h"
  20#include "util/evlist.h"
  21#include "util/evsel.h"
  22#include "util/debug.h"
  23#include "util/mmap.h"
  24#include "util/target.h"
  25#include "util/session.h"
  26#include "util/tool.h"
  27#include "util/symbol.h"
  28#include "util/record.h"
  29#include "util/cpumap.h"
  30#include "util/thread_map.h"
  31#include "util/data.h"
  32#include "util/perf_regs.h"
  33#include "util/auxtrace.h"
  34#include "util/tsc.h"
  35#include "util/parse-branch-options.h"
  36#include "util/parse-regs-options.h"
  37#include "util/perf_api_probe.h"
  38#include "util/llvm-utils.h"
  39#include "util/bpf-loader.h"
  40#include "util/trigger.h"
  41#include "util/perf-hooks.h"
  42#include "util/cpu-set-sched.h"
  43#include "util/synthetic-events.h"
  44#include "util/time-utils.h"
  45#include "util/units.h"
  46#include "util/bpf-event.h"
  47#include "util/util.h"
  48#include "util/pfm.h"
  49#include "util/clockid.h"
  50#include "asm/bug.h"
  51#include "perf.h"
  52
  53#include <errno.h>
  54#include <inttypes.h>
  55#include <locale.h>
  56#include <poll.h>
  57#include <pthread.h>
  58#include <unistd.h>
  59#include <sched.h>
  60#include <signal.h>
  61#ifdef HAVE_EVENTFD_SUPPORT
  62#include <sys/eventfd.h>
  63#endif
  64#include <sys/mman.h>
  65#include <sys/wait.h>
  66#include <sys/types.h>
  67#include <sys/stat.h>
  68#include <fcntl.h>
  69#include <linux/err.h>
  70#include <linux/string.h>
  71#include <linux/time64.h>
  72#include <linux/zalloc.h>
  73#include <linux/bitmap.h>
  74#include <sys/time.h>
  75
  76struct switch_output {
  77        bool             enabled;
  78        bool             signal;
  79        unsigned long    size;
  80        unsigned long    time;
  81        const char      *str;
  82        bool             set;
  83        char             **filenames;
  84        int              num_files;
  85        int              cur_file;
  86};
  87
  88struct record {
  89        struct perf_tool        tool;
  90        struct record_opts      opts;
  91        u64                     bytes_written;
  92        struct perf_data        data;
  93        struct auxtrace_record  *itr;
  94        struct evlist   *evlist;
  95        struct perf_session     *session;
  96        struct evlist           *sb_evlist;
  97        pthread_t               thread_id;
  98        int                     realtime_prio;
  99        bool                    switch_output_event_set;
 100        bool                    no_buildid;
 101        bool                    no_buildid_set;
 102        bool                    no_buildid_cache;
 103        bool                    no_buildid_cache_set;
 104        bool                    buildid_all;
 105        bool                    timestamp_filename;
 106        bool                    timestamp_boundary;
 107        struct switch_output    switch_output;
 108        unsigned long long      samples;
 109        struct mmap_cpu_mask    affinity_mask;
 110        unsigned long           output_max_size;        /* = 0: unlimited */
 111};
 112
 113static volatile int done;
 114
 115static volatile int auxtrace_record__snapshot_started;
 116static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
 117static DEFINE_TRIGGER(switch_output_trigger);
 118
 119static const char *affinity_tags[PERF_AFFINITY_MAX] = {
 120        "SYS", "NODE", "CPU"
 121};
 122
 123static bool switch_output_signal(struct record *rec)
 124{
 125        return rec->switch_output.signal &&
 126               trigger_is_ready(&switch_output_trigger);
 127}
 128
 129static bool switch_output_size(struct record *rec)
 130{
 131        return rec->switch_output.size &&
 132               trigger_is_ready(&switch_output_trigger) &&
 133               (rec->bytes_written >= rec->switch_output.size);
 134}
 135
 136static bool switch_output_time(struct record *rec)
 137{
 138        return rec->switch_output.time &&
 139               trigger_is_ready(&switch_output_trigger);
 140}
 141
 142static bool record__output_max_size_exceeded(struct record *rec)
 143{
 144        return rec->output_max_size &&
 145               (rec->bytes_written >= rec->output_max_size);
 146}
 147
 148static int record__write(struct record *rec, struct mmap *map __maybe_unused,
 149                         void *bf, size_t size)
 150{
 151        struct perf_data_file *file = &rec->session->data->file;
 152
 153        if (perf_data_file__write(file, bf, size) < 0) {
 154                pr_err("failed to write perf data, error: %m\n");
 155                return -1;
 156        }
 157
 158        rec->bytes_written += size;
 159
 160        if (record__output_max_size_exceeded(rec) && !done) {
 161                fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
 162                                " stopping session ]\n",
 163                                rec->bytes_written >> 10);
 164                done = 1;
 165        }
 166
 167        if (switch_output_size(rec))
 168                trigger_hit(&switch_output_trigger);
 169
 170        return 0;
 171}
 172
 173static int record__aio_enabled(struct record *rec);
 174static int record__comp_enabled(struct record *rec);
 175static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
 176                            void *src, size_t src_size);
 177
 178#ifdef HAVE_AIO_SUPPORT
 179static int record__aio_write(struct aiocb *cblock, int trace_fd,
 180                void *buf, size_t size, off_t off)
 181{
 182        int rc;
 183
 184        cblock->aio_fildes = trace_fd;
 185        cblock->aio_buf    = buf;
 186        cblock->aio_nbytes = size;
 187        cblock->aio_offset = off;
 188        cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
 189
 190        do {
 191                rc = aio_write(cblock);
 192                if (rc == 0) {
 193                        break;
 194                } else if (errno != EAGAIN) {
 195                        cblock->aio_fildes = -1;
 196                        pr_err("failed to queue perf data, error: %m\n");
 197                        break;
 198                }
 199        } while (1);
 200
 201        return rc;
 202}
 203
 204static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
 205{
 206        void *rem_buf;
 207        off_t rem_off;
 208        size_t rem_size;
 209        int rc, aio_errno;
 210        ssize_t aio_ret, written;
 211
 212        aio_errno = aio_error(cblock);
 213        if (aio_errno == EINPROGRESS)
 214                return 0;
 215
 216        written = aio_ret = aio_return(cblock);
 217        if (aio_ret < 0) {
 218                if (aio_errno != EINTR)
 219                        pr_err("failed to write perf data, error: %m\n");
 220                written = 0;
 221        }
 222
 223        rem_size = cblock->aio_nbytes - written;
 224
 225        if (rem_size == 0) {
 226                cblock->aio_fildes = -1;
 227                /*
 228                 * md->refcount is incremented in record__aio_pushfn() for
 229                 * every aio write request started in record__aio_push() so
 230                 * decrement it because the request is now complete.
 231                 */
 232                perf_mmap__put(&md->core);
 233                rc = 1;
 234        } else {
 235                /*
 236                 * aio write request may require restart with the
 237                 * reminder if the kernel didn't write whole
 238                 * chunk at once.
 239                 */
 240                rem_off = cblock->aio_offset + written;
 241                rem_buf = (void *)(cblock->aio_buf + written);
 242                record__aio_write(cblock, cblock->aio_fildes,
 243                                rem_buf, rem_size, rem_off);
 244                rc = 0;
 245        }
 246
 247        return rc;
 248}
 249
 250static int record__aio_sync(struct mmap *md, bool sync_all)
 251{
 252        struct aiocb **aiocb = md->aio.aiocb;
 253        struct aiocb *cblocks = md->aio.cblocks;
 254        struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
 255        int i, do_suspend;
 256
 257        do {
 258                do_suspend = 0;
 259                for (i = 0; i < md->aio.nr_cblocks; ++i) {
 260                        if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
 261                                if (sync_all)
 262                                        aiocb[i] = NULL;
 263                                else
 264                                        return i;
 265                        } else {
 266                                /*
 267                                 * Started aio write is not complete yet
 268                                 * so it has to be waited before the
 269                                 * next allocation.
 270                                 */
 271                                aiocb[i] = &cblocks[i];
 272                                do_suspend = 1;
 273                        }
 274                }
 275                if (!do_suspend)
 276                        return -1;
 277
 278                while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
 279                        if (!(errno == EAGAIN || errno == EINTR))
 280                                pr_err("failed to sync perf data, error: %m\n");
 281                }
 282        } while (1);
 283}
 284
 285struct record_aio {
 286        struct record   *rec;
 287        void            *data;
 288        size_t          size;
 289};
 290
 291static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
 292{
 293        struct record_aio *aio = to;
 294
 295        /*
 296         * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
 297         * to release space in the kernel buffer as fast as possible, calling
 298         * perf_mmap__consume() from perf_mmap__push() function.
 299         *
 300         * That lets the kernel to proceed with storing more profiling data into
 301         * the kernel buffer earlier than other per-cpu kernel buffers are handled.
 302         *
 303         * Coping can be done in two steps in case the chunk of profiling data
 304         * crosses the upper bound of the kernel buffer. In this case we first move
 305         * part of data from map->start till the upper bound and then the reminder
 306         * from the beginning of the kernel buffer till the end of the data chunk.
 307         */
 308
 309        if (record__comp_enabled(aio->rec)) {
 310                size = zstd_compress(aio->rec->session, aio->data + aio->size,
 311                                     mmap__mmap_len(map) - aio->size,
 312                                     buf, size);
 313        } else {
 314                memcpy(aio->data + aio->size, buf, size);
 315        }
 316
 317        if (!aio->size) {
 318                /*
 319                 * Increment map->refcount to guard map->aio.data[] buffer
 320                 * from premature deallocation because map object can be
 321                 * released earlier than aio write request started on
 322                 * map->aio.data[] buffer is complete.
 323                 *
 324                 * perf_mmap__put() is done at record__aio_complete()
 325                 * after started aio request completion or at record__aio_push()
 326                 * if the request failed to start.
 327                 */
 328                perf_mmap__get(&map->core);
 329        }
 330
 331        aio->size += size;
 332
 333        return size;
 334}
 335
 336static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
 337{
 338        int ret, idx;
 339        int trace_fd = rec->session->data->file.fd;
 340        struct record_aio aio = { .rec = rec, .size = 0 };
 341
 342        /*
 343         * Call record__aio_sync() to wait till map->aio.data[] buffer
 344         * becomes available after previous aio write operation.
 345         */
 346
 347        idx = record__aio_sync(map, false);
 348        aio.data = map->aio.data[idx];
 349        ret = perf_mmap__push(map, &aio, record__aio_pushfn);
 350        if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
 351                return ret;
 352
 353        rec->samples++;
 354        ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
 355        if (!ret) {
 356                *off += aio.size;
 357                rec->bytes_written += aio.size;
 358                if (switch_output_size(rec))
 359                        trigger_hit(&switch_output_trigger);
 360        } else {
 361                /*
 362                 * Decrement map->refcount incremented in record__aio_pushfn()
 363                 * back if record__aio_write() operation failed to start, otherwise
 364                 * map->refcount is decremented in record__aio_complete() after
 365                 * aio write operation finishes successfully.
 366                 */
 367                perf_mmap__put(&map->core);
 368        }
 369
 370        return ret;
 371}
 372
 373static off_t record__aio_get_pos(int trace_fd)
 374{
 375        return lseek(trace_fd, 0, SEEK_CUR);
 376}
 377
 378static void record__aio_set_pos(int trace_fd, off_t pos)
 379{
 380        lseek(trace_fd, pos, SEEK_SET);
 381}
 382
 383static void record__aio_mmap_read_sync(struct record *rec)
 384{
 385        int i;
 386        struct evlist *evlist = rec->evlist;
 387        struct mmap *maps = evlist->mmap;
 388
 389        if (!record__aio_enabled(rec))
 390                return;
 391
 392        for (i = 0; i < evlist->core.nr_mmaps; i++) {
 393                struct mmap *map = &maps[i];
 394
 395                if (map->core.base)
 396                        record__aio_sync(map, true);
 397        }
 398}
 399
 400static int nr_cblocks_default = 1;
 401static int nr_cblocks_max = 4;
 402
 403static int record__aio_parse(const struct option *opt,
 404                             const char *str,
 405                             int unset)
 406{
 407        struct record_opts *opts = (struct record_opts *)opt->value;
 408
 409        if (unset) {
 410                opts->nr_cblocks = 0;
 411        } else {
 412                if (str)
 413                        opts->nr_cblocks = strtol(str, NULL, 0);
 414                if (!opts->nr_cblocks)
 415                        opts->nr_cblocks = nr_cblocks_default;
 416        }
 417
 418        return 0;
 419}
 420#else /* HAVE_AIO_SUPPORT */
 421static int nr_cblocks_max = 0;
 422
 423static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
 424                            off_t *off __maybe_unused)
 425{
 426        return -1;
 427}
 428
 429static off_t record__aio_get_pos(int trace_fd __maybe_unused)
 430{
 431        return -1;
 432}
 433
 434static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
 435{
 436}
 437
 438static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
 439{
 440}
 441#endif
 442
 443static int record__aio_enabled(struct record *rec)
 444{
 445        return rec->opts.nr_cblocks > 0;
 446}
 447
 448#define MMAP_FLUSH_DEFAULT 1
 449static int record__mmap_flush_parse(const struct option *opt,
 450                                    const char *str,
 451                                    int unset)
 452{
 453        int flush_max;
 454        struct record_opts *opts = (struct record_opts *)opt->value;
 455        static struct parse_tag tags[] = {
 456                        { .tag  = 'B', .mult = 1       },
 457                        { .tag  = 'K', .mult = 1 << 10 },
 458                        { .tag  = 'M', .mult = 1 << 20 },
 459                        { .tag  = 'G', .mult = 1 << 30 },
 460                        { .tag  = 0 },
 461        };
 462
 463        if (unset)
 464                return 0;
 465
 466        if (str) {
 467                opts->mmap_flush = parse_tag_value(str, tags);
 468                if (opts->mmap_flush == (int)-1)
 469                        opts->mmap_flush = strtol(str, NULL, 0);
 470        }
 471
 472        if (!opts->mmap_flush)
 473                opts->mmap_flush = MMAP_FLUSH_DEFAULT;
 474
 475        flush_max = evlist__mmap_size(opts->mmap_pages);
 476        flush_max /= 4;
 477        if (opts->mmap_flush > flush_max)
 478                opts->mmap_flush = flush_max;
 479
 480        return 0;
 481}
 482
 483#ifdef HAVE_ZSTD_SUPPORT
 484static unsigned int comp_level_default = 1;
 485
 486static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
 487{
 488        struct record_opts *opts = opt->value;
 489
 490        if (unset) {
 491                opts->comp_level = 0;
 492        } else {
 493                if (str)
 494                        opts->comp_level = strtol(str, NULL, 0);
 495                if (!opts->comp_level)
 496                        opts->comp_level = comp_level_default;
 497        }
 498
 499        return 0;
 500}
 501#endif
 502static unsigned int comp_level_max = 22;
 503
 504static int record__comp_enabled(struct record *rec)
 505{
 506        return rec->opts.comp_level > 0;
 507}
 508
 509static int process_synthesized_event(struct perf_tool *tool,
 510                                     union perf_event *event,
 511                                     struct perf_sample *sample __maybe_unused,
 512                                     struct machine *machine __maybe_unused)
 513{
 514        struct record *rec = container_of(tool, struct record, tool);
 515        return record__write(rec, NULL, event, event->header.size);
 516}
 517
 518static int process_locked_synthesized_event(struct perf_tool *tool,
 519                                     union perf_event *event,
 520                                     struct perf_sample *sample __maybe_unused,
 521                                     struct machine *machine __maybe_unused)
 522{
 523        static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
 524        int ret;
 525
 526        pthread_mutex_lock(&synth_lock);
 527        ret = process_synthesized_event(tool, event, sample, machine);
 528        pthread_mutex_unlock(&synth_lock);
 529        return ret;
 530}
 531
 532static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
 533{
 534        struct record *rec = to;
 535
 536        if (record__comp_enabled(rec)) {
 537                size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size);
 538                bf   = map->data;
 539        }
 540
 541        rec->samples++;
 542        return record__write(rec, map, bf, size);
 543}
 544
 545static volatile int signr = -1;
 546static volatile int child_finished;
 547#ifdef HAVE_EVENTFD_SUPPORT
 548static int done_fd = -1;
 549#endif
 550
 551static void sig_handler(int sig)
 552{
 553        if (sig == SIGCHLD)
 554                child_finished = 1;
 555        else
 556                signr = sig;
 557
 558        done = 1;
 559#ifdef HAVE_EVENTFD_SUPPORT
 560{
 561        u64 tmp = 1;
 562        /*
 563         * It is possible for this signal handler to run after done is checked
 564         * in the main loop, but before the perf counter fds are polled. If this
 565         * happens, the poll() will continue to wait even though done is set,
 566         * and will only break out if either another signal is received, or the
 567         * counters are ready for read. To ensure the poll() doesn't sleep when
 568         * done is set, use an eventfd (done_fd) to wake up the poll().
 569         */
 570        if (write(done_fd, &tmp, sizeof(tmp)) < 0)
 571                pr_err("failed to signal wakeup fd, error: %m\n");
 572}
 573#endif // HAVE_EVENTFD_SUPPORT
 574}
 575
 576static void sigsegv_handler(int sig)
 577{
 578        perf_hooks__recover();
 579        sighandler_dump_stack(sig);
 580}
 581
 582static void record__sig_exit(void)
 583{
 584        if (signr == -1)
 585                return;
 586
 587        signal(signr, SIG_DFL);
 588        raise(signr);
 589}
 590
 591#ifdef HAVE_AUXTRACE_SUPPORT
 592
 593static int record__process_auxtrace(struct perf_tool *tool,
 594                                    struct mmap *map,
 595                                    union perf_event *event, void *data1,
 596                                    size_t len1, void *data2, size_t len2)
 597{
 598        struct record *rec = container_of(tool, struct record, tool);
 599        struct perf_data *data = &rec->data;
 600        size_t padding;
 601        u8 pad[8] = {0};
 602
 603        if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
 604                off_t file_offset;
 605                int fd = perf_data__fd(data);
 606                int err;
 607
 608                file_offset = lseek(fd, 0, SEEK_CUR);
 609                if (file_offset == -1)
 610                        return -1;
 611                err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
 612                                                     event, file_offset);
 613                if (err)
 614                        return err;
 615        }
 616
 617        /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
 618        padding = (len1 + len2) & 7;
 619        if (padding)
 620                padding = 8 - padding;
 621
 622        record__write(rec, map, event, event->header.size);
 623        record__write(rec, map, data1, len1);
 624        if (len2)
 625                record__write(rec, map, data2, len2);
 626        record__write(rec, map, &pad, padding);
 627
 628        return 0;
 629}
 630
 631static int record__auxtrace_mmap_read(struct record *rec,
 632                                      struct mmap *map)
 633{
 634        int ret;
 635
 636        ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
 637                                  record__process_auxtrace);
 638        if (ret < 0)
 639                return ret;
 640
 641        if (ret)
 642                rec->samples++;
 643
 644        return 0;
 645}
 646
 647static int record__auxtrace_mmap_read_snapshot(struct record *rec,
 648                                               struct mmap *map)
 649{
 650        int ret;
 651
 652        ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
 653                                           record__process_auxtrace,
 654                                           rec->opts.auxtrace_snapshot_size);
 655        if (ret < 0)
 656                return ret;
 657
 658        if (ret)
 659                rec->samples++;
 660
 661        return 0;
 662}
 663
 664static int record__auxtrace_read_snapshot_all(struct record *rec)
 665{
 666        int i;
 667        int rc = 0;
 668
 669        for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
 670                struct mmap *map = &rec->evlist->mmap[i];
 671
 672                if (!map->auxtrace_mmap.base)
 673                        continue;
 674
 675                if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
 676                        rc = -1;
 677                        goto out;
 678                }
 679        }
 680out:
 681        return rc;
 682}
 683
 684static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
 685{
 686        pr_debug("Recording AUX area tracing snapshot\n");
 687        if (record__auxtrace_read_snapshot_all(rec) < 0) {
 688                trigger_error(&auxtrace_snapshot_trigger);
 689        } else {
 690                if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
 691                        trigger_error(&auxtrace_snapshot_trigger);
 692                else
 693                        trigger_ready(&auxtrace_snapshot_trigger);
 694        }
 695}
 696
 697static int record__auxtrace_snapshot_exit(struct record *rec)
 698{
 699        if (trigger_is_error(&auxtrace_snapshot_trigger))
 700                return 0;
 701
 702        if (!auxtrace_record__snapshot_started &&
 703            auxtrace_record__snapshot_start(rec->itr))
 704                return -1;
 705
 706        record__read_auxtrace_snapshot(rec, true);
 707        if (trigger_is_error(&auxtrace_snapshot_trigger))
 708                return -1;
 709
 710        return 0;
 711}
 712
 713static int record__auxtrace_init(struct record *rec)
 714{
 715        int err;
 716
 717        if (!rec->itr) {
 718                rec->itr = auxtrace_record__init(rec->evlist, &err);
 719                if (err)
 720                        return err;
 721        }
 722
 723        err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
 724                                              rec->opts.auxtrace_snapshot_opts);
 725        if (err)
 726                return err;
 727
 728        err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
 729                                            rec->opts.auxtrace_sample_opts);
 730        if (err)
 731                return err;
 732
 733        return auxtrace_parse_filters(rec->evlist);
 734}
 735
 736#else
 737
 738static inline
 739int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
 740                               struct mmap *map __maybe_unused)
 741{
 742        return 0;
 743}
 744
 745static inline
 746void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
 747                                    bool on_exit __maybe_unused)
 748{
 749}
 750
 751static inline
 752int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
 753{
 754        return 0;
 755}
 756
 757static inline
 758int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
 759{
 760        return 0;
 761}
 762
 763static int record__auxtrace_init(struct record *rec __maybe_unused)
 764{
 765        return 0;
 766}
 767
 768#endif
 769
 770static int record__config_text_poke(struct evlist *evlist)
 771{
 772        struct evsel *evsel;
 773        int err;
 774
 775        /* Nothing to do if text poke is already configured */
 776        evlist__for_each_entry(evlist, evsel) {
 777                if (evsel->core.attr.text_poke)
 778                        return 0;
 779        }
 780
 781        err = parse_events(evlist, "dummy:u", NULL);
 782        if (err)
 783                return err;
 784
 785        evsel = evlist__last(evlist);
 786
 787        evsel->core.attr.freq = 0;
 788        evsel->core.attr.sample_period = 1;
 789        evsel->core.attr.text_poke = 1;
 790        evsel->core.attr.ksymbol = 1;
 791
 792        evsel->core.system_wide = true;
 793        evsel->no_aux_samples = true;
 794        evsel->immediate = true;
 795
 796        /* Text poke must be collected on all CPUs */
 797        perf_cpu_map__put(evsel->core.own_cpus);
 798        evsel->core.own_cpus = perf_cpu_map__new(NULL);
 799        perf_cpu_map__put(evsel->core.cpus);
 800        evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
 801
 802        evsel__set_sample_bit(evsel, TIME);
 803
 804        return 0;
 805}
 806
 807static bool record__kcore_readable(struct machine *machine)
 808{
 809        char kcore[PATH_MAX];
 810        int fd;
 811
 812        scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
 813
 814        fd = open(kcore, O_RDONLY);
 815        if (fd < 0)
 816                return false;
 817
 818        close(fd);
 819
 820        return true;
 821}
 822
 823static int record__kcore_copy(struct machine *machine, struct perf_data *data)
 824{
 825        char from_dir[PATH_MAX];
 826        char kcore_dir[PATH_MAX];
 827        int ret;
 828
 829        snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
 830
 831        ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
 832        if (ret)
 833                return ret;
 834
 835        return kcore_copy(from_dir, kcore_dir);
 836}
 837
 838static int record__mmap_evlist(struct record *rec,
 839                               struct evlist *evlist)
 840{
 841        struct record_opts *opts = &rec->opts;
 842        bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
 843                                  opts->auxtrace_sample_mode;
 844        char msg[512];
 845
 846        if (opts->affinity != PERF_AFFINITY_SYS)
 847                cpu__setup_cpunode_map();
 848
 849        if (evlist__mmap_ex(evlist, opts->mmap_pages,
 850                                 opts->auxtrace_mmap_pages,
 851                                 auxtrace_overwrite,
 852                                 opts->nr_cblocks, opts->affinity,
 853                                 opts->mmap_flush, opts->comp_level) < 0) {
 854                if (errno == EPERM) {
 855                        pr_err("Permission error mapping pages.\n"
 856                               "Consider increasing "
 857                               "/proc/sys/kernel/perf_event_mlock_kb,\n"
 858                               "or try again with a smaller value of -m/--mmap_pages.\n"
 859                               "(current value: %u,%u)\n",
 860                               opts->mmap_pages, opts->auxtrace_mmap_pages);
 861                        return -errno;
 862                } else {
 863                        pr_err("failed to mmap with %d (%s)\n", errno,
 864                                str_error_r(errno, msg, sizeof(msg)));
 865                        if (errno)
 866                                return -errno;
 867                        else
 868                                return -EINVAL;
 869                }
 870        }
 871        return 0;
 872}
 873
 874static int record__mmap(struct record *rec)
 875{
 876        return record__mmap_evlist(rec, rec->evlist);
 877}
 878
 879static int record__open(struct record *rec)
 880{
 881        char msg[BUFSIZ];
 882        struct evsel *pos;
 883        struct evlist *evlist = rec->evlist;
 884        struct perf_session *session = rec->session;
 885        struct record_opts *opts = &rec->opts;
 886        int rc = 0;
 887
 888        /*
 889         * For initial_delay or system wide, we need to add a dummy event so
 890         * that we can track PERF_RECORD_MMAP to cover the delay of waiting or
 891         * event synthesis.
 892         */
 893        if (opts->initial_delay || target__has_cpu(&opts->target)) {
 894                pos = perf_evlist__get_tracking_event(evlist);
 895                if (!evsel__is_dummy_event(pos)) {
 896                        /* Set up dummy event. */
 897                        if (evlist__add_dummy(evlist))
 898                                return -ENOMEM;
 899                        pos = evlist__last(evlist);
 900                        perf_evlist__set_tracking_event(evlist, pos);
 901                }
 902
 903                /*
 904                 * Enable the dummy event when the process is forked for
 905                 * initial_delay, immediately for system wide.
 906                 */
 907                if (opts->initial_delay && !pos->immediate)
 908                        pos->core.attr.enable_on_exec = 1;
 909                else
 910                        pos->immediate = 1;
 911        }
 912
 913        perf_evlist__config(evlist, opts, &callchain_param);
 914
 915        evlist__for_each_entry(evlist, pos) {
 916try_again:
 917                if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
 918                        if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
 919                                if (verbose > 0)
 920                                        ui__warning("%s\n", msg);
 921                                goto try_again;
 922                        }
 923                        if ((errno == EINVAL || errno == EBADF) &&
 924                            pos->leader != pos &&
 925                            pos->weak_group) {
 926                                pos = perf_evlist__reset_weak_group(evlist, pos, true);
 927                                goto try_again;
 928                        }
 929                        rc = -errno;
 930                        evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
 931                        ui__error("%s\n", msg);
 932                        goto out;
 933                }
 934
 935                pos->supported = true;
 936        }
 937
 938        if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
 939                pr_warning(
 940"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
 941"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
 942"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
 943"file is not found in the buildid cache or in the vmlinux path.\n\n"
 944"Samples in kernel modules won't be resolved at all.\n\n"
 945"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
 946"even with a suitable vmlinux or kallsyms file.\n\n");
 947        }
 948
 949        if (perf_evlist__apply_filters(evlist, &pos)) {
 950                pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
 951                        pos->filter, evsel__name(pos), errno,
 952                        str_error_r(errno, msg, sizeof(msg)));
 953                rc = -1;
 954                goto out;
 955        }
 956
 957        rc = record__mmap(rec);
 958        if (rc)
 959                goto out;
 960
 961        session->evlist = evlist;
 962        perf_session__set_id_hdr_size(session);
 963out:
 964        return rc;
 965}
 966
 967static int process_sample_event(struct perf_tool *tool,
 968                                union perf_event *event,
 969                                struct perf_sample *sample,
 970                                struct evsel *evsel,
 971                                struct machine *machine)
 972{
 973        struct record *rec = container_of(tool, struct record, tool);
 974
 975        if (rec->evlist->first_sample_time == 0)
 976                rec->evlist->first_sample_time = sample->time;
 977
 978        rec->evlist->last_sample_time = sample->time;
 979
 980        if (rec->buildid_all)
 981                return 0;
 982
 983        rec->samples++;
 984        return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
 985}
 986
 987static int process_buildids(struct record *rec)
 988{
 989        struct perf_session *session = rec->session;
 990
 991        if (perf_data__size(&rec->data) == 0)
 992                return 0;
 993
 994        /*
 995         * During this process, it'll load kernel map and replace the
 996         * dso->long_name to a real pathname it found.  In this case
 997         * we prefer the vmlinux path like
 998         *   /lib/modules/3.16.4/build/vmlinux
 999         *
1000         * rather than build-id path (in debug directory).
1001         *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1002         */
1003        symbol_conf.ignore_vmlinux_buildid = true;
1004
1005        /*
1006         * If --buildid-all is given, it marks all DSO regardless of hits,
1007         * so no need to process samples. But if timestamp_boundary is enabled,
1008         * it still needs to walk on all samples to get the timestamps of
1009         * first/last samples.
1010         */
1011        if (rec->buildid_all && !rec->timestamp_boundary)
1012                rec->tool.sample = NULL;
1013
1014        return perf_session__process_events(session);
1015}
1016
1017static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1018{
1019        int err;
1020        struct perf_tool *tool = data;
1021        /*
1022         *As for guest kernel when processing subcommand record&report,
1023         *we arrange module mmap prior to guest kernel mmap and trigger
1024         *a preload dso because default guest module symbols are loaded
1025         *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1026         *method is used to avoid symbol missing when the first addr is
1027         *in module instead of in guest kernel.
1028         */
1029        err = perf_event__synthesize_modules(tool, process_synthesized_event,
1030                                             machine);
1031        if (err < 0)
1032                pr_err("Couldn't record guest kernel [%d]'s reference"
1033                       " relocation symbol.\n", machine->pid);
1034
1035        /*
1036         * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1037         * have no _text sometimes.
1038         */
1039        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1040                                                 machine);
1041        if (err < 0)
1042                pr_err("Couldn't record guest kernel [%d]'s reference"
1043                       " relocation symbol.\n", machine->pid);
1044}
1045
1046static struct perf_event_header finished_round_event = {
1047        .size = sizeof(struct perf_event_header),
1048        .type = PERF_RECORD_FINISHED_ROUND,
1049};
1050
1051static void record__adjust_affinity(struct record *rec, struct mmap *map)
1052{
1053        if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1054            !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits,
1055                          rec->affinity_mask.nbits)) {
1056                bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits);
1057                bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits,
1058                          map->affinity_mask.bits, rec->affinity_mask.nbits);
1059                sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask),
1060                                  (cpu_set_t *)rec->affinity_mask.bits);
1061                if (verbose == 2)
1062                        mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread");
1063        }
1064}
1065
1066static size_t process_comp_header(void *record, size_t increment)
1067{
1068        struct perf_record_compressed *event = record;
1069        size_t size = sizeof(*event);
1070
1071        if (increment) {
1072                event->header.size += increment;
1073                return increment;
1074        }
1075
1076        event->header.type = PERF_RECORD_COMPRESSED;
1077        event->header.size = size;
1078
1079        return size;
1080}
1081
1082static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
1083                            void *src, size_t src_size)
1084{
1085        size_t compressed;
1086        size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1087
1088        compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
1089                                                     max_record_size, process_comp_header);
1090
1091        session->bytes_transferred += src_size;
1092        session->bytes_compressed  += compressed;
1093
1094        return compressed;
1095}
1096
1097static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1098                                    bool overwrite, bool synch)
1099{
1100        u64 bytes_written = rec->bytes_written;
1101        int i;
1102        int rc = 0;
1103        struct mmap *maps;
1104        int trace_fd = rec->data.file.fd;
1105        off_t off = 0;
1106
1107        if (!evlist)
1108                return 0;
1109
1110        maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
1111        if (!maps)
1112                return 0;
1113
1114        if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1115                return 0;
1116
1117        if (record__aio_enabled(rec))
1118                off = record__aio_get_pos(trace_fd);
1119
1120        for (i = 0; i < evlist->core.nr_mmaps; i++) {
1121                u64 flush = 0;
1122                struct mmap *map = &maps[i];
1123
1124                if (map->core.base) {
1125                        record__adjust_affinity(rec, map);
1126                        if (synch) {
1127                                flush = map->core.flush;
1128                                map->core.flush = 1;
1129                        }
1130                        if (!record__aio_enabled(rec)) {
1131                                if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1132                                        if (synch)
1133                                                map->core.flush = flush;
1134                                        rc = -1;
1135                                        goto out;
1136                                }
1137                        } else {
1138                                if (record__aio_push(rec, map, &off) < 0) {
1139                                        record__aio_set_pos(trace_fd, off);
1140                                        if (synch)
1141                                                map->core.flush = flush;
1142                                        rc = -1;
1143                                        goto out;
1144                                }
1145                        }
1146                        if (synch)
1147                                map->core.flush = flush;
1148                }
1149
1150                if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1151                    !rec->opts.auxtrace_sample_mode &&
1152                    record__auxtrace_mmap_read(rec, map) != 0) {
1153                        rc = -1;
1154                        goto out;
1155                }
1156        }
1157
1158        if (record__aio_enabled(rec))
1159                record__aio_set_pos(trace_fd, off);
1160
1161        /*
1162         * Mark the round finished in case we wrote
1163         * at least one event.
1164         */
1165        if (bytes_written != rec->bytes_written)
1166                rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1167
1168        if (overwrite)
1169                perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1170out:
1171        return rc;
1172}
1173
1174static int record__mmap_read_all(struct record *rec, bool synch)
1175{
1176        int err;
1177
1178        err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1179        if (err)
1180                return err;
1181
1182        return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1183}
1184
1185static void record__init_features(struct record *rec)
1186{
1187        struct perf_session *session = rec->session;
1188        int feat;
1189
1190        for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1191                perf_header__set_feat(&session->header, feat);
1192
1193        if (rec->no_buildid)
1194                perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1195
1196        if (!have_tracepoints(&rec->evlist->core.entries))
1197                perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1198
1199        if (!rec->opts.branch_stack)
1200                perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1201
1202        if (!rec->opts.full_auxtrace)
1203                perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1204
1205        if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1206                perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1207
1208        if (!rec->opts.use_clockid)
1209                perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1210
1211        perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1212        if (!record__comp_enabled(rec))
1213                perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1214
1215        perf_header__clear_feat(&session->header, HEADER_STAT);
1216}
1217
1218static void
1219record__finish_output(struct record *rec)
1220{
1221        struct perf_data *data = &rec->data;
1222        int fd = perf_data__fd(data);
1223
1224        if (data->is_pipe)
1225                return;
1226
1227        rec->session->header.data_size += rec->bytes_written;
1228        data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1229
1230        if (!rec->no_buildid) {
1231                process_buildids(rec);
1232
1233                if (rec->buildid_all)
1234                        dsos__hit_all(rec->session);
1235        }
1236        perf_session__write_header(rec->session, rec->evlist, fd, true);
1237
1238        return;
1239}
1240
1241static int record__synthesize_workload(struct record *rec, bool tail)
1242{
1243        int err;
1244        struct perf_thread_map *thread_map;
1245
1246        if (rec->opts.tail_synthesize != tail)
1247                return 0;
1248
1249        thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1250        if (thread_map == NULL)
1251                return -1;
1252
1253        err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1254                                                 process_synthesized_event,
1255                                                 &rec->session->machines.host,
1256                                                 rec->opts.sample_address);
1257        perf_thread_map__put(thread_map);
1258        return err;
1259}
1260
1261static int record__synthesize(struct record *rec, bool tail);
1262
1263static int
1264record__switch_output(struct record *rec, bool at_exit)
1265{
1266        struct perf_data *data = &rec->data;
1267        int fd, err;
1268        char *new_filename;
1269
1270        /* Same Size:      "2015122520103046"*/
1271        char timestamp[] = "InvalidTimestamp";
1272
1273        record__aio_mmap_read_sync(rec);
1274
1275        record__synthesize(rec, true);
1276        if (target__none(&rec->opts.target))
1277                record__synthesize_workload(rec, true);
1278
1279        rec->samples = 0;
1280        record__finish_output(rec);
1281        err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1282        if (err) {
1283                pr_err("Failed to get current timestamp\n");
1284                return -EINVAL;
1285        }
1286
1287        fd = perf_data__switch(data, timestamp,
1288                                    rec->session->header.data_offset,
1289                                    at_exit, &new_filename);
1290        if (fd >= 0 && !at_exit) {
1291                rec->bytes_written = 0;
1292                rec->session->header.data_size = 0;
1293        }
1294
1295        if (!quiet)
1296                fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1297                        data->path, timestamp);
1298
1299        if (rec->switch_output.num_files) {
1300                int n = rec->switch_output.cur_file + 1;
1301
1302                if (n >= rec->switch_output.num_files)
1303                        n = 0;
1304                rec->switch_output.cur_file = n;
1305                if (rec->switch_output.filenames[n]) {
1306                        remove(rec->switch_output.filenames[n]);
1307                        zfree(&rec->switch_output.filenames[n]);
1308                }
1309                rec->switch_output.filenames[n] = new_filename;
1310        } else {
1311                free(new_filename);
1312        }
1313
1314        /* Output tracking events */
1315        if (!at_exit) {
1316                record__synthesize(rec, false);
1317
1318                /*
1319                 * In 'perf record --switch-output' without -a,
1320                 * record__synthesize() in record__switch_output() won't
1321                 * generate tracking events because there's no thread_map
1322                 * in evlist. Which causes newly created perf.data doesn't
1323                 * contain map and comm information.
1324                 * Create a fake thread_map and directly call
1325                 * perf_event__synthesize_thread_map() for those events.
1326                 */
1327                if (target__none(&rec->opts.target))
1328                        record__synthesize_workload(rec, false);
1329        }
1330        return fd;
1331}
1332
1333static volatile int workload_exec_errno;
1334
1335/*
1336 * perf_evlist__prepare_workload will send a SIGUSR1
1337 * if the fork fails, since we asked by setting its
1338 * want_signal to true.
1339 */
1340static void workload_exec_failed_signal(int signo __maybe_unused,
1341                                        siginfo_t *info,
1342                                        void *ucontext __maybe_unused)
1343{
1344        workload_exec_errno = info->si_value.sival_int;
1345        done = 1;
1346        child_finished = 1;
1347}
1348
1349static void snapshot_sig_handler(int sig);
1350static void alarm_sig_handler(int sig);
1351
1352static const struct perf_event_mmap_page *
1353perf_evlist__pick_pc(struct evlist *evlist)
1354{
1355        if (evlist) {
1356                if (evlist->mmap && evlist->mmap[0].core.base)
1357                        return evlist->mmap[0].core.base;
1358                if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1359                        return evlist->overwrite_mmap[0].core.base;
1360        }
1361        return NULL;
1362}
1363
1364static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1365{
1366        const struct perf_event_mmap_page *pc;
1367
1368        pc = perf_evlist__pick_pc(rec->evlist);
1369        if (pc)
1370                return pc;
1371        return NULL;
1372}
1373
1374static int record__synthesize(struct record *rec, bool tail)
1375{
1376        struct perf_session *session = rec->session;
1377        struct machine *machine = &session->machines.host;
1378        struct perf_data *data = &rec->data;
1379        struct record_opts *opts = &rec->opts;
1380        struct perf_tool *tool = &rec->tool;
1381        int fd = perf_data__fd(data);
1382        int err = 0;
1383        event_op f = process_synthesized_event;
1384
1385        if (rec->opts.tail_synthesize != tail)
1386                return 0;
1387
1388        if (data->is_pipe) {
1389                /*
1390                 * We need to synthesize events first, because some
1391                 * features works on top of them (on report side).
1392                 */
1393                err = perf_event__synthesize_attrs(tool, rec->evlist,
1394                                                   process_synthesized_event);
1395                if (err < 0) {
1396                        pr_err("Couldn't synthesize attrs.\n");
1397                        goto out;
1398                }
1399
1400                err = perf_event__synthesize_features(tool, session, rec->evlist,
1401                                                      process_synthesized_event);
1402                if (err < 0) {
1403                        pr_err("Couldn't synthesize features.\n");
1404                        return err;
1405                }
1406
1407                if (have_tracepoints(&rec->evlist->core.entries)) {
1408                        /*
1409                         * FIXME err <= 0 here actually means that
1410                         * there were no tracepoints so its not really
1411                         * an error, just that we don't need to
1412                         * synthesize anything.  We really have to
1413                         * return this more properly and also
1414                         * propagate errors that now are calling die()
1415                         */
1416                        err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1417                                                                  process_synthesized_event);
1418                        if (err <= 0) {
1419                                pr_err("Couldn't record tracing data.\n");
1420                                goto out;
1421                        }
1422                        rec->bytes_written += err;
1423                }
1424        }
1425
1426        err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1427                                          process_synthesized_event, machine);
1428        if (err)
1429                goto out;
1430
1431        /* Synthesize id_index before auxtrace_info */
1432        if (rec->opts.auxtrace_sample_mode) {
1433                err = perf_event__synthesize_id_index(tool,
1434                                                      process_synthesized_event,
1435                                                      session->evlist, machine);
1436                if (err)
1437                        goto out;
1438        }
1439
1440        if (rec->opts.full_auxtrace) {
1441                err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1442                                        session, process_synthesized_event);
1443                if (err)
1444                        goto out;
1445        }
1446
1447        if (!perf_evlist__exclude_kernel(rec->evlist)) {
1448                err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1449                                                         machine);
1450                WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1451                                   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1452                                   "Check /proc/kallsyms permission or run as root.\n");
1453
1454                err = perf_event__synthesize_modules(tool, process_synthesized_event,
1455                                                     machine);
1456                WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1457                                   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1458                                   "Check /proc/modules permission or run as root.\n");
1459        }
1460
1461        if (perf_guest) {
1462                machines__process_guests(&session->machines,
1463                                         perf_event__synthesize_guest_os, tool);
1464        }
1465
1466        err = perf_event__synthesize_extra_attr(&rec->tool,
1467                                                rec->evlist,
1468                                                process_synthesized_event,
1469                                                data->is_pipe);
1470        if (err)
1471                goto out;
1472
1473        err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1474                                                 process_synthesized_event,
1475                                                NULL);
1476        if (err < 0) {
1477                pr_err("Couldn't synthesize thread map.\n");
1478                return err;
1479        }
1480
1481        err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.cpus,
1482                                             process_synthesized_event, NULL);
1483        if (err < 0) {
1484                pr_err("Couldn't synthesize cpu map.\n");
1485                return err;
1486        }
1487
1488        err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1489                                                machine, opts);
1490        if (err < 0)
1491                pr_warning("Couldn't synthesize bpf events.\n");
1492
1493        err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1494                                             machine);
1495        if (err < 0)
1496                pr_warning("Couldn't synthesize cgroup events.\n");
1497
1498        if (rec->opts.nr_threads_synthesize > 1) {
1499                perf_set_multithreaded();
1500                f = process_locked_synthesized_event;
1501        }
1502
1503        err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
1504                                            f, opts->sample_address,
1505                                            rec->opts.nr_threads_synthesize);
1506
1507        if (rec->opts.nr_threads_synthesize > 1)
1508                perf_set_singlethreaded();
1509
1510out:
1511        return err;
1512}
1513
1514static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1515{
1516        struct record *rec = data;
1517        pthread_kill(rec->thread_id, SIGUSR2);
1518        return 0;
1519}
1520
1521static int record__setup_sb_evlist(struct record *rec)
1522{
1523        struct record_opts *opts = &rec->opts;
1524
1525        if (rec->sb_evlist != NULL) {
1526                /*
1527                 * We get here if --switch-output-event populated the
1528                 * sb_evlist, so associate a callback that will send a SIGUSR2
1529                 * to the main thread.
1530                 */
1531                evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1532                rec->thread_id = pthread_self();
1533        }
1534#ifdef HAVE_LIBBPF_SUPPORT
1535        if (!opts->no_bpf_event) {
1536                if (rec->sb_evlist == NULL) {
1537                        rec->sb_evlist = evlist__new();
1538
1539                        if (rec->sb_evlist == NULL) {
1540                                pr_err("Couldn't create side band evlist.\n.");
1541                                return -1;
1542                        }
1543                }
1544
1545                if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1546                        pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1547                        return -1;
1548                }
1549        }
1550#endif
1551        if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1552                pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1553                opts->no_bpf_event = true;
1554        }
1555
1556        return 0;
1557}
1558
1559static int record__init_clock(struct record *rec)
1560{
1561        struct perf_session *session = rec->session;
1562        struct timespec ref_clockid;
1563        struct timeval ref_tod;
1564        u64 ref;
1565
1566        if (!rec->opts.use_clockid)
1567                return 0;
1568
1569        if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1570                session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
1571
1572        session->header.env.clock.clockid = rec->opts.clockid;
1573
1574        if (gettimeofday(&ref_tod, NULL) != 0) {
1575                pr_err("gettimeofday failed, cannot set reference time.\n");
1576                return -1;
1577        }
1578
1579        if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
1580                pr_err("clock_gettime failed, cannot set reference time.\n");
1581                return -1;
1582        }
1583
1584        ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
1585              (u64) ref_tod.tv_usec * NSEC_PER_USEC;
1586
1587        session->header.env.clock.tod_ns = ref;
1588
1589        ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
1590              (u64) ref_clockid.tv_nsec;
1591
1592        session->header.env.clock.clockid_ns = ref;
1593        return 0;
1594}
1595
1596static int __cmd_record(struct record *rec, int argc, const char **argv)
1597{
1598        int err;
1599        int status = 0;
1600        unsigned long waking = 0;
1601        const bool forks = argc > 0;
1602        struct perf_tool *tool = &rec->tool;
1603        struct record_opts *opts = &rec->opts;
1604        struct perf_data *data = &rec->data;
1605        struct perf_session *session;
1606        bool disabled = false, draining = false;
1607        int fd;
1608        float ratio = 0;
1609        enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
1610
1611        atexit(record__sig_exit);
1612        signal(SIGCHLD, sig_handler);
1613        signal(SIGINT, sig_handler);
1614        signal(SIGTERM, sig_handler);
1615        signal(SIGSEGV, sigsegv_handler);
1616
1617        if (rec->opts.record_namespaces)
1618                tool->namespace_events = true;
1619
1620        if (rec->opts.record_cgroup) {
1621#ifdef HAVE_FILE_HANDLE
1622                tool->cgroup_events = true;
1623#else
1624                pr_err("cgroup tracking is not supported\n");
1625                return -1;
1626#endif
1627        }
1628
1629        if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
1630                signal(SIGUSR2, snapshot_sig_handler);
1631                if (rec->opts.auxtrace_snapshot_mode)
1632                        trigger_on(&auxtrace_snapshot_trigger);
1633                if (rec->switch_output.enabled)
1634                        trigger_on(&switch_output_trigger);
1635        } else {
1636                signal(SIGUSR2, SIG_IGN);
1637        }
1638
1639        session = perf_session__new(data, false, tool);
1640        if (IS_ERR(session)) {
1641                pr_err("Perf session creation failed.\n");
1642                return PTR_ERR(session);
1643        }
1644
1645        fd = perf_data__fd(data);
1646        rec->session = session;
1647
1648        if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1649                pr_err("Compression initialization failed.\n");
1650                return -1;
1651        }
1652#ifdef HAVE_EVENTFD_SUPPORT
1653        done_fd = eventfd(0, EFD_NONBLOCK);
1654        if (done_fd < 0) {
1655                pr_err("Failed to create wakeup eventfd, error: %m\n");
1656                status = -1;
1657                goto out_delete_session;
1658        }
1659        err = evlist__add_pollfd(rec->evlist, done_fd);
1660        if (err < 0) {
1661                pr_err("Failed to add wakeup eventfd to poll list\n");
1662                status = err;
1663                goto out_delete_session;
1664        }
1665#endif // HAVE_EVENTFD_SUPPORT
1666
1667        session->header.env.comp_type  = PERF_COMP_ZSTD;
1668        session->header.env.comp_level = rec->opts.comp_level;
1669
1670        if (rec->opts.kcore &&
1671            !record__kcore_readable(&session->machines.host)) {
1672                pr_err("ERROR: kcore is not readable.\n");
1673                return -1;
1674        }
1675
1676        if (record__init_clock(rec))
1677                return -1;
1678
1679        record__init_features(rec);
1680
1681        if (forks) {
1682                err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
1683                                                    argv, data->is_pipe,
1684                                                    workload_exec_failed_signal);
1685                if (err < 0) {
1686                        pr_err("Couldn't run the workload!\n");
1687                        status = err;
1688                        goto out_delete_session;
1689                }
1690        }
1691
1692        /*
1693         * If we have just single event and are sending data
1694         * through pipe, we need to force the ids allocation,
1695         * because we synthesize event name through the pipe
1696         * and need the id for that.
1697         */
1698        if (data->is_pipe && rec->evlist->core.nr_entries == 1)
1699                rec->opts.sample_id = true;
1700
1701        if (record__open(rec) != 0) {
1702                err = -1;
1703                goto out_child;
1704        }
1705        session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
1706
1707        if (rec->opts.kcore) {
1708                err = record__kcore_copy(&session->machines.host, data);
1709                if (err) {
1710                        pr_err("ERROR: Failed to copy kcore\n");
1711                        goto out_child;
1712                }
1713        }
1714
1715        err = bpf__apply_obj_config();
1716        if (err) {
1717                char errbuf[BUFSIZ];
1718
1719                bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1720                pr_err("ERROR: Apply config to BPF failed: %s\n",
1721                         errbuf);
1722                goto out_child;
1723        }
1724
1725        /*
1726         * Normally perf_session__new would do this, but it doesn't have the
1727         * evlist.
1728         */
1729        if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
1730                pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1731                rec->tool.ordered_events = false;
1732        }
1733
1734        if (!rec->evlist->nr_groups)
1735                perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1736
1737        if (data->is_pipe) {
1738                err = perf_header__write_pipe(fd);
1739                if (err < 0)
1740                        goto out_child;
1741        } else {
1742                err = perf_session__write_header(session, rec->evlist, fd, false);
1743                if (err < 0)
1744                        goto out_child;
1745        }
1746
1747        err = -1;
1748        if (!rec->no_buildid
1749            && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
1750                pr_err("Couldn't generate buildids. "
1751                       "Use --no-buildid to profile anyway.\n");
1752                goto out_child;
1753        }
1754
1755        err = record__setup_sb_evlist(rec);
1756        if (err)
1757                goto out_child;
1758
1759        err = record__synthesize(rec, false);
1760        if (err < 0)
1761                goto out_child;
1762
1763        if (rec->realtime_prio) {
1764                struct sched_param param;
1765
1766                param.sched_priority = rec->realtime_prio;
1767                if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1768                        pr_err("Could not set realtime priority.\n");
1769                        err = -1;
1770                        goto out_child;
1771                }
1772        }
1773
1774        /*
1775         * When perf is starting the traced process, all the events
1776         * (apart from group members) have enable_on_exec=1 set,
1777         * so don't spoil it by prematurely enabling them.
1778         */
1779        if (!target__none(&opts->target) && !opts->initial_delay)
1780                evlist__enable(rec->evlist);
1781
1782        /*
1783         * Let the child rip
1784         */
1785        if (forks) {
1786                struct machine *machine = &session->machines.host;
1787                union perf_event *event;
1788                pid_t tgid;
1789
1790                event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1791                if (event == NULL) {
1792                        err = -ENOMEM;
1793                        goto out_child;
1794                }
1795
1796                /*
1797                 * Some H/W events are generated before COMM event
1798                 * which is emitted during exec(), so perf script
1799                 * cannot see a correct process name for those events.
1800                 * Synthesize COMM event to prevent it.
1801                 */
1802                tgid = perf_event__synthesize_comm(tool, event,
1803                                                   rec->evlist->workload.pid,
1804                                                   process_synthesized_event,
1805                                                   machine);
1806                free(event);
1807
1808                if (tgid == -1)
1809                        goto out_child;
1810
1811                event = malloc(sizeof(event->namespaces) +
1812                               (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1813                               machine->id_hdr_size);
1814                if (event == NULL) {
1815                        err = -ENOMEM;
1816                        goto out_child;
1817                }
1818
1819                /*
1820                 * Synthesize NAMESPACES event for the command specified.
1821                 */
1822                perf_event__synthesize_namespaces(tool, event,
1823                                                  rec->evlist->workload.pid,
1824                                                  tgid, process_synthesized_event,
1825                                                  machine);
1826                free(event);
1827
1828                perf_evlist__start_workload(rec->evlist);
1829        }
1830
1831        if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack))
1832                goto out_child;
1833
1834        if (opts->initial_delay) {
1835                pr_info(EVLIST_DISABLED_MSG);
1836                if (opts->initial_delay > 0) {
1837                        usleep(opts->initial_delay * USEC_PER_MSEC);
1838                        evlist__enable(rec->evlist);
1839                        pr_info(EVLIST_ENABLED_MSG);
1840                }
1841        }
1842
1843        trigger_ready(&auxtrace_snapshot_trigger);
1844        trigger_ready(&switch_output_trigger);
1845        perf_hooks__invoke_record_start();
1846        for (;;) {
1847                unsigned long long hits = rec->samples;
1848
1849                /*
1850                 * rec->evlist->bkw_mmap_state is possible to be
1851                 * BKW_MMAP_EMPTY here: when done == true and
1852                 * hits != rec->samples in previous round.
1853                 *
1854                 * perf_evlist__toggle_bkw_mmap ensure we never
1855                 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1856                 */
1857                if (trigger_is_hit(&switch_output_trigger) || done || draining)
1858                        perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1859
1860                if (record__mmap_read_all(rec, false) < 0) {
1861                        trigger_error(&auxtrace_snapshot_trigger);
1862                        trigger_error(&switch_output_trigger);
1863                        err = -1;
1864                        goto out_child;
1865                }
1866
1867                if (auxtrace_record__snapshot_started) {
1868                        auxtrace_record__snapshot_started = 0;
1869                        if (!trigger_is_error(&auxtrace_snapshot_trigger))
1870                                record__read_auxtrace_snapshot(rec, false);
1871                        if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1872                                pr_err("AUX area tracing snapshot failed\n");
1873                                err = -1;
1874                                goto out_child;
1875                        }
1876                }
1877
1878                if (trigger_is_hit(&switch_output_trigger)) {
1879                        /*
1880                         * If switch_output_trigger is hit, the data in
1881                         * overwritable ring buffer should have been collected,
1882                         * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1883                         *
1884                         * If SIGUSR2 raise after or during record__mmap_read_all(),
1885                         * record__mmap_read_all() didn't collect data from
1886                         * overwritable ring buffer. Read again.
1887                         */
1888                        if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1889                                continue;
1890                        trigger_ready(&switch_output_trigger);
1891
1892                        /*
1893                         * Reenable events in overwrite ring buffer after
1894                         * record__mmap_read_all(): we should have collected
1895                         * data from it.
1896                         */
1897                        perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1898
1899                        if (!quiet)
1900                                fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1901                                        waking);
1902                        waking = 0;
1903                        fd = record__switch_output(rec, false);
1904                        if (fd < 0) {
1905                                pr_err("Failed to switch to new file\n");
1906                                trigger_error(&switch_output_trigger);
1907                                err = fd;
1908                                goto out_child;
1909                        }
1910
1911                        /* re-arm the alarm */
1912                        if (rec->switch_output.time)
1913                                alarm(rec->switch_output.time);
1914                }
1915
1916                if (hits == rec->samples) {
1917                        if (done || draining)
1918                                break;
1919                        err = evlist__poll(rec->evlist, -1);
1920                        /*
1921                         * Propagate error, only if there's any. Ignore positive
1922                         * number of returned events and interrupt error.
1923                         */
1924                        if (err > 0 || (err < 0 && errno == EINTR))
1925                                err = 0;
1926                        waking++;
1927
1928                        if (evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1929                                draining = true;
1930                }
1931
1932                if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
1933                        switch (cmd) {
1934                        case EVLIST_CTL_CMD_ENABLE:
1935                                pr_info(EVLIST_ENABLED_MSG);
1936                                break;
1937                        case EVLIST_CTL_CMD_DISABLE:
1938                                pr_info(EVLIST_DISABLED_MSG);
1939                                break;
1940                        case EVLIST_CTL_CMD_ACK:
1941                        case EVLIST_CTL_CMD_UNSUPPORTED:
1942                        default:
1943                                break;
1944                        }
1945                }
1946
1947                /*
1948                 * When perf is starting the traced process, at the end events
1949                 * die with the process and we wait for that. Thus no need to
1950                 * disable events in this case.
1951                 */
1952                if (done && !disabled && !target__none(&opts->target)) {
1953                        trigger_off(&auxtrace_snapshot_trigger);
1954                        evlist__disable(rec->evlist);
1955                        disabled = true;
1956                }
1957        }
1958
1959        trigger_off(&auxtrace_snapshot_trigger);
1960        trigger_off(&switch_output_trigger);
1961
1962        if (opts->auxtrace_snapshot_on_exit)
1963                record__auxtrace_snapshot_exit(rec);
1964
1965        if (forks && workload_exec_errno) {
1966                char msg[STRERR_BUFSIZE];
1967                const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1968                pr_err("Workload failed: %s\n", emsg);
1969                err = -1;
1970                goto out_child;
1971        }
1972
1973        if (!quiet)
1974                fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1975
1976        if (target__none(&rec->opts.target))
1977                record__synthesize_workload(rec, true);
1978
1979out_child:
1980        evlist__finalize_ctlfd(rec->evlist);
1981        record__mmap_read_all(rec, true);
1982        record__aio_mmap_read_sync(rec);
1983
1984        if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1985                ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1986                session->header.env.comp_ratio = ratio + 0.5;
1987        }
1988
1989        if (forks) {
1990                int exit_status;
1991
1992                if (!child_finished)
1993                        kill(rec->evlist->workload.pid, SIGTERM);
1994
1995                wait(&exit_status);
1996
1997                if (err < 0)
1998                        status = err;
1999                else if (WIFEXITED(exit_status))
2000                        status = WEXITSTATUS(exit_status);
2001                else if (WIFSIGNALED(exit_status))
2002                        signr = WTERMSIG(exit_status);
2003        } else
2004                status = err;
2005
2006        record__synthesize(rec, true);
2007        /* this will be recalculated during process_buildids() */
2008        rec->samples = 0;
2009
2010        if (!err) {
2011                if (!rec->timestamp_filename) {
2012                        record__finish_output(rec);
2013                } else {
2014                        fd = record__switch_output(rec, true);
2015                        if (fd < 0) {
2016                                status = fd;
2017                                goto out_delete_session;
2018                        }
2019                }
2020        }
2021
2022        perf_hooks__invoke_record_end();
2023
2024        if (!err && !quiet) {
2025                char samples[128];
2026                const char *postfix = rec->timestamp_filename ?
2027                                        ".<timestamp>" : "";
2028
2029                if (rec->samples && !rec->opts.full_auxtrace)
2030                        scnprintf(samples, sizeof(samples),
2031                                  " (%" PRIu64 " samples)", rec->samples);
2032                else
2033                        samples[0] = '\0';
2034
2035                fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2036                        perf_data__size(data) / 1024.0 / 1024.0,
2037                        data->path, postfix, samples);
2038                if (ratio) {
2039                        fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2040                                        rec->session->bytes_transferred / 1024.0 / 1024.0,
2041                                        ratio);
2042                }
2043                fprintf(stderr, " ]\n");
2044        }
2045
2046out_delete_session:
2047#ifdef HAVE_EVENTFD_SUPPORT
2048        if (done_fd >= 0)
2049                close(done_fd);
2050#endif
2051        zstd_fini(&session->zstd_data);
2052        perf_session__delete(session);
2053
2054        if (!opts->no_bpf_event)
2055                perf_evlist__stop_sb_thread(rec->sb_evlist);
2056        return status;
2057}
2058
2059static void callchain_debug(struct callchain_param *callchain)
2060{
2061        static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2062
2063        pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2064
2065        if (callchain->record_mode == CALLCHAIN_DWARF)
2066                pr_debug("callchain: stack dump size %d\n",
2067                         callchain->dump_size);
2068}
2069
2070int record_opts__parse_callchain(struct record_opts *record,
2071                                 struct callchain_param *callchain,
2072                                 const char *arg, bool unset)
2073{
2074        int ret;
2075        callchain->enabled = !unset;
2076
2077        /* --no-call-graph */
2078        if (unset) {
2079                callchain->record_mode = CALLCHAIN_NONE;
2080                pr_debug("callchain: disabled\n");
2081                return 0;
2082        }
2083
2084        ret = parse_callchain_record_opt(arg, callchain);
2085        if (!ret) {
2086                /* Enable data address sampling for DWARF unwind. */
2087                if (callchain->record_mode == CALLCHAIN_DWARF)
2088                        record->sample_address = true;
2089                callchain_debug(callchain);
2090        }
2091
2092        return ret;
2093}
2094
2095int record_parse_callchain_opt(const struct option *opt,
2096                               const char *arg,
2097                               int unset)
2098{
2099        return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2100}
2101
2102int record_callchain_opt(const struct option *opt,
2103                         const char *arg __maybe_unused,
2104                         int unset __maybe_unused)
2105{
2106        struct callchain_param *callchain = opt->value;
2107
2108        callchain->enabled = true;
2109
2110        if (callchain->record_mode == CALLCHAIN_NONE)
2111                callchain->record_mode = CALLCHAIN_FP;
2112
2113        callchain_debug(callchain);
2114        return 0;
2115}
2116
2117static int perf_record_config(const char *var, const char *value, void *cb)
2118{
2119        struct record *rec = cb;
2120
2121        if (!strcmp(var, "record.build-id")) {
2122                if (!strcmp(value, "cache"))
2123                        rec->no_buildid_cache = false;
2124                else if (!strcmp(value, "no-cache"))
2125                        rec->no_buildid_cache = true;
2126                else if (!strcmp(value, "skip"))
2127                        rec->no_buildid = true;
2128                else
2129                        return -1;
2130                return 0;
2131        }
2132        if (!strcmp(var, "record.call-graph")) {
2133                var = "call-graph.record-mode";
2134                return perf_default_config(var, value, cb);
2135        }
2136#ifdef HAVE_AIO_SUPPORT
2137        if (!strcmp(var, "record.aio")) {
2138                rec->opts.nr_cblocks = strtol(value, NULL, 0);
2139                if (!rec->opts.nr_cblocks)
2140                        rec->opts.nr_cblocks = nr_cblocks_default;
2141        }
2142#endif
2143
2144        return 0;
2145}
2146
2147
2148static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2149{
2150        struct record_opts *opts = (struct record_opts *)opt->value;
2151
2152        if (unset || !str)
2153                return 0;
2154
2155        if (!strcasecmp(str, "node"))
2156                opts->affinity = PERF_AFFINITY_NODE;
2157        else if (!strcasecmp(str, "cpu"))
2158                opts->affinity = PERF_AFFINITY_CPU;
2159
2160        return 0;
2161}
2162
2163static int parse_output_max_size(const struct option *opt,
2164                                 const char *str, int unset)
2165{
2166        unsigned long *s = (unsigned long *)opt->value;
2167        static struct parse_tag tags_size[] = {
2168                { .tag  = 'B', .mult = 1       },
2169                { .tag  = 'K', .mult = 1 << 10 },
2170                { .tag  = 'M', .mult = 1 << 20 },
2171                { .tag  = 'G', .mult = 1 << 30 },
2172                { .tag  = 0 },
2173        };
2174        unsigned long val;
2175
2176        if (unset) {
2177                *s = 0;
2178                return 0;
2179        }
2180
2181        val = parse_tag_value(str, tags_size);
2182        if (val != (unsigned long) -1) {
2183                *s = val;
2184                return 0;
2185        }
2186
2187        return -1;
2188}
2189
2190static int record__parse_mmap_pages(const struct option *opt,
2191                                    const char *str,
2192                                    int unset __maybe_unused)
2193{
2194        struct record_opts *opts = opt->value;
2195        char *s, *p;
2196        unsigned int mmap_pages;
2197        int ret;
2198
2199        if (!str)
2200                return -EINVAL;
2201
2202        s = strdup(str);
2203        if (!s)
2204                return -ENOMEM;
2205
2206        p = strchr(s, ',');
2207        if (p)
2208                *p = '\0';
2209
2210        if (*s) {
2211                ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
2212                if (ret)
2213                        goto out_free;
2214                opts->mmap_pages = mmap_pages;
2215        }
2216
2217        if (!p) {
2218                ret = 0;
2219                goto out_free;
2220        }
2221
2222        ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
2223        if (ret)
2224                goto out_free;
2225
2226        opts->auxtrace_mmap_pages = mmap_pages;
2227
2228out_free:
2229        free(s);
2230        return ret;
2231}
2232
2233static int parse_control_option(const struct option *opt,
2234                                const char *str,
2235                                int unset __maybe_unused)
2236{
2237        char *comma = NULL, *endptr = NULL;
2238        struct record_opts *config = (struct record_opts *)opt->value;
2239
2240        if (strncmp(str, "fd:", 3))
2241                return -EINVAL;
2242
2243        config->ctl_fd = strtoul(&str[3], &endptr, 0);
2244        if (endptr == &str[3])
2245                return -EINVAL;
2246
2247        comma = strchr(str, ',');
2248        if (comma) {
2249                if (endptr != comma)
2250                        return -EINVAL;
2251
2252                config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
2253                if (endptr == comma + 1 || *endptr != '\0')
2254                        return -EINVAL;
2255        }
2256
2257        return 0;
2258}
2259
2260static void switch_output_size_warn(struct record *rec)
2261{
2262        u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2263        struct switch_output *s = &rec->switch_output;
2264
2265        wakeup_size /= 2;
2266
2267        if (s->size < wakeup_size) {
2268                char buf[100];
2269
2270                unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2271                pr_warning("WARNING: switch-output data size lower than "
2272                           "wakeup kernel buffer size (%s) "
2273                           "expect bigger perf.data sizes\n", buf);
2274        }
2275}
2276
2277static int switch_output_setup(struct record *rec)
2278{
2279        struct switch_output *s = &rec->switch_output;
2280        static struct parse_tag tags_size[] = {
2281                { .tag  = 'B', .mult = 1       },
2282                { .tag  = 'K', .mult = 1 << 10 },
2283                { .tag  = 'M', .mult = 1 << 20 },
2284                { .tag  = 'G', .mult = 1 << 30 },
2285                { .tag  = 0 },
2286        };
2287        static struct parse_tag tags_time[] = {
2288                { .tag  = 's', .mult = 1        },
2289                { .tag  = 'm', .mult = 60       },
2290                { .tag  = 'h', .mult = 60*60    },
2291                { .tag  = 'd', .mult = 60*60*24 },
2292                { .tag  = 0 },
2293        };
2294        unsigned long val;
2295
2296        /*
2297         * If we're using --switch-output-events, then we imply its 
2298         * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2299         *  thread to its parent.
2300         */
2301        if (rec->switch_output_event_set)
2302                goto do_signal;
2303
2304        if (!s->set)
2305                return 0;
2306
2307        if (!strcmp(s->str, "signal")) {
2308do_signal:
2309                s->signal = true;
2310                pr_debug("switch-output with SIGUSR2 signal\n");
2311                goto enabled;
2312        }
2313
2314        val = parse_tag_value(s->str, tags_size);
2315        if (val != (unsigned long) -1) {
2316                s->size = val;
2317                pr_debug("switch-output with %s size threshold\n", s->str);
2318                goto enabled;
2319        }
2320
2321        val = parse_tag_value(s->str, tags_time);
2322        if (val != (unsigned long) -1) {
2323                s->time = val;
2324                pr_debug("switch-output with %s time threshold (%lu seconds)\n",
2325                         s->str, s->time);
2326                goto enabled;
2327        }
2328
2329        return -1;
2330
2331enabled:
2332        rec->timestamp_filename = true;
2333        s->enabled              = true;
2334
2335        if (s->size && !rec->opts.no_buffering)
2336                switch_output_size_warn(rec);
2337
2338        return 0;
2339}
2340
2341static const char * const __record_usage[] = {
2342        "perf record [<options>] [<command>]",
2343        "perf record [<options>] -- <command> [<options>]",
2344        NULL
2345};
2346const char * const *record_usage = __record_usage;
2347
2348static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
2349                                  struct perf_sample *sample, struct machine *machine)
2350{
2351        /*
2352         * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2353         * no need to add them twice.
2354         */
2355        if (!(event->header.misc & PERF_RECORD_MISC_USER))
2356                return 0;
2357        return perf_event__process_mmap(tool, event, sample, machine);
2358}
2359
2360static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
2361                                   struct perf_sample *sample, struct machine *machine)
2362{
2363        /*
2364         * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
2365         * no need to add them twice.
2366         */
2367        if (!(event->header.misc & PERF_RECORD_MISC_USER))
2368                return 0;
2369
2370        return perf_event__process_mmap2(tool, event, sample, machine);
2371}
2372
2373/*
2374 * XXX Ideally would be local to cmd_record() and passed to a record__new
2375 * because we need to have access to it in record__exit, that is called
2376 * after cmd_record() exits, but since record_options need to be accessible to
2377 * builtin-script, leave it here.
2378 *
2379 * At least we don't ouch it in all the other functions here directly.
2380 *
2381 * Just say no to tons of global variables, sigh.
2382 */
2383static struct record record = {
2384        .opts = {
2385                .sample_time         = true,
2386                .mmap_pages          = UINT_MAX,
2387                .user_freq           = UINT_MAX,
2388                .user_interval       = ULLONG_MAX,
2389                .freq                = 4000,
2390                .target              = {
2391                        .uses_mmap   = true,
2392                        .default_per_cpu = true,
2393                },
2394                .mmap_flush          = MMAP_FLUSH_DEFAULT,
2395                .nr_threads_synthesize = 1,
2396                .ctl_fd              = -1,
2397                .ctl_fd_ack          = -1,
2398        },
2399        .tool = {
2400                .sample         = process_sample_event,
2401                .fork           = perf_event__process_fork,
2402                .exit           = perf_event__process_exit,
2403                .comm           = perf_event__process_comm,
2404                .namespaces     = perf_event__process_namespaces,
2405                .mmap           = build_id__process_mmap,
2406                .mmap2          = build_id__process_mmap2,
2407                .ordered_events = true,
2408        },
2409};
2410
2411const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
2412        "\n\t\t\t\tDefault: fp";
2413
2414static bool dry_run;
2415
2416/*
2417 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
2418 * with it and switch to use the library functions in perf_evlist that came
2419 * from builtin-record.c, i.e. use record_opts,
2420 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
2421 * using pipes, etc.
2422 */
2423static struct option __record_options[] = {
2424        OPT_CALLBACK('e', "event", &record.evlist, "event",
2425                     "event selector. use 'perf list' to list available events",
2426                     parse_events_option),
2427        OPT_CALLBACK(0, "filter", &record.evlist, "filter",
2428                     "event filter", parse_filter),
2429        OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
2430                           NULL, "don't record events from perf itself",
2431                           exclude_perf),
2432        OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
2433                    "record events on existing process id"),
2434        OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
2435                    "record events on existing thread id"),
2436        OPT_INTEGER('r', "realtime", &record.realtime_prio,
2437                    "collect data with this RT SCHED_FIFO priority"),
2438        OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
2439                    "collect data without buffering"),
2440        OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
2441                    "collect raw sample records from all opened counters"),
2442        OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
2443                            "system-wide collection from all CPUs"),
2444        OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
2445                    "list of cpus to monitor"),
2446        OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2447        OPT_STRING('o', "output", &record.data.path, "file",
2448                    "output file name"),
2449        OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
2450                        &record.opts.no_inherit_set,
2451                        "child tasks do not inherit counters"),
2452        OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
2453                    "synthesize non-sample events at the end of output"),
2454        OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
2455        OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
2456        OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
2457                    "Fail if the specified frequency can't be used"),
2458        OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
2459                     "profile at this frequency",
2460                      record__parse_freq),
2461        OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
2462                     "number of mmap data pages and AUX area tracing mmap pages",
2463                     record__parse_mmap_pages),
2464        OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
2465                     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
2466                     record__mmap_flush_parse),
2467        OPT_BOOLEAN(0, "group", &record.opts.group,
2468                    "put the counters into a counter group"),
2469        OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
2470                           NULL, "enables call-graph recording" ,
2471                           &record_callchain_opt),
2472        OPT_CALLBACK(0, "call-graph", &record.opts,
2473                     "record_mode[,record_size]", record_callchain_help,
2474                     &record_parse_callchain_opt),
2475        OPT_INCR('v', "verbose", &verbose,
2476                    "be more verbose (show counter open errors, etc)"),
2477        OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
2478        OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
2479                    "per thread counts"),
2480        OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
2481        OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
2482                    "Record the sample physical addresses"),
2483        OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
2484        OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
2485                        &record.opts.sample_time_set,
2486                        "Record the sample timestamps"),
2487        OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
2488                        "Record the sample period"),
2489        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
2490                    "don't sample"),
2491        OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
2492                        &record.no_buildid_cache_set,
2493                        "do not update the buildid cache"),
2494        OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
2495                        &record.no_buildid_set,
2496                        "do not collect buildids in perf.data"),
2497        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
2498                     "monitor event in cgroup name only",
2499                     parse_cgroups),
2500        OPT_INTEGER('D', "delay", &record.opts.initial_delay,
2501                  "ms to wait before starting measurement after program start (-1: start with events disabled)"),
2502        OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
2503        OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
2504                   "user to profile"),
2505
2506        OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
2507                     "branch any", "sample any taken branches",
2508                     parse_branch_stack),
2509
2510        OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
2511                     "branch filter mask", "branch stack filter modes",
2512                     parse_branch_stack),
2513        OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
2514                    "sample by weight (on special events only)"),
2515        OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
2516                    "sample transaction flags (special events only)"),
2517        OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
2518                    "use per-thread mmaps"),
2519        OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2520                    "sample selected machine registers on interrupt,"
2521                    " use '-I?' to list register names", parse_intr_regs),
2522        OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2523                    "sample selected machine registers on interrupt,"
2524                    " use '--user-regs=?' to list register names", parse_user_regs),
2525        OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2526                    "Record running/enabled time of read (:S) events"),
2527        OPT_CALLBACK('k', "clockid", &record.opts,
2528        "clockid", "clockid to use for events, see clock_gettime()",
2529        parse_clockid),
2530        OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
2531                          "opts", "AUX area tracing Snapshot Mode", ""),
2532        OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
2533                          "opts", "sample AUX area", ""),
2534        OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
2535                        "per thread proc mmap processing timeout in ms"),
2536        OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
2537                    "Record namespaces events"),
2538        OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
2539                    "Record cgroup events"),
2540        OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
2541                        &record.opts.record_switch_events_set,
2542                        "Record context switch events"),
2543        OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
2544                         "Configure all used events to run in kernel space.",
2545                         PARSE_OPT_EXCLUSIVE),
2546        OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2547                         "Configure all used events to run in user space.",
2548                         PARSE_OPT_EXCLUSIVE),
2549        OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
2550                    "collect kernel callchains"),
2551        OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
2552                    "collect user callchains"),
2553        OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2554                   "clang binary to use for compiling BPF scriptlets"),
2555        OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
2556                   "options passed to clang when compiling BPF scriptlets"),
2557        OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2558                   "file", "vmlinux pathname"),
2559        OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
2560                    "Record build-id of all DSOs regardless of hits"),
2561        OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
2562                    "append timestamp to output filename"),
2563        OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
2564                    "Record timestamp boundary (time of first/last samples)"),
2565        OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
2566                          &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
2567                          "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
2568                          "signal"),
2569        OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
2570                         "switch output event selector. use 'perf list' to list available events",
2571                         parse_events_option_new_evlist),
2572        OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
2573                   "Limit number of switch output generated files"),
2574        OPT_BOOLEAN(0, "dry-run", &dry_run,
2575                    "Parse options then exit"),
2576#ifdef HAVE_AIO_SUPPORT
2577        OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2578                     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
2579                     record__aio_parse),
2580#endif
2581        OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2582                     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2583                     record__parse_affinity),
2584#ifdef HAVE_ZSTD_SUPPORT
2585        OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2586                            "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2587                            record__parse_comp_level),
2588#endif
2589        OPT_CALLBACK(0, "max-size", &record.output_max_size,
2590                     "size", "Limit the maximum size of the output file", parse_output_max_size),
2591        OPT_UINTEGER(0, "num-thread-synthesize",
2592                     &record.opts.nr_threads_synthesize,
2593                     "number of threads to run for event synthesis"),
2594#ifdef HAVE_LIBPFM
2595        OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
2596                "libpfm4 event selector. use 'perf list' to list available events",
2597                parse_libpfm_events_option),
2598#endif
2599        OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd]",
2600                     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
2601                     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.",
2602                      parse_control_option),
2603        OPT_END()
2604};
2605
2606struct option *record_options = __record_options;
2607
2608int cmd_record(int argc, const char **argv)
2609{
2610        int err;
2611        struct record *rec = &record;
2612        char errbuf[BUFSIZ];
2613
2614        setlocale(LC_ALL, "");
2615
2616#ifndef HAVE_LIBBPF_SUPPORT
2617# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2618        set_nobuild('\0', "clang-path", true);
2619        set_nobuild('\0', "clang-opt", true);
2620# undef set_nobuild
2621#endif
2622
2623#ifndef HAVE_BPF_PROLOGUE
2624# if !defined (HAVE_DWARF_SUPPORT)
2625#  define REASON  "NO_DWARF=1"
2626# elif !defined (HAVE_LIBBPF_SUPPORT)
2627#  define REASON  "NO_LIBBPF=1"
2628# else
2629#  define REASON  "this architecture doesn't support BPF prologue"
2630# endif
2631# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2632        set_nobuild('\0', "vmlinux", true);
2633# undef set_nobuild
2634# undef REASON
2635#endif
2636
2637        rec->opts.affinity = PERF_AFFINITY_SYS;
2638
2639        rec->evlist = evlist__new();
2640        if (rec->evlist == NULL)
2641                return -ENOMEM;
2642
2643        err = perf_config(perf_record_config, rec);
2644        if (err)
2645                return err;
2646
2647        argc = parse_options(argc, argv, record_options, record_usage,
2648                            PARSE_OPT_STOP_AT_NON_OPTION);
2649        if (quiet)
2650                perf_quiet_option();
2651
2652        /* Make system wide (-a) the default target. */
2653        if (!argc && target__none(&rec->opts.target))
2654                rec->opts.target.system_wide = true;
2655
2656        if (nr_cgroups && !rec->opts.target.system_wide) {
2657                usage_with_options_msg(record_usage, record_options,
2658                        "cgroup monitoring only available in system-wide mode");
2659
2660        }
2661
2662        if (rec->opts.kcore)
2663                rec->data.is_dir = true;
2664
2665        if (rec->opts.comp_level != 0) {
2666                pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2667                rec->no_buildid = true;
2668        }
2669
2670        if (rec->opts.record_switch_events &&
2671            !perf_can_record_switch_events()) {
2672                ui__error("kernel does not support recording context switch events\n");
2673                parse_options_usage(record_usage, record_options, "switch-events", 0);
2674                return -EINVAL;
2675        }
2676
2677        if (switch_output_setup(rec)) {
2678                parse_options_usage(record_usage, record_options, "switch-output", 0);
2679                return -EINVAL;
2680        }
2681
2682        if (rec->switch_output.time) {
2683                signal(SIGALRM, alarm_sig_handler);
2684                alarm(rec->switch_output.time);
2685        }
2686
2687        if (rec->switch_output.num_files) {
2688                rec->switch_output.filenames = calloc(sizeof(char *),
2689                                                      rec->switch_output.num_files);
2690                if (!rec->switch_output.filenames)
2691                        return -EINVAL;
2692        }
2693
2694        /*
2695         * Allow aliases to facilitate the lookup of symbols for address
2696         * filters. Refer to auxtrace_parse_filters().
2697         */
2698        symbol_conf.allow_aliases = true;
2699
2700        symbol__init(NULL);
2701
2702        if (rec->opts.affinity != PERF_AFFINITY_SYS) {
2703                rec->affinity_mask.nbits = cpu__max_cpu();
2704                rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
2705                if (!rec->affinity_mask.bits) {
2706                        pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
2707                        return -ENOMEM;
2708                }
2709                pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits);
2710        }
2711
2712        err = record__auxtrace_init(rec);
2713        if (err)
2714                goto out;
2715
2716        if (dry_run)
2717                goto out;
2718
2719        err = bpf__setup_stdout(rec->evlist);
2720        if (err) {
2721                bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2722                pr_err("ERROR: Setup BPF stdout failed: %s\n",
2723                         errbuf);
2724                goto out;
2725        }
2726
2727        err = -ENOMEM;
2728
2729        if (rec->no_buildid_cache || rec->no_buildid) {
2730                disable_buildid_cache();
2731        } else if (rec->switch_output.enabled) {
2732                /*
2733                 * In 'perf record --switch-output', disable buildid
2734                 * generation by default to reduce data file switching
2735                 * overhead. Still generate buildid if they are required
2736                 * explicitly using
2737                 *
2738                 *  perf record --switch-output --no-no-buildid \
2739                 *              --no-no-buildid-cache
2740                 *
2741                 * Following code equals to:
2742                 *
2743                 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2744                 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2745                 *         disable_buildid_cache();
2746                 */
2747                bool disable = true;
2748
2749                if (rec->no_buildid_set && !rec->no_buildid)
2750                        disable = false;
2751                if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2752                        disable = false;
2753                if (disable) {
2754                        rec->no_buildid = true;
2755                        rec->no_buildid_cache = true;
2756                        disable_buildid_cache();
2757                }
2758        }
2759
2760        if (record.opts.overwrite)
2761                record.opts.tail_synthesize = true;
2762
2763        if (rec->evlist->core.nr_entries == 0 &&
2764            __evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
2765                pr_err("Not enough memory for event selector list\n");
2766                goto out;
2767        }
2768
2769        if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2770                rec->opts.no_inherit = true;
2771
2772        err = target__validate(&rec->opts.target);
2773        if (err) {
2774                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2775                ui__warning("%s\n", errbuf);
2776        }
2777
2778        err = target__parse_uid(&rec->opts.target);
2779        if (err) {
2780                int saved_errno = errno;
2781
2782                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
2783                ui__error("%s", errbuf);
2784
2785                err = -saved_errno;
2786                goto out;
2787        }
2788
2789        /* Enable ignoring missing threads when -u/-p option is defined. */
2790        rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
2791
2792        err = -ENOMEM;
2793        if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
2794                usage_with_options(record_usage, record_options);
2795
2796        err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2797        if (err)
2798                goto out;
2799
2800        /*
2801         * We take all buildids when the file contains
2802         * AUX area tracing data because we do not decode the
2803         * trace because it would take too long.
2804         */
2805        if (rec->opts.full_auxtrace)
2806                rec->buildid_all = true;
2807
2808        if (rec->opts.text_poke) {
2809                err = record__config_text_poke(rec->evlist);
2810                if (err) {
2811                        pr_err("record__config_text_poke failed, error %d\n", err);
2812                        goto out;
2813                }
2814        }
2815
2816        if (record_opts__config(&rec->opts)) {
2817                err = -EINVAL;
2818                goto out;
2819        }
2820
2821        if (rec->opts.nr_cblocks > nr_cblocks_max)
2822                rec->opts.nr_cblocks = nr_cblocks_max;
2823        pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2824
2825        pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2826        pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2827
2828        if (rec->opts.comp_level > comp_level_max)
2829                rec->opts.comp_level = comp_level_max;
2830        pr_debug("comp level: %d\n", rec->opts.comp_level);
2831
2832        err = __cmd_record(&record, argc, argv);
2833out:
2834        bitmap_free(rec->affinity_mask.bits);
2835        evlist__delete(rec->evlist);
2836        symbol__exit();
2837        auxtrace_record__free(rec->itr);
2838        return err;
2839}
2840
2841static void snapshot_sig_handler(int sig __maybe_unused)
2842{
2843        struct record *rec = &record;
2844
2845        if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2846                trigger_hit(&auxtrace_snapshot_trigger);
2847                auxtrace_record__snapshot_started = 1;
2848                if (auxtrace_record__snapshot_start(record.itr))
2849                        trigger_error(&auxtrace_snapshot_trigger);
2850        }
2851
2852        if (switch_output_signal(rec))
2853                trigger_hit(&switch_output_trigger);
2854}
2855
2856static void alarm_sig_handler(int sig __maybe_unused)
2857{
2858        struct record *rec = &record;
2859
2860        if (switch_output_time(rec))
2861                trigger_hit(&switch_output_trigger);
2862}
2863