linux/tools/perf/builtin-record.c
<<
>>
Prefs
   1/*
   2 * builtin-record.c
   3 *
   4 * Builtin record command: Record the profile of a workload
   5 * (or a CPU, or a PID) into the perf.data output file - for
   6 * later analysis via perf report.
   7 */
   8#include "builtin.h"
   9
  10#include "perf.h"
  11
  12#include "util/build-id.h"
  13#include "util/util.h"
  14#include <subcmd/parse-options.h>
  15#include "util/parse-events.h"
  16
  17#include "util/callchain.h"
  18#include "util/cgroup.h"
  19#include "util/header.h"
  20#include "util/event.h"
  21#include "util/evlist.h"
  22#include "util/evsel.h"
  23#include "util/debug.h"
  24#include "util/session.h"
  25#include "util/tool.h"
  26#include "util/symbol.h"
  27#include "util/cpumap.h"
  28#include "util/thread_map.h"
  29#include "util/data.h"
  30#include "util/perf_regs.h"
  31#include "util/auxtrace.h"
  32#include "util/tsc.h"
  33#include "util/parse-branch-options.h"
  34#include "util/parse-regs-options.h"
  35#include "util/llvm-utils.h"
  36#include "util/bpf-loader.h"
  37#include "util/trigger.h"
  38#include "asm/bug.h"
  39
  40#include <unistd.h>
  41#include <sched.h>
  42#include <sys/mman.h>
  43#include <asm/bug.h>
  44
  45
  46struct record {
  47        struct perf_tool        tool;
  48        struct record_opts      opts;
  49        u64                     bytes_written;
  50        struct perf_data_file   file;
  51        struct auxtrace_record  *itr;
  52        struct perf_evlist      *evlist;
  53        struct perf_session     *session;
  54        const char              *progname;
  55        int                     realtime_prio;
  56        bool                    no_buildid;
  57        bool                    no_buildid_set;
  58        bool                    no_buildid_cache;
  59        bool                    no_buildid_cache_set;
  60        bool                    buildid_all;
  61        bool                    timestamp_filename;
  62        bool                    switch_output;
  63        unsigned long long      samples;
  64};
  65
  66static int record__write(struct record *rec, void *bf, size_t size)
  67{
  68        if (perf_data_file__write(rec->session->file, bf, size) < 0) {
  69                pr_err("failed to write perf data, error: %m\n");
  70                return -1;
  71        }
  72
  73        rec->bytes_written += size;
  74        return 0;
  75}
  76
  77static int process_synthesized_event(struct perf_tool *tool,
  78                                     union perf_event *event,
  79                                     struct perf_sample *sample __maybe_unused,
  80                                     struct machine *machine __maybe_unused)
  81{
  82        struct record *rec = container_of(tool, struct record, tool);
  83        return record__write(rec, event, event->header.size);
  84}
  85
  86static int
  87backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
  88{
  89        struct perf_event_header *pheader;
  90        u64 evt_head = head;
  91        int size = mask + 1;
  92
  93        pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
  94        pheader = (struct perf_event_header *)(buf + (head & mask));
  95        *start = head;
  96        while (true) {
  97                if (evt_head - head >= (unsigned int)size) {
  98                        pr_debug("Finshed reading backward ring buffer: rewind\n");
  99                        if (evt_head - head > (unsigned int)size)
 100                                evt_head -= pheader->size;
 101                        *end = evt_head;
 102                        return 0;
 103                }
 104
 105                pheader = (struct perf_event_header *)(buf + (evt_head & mask));
 106
 107                if (pheader->size == 0) {
 108                        pr_debug("Finshed reading backward ring buffer: get start\n");
 109                        *end = evt_head;
 110                        return 0;
 111                }
 112
 113                evt_head += pheader->size;
 114                pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
 115        }
 116        WARN_ONCE(1, "Shouldn't get here\n");
 117        return -1;
 118}
 119
 120static int
 121rb_find_range(struct perf_evlist *evlist,
 122              void *data, int mask, u64 head, u64 old,
 123              u64 *start, u64 *end)
 124{
 125        if (!evlist->backward) {
 126                *start = old;
 127                *end = head;
 128                return 0;
 129        }
 130
 131        return backward_rb_find_range(data, mask, head, start, end);
 132}
 133
 134static int record__mmap_read(struct record *rec, int idx)
 135{
 136        struct perf_mmap *md = &rec->evlist->mmap[idx];
 137        u64 head = perf_mmap__read_head(md);
 138        u64 old = md->prev;
 139        u64 end = head, start = old;
 140        unsigned char *data = md->base + page_size;
 141        unsigned long size;
 142        void *buf;
 143        int rc = 0;
 144
 145        if (rb_find_range(rec->evlist, data, md->mask, head,
 146                          old, &start, &end))
 147                return -1;
 148
 149        if (start == end)
 150                return 0;
 151
 152        rec->samples++;
 153
 154        size = end - start;
 155        if (size > (unsigned long)(md->mask) + 1) {
 156                WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
 157
 158                md->prev = head;
 159                perf_evlist__mmap_consume(rec->evlist, idx);
 160                return 0;
 161        }
 162
 163        if ((start & md->mask) + size != (end & md->mask)) {
 164                buf = &data[start & md->mask];
 165                size = md->mask + 1 - (start & md->mask);
 166                start += size;
 167
 168                if (record__write(rec, buf, size) < 0) {
 169                        rc = -1;
 170                        goto out;
 171                }
 172        }
 173
 174        buf = &data[start & md->mask];
 175        size = end - start;
 176        start += size;
 177
 178        if (record__write(rec, buf, size) < 0) {
 179                rc = -1;
 180                goto out;
 181        }
 182
 183        md->prev = head;
 184        perf_evlist__mmap_consume(rec->evlist, idx);
 185out:
 186        return rc;
 187}
 188
 189static volatile int done;
 190static volatile int signr = -1;
 191static volatile int child_finished;
 192
 193static volatile int auxtrace_record__snapshot_started;
 194static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
 195static DEFINE_TRIGGER(switch_output_trigger);
 196
 197static void sig_handler(int sig)
 198{
 199        if (sig == SIGCHLD)
 200                child_finished = 1;
 201        else
 202                signr = sig;
 203
 204        done = 1;
 205}
 206
 207static void record__sig_exit(void)
 208{
 209        if (signr == -1)
 210                return;
 211
 212        signal(signr, SIG_DFL);
 213        raise(signr);
 214}
 215
 216#ifdef HAVE_AUXTRACE_SUPPORT
 217
 218static int record__process_auxtrace(struct perf_tool *tool,
 219                                    union perf_event *event, void *data1,
 220                                    size_t len1, void *data2, size_t len2)
 221{
 222        struct record *rec = container_of(tool, struct record, tool);
 223        struct perf_data_file *file = &rec->file;
 224        size_t padding;
 225        u8 pad[8] = {0};
 226
 227        if (!perf_data_file__is_pipe(file)) {
 228                off_t file_offset;
 229                int fd = perf_data_file__fd(file);
 230                int err;
 231
 232                file_offset = lseek(fd, 0, SEEK_CUR);
 233                if (file_offset == -1)
 234                        return -1;
 235                err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
 236                                                     event, file_offset);
 237                if (err)
 238                        return err;
 239        }
 240
 241        /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
 242        padding = (len1 + len2) & 7;
 243        if (padding)
 244                padding = 8 - padding;
 245
 246        record__write(rec, event, event->header.size);
 247        record__write(rec, data1, len1);
 248        if (len2)
 249                record__write(rec, data2, len2);
 250        record__write(rec, &pad, padding);
 251
 252        return 0;
 253}
 254
 255static int record__auxtrace_mmap_read(struct record *rec,
 256                                      struct auxtrace_mmap *mm)
 257{
 258        int ret;
 259
 260        ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
 261                                  record__process_auxtrace);
 262        if (ret < 0)
 263                return ret;
 264
 265        if (ret)
 266                rec->samples++;
 267
 268        return 0;
 269}
 270
 271static int record__auxtrace_mmap_read_snapshot(struct record *rec,
 272                                               struct auxtrace_mmap *mm)
 273{
 274        int ret;
 275
 276        ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
 277                                           record__process_auxtrace,
 278                                           rec->opts.auxtrace_snapshot_size);
 279        if (ret < 0)
 280                return ret;
 281
 282        if (ret)
 283                rec->samples++;
 284
 285        return 0;
 286}
 287
 288static int record__auxtrace_read_snapshot_all(struct record *rec)
 289{
 290        int i;
 291        int rc = 0;
 292
 293        for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 294                struct auxtrace_mmap *mm =
 295                                &rec->evlist->mmap[i].auxtrace_mmap;
 296
 297                if (!mm->base)
 298                        continue;
 299
 300                if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
 301                        rc = -1;
 302                        goto out;
 303                }
 304        }
 305out:
 306        return rc;
 307}
 308
 309static void record__read_auxtrace_snapshot(struct record *rec)
 310{
 311        pr_debug("Recording AUX area tracing snapshot\n");
 312        if (record__auxtrace_read_snapshot_all(rec) < 0) {
 313                trigger_error(&auxtrace_snapshot_trigger);
 314        } else {
 315                if (auxtrace_record__snapshot_finish(rec->itr))
 316                        trigger_error(&auxtrace_snapshot_trigger);
 317                else
 318                        trigger_ready(&auxtrace_snapshot_trigger);
 319        }
 320}
 321
 322#else
 323
 324static inline
 325int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
 326                               struct auxtrace_mmap *mm __maybe_unused)
 327{
 328        return 0;
 329}
 330
 331static inline
 332void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
 333{
 334}
 335
 336static inline
 337int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
 338{
 339        return 0;
 340}
 341
 342#endif
 343
 344static int record__open(struct record *rec)
 345{
 346        char msg[512];
 347        struct perf_evsel *pos;
 348        struct perf_evlist *evlist = rec->evlist;
 349        struct perf_session *session = rec->session;
 350        struct record_opts *opts = &rec->opts;
 351        int rc = 0;
 352
 353        perf_evlist__config(evlist, opts, &callchain_param);
 354
 355        evlist__for_each(evlist, pos) {
 356try_again:
 357                if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
 358                        if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
 359                                if (verbose)
 360                                        ui__warning("%s\n", msg);
 361                                goto try_again;
 362                        }
 363
 364                        rc = -errno;
 365                        perf_evsel__open_strerror(pos, &opts->target,
 366                                                  errno, msg, sizeof(msg));
 367                        ui__error("%s\n", msg);
 368                        goto out;
 369                }
 370        }
 371
 372        if (perf_evlist__apply_filters(evlist, &pos)) {
 373                error("failed to set filter \"%s\" on event %s with %d (%s)\n",
 374                        pos->filter, perf_evsel__name(pos), errno,
 375                        strerror_r(errno, msg, sizeof(msg)));
 376                rc = -1;
 377                goto out;
 378        }
 379
 380        if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
 381                                 opts->auxtrace_mmap_pages,
 382                                 opts->auxtrace_snapshot_mode) < 0) {
 383                if (errno == EPERM) {
 384                        pr_err("Permission error mapping pages.\n"
 385                               "Consider increasing "
 386                               "/proc/sys/kernel/perf_event_mlock_kb,\n"
 387                               "or try again with a smaller value of -m/--mmap_pages.\n"
 388                               "(current value: %u,%u)\n",
 389                               opts->mmap_pages, opts->auxtrace_mmap_pages);
 390                        rc = -errno;
 391                } else {
 392                        pr_err("failed to mmap with %d (%s)\n", errno,
 393                                strerror_r(errno, msg, sizeof(msg)));
 394                        if (errno)
 395                                rc = -errno;
 396                        else
 397                                rc = -EINVAL;
 398                }
 399                goto out;
 400        }
 401
 402        session->evlist = evlist;
 403        perf_session__set_id_hdr_size(session);
 404out:
 405        return rc;
 406}
 407
 408static int process_sample_event(struct perf_tool *tool,
 409                                union perf_event *event,
 410                                struct perf_sample *sample,
 411                                struct perf_evsel *evsel,
 412                                struct machine *machine)
 413{
 414        struct record *rec = container_of(tool, struct record, tool);
 415
 416        rec->samples++;
 417
 418        return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
 419}
 420
 421static int process_buildids(struct record *rec)
 422{
 423        struct perf_data_file *file  = &rec->file;
 424        struct perf_session *session = rec->session;
 425
 426        if (file->size == 0)
 427                return 0;
 428
 429        /*
 430         * During this process, it'll load kernel map and replace the
 431         * dso->long_name to a real pathname it found.  In this case
 432         * we prefer the vmlinux path like
 433         *   /lib/modules/3.16.4/build/vmlinux
 434         *
 435         * rather than build-id path (in debug directory).
 436         *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
 437         */
 438        symbol_conf.ignore_vmlinux_buildid = true;
 439
 440        /*
 441         * If --buildid-all is given, it marks all DSO regardless of hits,
 442         * so no need to process samples.
 443         */
 444        if (rec->buildid_all)
 445                rec->tool.sample = NULL;
 446
 447        return perf_session__process_events(session);
 448}
 449
 450static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 451{
 452        int err;
 453        struct perf_tool *tool = data;
 454        /*
 455         *As for guest kernel when processing subcommand record&report,
 456         *we arrange module mmap prior to guest kernel mmap and trigger
 457         *a preload dso because default guest module symbols are loaded
 458         *from guest kallsyms instead of /lib/modules/XXX/XXX. This
 459         *method is used to avoid symbol missing when the first addr is
 460         *in module instead of in guest kernel.
 461         */
 462        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 463                                             machine);
 464        if (err < 0)
 465                pr_err("Couldn't record guest kernel [%d]'s reference"
 466                       " relocation symbol.\n", machine->pid);
 467
 468        /*
 469         * We use _stext for guest kernel because guest kernel's /proc/kallsyms
 470         * have no _text sometimes.
 471         */
 472        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 473                                                 machine);
 474        if (err < 0)
 475                pr_err("Couldn't record guest kernel [%d]'s reference"
 476                       " relocation symbol.\n", machine->pid);
 477}
 478
 479static struct perf_event_header finished_round_event = {
 480        .size = sizeof(struct perf_event_header),
 481        .type = PERF_RECORD_FINISHED_ROUND,
 482};
 483
 484static int record__mmap_read_all(struct record *rec)
 485{
 486        u64 bytes_written = rec->bytes_written;
 487        int i;
 488        int rc = 0;
 489
 490        for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 491                struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
 492
 493                if (rec->evlist->mmap[i].base) {
 494                        if (record__mmap_read(rec, i) != 0) {
 495                                rc = -1;
 496                                goto out;
 497                        }
 498                }
 499
 500                if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
 501                    record__auxtrace_mmap_read(rec, mm) != 0) {
 502                        rc = -1;
 503                        goto out;
 504                }
 505        }
 506
 507        /*
 508         * Mark the round finished in case we wrote
 509         * at least one event.
 510         */
 511        if (bytes_written != rec->bytes_written)
 512                rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
 513
 514out:
 515        return rc;
 516}
 517
 518static void record__init_features(struct record *rec)
 519{
 520        struct perf_session *session = rec->session;
 521        int feat;
 522
 523        for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
 524                perf_header__set_feat(&session->header, feat);
 525
 526        if (rec->no_buildid)
 527                perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 528
 529        if (!have_tracepoints(&rec->evlist->entries))
 530                perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
 531
 532        if (!rec->opts.branch_stack)
 533                perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
 534
 535        if (!rec->opts.full_auxtrace)
 536                perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 537
 538        perf_header__clear_feat(&session->header, HEADER_STAT);
 539}
 540
 541static void
 542record__finish_output(struct record *rec)
 543{
 544        struct perf_data_file *file = &rec->file;
 545        int fd = perf_data_file__fd(file);
 546
 547        if (file->is_pipe)
 548                return;
 549
 550        rec->session->header.data_size += rec->bytes_written;
 551        file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
 552
 553        if (!rec->no_buildid) {
 554                process_buildids(rec);
 555
 556                if (rec->buildid_all)
 557                        dsos__hit_all(rec->session);
 558        }
 559        perf_session__write_header(rec->session, rec->evlist, fd, true);
 560
 561        return;
 562}
 563
 564static int record__synthesize_workload(struct record *rec)
 565{
 566        struct {
 567                struct thread_map map;
 568                struct thread_map_data map_data;
 569        } thread_map;
 570
 571        thread_map.map.nr = 1;
 572        thread_map.map.map[0].pid = rec->evlist->workload.pid;
 573        thread_map.map.map[0].comm = NULL;
 574        return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
 575                                                 process_synthesized_event,
 576                                                 &rec->session->machines.host,
 577                                                 rec->opts.sample_address,
 578                                                 rec->opts.proc_map_timeout);
 579}
 580
 581static int record__synthesize(struct record *rec);
 582
 583static int
 584record__switch_output(struct record *rec, bool at_exit)
 585{
 586        struct perf_data_file *file = &rec->file;
 587        int fd, err;
 588
 589        /* Same Size:      "2015122520103046"*/
 590        char timestamp[] = "InvalidTimestamp";
 591
 592        rec->samples = 0;
 593        record__finish_output(rec);
 594        err = fetch_current_timestamp(timestamp, sizeof(timestamp));
 595        if (err) {
 596                pr_err("Failed to get current timestamp\n");
 597                return -EINVAL;
 598        }
 599
 600        fd = perf_data_file__switch(file, timestamp,
 601                                    rec->session->header.data_offset,
 602                                    at_exit);
 603        if (fd >= 0 && !at_exit) {
 604                rec->bytes_written = 0;
 605                rec->session->header.data_size = 0;
 606        }
 607
 608        if (!quiet)
 609                fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
 610                        file->path, timestamp);
 611
 612        /* Output tracking events */
 613        if (!at_exit) {
 614                record__synthesize(rec);
 615
 616                /*
 617                 * In 'perf record --switch-output' without -a,
 618                 * record__synthesize() in record__switch_output() won't
 619                 * generate tracking events because there's no thread_map
 620                 * in evlist. Which causes newly created perf.data doesn't
 621                 * contain map and comm information.
 622                 * Create a fake thread_map and directly call
 623                 * perf_event__synthesize_thread_map() for those events.
 624                 */
 625                if (target__none(&rec->opts.target))
 626                        record__synthesize_workload(rec);
 627        }
 628        return fd;
 629}
 630
 631static volatile int workload_exec_errno;
 632
 633/*
 634 * perf_evlist__prepare_workload will send a SIGUSR1
 635 * if the fork fails, since we asked by setting its
 636 * want_signal to true.
 637 */
 638static void workload_exec_failed_signal(int signo __maybe_unused,
 639                                        siginfo_t *info,
 640                                        void *ucontext __maybe_unused)
 641{
 642        workload_exec_errno = info->si_value.sival_int;
 643        done = 1;
 644        child_finished = 1;
 645}
 646
 647static void snapshot_sig_handler(int sig);
 648
 649int __weak
 650perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
 651                            struct perf_tool *tool __maybe_unused,
 652                            perf_event__handler_t process __maybe_unused,
 653                            struct machine *machine __maybe_unused)
 654{
 655        return 0;
 656}
 657
 658static int record__synthesize(struct record *rec)
 659{
 660        struct perf_session *session = rec->session;
 661        struct machine *machine = &session->machines.host;
 662        struct perf_data_file *file = &rec->file;
 663        struct record_opts *opts = &rec->opts;
 664        struct perf_tool *tool = &rec->tool;
 665        int fd = perf_data_file__fd(file);
 666        int err = 0;
 667
 668        if (file->is_pipe) {
 669                err = perf_event__synthesize_attrs(tool, session,
 670                                                   process_synthesized_event);
 671                if (err < 0) {
 672                        pr_err("Couldn't synthesize attrs.\n");
 673                        goto out;
 674                }
 675
 676                if (have_tracepoints(&rec->evlist->entries)) {
 677                        /*
 678                         * FIXME err <= 0 here actually means that
 679                         * there were no tracepoints so its not really
 680                         * an error, just that we don't need to
 681                         * synthesize anything.  We really have to
 682                         * return this more properly and also
 683                         * propagate errors that now are calling die()
 684                         */
 685                        err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
 686                                                                  process_synthesized_event);
 687                        if (err <= 0) {
 688                                pr_err("Couldn't record tracing data.\n");
 689                                goto out;
 690                        }
 691                        rec->bytes_written += err;
 692                }
 693        }
 694
 695        err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
 696                                          process_synthesized_event, machine);
 697        if (err)
 698                goto out;
 699
 700        if (rec->opts.full_auxtrace) {
 701                err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
 702                                        session, process_synthesized_event);
 703                if (err)
 704                        goto out;
 705        }
 706
 707        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 708                                                 machine);
 709        WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
 710                           "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 711                           "Check /proc/kallsyms permission or run as root.\n");
 712
 713        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 714                                             machine);
 715        WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
 716                           "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 717                           "Check /proc/modules permission or run as root.\n");
 718
 719        if (perf_guest) {
 720                machines__process_guests(&session->machines,
 721                                         perf_event__synthesize_guest_os, tool);
 722        }
 723
 724        err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
 725                                            process_synthesized_event, opts->sample_address,
 726                                            opts->proc_map_timeout);
 727out:
 728        return err;
 729}
 730
 731static int __cmd_record(struct record *rec, int argc, const char **argv)
 732{
 733        int err;
 734        int status = 0;
 735        unsigned long waking = 0;
 736        const bool forks = argc > 0;
 737        struct machine *machine;
 738        struct perf_tool *tool = &rec->tool;
 739        struct record_opts *opts = &rec->opts;
 740        struct perf_data_file *file = &rec->file;
 741        struct perf_session *session;
 742        bool disabled = false, draining = false;
 743        int fd;
 744
 745        rec->progname = argv[0];
 746
 747        atexit(record__sig_exit);
 748        signal(SIGCHLD, sig_handler);
 749        signal(SIGINT, sig_handler);
 750        signal(SIGTERM, sig_handler);
 751
 752        if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
 753                signal(SIGUSR2, snapshot_sig_handler);
 754                if (rec->opts.auxtrace_snapshot_mode)
 755                        trigger_on(&auxtrace_snapshot_trigger);
 756                if (rec->switch_output)
 757                        trigger_on(&switch_output_trigger);
 758        } else {
 759                signal(SIGUSR2, SIG_IGN);
 760        }
 761
 762        session = perf_session__new(file, false, tool);
 763        if (session == NULL) {
 764                pr_err("Perf session creation failed.\n");
 765                return -1;
 766        }
 767
 768        fd = perf_data_file__fd(file);
 769        rec->session = session;
 770
 771        record__init_features(rec);
 772
 773        if (forks) {
 774                err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
 775                                                    argv, file->is_pipe,
 776                                                    workload_exec_failed_signal);
 777                if (err < 0) {
 778                        pr_err("Couldn't run the workload!\n");
 779                        status = err;
 780                        goto out_delete_session;
 781                }
 782        }
 783
 784        if (record__open(rec) != 0) {
 785                err = -1;
 786                goto out_child;
 787        }
 788
 789        err = bpf__apply_obj_config();
 790        if (err) {
 791                char errbuf[BUFSIZ];
 792
 793                bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
 794                pr_err("ERROR: Apply config to BPF failed: %s\n",
 795                         errbuf);
 796                goto out_child;
 797        }
 798
 799        /*
 800         * Normally perf_session__new would do this, but it doesn't have the
 801         * evlist.
 802         */
 803        if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
 804                pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
 805                rec->tool.ordered_events = false;
 806        }
 807
 808        if (!rec->evlist->nr_groups)
 809                perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
 810
 811        if (file->is_pipe) {
 812                err = perf_header__write_pipe(fd);
 813                if (err < 0)
 814                        goto out_child;
 815        } else {
 816                err = perf_session__write_header(session, rec->evlist, fd, false);
 817                if (err < 0)
 818                        goto out_child;
 819        }
 820
 821        if (!rec->no_buildid
 822            && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
 823                pr_err("Couldn't generate buildids. "
 824                       "Use --no-buildid to profile anyway.\n");
 825                err = -1;
 826                goto out_child;
 827        }
 828
 829        machine = &session->machines.host;
 830
 831        err = record__synthesize(rec);
 832        if (err < 0)
 833                goto out_child;
 834
 835        if (rec->realtime_prio) {
 836                struct sched_param param;
 837
 838                param.sched_priority = rec->realtime_prio;
 839                if (sched_setscheduler(0, SCHED_FIFO, &param)) {
 840                        pr_err("Could not set realtime priority.\n");
 841                        err = -1;
 842                        goto out_child;
 843                }
 844        }
 845
 846        /*
 847         * When perf is starting the traced process, all the events
 848         * (apart from group members) have enable_on_exec=1 set,
 849         * so don't spoil it by prematurely enabling them.
 850         */
 851        if (!target__none(&opts->target) && !opts->initial_delay)
 852                perf_evlist__enable(rec->evlist);
 853
 854        /*
 855         * Let the child rip
 856         */
 857        if (forks) {
 858                union perf_event *event;
 859
 860                event = malloc(sizeof(event->comm) + machine->id_hdr_size);
 861                if (event == NULL) {
 862                        err = -ENOMEM;
 863                        goto out_child;
 864                }
 865
 866                /*
 867                 * Some H/W events are generated before COMM event
 868                 * which is emitted during exec(), so perf script
 869                 * cannot see a correct process name for those events.
 870                 * Synthesize COMM event to prevent it.
 871                 */
 872                perf_event__synthesize_comm(tool, event,
 873                                            rec->evlist->workload.pid,
 874                                            process_synthesized_event,
 875                                            machine);
 876                free(event);
 877
 878                perf_evlist__start_workload(rec->evlist);
 879        }
 880
 881        if (opts->initial_delay) {
 882                usleep(opts->initial_delay * 1000);
 883                perf_evlist__enable(rec->evlist);
 884        }
 885
 886        trigger_ready(&auxtrace_snapshot_trigger);
 887        trigger_ready(&switch_output_trigger);
 888        for (;;) {
 889                unsigned long long hits = rec->samples;
 890
 891                if (record__mmap_read_all(rec) < 0) {
 892                        trigger_error(&auxtrace_snapshot_trigger);
 893                        trigger_error(&switch_output_trigger);
 894                        err = -1;
 895                        goto out_child;
 896                }
 897
 898                if (auxtrace_record__snapshot_started) {
 899                        auxtrace_record__snapshot_started = 0;
 900                        if (!trigger_is_error(&auxtrace_snapshot_trigger))
 901                                record__read_auxtrace_snapshot(rec);
 902                        if (trigger_is_error(&auxtrace_snapshot_trigger)) {
 903                                pr_err("AUX area tracing snapshot failed\n");
 904                                err = -1;
 905                                goto out_child;
 906                        }
 907                }
 908
 909                if (trigger_is_hit(&switch_output_trigger)) {
 910                        trigger_ready(&switch_output_trigger);
 911
 912                        if (!quiet)
 913                                fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
 914                                        waking);
 915                        waking = 0;
 916                        fd = record__switch_output(rec, false);
 917                        if (fd < 0) {
 918                                pr_err("Failed to switch to new file\n");
 919                                trigger_error(&switch_output_trigger);
 920                                err = fd;
 921                                goto out_child;
 922                        }
 923                }
 924
 925                if (hits == rec->samples) {
 926                        if (done || draining)
 927                                break;
 928                        err = perf_evlist__poll(rec->evlist, -1);
 929                        /*
 930                         * Propagate error, only if there's any. Ignore positive
 931                         * number of returned events and interrupt error.
 932                         */
 933                        if (err > 0 || (err < 0 && errno == EINTR))
 934                                err = 0;
 935                        waking++;
 936
 937                        if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
 938                                draining = true;
 939                }
 940
 941                /*
 942                 * When perf is starting the traced process, at the end events
 943                 * die with the process and we wait for that. Thus no need to
 944                 * disable events in this case.
 945                 */
 946                if (done && !disabled && !target__none(&opts->target)) {
 947                        trigger_off(&auxtrace_snapshot_trigger);
 948                        perf_evlist__disable(rec->evlist);
 949                        disabled = true;
 950                }
 951        }
 952        trigger_off(&auxtrace_snapshot_trigger);
 953        trigger_off(&switch_output_trigger);
 954
 955        if (forks && workload_exec_errno) {
 956                char msg[STRERR_BUFSIZE];
 957                const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
 958                pr_err("Workload failed: %s\n", emsg);
 959                err = -1;
 960                goto out_child;
 961        }
 962
 963        if (!quiet)
 964                fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
 965
 966out_child:
 967        if (forks) {
 968                int exit_status;
 969
 970                if (!child_finished)
 971                        kill(rec->evlist->workload.pid, SIGTERM);
 972
 973                wait(&exit_status);
 974
 975                if (err < 0)
 976                        status = err;
 977                else if (WIFEXITED(exit_status))
 978                        status = WEXITSTATUS(exit_status);
 979                else if (WIFSIGNALED(exit_status))
 980                        signr = WTERMSIG(exit_status);
 981        } else
 982                status = err;
 983
 984        /* this will be recalculated during process_buildids() */
 985        rec->samples = 0;
 986
 987        if (!err) {
 988                if (!rec->timestamp_filename) {
 989                        record__finish_output(rec);
 990                } else {
 991                        fd = record__switch_output(rec, true);
 992                        if (fd < 0) {
 993                                status = fd;
 994                                goto out_delete_session;
 995                        }
 996                }
 997        }
 998
 999        if (!err && !quiet) {
1000                char samples[128];
1001                const char *postfix = rec->timestamp_filename ?
1002                                        ".<timestamp>" : "";
1003
1004                if (rec->samples && !rec->opts.full_auxtrace)
1005                        scnprintf(samples, sizeof(samples),
1006                                  " (%" PRIu64 " samples)", rec->samples);
1007                else
1008                        samples[0] = '\0';
1009
1010                fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1011                        perf_data_file__size(file) / 1024.0 / 1024.0,
1012                        file->path, postfix, samples);
1013        }
1014
1015out_delete_session:
1016        perf_session__delete(session);
1017        return status;
1018}
1019
1020static void callchain_debug(struct callchain_param *callchain)
1021{
1022        static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1023
1024        pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1025
1026        if (callchain->record_mode == CALLCHAIN_DWARF)
1027                pr_debug("callchain: stack dump size %d\n",
1028                         callchain->dump_size);
1029}
1030
1031int record_opts__parse_callchain(struct record_opts *record,
1032                                 struct callchain_param *callchain,
1033                                 const char *arg, bool unset)
1034{
1035        int ret;
1036        callchain->enabled = !unset;
1037
1038        /* --no-call-graph */
1039        if (unset) {
1040                callchain->record_mode = CALLCHAIN_NONE;
1041                pr_debug("callchain: disabled\n");
1042                return 0;
1043        }
1044
1045        ret = parse_callchain_record_opt(arg, callchain);
1046        if (!ret) {
1047                /* Enable data address sampling for DWARF unwind. */
1048                if (callchain->record_mode == CALLCHAIN_DWARF)
1049                        record->sample_address = true;
1050                callchain_debug(callchain);
1051        }
1052
1053        return ret;
1054}
1055
1056int record_parse_callchain_opt(const struct option *opt,
1057                               const char *arg,
1058                               int unset)
1059{
1060        return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1061}
1062
1063int record_callchain_opt(const struct option *opt,
1064                         const char *arg __maybe_unused,
1065                         int unset __maybe_unused)
1066{
1067        struct callchain_param *callchain = opt->value;
1068
1069        callchain->enabled = true;
1070
1071        if (callchain->record_mode == CALLCHAIN_NONE)
1072                callchain->record_mode = CALLCHAIN_FP;
1073
1074        callchain_debug(callchain);
1075        return 0;
1076}
1077
1078static int perf_record_config(const char *var, const char *value, void *cb)
1079{
1080        struct record *rec = cb;
1081
1082        if (!strcmp(var, "record.build-id")) {
1083                if (!strcmp(value, "cache"))
1084                        rec->no_buildid_cache = false;
1085                else if (!strcmp(value, "no-cache"))
1086                        rec->no_buildid_cache = true;
1087                else if (!strcmp(value, "skip"))
1088                        rec->no_buildid = true;
1089                else
1090                        return -1;
1091                return 0;
1092        }
1093        if (!strcmp(var, "record.call-graph"))
1094                var = "call-graph.record-mode"; /* fall-through */
1095
1096        return perf_default_config(var, value, cb);
1097}
1098
1099struct clockid_map {
1100        const char *name;
1101        int clockid;
1102};
1103
1104#define CLOCKID_MAP(n, c)       \
1105        { .name = n, .clockid = (c), }
1106
1107#define CLOCKID_END     { .name = NULL, }
1108
1109
1110/*
1111 * Add the missing ones, we need to build on many distros...
1112 */
1113#ifndef CLOCK_MONOTONIC_RAW
1114#define CLOCK_MONOTONIC_RAW 4
1115#endif
1116#ifndef CLOCK_BOOTTIME
1117#define CLOCK_BOOTTIME 7
1118#endif
1119#ifndef CLOCK_TAI
1120#define CLOCK_TAI 11
1121#endif
1122
1123static const struct clockid_map clockids[] = {
1124        /* available for all events, NMI safe */
1125        CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1126        CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1127
1128        /* available for some events */
1129        CLOCKID_MAP("realtime", CLOCK_REALTIME),
1130        CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1131        CLOCKID_MAP("tai", CLOCK_TAI),
1132
1133        /* available for the lazy */
1134        CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1135        CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1136        CLOCKID_MAP("real", CLOCK_REALTIME),
1137        CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1138
1139        CLOCKID_END,
1140};
1141
1142static int parse_clockid(const struct option *opt, const char *str, int unset)
1143{
1144        struct record_opts *opts = (struct record_opts *)opt->value;
1145        const struct clockid_map *cm;
1146        const char *ostr = str;
1147
1148        if (unset) {
1149                opts->use_clockid = 0;
1150                return 0;
1151        }
1152
1153        /* no arg passed */
1154        if (!str)
1155                return 0;
1156
1157        /* no setting it twice */
1158        if (opts->use_clockid)
1159                return -1;
1160
1161        opts->use_clockid = true;
1162
1163        /* if its a number, we're done */
1164        if (sscanf(str, "%d", &opts->clockid) == 1)
1165                return 0;
1166
1167        /* allow a "CLOCK_" prefix to the name */
1168        if (!strncasecmp(str, "CLOCK_", 6))
1169                str += 6;
1170
1171        for (cm = clockids; cm->name; cm++) {
1172                if (!strcasecmp(str, cm->name)) {
1173                        opts->clockid = cm->clockid;
1174                        return 0;
1175                }
1176        }
1177
1178        opts->use_clockid = false;
1179        ui__warning("unknown clockid %s, check man page\n", ostr);
1180        return -1;
1181}
1182
1183static int record__parse_mmap_pages(const struct option *opt,
1184                                    const char *str,
1185                                    int unset __maybe_unused)
1186{
1187        struct record_opts *opts = opt->value;
1188        char *s, *p;
1189        unsigned int mmap_pages;
1190        int ret;
1191
1192        if (!str)
1193                return -EINVAL;
1194
1195        s = strdup(str);
1196        if (!s)
1197                return -ENOMEM;
1198
1199        p = strchr(s, ',');
1200        if (p)
1201                *p = '\0';
1202
1203        if (*s) {
1204                ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1205                if (ret)
1206                        goto out_free;
1207                opts->mmap_pages = mmap_pages;
1208        }
1209
1210        if (!p) {
1211                ret = 0;
1212                goto out_free;
1213        }
1214
1215        ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1216        if (ret)
1217                goto out_free;
1218
1219        opts->auxtrace_mmap_pages = mmap_pages;
1220
1221out_free:
1222        free(s);
1223        return ret;
1224}
1225
1226static const char * const __record_usage[] = {
1227        "perf record [<options>] [<command>]",
1228        "perf record [<options>] -- <command> [<options>]",
1229        NULL
1230};
1231const char * const *record_usage = __record_usage;
1232
1233/*
1234 * XXX Ideally would be local to cmd_record() and passed to a record__new
1235 * because we need to have access to it in record__exit, that is called
1236 * after cmd_record() exits, but since record_options need to be accessible to
1237 * builtin-script, leave it here.
1238 *
1239 * At least we don't ouch it in all the other functions here directly.
1240 *
1241 * Just say no to tons of global variables, sigh.
1242 */
1243static struct record record = {
1244        .opts = {
1245                .sample_time         = true,
1246                .mmap_pages          = UINT_MAX,
1247                .user_freq           = UINT_MAX,
1248                .user_interval       = ULLONG_MAX,
1249                .freq                = 4000,
1250                .target              = {
1251                        .uses_mmap   = true,
1252                        .default_per_cpu = true,
1253                },
1254                .proc_map_timeout     = 500,
1255        },
1256        .tool = {
1257                .sample         = process_sample_event,
1258                .fork           = perf_event__process_fork,
1259                .exit           = perf_event__process_exit,
1260                .comm           = perf_event__process_comm,
1261                .mmap           = perf_event__process_mmap,
1262                .mmap2          = perf_event__process_mmap2,
1263                .ordered_events = true,
1264        },
1265};
1266
1267const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1268        "\n\t\t\t\tDefault: fp";
1269
1270/*
1271 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1272 * with it and switch to use the library functions in perf_evlist that came
1273 * from builtin-record.c, i.e. use record_opts,
1274 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1275 * using pipes, etc.
1276 */
1277struct option __record_options[] = {
1278        OPT_CALLBACK('e', "event", &record.evlist, "event",
1279                     "event selector. use 'perf list' to list available events",
1280                     parse_events_option),
1281        OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1282                     "event filter", parse_filter),
1283        OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1284                           NULL, "don't record events from perf itself",
1285                           exclude_perf),
1286        OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1287                    "record events on existing process id"),
1288        OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1289                    "record events on existing thread id"),
1290        OPT_INTEGER('r', "realtime", &record.realtime_prio,
1291                    "collect data with this RT SCHED_FIFO priority"),
1292        OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1293                    "collect data without buffering"),
1294        OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1295                    "collect raw sample records from all opened counters"),
1296        OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1297                            "system-wide collection from all CPUs"),
1298        OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1299                    "list of cpus to monitor"),
1300        OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1301        OPT_STRING('o', "output", &record.file.path, "file",
1302                    "output file name"),
1303        OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1304                        &record.opts.no_inherit_set,
1305                        "child tasks do not inherit counters"),
1306        OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1307        OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1308                     "number of mmap data pages and AUX area tracing mmap pages",
1309                     record__parse_mmap_pages),
1310        OPT_BOOLEAN(0, "group", &record.opts.group,
1311                    "put the counters into a counter group"),
1312        OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1313                           NULL, "enables call-graph recording" ,
1314                           &record_callchain_opt),
1315        OPT_CALLBACK(0, "call-graph", &record.opts,
1316                     "record_mode[,record_size]", record_callchain_help,
1317                     &record_parse_callchain_opt),
1318        OPT_INCR('v', "verbose", &verbose,
1319                    "be more verbose (show counter open errors, etc)"),
1320        OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1321        OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1322                    "per thread counts"),
1323        OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1324        OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1325                        &record.opts.sample_time_set,
1326                        "Record the sample timestamps"),
1327        OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1328        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1329                    "don't sample"),
1330        OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1331                        &record.no_buildid_cache_set,
1332                        "do not update the buildid cache"),
1333        OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1334                        &record.no_buildid_set,
1335                        "do not collect buildids in perf.data"),
1336        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1337                     "monitor event in cgroup name only",
1338                     parse_cgroups),
1339        OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1340                  "ms to wait before starting measurement after program start"),
1341        OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1342                   "user to profile"),
1343
1344        OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1345                     "branch any", "sample any taken branches",
1346                     parse_branch_stack),
1347
1348        OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1349                     "branch filter mask", "branch stack filter modes",
1350                     parse_branch_stack),
1351        OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1352                    "sample by weight (on special events only)"),
1353        OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1354                    "sample transaction flags (special events only)"),
1355        OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1356                    "use per-thread mmaps"),
1357        OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1358                    "sample selected machine registers on interrupt,"
1359                    " use -I ? to list register names", parse_regs),
1360        OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1361                    "Record running/enabled time of read (:S) events"),
1362        OPT_CALLBACK('k', "clockid", &record.opts,
1363        "clockid", "clockid to use for events, see clock_gettime()",
1364        parse_clockid),
1365        OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1366                          "opts", "AUX area tracing Snapshot Mode", ""),
1367        OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1368                        "per thread proc mmap processing timeout in ms"),
1369        OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1370                    "Record context switch events"),
1371        OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1372                         "Configure all used events to run in kernel space.",
1373                         PARSE_OPT_EXCLUSIVE),
1374        OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1375                         "Configure all used events to run in user space.",
1376                         PARSE_OPT_EXCLUSIVE),
1377        OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1378                   "clang binary to use for compiling BPF scriptlets"),
1379        OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1380                   "options passed to clang when compiling BPF scriptlets"),
1381        OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1382                   "file", "vmlinux pathname"),
1383        OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1384                    "Record build-id of all DSOs regardless of hits"),
1385        OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1386                    "append timestamp to output filename"),
1387        OPT_BOOLEAN(0, "switch-output", &record.switch_output,
1388                    "Switch output when receive SIGUSR2"),
1389        OPT_END()
1390};
1391
1392struct option *record_options = __record_options;
1393
1394int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1395{
1396        int err;
1397        struct record *rec = &record;
1398        char errbuf[BUFSIZ];
1399
1400#ifndef HAVE_LIBBPF_SUPPORT
1401# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1402        set_nobuild('\0', "clang-path", true);
1403        set_nobuild('\0', "clang-opt", true);
1404# undef set_nobuild
1405#endif
1406
1407#ifndef HAVE_BPF_PROLOGUE
1408# if !defined (HAVE_DWARF_SUPPORT)
1409#  define REASON  "NO_DWARF=1"
1410# elif !defined (HAVE_LIBBPF_SUPPORT)
1411#  define REASON  "NO_LIBBPF=1"
1412# else
1413#  define REASON  "this architecture doesn't support BPF prologue"
1414# endif
1415# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1416        set_nobuild('\0', "vmlinux", true);
1417# undef set_nobuild
1418# undef REASON
1419#endif
1420
1421        rec->evlist = perf_evlist__new();
1422        if (rec->evlist == NULL)
1423                return -ENOMEM;
1424
1425        perf_config(perf_record_config, rec);
1426
1427        argc = parse_options(argc, argv, record_options, record_usage,
1428                            PARSE_OPT_STOP_AT_NON_OPTION);
1429        if (!argc && target__none(&rec->opts.target))
1430                usage_with_options(record_usage, record_options);
1431
1432        if (nr_cgroups && !rec->opts.target.system_wide) {
1433                usage_with_options_msg(record_usage, record_options,
1434                        "cgroup monitoring only available in system-wide mode");
1435
1436        }
1437        if (rec->opts.record_switch_events &&
1438            !perf_can_record_switch_events()) {
1439                ui__error("kernel does not support recording context switch events\n");
1440                parse_options_usage(record_usage, record_options, "switch-events", 0);
1441                return -EINVAL;
1442        }
1443
1444        if (rec->switch_output)
1445                rec->timestamp_filename = true;
1446
1447        if (!rec->itr) {
1448                rec->itr = auxtrace_record__init(rec->evlist, &err);
1449                if (err)
1450                        return err;
1451        }
1452
1453        err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1454                                              rec->opts.auxtrace_snapshot_opts);
1455        if (err)
1456                return err;
1457
1458        err = bpf__setup_stdout(rec->evlist);
1459        if (err) {
1460                bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1461                pr_err("ERROR: Setup BPF stdout failed: %s\n",
1462                         errbuf);
1463                return err;
1464        }
1465
1466        err = -ENOMEM;
1467
1468        symbol__init(NULL);
1469
1470        if (symbol_conf.kptr_restrict)
1471                pr_warning(
1472"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1473"check /proc/sys/kernel/kptr_restrict.\n\n"
1474"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1475"file is not found in the buildid cache or in the vmlinux path.\n\n"
1476"Samples in kernel modules won't be resolved at all.\n\n"
1477"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1478"even with a suitable vmlinux or kallsyms file.\n\n");
1479
1480        if (rec->no_buildid_cache || rec->no_buildid) {
1481                disable_buildid_cache();
1482        } else if (rec->switch_output) {
1483                /*
1484                 * In 'perf record --switch-output', disable buildid
1485                 * generation by default to reduce data file switching
1486                 * overhead. Still generate buildid if they are required
1487                 * explicitly using
1488                 *
1489                 *  perf record --signal-trigger --no-no-buildid \
1490                 *              --no-no-buildid-cache
1491                 *
1492                 * Following code equals to:
1493                 *
1494                 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1495                 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1496                 *         disable_buildid_cache();
1497                 */
1498                bool disable = true;
1499
1500                if (rec->no_buildid_set && !rec->no_buildid)
1501                        disable = false;
1502                if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1503                        disable = false;
1504                if (disable) {
1505                        rec->no_buildid = true;
1506                        rec->no_buildid_cache = true;
1507                        disable_buildid_cache();
1508                }
1509        }
1510
1511        if (rec->evlist->nr_entries == 0 &&
1512            perf_evlist__add_default(rec->evlist) < 0) {
1513                pr_err("Not enough memory for event selector list\n");
1514                goto out_symbol_exit;
1515        }
1516
1517        if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1518                rec->opts.no_inherit = true;
1519
1520        err = target__validate(&rec->opts.target);
1521        if (err) {
1522                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1523                ui__warning("%s", errbuf);
1524        }
1525
1526        err = target__parse_uid(&rec->opts.target);
1527        if (err) {
1528                int saved_errno = errno;
1529
1530                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1531                ui__error("%s", errbuf);
1532
1533                err = -saved_errno;
1534                goto out_symbol_exit;
1535        }
1536
1537        err = -ENOMEM;
1538        if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1539                usage_with_options(record_usage, record_options);
1540
1541        err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1542        if (err)
1543                goto out_symbol_exit;
1544
1545        /*
1546         * We take all buildids when the file contains
1547         * AUX area tracing data because we do not decode the
1548         * trace because it would take too long.
1549         */
1550        if (rec->opts.full_auxtrace)
1551                rec->buildid_all = true;
1552
1553        if (record_opts__config(&rec->opts)) {
1554                err = -EINVAL;
1555                goto out_symbol_exit;
1556        }
1557
1558        err = __cmd_record(&record, argc, argv);
1559out_symbol_exit:
1560        perf_evlist__delete(rec->evlist);
1561        symbol__exit();
1562        auxtrace_record__free(rec->itr);
1563        return err;
1564}
1565
1566static void snapshot_sig_handler(int sig __maybe_unused)
1567{
1568        if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1569                trigger_hit(&auxtrace_snapshot_trigger);
1570                auxtrace_record__snapshot_started = 1;
1571                if (auxtrace_record__snapshot_start(record.itr))
1572                        trigger_error(&auxtrace_snapshot_trigger);
1573        }
1574
1575        if (trigger_is_ready(&switch_output_trigger))
1576                trigger_hit(&switch_output_trigger);
1577}
1578