linux/tools/perf/builtin-record.c
<<
>>
Prefs
   1/*
   2 * builtin-record.c
   3 *
   4 * Builtin record command: Record the profile of a workload
   5 * (or a CPU, or a PID) into the perf.data output file - for
   6 * later analysis via perf report.
   7 */
   8#include "builtin.h"
   9
  10#include "perf.h"
  11
  12#include "util/build-id.h"
  13#include "util/util.h"
  14#include "util/parse-options.h"
  15#include "util/parse-events.h"
  16
  17#include "util/header.h"
  18#include "util/event.h"
  19#include "util/evlist.h"
  20#include "util/evsel.h"
  21#include "util/debug.h"
  22#include "util/session.h"
  23#include "util/tool.h"
  24#include "util/symbol.h"
  25#include "util/cpumap.h"
  26#include "util/thread_map.h"
  27
  28#include <unistd.h>
  29#include <sched.h>
  30#include <sys/mman.h>
  31
  32#ifndef HAVE_ON_EXIT
  33#ifndef ATEXIT_MAX
  34#define ATEXIT_MAX 32
  35#endif
  36static int __on_exit_count = 0;
  37typedef void (*on_exit_func_t) (int, void *);
  38static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
  39static void *__on_exit_args[ATEXIT_MAX];
  40static int __exitcode = 0;
  41static void __handle_on_exit_funcs(void);
  42static int on_exit(on_exit_func_t function, void *arg);
  43#define exit(x) (exit)(__exitcode = (x))
  44
  45static int on_exit(on_exit_func_t function, void *arg)
  46{
  47        if (__on_exit_count == ATEXIT_MAX)
  48                return -ENOMEM;
  49        else if (__on_exit_count == 0)
  50                atexit(__handle_on_exit_funcs);
  51        __on_exit_funcs[__on_exit_count] = function;
  52        __on_exit_args[__on_exit_count++] = arg;
  53        return 0;
  54}
  55
  56static void __handle_on_exit_funcs(void)
  57{
  58        int i;
  59        for (i = 0; i < __on_exit_count; i++)
  60                __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
  61}
  62#endif
  63
  64enum write_mode_t {
  65        WRITE_FORCE,
  66        WRITE_APPEND
  67};
  68
  69struct perf_record {
  70        struct perf_tool        tool;
  71        struct perf_record_opts opts;
  72        u64                     bytes_written;
  73        const char              *output_name;
  74        struct perf_evlist      *evlist;
  75        struct perf_session     *session;
  76        const char              *progname;
  77        int                     output;
  78        unsigned int            page_size;
  79        int                     realtime_prio;
  80        enum write_mode_t       write_mode;
  81        bool                    no_buildid;
  82        bool                    no_buildid_cache;
  83        bool                    force;
  84        bool                    file_new;
  85        bool                    append_file;
  86        long                    samples;
  87        off_t                   post_processing_offset;
  88};
  89
  90static void advance_output(struct perf_record *rec, size_t size)
  91{
  92        rec->bytes_written += size;
  93}
  94
  95static int write_output(struct perf_record *rec, void *buf, size_t size)
  96{
  97        while (size) {
  98                int ret = write(rec->output, buf, size);
  99
 100                if (ret < 0) {
 101                        pr_err("failed to write\n");
 102                        return -1;
 103                }
 104
 105                size -= ret;
 106                buf += ret;
 107
 108                rec->bytes_written += ret;
 109        }
 110
 111        return 0;
 112}
 113
 114static int process_synthesized_event(struct perf_tool *tool,
 115                                     union perf_event *event,
 116                                     struct perf_sample *sample __maybe_unused,
 117                                     struct machine *machine __maybe_unused)
 118{
 119        struct perf_record *rec = container_of(tool, struct perf_record, tool);
 120        if (write_output(rec, event, event->header.size) < 0)
 121                return -1;
 122
 123        return 0;
 124}
 125
 126static int perf_record__mmap_read(struct perf_record *rec,
 127                                   struct perf_mmap *md)
 128{
 129        unsigned int head = perf_mmap__read_head(md);
 130        unsigned int old = md->prev;
 131        unsigned char *data = md->base + rec->page_size;
 132        unsigned long size;
 133        void *buf;
 134        int rc = 0;
 135
 136        if (old == head)
 137                return 0;
 138
 139        rec->samples++;
 140
 141        size = head - old;
 142
 143        if ((old & md->mask) + size != (head & md->mask)) {
 144                buf = &data[old & md->mask];
 145                size = md->mask + 1 - (old & md->mask);
 146                old += size;
 147
 148                if (write_output(rec, buf, size) < 0) {
 149                        rc = -1;
 150                        goto out;
 151                }
 152        }
 153
 154        buf = &data[old & md->mask];
 155        size = head - old;
 156        old += size;
 157
 158        if (write_output(rec, buf, size) < 0) {
 159                rc = -1;
 160                goto out;
 161        }
 162
 163        md->prev = old;
 164        perf_mmap__write_tail(md, old);
 165
 166out:
 167        return rc;
 168}
 169
 170static volatile int done = 0;
 171static volatile int signr = -1;
 172static volatile int child_finished = 0;
 173
 174static void sig_handler(int sig)
 175{
 176        if (sig == SIGCHLD)
 177                child_finished = 1;
 178
 179        done = 1;
 180        signr = sig;
 181}
 182
 183static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
 184{
 185        struct perf_record *rec = arg;
 186        int status;
 187
 188        if (rec->evlist->workload.pid > 0) {
 189                if (!child_finished)
 190                        kill(rec->evlist->workload.pid, SIGTERM);
 191
 192                wait(&status);
 193                if (WIFSIGNALED(status))
 194                        psignal(WTERMSIG(status), rec->progname);
 195        }
 196
 197        if (signr == -1 || signr == SIGUSR1)
 198                return;
 199
 200        signal(signr, SIG_DFL);
 201        kill(getpid(), signr);
 202}
 203
 204static bool perf_evlist__equal(struct perf_evlist *evlist,
 205                               struct perf_evlist *other)
 206{
 207        struct perf_evsel *pos, *pair;
 208
 209        if (evlist->nr_entries != other->nr_entries)
 210                return false;
 211
 212        pair = perf_evlist__first(other);
 213
 214        list_for_each_entry(pos, &evlist->entries, node) {
 215                if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
 216                        return false;
 217                pair = perf_evsel__next(pair);
 218        }
 219
 220        return true;
 221}
 222
 223static int perf_record__open(struct perf_record *rec)
 224{
 225        char msg[512];
 226        struct perf_evsel *pos;
 227        struct perf_evlist *evlist = rec->evlist;
 228        struct perf_session *session = rec->session;
 229        struct perf_record_opts *opts = &rec->opts;
 230        int rc = 0;
 231
 232        perf_evlist__config(evlist, opts);
 233
 234        list_for_each_entry(pos, &evlist->entries, node) {
 235try_again:
 236                if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
 237                        if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
 238                                if (verbose)
 239                                        ui__warning("%s\n", msg);
 240                                goto try_again;
 241                        }
 242
 243                        rc = -errno;
 244                        perf_evsel__open_strerror(pos, &opts->target,
 245                                                  errno, msg, sizeof(msg));
 246                        ui__error("%s\n", msg);
 247                        goto out;
 248                }
 249        }
 250
 251        if (perf_evlist__apply_filters(evlist)) {
 252                error("failed to set filter with %d (%s)\n", errno,
 253                        strerror(errno));
 254                rc = -1;
 255                goto out;
 256        }
 257
 258        if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
 259                if (errno == EPERM) {
 260                        pr_err("Permission error mapping pages.\n"
 261                               "Consider increasing "
 262                               "/proc/sys/kernel/perf_event_mlock_kb,\n"
 263                               "or try again with a smaller value of -m/--mmap_pages.\n"
 264                               "(current value: %d)\n", opts->mmap_pages);
 265                        rc = -errno;
 266                } else if (!is_power_of_2(opts->mmap_pages) &&
 267                           (opts->mmap_pages != UINT_MAX)) {
 268                        pr_err("--mmap_pages/-m value must be a power of two.");
 269                        rc = -EINVAL;
 270                } else {
 271                        pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
 272                        rc = -errno;
 273                }
 274                goto out;
 275        }
 276
 277        if (rec->file_new)
 278                session->evlist = evlist;
 279        else {
 280                if (!perf_evlist__equal(session->evlist, evlist)) {
 281                        fprintf(stderr, "incompatible append\n");
 282                        rc = -1;
 283                        goto out;
 284                }
 285        }
 286
 287        perf_session__set_id_hdr_size(session);
 288out:
 289        return rc;
 290}
 291
 292static int process_buildids(struct perf_record *rec)
 293{
 294        u64 size = lseek(rec->output, 0, SEEK_CUR);
 295
 296        if (size == 0)
 297                return 0;
 298
 299        rec->session->fd = rec->output;
 300        return __perf_session__process_events(rec->session, rec->post_processing_offset,
 301                                              size - rec->post_processing_offset,
 302                                              size, &build_id__mark_dso_hit_ops);
 303}
 304
 305static void perf_record__exit(int status, void *arg)
 306{
 307        struct perf_record *rec = arg;
 308
 309        if (status != 0)
 310                return;
 311
 312        if (!rec->opts.pipe_output) {
 313                rec->session->header.data_size += rec->bytes_written;
 314
 315                if (!rec->no_buildid)
 316                        process_buildids(rec);
 317                perf_session__write_header(rec->session, rec->evlist,
 318                                           rec->output, true);
 319                perf_session__delete(rec->session);
 320                perf_evlist__delete(rec->evlist);
 321                symbol__exit();
 322        }
 323}
 324
 325static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 326{
 327        int err;
 328        struct perf_tool *tool = data;
 329        /*
 330         *As for guest kernel when processing subcommand record&report,
 331         *we arrange module mmap prior to guest kernel mmap and trigger
 332         *a preload dso because default guest module symbols are loaded
 333         *from guest kallsyms instead of /lib/modules/XXX/XXX. This
 334         *method is used to avoid symbol missing when the first addr is
 335         *in module instead of in guest kernel.
 336         */
 337        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 338                                             machine);
 339        if (err < 0)
 340                pr_err("Couldn't record guest kernel [%d]'s reference"
 341                       " relocation symbol.\n", machine->pid);
 342
 343        /*
 344         * We use _stext for guest kernel because guest kernel's /proc/kallsyms
 345         * have no _text sometimes.
 346         */
 347        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 348                                                 machine, "_text");
 349        if (err < 0)
 350                err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 351                                                         machine, "_stext");
 352        if (err < 0)
 353                pr_err("Couldn't record guest kernel [%d]'s reference"
 354                       " relocation symbol.\n", machine->pid);
 355}
 356
 357static struct perf_event_header finished_round_event = {
 358        .size = sizeof(struct perf_event_header),
 359        .type = PERF_RECORD_FINISHED_ROUND,
 360};
 361
 362static int perf_record__mmap_read_all(struct perf_record *rec)
 363{
 364        int i;
 365        int rc = 0;
 366
 367        for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 368                if (rec->evlist->mmap[i].base) {
 369                        if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
 370                                rc = -1;
 371                                goto out;
 372                        }
 373                }
 374        }
 375
 376        if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
 377                rc = write_output(rec, &finished_round_event,
 378                                  sizeof(finished_round_event));
 379
 380out:
 381        return rc;
 382}
 383
 384static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 385{
 386        struct stat st;
 387        int flags;
 388        int err, output, feat;
 389        unsigned long waking = 0;
 390        const bool forks = argc > 0;
 391        struct machine *machine;
 392        struct perf_tool *tool = &rec->tool;
 393        struct perf_record_opts *opts = &rec->opts;
 394        struct perf_evlist *evsel_list = rec->evlist;
 395        const char *output_name = rec->output_name;
 396        struct perf_session *session;
 397        bool disabled = false;
 398
 399        rec->progname = argv[0];
 400
 401        rec->page_size = sysconf(_SC_PAGE_SIZE);
 402
 403        on_exit(perf_record__sig_exit, rec);
 404        signal(SIGCHLD, sig_handler);
 405        signal(SIGINT, sig_handler);
 406        signal(SIGUSR1, sig_handler);
 407
 408        if (!output_name) {
 409                if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
 410                        opts->pipe_output = true;
 411                else
 412                        rec->output_name = output_name = "perf.data";
 413        }
 414        if (output_name) {
 415                if (!strcmp(output_name, "-"))
 416                        opts->pipe_output = true;
 417                else if (!stat(output_name, &st) && st.st_size) {
 418                        if (rec->write_mode == WRITE_FORCE) {
 419                                char oldname[PATH_MAX];
 420                                snprintf(oldname, sizeof(oldname), "%s.old",
 421                                         output_name);
 422                                unlink(oldname);
 423                                rename(output_name, oldname);
 424                        }
 425                } else if (rec->write_mode == WRITE_APPEND) {
 426                        rec->write_mode = WRITE_FORCE;
 427                }
 428        }
 429
 430        flags = O_CREAT|O_RDWR;
 431        if (rec->write_mode == WRITE_APPEND)
 432                rec->file_new = 0;
 433        else
 434                flags |= O_TRUNC;
 435
 436        if (opts->pipe_output)
 437                output = STDOUT_FILENO;
 438        else
 439                output = open(output_name, flags, S_IRUSR | S_IWUSR);
 440        if (output < 0) {
 441                perror("failed to create output file");
 442                return -1;
 443        }
 444
 445        rec->output = output;
 446
 447        session = perf_session__new(output_name, O_WRONLY,
 448                                    rec->write_mode == WRITE_FORCE, false, NULL);
 449        if (session == NULL) {
 450                pr_err("Not enough memory for reading perf file header\n");
 451                return -1;
 452        }
 453
 454        rec->session = session;
 455
 456        for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
 457                perf_header__set_feat(&session->header, feat);
 458
 459        if (rec->no_buildid)
 460                perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 461
 462        if (!have_tracepoints(&evsel_list->entries))
 463                perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
 464
 465        if (!rec->opts.branch_stack)
 466                perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
 467
 468        if (!rec->file_new) {
 469                err = perf_session__read_header(session, output);
 470                if (err < 0)
 471                        goto out_delete_session;
 472        }
 473
 474        if (forks) {
 475                err = perf_evlist__prepare_workload(evsel_list, &opts->target,
 476                                                    argv, opts->pipe_output,
 477                                                    true);
 478                if (err < 0) {
 479                        pr_err("Couldn't run the workload!\n");
 480                        goto out_delete_session;
 481                }
 482        }
 483
 484        if (perf_record__open(rec) != 0) {
 485                err = -1;
 486                goto out_delete_session;
 487        }
 488
 489        if (!evsel_list->nr_groups)
 490                perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
 491
 492        /*
 493         * perf_session__delete(session) will be called at perf_record__exit()
 494         */
 495        on_exit(perf_record__exit, rec);
 496
 497        if (opts->pipe_output) {
 498                err = perf_header__write_pipe(output);
 499                if (err < 0)
 500                        goto out_delete_session;
 501        } else if (rec->file_new) {
 502                err = perf_session__write_header(session, evsel_list,
 503                                                 output, false);
 504                if (err < 0)
 505                        goto out_delete_session;
 506        }
 507
 508        if (!rec->no_buildid
 509            && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
 510                pr_err("Couldn't generate buildids. "
 511                       "Use --no-buildid to profile anyway.\n");
 512                err = -1;
 513                goto out_delete_session;
 514        }
 515
 516        rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
 517
 518        machine = &session->machines.host;
 519
 520        if (opts->pipe_output) {
 521                err = perf_event__synthesize_attrs(tool, session,
 522                                                   process_synthesized_event);
 523                if (err < 0) {
 524                        pr_err("Couldn't synthesize attrs.\n");
 525                        goto out_delete_session;
 526                }
 527
 528                err = perf_event__synthesize_event_types(tool, process_synthesized_event,
 529                                                         machine);
 530                if (err < 0) {
 531                        pr_err("Couldn't synthesize event_types.\n");
 532                        goto out_delete_session;
 533                }
 534
 535                if (have_tracepoints(&evsel_list->entries)) {
 536                        /*
 537                         * FIXME err <= 0 here actually means that
 538                         * there were no tracepoints so its not really
 539                         * an error, just that we don't need to
 540                         * synthesize anything.  We really have to
 541                         * return this more properly and also
 542                         * propagate errors that now are calling die()
 543                         */
 544                        err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
 545                                                                  process_synthesized_event);
 546                        if (err <= 0) {
 547                                pr_err("Couldn't record tracing data.\n");
 548                                goto out_delete_session;
 549                        }
 550                        advance_output(rec, err);
 551                }
 552        }
 553
 554        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 555                                                 machine, "_text");
 556        if (err < 0)
 557                err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 558                                                         machine, "_stext");
 559        if (err < 0)
 560                pr_err("Couldn't record kernel reference relocation symbol\n"
 561                       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 562                       "Check /proc/kallsyms permission or run as root.\n");
 563
 564        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 565                                             machine);
 566        if (err < 0)
 567                pr_err("Couldn't record kernel module information.\n"
 568                       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 569                       "Check /proc/modules permission or run as root.\n");
 570
 571        if (perf_guest) {
 572                machines__process_guests(&session->machines,
 573                                         perf_event__synthesize_guest_os, tool);
 574        }
 575
 576        if (perf_target__has_task(&opts->target))
 577                err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
 578                                                  process_synthesized_event,
 579                                                  machine);
 580        else if (perf_target__has_cpu(&opts->target))
 581                err = perf_event__synthesize_threads(tool, process_synthesized_event,
 582                                               machine);
 583        else /* command specified */
 584                err = 0;
 585
 586        if (err != 0)
 587                goto out_delete_session;
 588
 589        if (rec->realtime_prio) {
 590                struct sched_param param;
 591
 592                param.sched_priority = rec->realtime_prio;
 593                if (sched_setscheduler(0, SCHED_FIFO, &param)) {
 594                        pr_err("Could not set realtime priority.\n");
 595                        err = -1;
 596                        goto out_delete_session;
 597                }
 598        }
 599
 600        /*
 601         * When perf is starting the traced process, all the events
 602         * (apart from group members) have enable_on_exec=1 set,
 603         * so don't spoil it by prematurely enabling them.
 604         */
 605        if (!perf_target__none(&opts->target))
 606                perf_evlist__enable(evsel_list);
 607
 608        /*
 609         * Let the child rip
 610         */
 611        if (forks)
 612                perf_evlist__start_workload(evsel_list);
 613
 614        for (;;) {
 615                int hits = rec->samples;
 616
 617                if (perf_record__mmap_read_all(rec) < 0) {
 618                        err = -1;
 619                        goto out_delete_session;
 620                }
 621
 622                if (hits == rec->samples) {
 623                        if (done)
 624                                break;
 625                        err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
 626                        waking++;
 627                }
 628
 629                /*
 630                 * When perf is starting the traced process, at the end events
 631                 * die with the process and we wait for that. Thus no need to
 632                 * disable events in this case.
 633                 */
 634                if (done && !disabled && !perf_target__none(&opts->target)) {
 635                        perf_evlist__disable(evsel_list);
 636                        disabled = true;
 637                }
 638        }
 639
 640        if (quiet || signr == SIGUSR1)
 641                return 0;
 642
 643        fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
 644
 645        /*
 646         * Approximate RIP event size: 24 bytes.
 647         */
 648        fprintf(stderr,
 649                "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
 650                (double)rec->bytes_written / 1024.0 / 1024.0,
 651                output_name,
 652                rec->bytes_written / 24);
 653
 654        return 0;
 655
 656out_delete_session:
 657        perf_session__delete(session);
 658        return err;
 659}
 660
 661#define BRANCH_OPT(n, m) \
 662        { .name = n, .mode = (m) }
 663
 664#define BRANCH_END { .name = NULL }
 665
 666struct branch_mode {
 667        const char *name;
 668        int mode;
 669};
 670
 671static const struct branch_mode branch_modes[] = {
 672        BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
 673        BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
 674        BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
 675        BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
 676        BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
 677        BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
 678        BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
 679        BRANCH_END
 680};
 681
 682static int
 683parse_branch_stack(const struct option *opt, const char *str, int unset)
 684{
 685#define ONLY_PLM \
 686        (PERF_SAMPLE_BRANCH_USER        |\
 687         PERF_SAMPLE_BRANCH_KERNEL      |\
 688         PERF_SAMPLE_BRANCH_HV)
 689
 690        uint64_t *mode = (uint64_t *)opt->value;
 691        const struct branch_mode *br;
 692        char *s, *os = NULL, *p;
 693        int ret = -1;
 694
 695        if (unset)
 696                return 0;
 697
 698        /*
 699         * cannot set it twice, -b + --branch-filter for instance
 700         */
 701        if (*mode)
 702                return -1;
 703
 704        /* str may be NULL in case no arg is passed to -b */
 705        if (str) {
 706                /* because str is read-only */
 707                s = os = strdup(str);
 708                if (!s)
 709                        return -1;
 710
 711                for (;;) {
 712                        p = strchr(s, ',');
 713                        if (p)
 714                                *p = '\0';
 715
 716                        for (br = branch_modes; br->name; br++) {
 717                                if (!strcasecmp(s, br->name))
 718                                        break;
 719                        }
 720                        if (!br->name) {
 721                                ui__warning("unknown branch filter %s,"
 722                                            " check man page\n", s);
 723                                goto error;
 724                        }
 725
 726                        *mode |= br->mode;
 727
 728                        if (!p)
 729                                break;
 730
 731                        s = p + 1;
 732                }
 733        }
 734        ret = 0;
 735
 736        /* default to any branch */
 737        if ((*mode & ~ONLY_PLM) == 0) {
 738                *mode = PERF_SAMPLE_BRANCH_ANY;
 739        }
 740error:
 741        free(os);
 742        return ret;
 743}
 744
 745#ifdef LIBUNWIND_SUPPORT
 746static int get_stack_size(char *str, unsigned long *_size)
 747{
 748        char *endptr;
 749        unsigned long size;
 750        unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
 751
 752        size = strtoul(str, &endptr, 0);
 753
 754        do {
 755                if (*endptr)
 756                        break;
 757
 758                size = round_up(size, sizeof(u64));
 759                if (!size || size > max_size)
 760                        break;
 761
 762                *_size = size;
 763                return 0;
 764
 765        } while (0);
 766
 767        pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
 768               max_size, str);
 769        return -1;
 770}
 771#endif /* LIBUNWIND_SUPPORT */
 772
 773int record_parse_callchain_opt(const struct option *opt,
 774                               const char *arg, int unset)
 775{
 776        struct perf_record_opts *opts = opt->value;
 777        char *tok, *name, *saveptr = NULL;
 778        char *buf;
 779        int ret = -1;
 780
 781        /* --no-call-graph */
 782        if (unset)
 783                return 0;
 784
 785        /* We specified default option if none is provided. */
 786        BUG_ON(!arg);
 787
 788        /* We need buffer that we know we can write to. */
 789        buf = malloc(strlen(arg) + 1);
 790        if (!buf)
 791                return -ENOMEM;
 792
 793        strcpy(buf, arg);
 794
 795        tok = strtok_r((char *)buf, ",", &saveptr);
 796        name = tok ? : (char *)buf;
 797
 798        do {
 799                /* Framepointer style */
 800                if (!strncmp(name, "fp", sizeof("fp"))) {
 801                        if (!strtok_r(NULL, ",", &saveptr)) {
 802                                opts->call_graph = CALLCHAIN_FP;
 803                                ret = 0;
 804                        } else
 805                                pr_err("callchain: No more arguments "
 806                                       "needed for -g fp\n");
 807                        break;
 808
 809#ifdef LIBUNWIND_SUPPORT
 810                /* Dwarf style */
 811                } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
 812                        const unsigned long default_stack_dump_size = 8192;
 813
 814                        ret = 0;
 815                        opts->call_graph = CALLCHAIN_DWARF;
 816                        opts->stack_dump_size = default_stack_dump_size;
 817
 818                        tok = strtok_r(NULL, ",", &saveptr);
 819                        if (tok) {
 820                                unsigned long size = 0;
 821
 822                                ret = get_stack_size(tok, &size);
 823                                opts->stack_dump_size = size;
 824                        }
 825
 826                        if (!ret)
 827                                pr_debug("callchain: stack dump size %d\n",
 828                                         opts->stack_dump_size);
 829#endif /* LIBUNWIND_SUPPORT */
 830                } else {
 831                        pr_err("callchain: Unknown -g option "
 832                               "value: %s\n", arg);
 833                        break;
 834                }
 835
 836        } while (0);
 837
 838        free(buf);
 839
 840        if (!ret)
 841                pr_debug("callchain: type %d\n", opts->call_graph);
 842
 843        return ret;
 844}
 845
 846static const char * const record_usage[] = {
 847        "perf record [<options>] [<command>]",
 848        "perf record [<options>] -- <command> [<options>]",
 849        NULL
 850};
 851
 852/*
 853 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
 854 * because we need to have access to it in perf_record__exit, that is called
 855 * after cmd_record() exits, but since record_options need to be accessible to
 856 * builtin-script, leave it here.
 857 *
 858 * At least we don't ouch it in all the other functions here directly.
 859 *
 860 * Just say no to tons of global variables, sigh.
 861 */
 862static struct perf_record record = {
 863        .opts = {
 864                .mmap_pages          = UINT_MAX,
 865                .user_freq           = UINT_MAX,
 866                .user_interval       = ULLONG_MAX,
 867                .freq                = 4000,
 868                .target              = {
 869                        .uses_mmap   = true,
 870                },
 871        },
 872        .write_mode = WRITE_FORCE,
 873        .file_new   = true,
 874};
 875
 876#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
 877
 878#ifdef LIBUNWIND_SUPPORT
 879const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
 880#else
 881const char record_callchain_help[] = CALLCHAIN_HELP "[fp]";
 882#endif
 883
 884/*
 885 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
 886 * with it and switch to use the library functions in perf_evlist that came
 887 * from builtin-record.c, i.e. use perf_record_opts,
 888 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
 889 * using pipes, etc.
 890 */
 891const struct option record_options[] = {
 892        OPT_CALLBACK('e', "event", &record.evlist, "event",
 893                     "event selector. use 'perf list' to list available events",
 894                     parse_events_option),
 895        OPT_CALLBACK(0, "filter", &record.evlist, "filter",
 896                     "event filter", parse_filter),
 897        OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
 898                    "record events on existing process id"),
 899        OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
 900                    "record events on existing thread id"),
 901        OPT_INTEGER('r', "realtime", &record.realtime_prio,
 902                    "collect data with this RT SCHED_FIFO priority"),
 903        OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
 904                    "collect data without buffering"),
 905        OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
 906                    "collect raw sample records from all opened counters"),
 907        OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
 908                            "system-wide collection from all CPUs"),
 909        OPT_BOOLEAN('A', "append", &record.append_file,
 910                            "append to the output file to do incremental profiling"),
 911        OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
 912                    "list of cpus to monitor"),
 913        OPT_BOOLEAN('f', "force", &record.force,
 914                        "overwrite existing data file (deprecated)"),
 915        OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
 916        OPT_STRING('o', "output", &record.output_name, "file",
 917                    "output file name"),
 918        OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
 919                    "child tasks do not inherit counters"),
 920        OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
 921        OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
 922                     "number of mmap data pages"),
 923        OPT_BOOLEAN(0, "group", &record.opts.group,
 924                    "put the counters into a counter group"),
 925        OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts,
 926                             "mode[,dump_size]", record_callchain_help,
 927                             &record_parse_callchain_opt, "fp"),
 928        OPT_INCR('v', "verbose", &verbose,
 929                    "be more verbose (show counter open errors, etc)"),
 930        OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
 931        OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
 932                    "per thread counts"),
 933        OPT_BOOLEAN('d', "data", &record.opts.sample_address,
 934                    "Sample addresses"),
 935        OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
 936        OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
 937        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
 938                    "don't sample"),
 939        OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
 940                    "do not update the buildid cache"),
 941        OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
 942                    "do not collect buildids in perf.data"),
 943        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
 944                     "monitor event in cgroup name only",
 945                     parse_cgroups),
 946        OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
 947                   "user to profile"),
 948
 949        OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
 950                     "branch any", "sample any taken branches",
 951                     parse_branch_stack),
 952
 953        OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
 954                     "branch filter mask", "branch stack filter modes",
 955                     parse_branch_stack),
 956        OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
 957                    "sample by weight (on special events only)"),
 958        OPT_END()
 959};
 960
 961int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 962{
 963        int err = -ENOMEM;
 964        struct perf_evsel *pos;
 965        struct perf_evlist *evsel_list;
 966        struct perf_record *rec = &record;
 967        char errbuf[BUFSIZ];
 968
 969        evsel_list = perf_evlist__new();
 970        if (evsel_list == NULL)
 971                return -ENOMEM;
 972
 973        rec->evlist = evsel_list;
 974
 975        argc = parse_options(argc, argv, record_options, record_usage,
 976                            PARSE_OPT_STOP_AT_NON_OPTION);
 977        if (!argc && perf_target__none(&rec->opts.target))
 978                usage_with_options(record_usage, record_options);
 979
 980        if (rec->force && rec->append_file) {
 981                ui__error("Can't overwrite and append at the same time."
 982                          " You need to choose between -f and -A");
 983                usage_with_options(record_usage, record_options);
 984        } else if (rec->append_file) {
 985                rec->write_mode = WRITE_APPEND;
 986        } else {
 987                rec->write_mode = WRITE_FORCE;
 988        }
 989
 990        if (nr_cgroups && !rec->opts.target.system_wide) {
 991                ui__error("cgroup monitoring only available in"
 992                          " system-wide mode\n");
 993                usage_with_options(record_usage, record_options);
 994        }
 995
 996        symbol__init();
 997
 998        if (symbol_conf.kptr_restrict)
 999                pr_warning(
1000"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1001"check /proc/sys/kernel/kptr_restrict.\n\n"
1002"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1003"file is not found in the buildid cache or in the vmlinux path.\n\n"
1004"Samples in kernel modules won't be resolved at all.\n\n"
1005"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1006"even with a suitable vmlinux or kallsyms file.\n\n");
1007
1008        if (rec->no_buildid_cache || rec->no_buildid)
1009                disable_buildid_cache();
1010
1011        if (evsel_list->nr_entries == 0 &&
1012            perf_evlist__add_default(evsel_list) < 0) {
1013                pr_err("Not enough memory for event selector list\n");
1014                goto out_symbol_exit;
1015        }
1016
1017        err = perf_target__validate(&rec->opts.target);
1018        if (err) {
1019                perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1020                ui__warning("%s", errbuf);
1021        }
1022
1023        err = perf_target__parse_uid(&rec->opts.target);
1024        if (err) {
1025                int saved_errno = errno;
1026
1027                perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1028                ui__error("%s", errbuf);
1029
1030                err = -saved_errno;
1031                goto out_symbol_exit;
1032        }
1033
1034        err = -ENOMEM;
1035        if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1036                usage_with_options(record_usage, record_options);
1037
1038        list_for_each_entry(pos, &evsel_list->entries, node) {
1039                if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1040                        goto out_free_fd;
1041        }
1042
1043        if (rec->opts.user_interval != ULLONG_MAX)
1044                rec->opts.default_interval = rec->opts.user_interval;
1045        if (rec->opts.user_freq != UINT_MAX)
1046                rec->opts.freq = rec->opts.user_freq;
1047
1048        /*
1049         * User specified count overrides default frequency.
1050         */
1051        if (rec->opts.default_interval)
1052                rec->opts.freq = 0;
1053        else if (rec->opts.freq) {
1054                rec->opts.default_interval = rec->opts.freq;
1055        } else {
1056                ui__error("frequency and count are zero, aborting\n");
1057                err = -EINVAL;
1058                goto out_free_fd;
1059        }
1060
1061        err = __cmd_record(&record, argc, argv);
1062
1063        perf_evlist__munmap(evsel_list);
1064        perf_evlist__close(evsel_list);
1065out_free_fd:
1066        perf_evlist__delete_maps(evsel_list);
1067out_symbol_exit:
1068        symbol__exit();
1069        return err;
1070}
1071