linux/tools/perf/builtin-record.c
<<
>>
Prefs
   1/*
   2 * builtin-record.c
   3 *
   4 * Builtin record command: Record the profile of a workload
   5 * (or a CPU, or a PID) into the perf.data output file - for
   6 * later analysis via perf report.
   7 */
   8#include "builtin.h"
   9
  10#include "perf.h"
  11
  12#include "util/build-id.h"
  13#include "util/util.h"
  14#include "util/parse-options.h"
  15#include "util/parse-events.h"
  16
  17#include "util/callchain.h"
  18#include "util/cgroup.h"
  19#include "util/header.h"
  20#include "util/event.h"
  21#include "util/evlist.h"
  22#include "util/evsel.h"
  23#include "util/debug.h"
  24#include "util/session.h"
  25#include "util/tool.h"
  26#include "util/symbol.h"
  27#include "util/cpumap.h"
  28#include "util/thread_map.h"
  29#include "util/data.h"
  30
  31#include <unistd.h>
  32#include <sched.h>
  33#include <sys/mman.h>
  34
  35
  36struct record {
  37        struct perf_tool        tool;
  38        struct record_opts      opts;
  39        u64                     bytes_written;
  40        struct perf_data_file   file;
  41        struct perf_evlist      *evlist;
  42        struct perf_session     *session;
  43        const char              *progname;
  44        int                     realtime_prio;
  45        bool                    no_buildid;
  46        bool                    no_buildid_cache;
  47        long                    samples;
  48};
  49
  50static int record__write(struct record *rec, void *bf, size_t size)
  51{
  52        if (perf_data_file__write(rec->session->file, bf, size) < 0) {
  53                pr_err("failed to write perf data, error: %m\n");
  54                return -1;
  55        }
  56
  57        rec->bytes_written += size;
  58        return 0;
  59}
  60
  61static int process_synthesized_event(struct perf_tool *tool,
  62                                     union perf_event *event,
  63                                     struct perf_sample *sample __maybe_unused,
  64                                     struct machine *machine __maybe_unused)
  65{
  66        struct record *rec = container_of(tool, struct record, tool);
  67        return record__write(rec, event, event->header.size);
  68}
  69
  70static int record__mmap_read(struct record *rec, int idx)
  71{
  72        struct perf_mmap *md = &rec->evlist->mmap[idx];
  73        u64 head = perf_mmap__read_head(md);
  74        u64 old = md->prev;
  75        unsigned char *data = md->base + page_size;
  76        unsigned long size;
  77        void *buf;
  78        int rc = 0;
  79
  80        if (old == head)
  81                return 0;
  82
  83        rec->samples++;
  84
  85        size = head - old;
  86
  87        if ((old & md->mask) + size != (head & md->mask)) {
  88                buf = &data[old & md->mask];
  89                size = md->mask + 1 - (old & md->mask);
  90                old += size;
  91
  92                if (record__write(rec, buf, size) < 0) {
  93                        rc = -1;
  94                        goto out;
  95                }
  96        }
  97
  98        buf = &data[old & md->mask];
  99        size = head - old;
 100        old += size;
 101
 102        if (record__write(rec, buf, size) < 0) {
 103                rc = -1;
 104                goto out;
 105        }
 106
 107        md->prev = old;
 108        perf_evlist__mmap_consume(rec->evlist, idx);
 109out:
 110        return rc;
 111}
 112
 113static volatile int done = 0;
 114static volatile int signr = -1;
 115static volatile int child_finished = 0;
 116
 117static void sig_handler(int sig)
 118{
 119        if (sig == SIGCHLD)
 120                child_finished = 1;
 121        else
 122                signr = sig;
 123
 124        done = 1;
 125}
 126
 127static void record__sig_exit(void)
 128{
 129        if (signr == -1)
 130                return;
 131
 132        signal(signr, SIG_DFL);
 133        raise(signr);
 134}
 135
 136static int record__open(struct record *rec)
 137{
 138        char msg[512];
 139        struct perf_evsel *pos;
 140        struct perf_evlist *evlist = rec->evlist;
 141        struct perf_session *session = rec->session;
 142        struct record_opts *opts = &rec->opts;
 143        int rc = 0;
 144
 145        perf_evlist__config(evlist, opts);
 146
 147        evlist__for_each(evlist, pos) {
 148try_again:
 149                if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
 150                        if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
 151                                if (verbose)
 152                                        ui__warning("%s\n", msg);
 153                                goto try_again;
 154                        }
 155
 156                        rc = -errno;
 157                        perf_evsel__open_strerror(pos, &opts->target,
 158                                                  errno, msg, sizeof(msg));
 159                        ui__error("%s\n", msg);
 160                        goto out;
 161                }
 162        }
 163
 164        if (perf_evlist__apply_filters(evlist, &pos)) {
 165                error("failed to set filter \"%s\" on event %s with %d (%s)\n",
 166                        pos->filter, perf_evsel__name(pos), errno,
 167                        strerror_r(errno, msg, sizeof(msg)));
 168                rc = -1;
 169                goto out;
 170        }
 171
 172        if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
 173                if (errno == EPERM) {
 174                        pr_err("Permission error mapping pages.\n"
 175                               "Consider increasing "
 176                               "/proc/sys/kernel/perf_event_mlock_kb,\n"
 177                               "or try again with a smaller value of -m/--mmap_pages.\n"
 178                               "(current value: %u)\n", opts->mmap_pages);
 179                        rc = -errno;
 180                } else {
 181                        pr_err("failed to mmap with %d (%s)\n", errno,
 182                                strerror_r(errno, msg, sizeof(msg)));
 183                        rc = -errno;
 184                }
 185                goto out;
 186        }
 187
 188        session->evlist = evlist;
 189        perf_session__set_id_hdr_size(session);
 190out:
 191        return rc;
 192}
 193
 194static int process_sample_event(struct perf_tool *tool,
 195                                union perf_event *event,
 196                                struct perf_sample *sample,
 197                                struct perf_evsel *evsel,
 198                                struct machine *machine)
 199{
 200        struct record *rec = container_of(tool, struct record, tool);
 201
 202        rec->samples++;
 203
 204        return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
 205}
 206
 207static int process_buildids(struct record *rec)
 208{
 209        struct perf_data_file *file  = &rec->file;
 210        struct perf_session *session = rec->session;
 211
 212        u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
 213        if (size == 0)
 214                return 0;
 215
 216        file->size = size;
 217
 218        /*
 219         * During this process, it'll load kernel map and replace the
 220         * dso->long_name to a real pathname it found.  In this case
 221         * we prefer the vmlinux path like
 222         *   /lib/modules/3.16.4/build/vmlinux
 223         *
 224         * rather than build-id path (in debug directory).
 225         *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
 226         */
 227        symbol_conf.ignore_vmlinux_buildid = true;
 228
 229        return perf_session__process_events(session);
 230}
 231
 232static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
 233{
 234        int err;
 235        struct perf_tool *tool = data;
 236        /*
 237         *As for guest kernel when processing subcommand record&report,
 238         *we arrange module mmap prior to guest kernel mmap and trigger
 239         *a preload dso because default guest module symbols are loaded
 240         *from guest kallsyms instead of /lib/modules/XXX/XXX. This
 241         *method is used to avoid symbol missing when the first addr is
 242         *in module instead of in guest kernel.
 243         */
 244        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 245                                             machine);
 246        if (err < 0)
 247                pr_err("Couldn't record guest kernel [%d]'s reference"
 248                       " relocation symbol.\n", machine->pid);
 249
 250        /*
 251         * We use _stext for guest kernel because guest kernel's /proc/kallsyms
 252         * have no _text sometimes.
 253         */
 254        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 255                                                 machine);
 256        if (err < 0)
 257                pr_err("Couldn't record guest kernel [%d]'s reference"
 258                       " relocation symbol.\n", machine->pid);
 259}
 260
 261static struct perf_event_header finished_round_event = {
 262        .size = sizeof(struct perf_event_header),
 263        .type = PERF_RECORD_FINISHED_ROUND,
 264};
 265
 266static int record__mmap_read_all(struct record *rec)
 267{
 268        u64 bytes_written = rec->bytes_written;
 269        int i;
 270        int rc = 0;
 271
 272        for (i = 0; i < rec->evlist->nr_mmaps; i++) {
 273                if (rec->evlist->mmap[i].base) {
 274                        if (record__mmap_read(rec, i) != 0) {
 275                                rc = -1;
 276                                goto out;
 277                        }
 278                }
 279        }
 280
 281        /*
 282         * Mark the round finished in case we wrote
 283         * at least one event.
 284         */
 285        if (bytes_written != rec->bytes_written)
 286                rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
 287
 288out:
 289        return rc;
 290}
 291
 292static void record__init_features(struct record *rec)
 293{
 294        struct perf_session *session = rec->session;
 295        int feat;
 296
 297        for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
 298                perf_header__set_feat(&session->header, feat);
 299
 300        if (rec->no_buildid)
 301                perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 302
 303        if (!have_tracepoints(&rec->evlist->entries))
 304                perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
 305
 306        if (!rec->opts.branch_stack)
 307                perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
 308}
 309
 310static volatile int workload_exec_errno;
 311
 312/*
 313 * perf_evlist__prepare_workload will send a SIGUSR1
 314 * if the fork fails, since we asked by setting its
 315 * want_signal to true.
 316 */
 317static void workload_exec_failed_signal(int signo __maybe_unused,
 318                                        siginfo_t *info,
 319                                        void *ucontext __maybe_unused)
 320{
 321        workload_exec_errno = info->si_value.sival_int;
 322        done = 1;
 323        child_finished = 1;
 324}
 325
 326static int __cmd_record(struct record *rec, int argc, const char **argv)
 327{
 328        int err;
 329        int status = 0;
 330        unsigned long waking = 0;
 331        const bool forks = argc > 0;
 332        struct machine *machine;
 333        struct perf_tool *tool = &rec->tool;
 334        struct record_opts *opts = &rec->opts;
 335        struct perf_data_file *file = &rec->file;
 336        struct perf_session *session;
 337        bool disabled = false, draining = false;
 338        int fd;
 339
 340        rec->progname = argv[0];
 341
 342        atexit(record__sig_exit);
 343        signal(SIGCHLD, sig_handler);
 344        signal(SIGINT, sig_handler);
 345        signal(SIGTERM, sig_handler);
 346
 347        session = perf_session__new(file, false, tool);
 348        if (session == NULL) {
 349                pr_err("Perf session creation failed.\n");
 350                return -1;
 351        }
 352
 353        fd = perf_data_file__fd(file);
 354        rec->session = session;
 355
 356        record__init_features(rec);
 357
 358        if (forks) {
 359                err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
 360                                                    argv, file->is_pipe,
 361                                                    workload_exec_failed_signal);
 362                if (err < 0) {
 363                        pr_err("Couldn't run the workload!\n");
 364                        status = err;
 365                        goto out_delete_session;
 366                }
 367        }
 368
 369        if (record__open(rec) != 0) {
 370                err = -1;
 371                goto out_child;
 372        }
 373
 374        if (!rec->evlist->nr_groups)
 375                perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
 376
 377        if (file->is_pipe) {
 378                err = perf_header__write_pipe(fd);
 379                if (err < 0)
 380                        goto out_child;
 381        } else {
 382                err = perf_session__write_header(session, rec->evlist, fd, false);
 383                if (err < 0)
 384                        goto out_child;
 385        }
 386
 387        if (!rec->no_buildid
 388            && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
 389                pr_err("Couldn't generate buildids. "
 390                       "Use --no-buildid to profile anyway.\n");
 391                err = -1;
 392                goto out_child;
 393        }
 394
 395        machine = &session->machines.host;
 396
 397        if (file->is_pipe) {
 398                err = perf_event__synthesize_attrs(tool, session,
 399                                                   process_synthesized_event);
 400                if (err < 0) {
 401                        pr_err("Couldn't synthesize attrs.\n");
 402                        goto out_child;
 403                }
 404
 405                if (have_tracepoints(&rec->evlist->entries)) {
 406                        /*
 407                         * FIXME err <= 0 here actually means that
 408                         * there were no tracepoints so its not really
 409                         * an error, just that we don't need to
 410                         * synthesize anything.  We really have to
 411                         * return this more properly and also
 412                         * propagate errors that now are calling die()
 413                         */
 414                        err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
 415                                                                  process_synthesized_event);
 416                        if (err <= 0) {
 417                                pr_err("Couldn't record tracing data.\n");
 418                                goto out_child;
 419                        }
 420                        rec->bytes_written += err;
 421                }
 422        }
 423
 424        err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 425                                                 machine);
 426        if (err < 0)
 427                pr_err("Couldn't record kernel reference relocation symbol\n"
 428                       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 429                       "Check /proc/kallsyms permission or run as root.\n");
 430
 431        err = perf_event__synthesize_modules(tool, process_synthesized_event,
 432                                             machine);
 433        if (err < 0)
 434                pr_err("Couldn't record kernel module information.\n"
 435                       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
 436                       "Check /proc/modules permission or run as root.\n");
 437
 438        if (perf_guest) {
 439                machines__process_guests(&session->machines,
 440                                         perf_event__synthesize_guest_os, tool);
 441        }
 442
 443        err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
 444                                            process_synthesized_event, opts->sample_address);
 445        if (err != 0)
 446                goto out_child;
 447
 448        if (rec->realtime_prio) {
 449                struct sched_param param;
 450
 451                param.sched_priority = rec->realtime_prio;
 452                if (sched_setscheduler(0, SCHED_FIFO, &param)) {
 453                        pr_err("Could not set realtime priority.\n");
 454                        err = -1;
 455                        goto out_child;
 456                }
 457        }
 458
 459        /*
 460         * When perf is starting the traced process, all the events
 461         * (apart from group members) have enable_on_exec=1 set,
 462         * so don't spoil it by prematurely enabling them.
 463         */
 464        if (!target__none(&opts->target) && !opts->initial_delay)
 465                perf_evlist__enable(rec->evlist);
 466
 467        /*
 468         * Let the child rip
 469         */
 470        if (forks)
 471                perf_evlist__start_workload(rec->evlist);
 472
 473        if (opts->initial_delay) {
 474                usleep(opts->initial_delay * 1000);
 475                perf_evlist__enable(rec->evlist);
 476        }
 477
 478        for (;;) {
 479                int hits = rec->samples;
 480
 481                if (record__mmap_read_all(rec) < 0) {
 482                        err = -1;
 483                        goto out_child;
 484                }
 485
 486                if (hits == rec->samples) {
 487                        if (done || draining)
 488                                break;
 489                        err = perf_evlist__poll(rec->evlist, -1);
 490                        /*
 491                         * Propagate error, only if there's any. Ignore positive
 492                         * number of returned events and interrupt error.
 493                         */
 494                        if (err > 0 || (err < 0 && errno == EINTR))
 495                                err = 0;
 496                        waking++;
 497
 498                        if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
 499                                draining = true;
 500                }
 501
 502                /*
 503                 * When perf is starting the traced process, at the end events
 504                 * die with the process and we wait for that. Thus no need to
 505                 * disable events in this case.
 506                 */
 507                if (done && !disabled && !target__none(&opts->target)) {
 508                        perf_evlist__disable(rec->evlist);
 509                        disabled = true;
 510                }
 511        }
 512
 513        if (forks && workload_exec_errno) {
 514                char msg[STRERR_BUFSIZE];
 515                const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
 516                pr_err("Workload failed: %s\n", emsg);
 517                err = -1;
 518                goto out_child;
 519        }
 520
 521        if (!quiet)
 522                fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
 523
 524out_child:
 525        if (forks) {
 526                int exit_status;
 527
 528                if (!child_finished)
 529                        kill(rec->evlist->workload.pid, SIGTERM);
 530
 531                wait(&exit_status);
 532
 533                if (err < 0)
 534                        status = err;
 535                else if (WIFEXITED(exit_status))
 536                        status = WEXITSTATUS(exit_status);
 537                else if (WIFSIGNALED(exit_status))
 538                        signr = WTERMSIG(exit_status);
 539        } else
 540                status = err;
 541
 542        /* this will be recalculated during process_buildids() */
 543        rec->samples = 0;
 544
 545        if (!err && !file->is_pipe) {
 546                rec->session->header.data_size += rec->bytes_written;
 547
 548                if (!rec->no_buildid)
 549                        process_buildids(rec);
 550                perf_session__write_header(rec->session, rec->evlist, fd, true);
 551        }
 552
 553        if (!err && !quiet) {
 554                char samples[128];
 555
 556                if (rec->samples)
 557                        scnprintf(samples, sizeof(samples),
 558                                  " (%" PRIu64 " samples)", rec->samples);
 559                else
 560                        samples[0] = '\0';
 561
 562                fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n",
 563                        perf_data_file__size(file) / 1024.0 / 1024.0,
 564                        file->path, samples);
 565        }
 566
 567out_delete_session:
 568        perf_session__delete(session);
 569        return status;
 570}
 571
 572#define BRANCH_OPT(n, m) \
 573        { .name = n, .mode = (m) }
 574
 575#define BRANCH_END { .name = NULL }
 576
 577struct branch_mode {
 578        const char *name;
 579        int mode;
 580};
 581
 582static const struct branch_mode branch_modes[] = {
 583        BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
 584        BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
 585        BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
 586        BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
 587        BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
 588        BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
 589        BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
 590        BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
 591        BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
 592        BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
 593        BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
 594        BRANCH_END
 595};
 596
 597static int
 598parse_branch_stack(const struct option *opt, const char *str, int unset)
 599{
 600#define ONLY_PLM \
 601        (PERF_SAMPLE_BRANCH_USER        |\
 602         PERF_SAMPLE_BRANCH_KERNEL      |\
 603         PERF_SAMPLE_BRANCH_HV)
 604
 605        uint64_t *mode = (uint64_t *)opt->value;
 606        const struct branch_mode *br;
 607        char *s, *os = NULL, *p;
 608        int ret = -1;
 609
 610        if (unset)
 611                return 0;
 612
 613        /*
 614         * cannot set it twice, -b + --branch-filter for instance
 615         */
 616        if (*mode)
 617                return -1;
 618
 619        /* str may be NULL in case no arg is passed to -b */
 620        if (str) {
 621                /* because str is read-only */
 622                s = os = strdup(str);
 623                if (!s)
 624                        return -1;
 625
 626                for (;;) {
 627                        p = strchr(s, ',');
 628                        if (p)
 629                                *p = '\0';
 630
 631                        for (br = branch_modes; br->name; br++) {
 632                                if (!strcasecmp(s, br->name))
 633                                        break;
 634                        }
 635                        if (!br->name) {
 636                                ui__warning("unknown branch filter %s,"
 637                                            " check man page\n", s);
 638                                goto error;
 639                        }
 640
 641                        *mode |= br->mode;
 642
 643                        if (!p)
 644                                break;
 645
 646                        s = p + 1;
 647                }
 648        }
 649        ret = 0;
 650
 651        /* default to any branch */
 652        if ((*mode & ~ONLY_PLM) == 0) {
 653                *mode = PERF_SAMPLE_BRANCH_ANY;
 654        }
 655error:
 656        free(os);
 657        return ret;
 658}
 659
 660static void callchain_debug(void)
 661{
 662        static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
 663
 664        pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
 665
 666        if (callchain_param.record_mode == CALLCHAIN_DWARF)
 667                pr_debug("callchain: stack dump size %d\n",
 668                         callchain_param.dump_size);
 669}
 670
 671int record_parse_callchain_opt(const struct option *opt __maybe_unused,
 672                               const char *arg,
 673                               int unset)
 674{
 675        int ret;
 676
 677        callchain_param.enabled = !unset;
 678
 679        /* --no-call-graph */
 680        if (unset) {
 681                callchain_param.record_mode = CALLCHAIN_NONE;
 682                pr_debug("callchain: disabled\n");
 683                return 0;
 684        }
 685
 686        ret = parse_callchain_record_opt(arg);
 687        if (!ret)
 688                callchain_debug();
 689
 690        return ret;
 691}
 692
 693int record_callchain_opt(const struct option *opt __maybe_unused,
 694                         const char *arg __maybe_unused,
 695                         int unset __maybe_unused)
 696{
 697        callchain_param.enabled = true;
 698
 699        if (callchain_param.record_mode == CALLCHAIN_NONE)
 700                callchain_param.record_mode = CALLCHAIN_FP;
 701
 702        callchain_debug();
 703        return 0;
 704}
 705
 706static int perf_record_config(const char *var, const char *value, void *cb)
 707{
 708        if (!strcmp(var, "record.call-graph"))
 709                var = "call-graph.record-mode"; /* fall-through */
 710
 711        return perf_default_config(var, value, cb);
 712}
 713
 714struct clockid_map {
 715        const char *name;
 716        int clockid;
 717};
 718
 719#define CLOCKID_MAP(n, c)       \
 720        { .name = n, .clockid = (c), }
 721
 722#define CLOCKID_END     { .name = NULL, }
 723
 724
 725/*
 726 * Add the missing ones, we need to build on many distros...
 727 */
 728#ifndef CLOCK_MONOTONIC_RAW
 729#define CLOCK_MONOTONIC_RAW 4
 730#endif
 731#ifndef CLOCK_BOOTTIME
 732#define CLOCK_BOOTTIME 7
 733#endif
 734#ifndef CLOCK_TAI
 735#define CLOCK_TAI 11
 736#endif
 737
 738static const struct clockid_map clockids[] = {
 739        /* available for all events, NMI safe */
 740        CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
 741        CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
 742
 743        /* available for some events */
 744        CLOCKID_MAP("realtime", CLOCK_REALTIME),
 745        CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
 746        CLOCKID_MAP("tai", CLOCK_TAI),
 747
 748        /* available for the lazy */
 749        CLOCKID_MAP("mono", CLOCK_MONOTONIC),
 750        CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
 751        CLOCKID_MAP("real", CLOCK_REALTIME),
 752        CLOCKID_MAP("boot", CLOCK_BOOTTIME),
 753
 754        CLOCKID_END,
 755};
 756
 757static int parse_clockid(const struct option *opt, const char *str, int unset)
 758{
 759        struct record_opts *opts = (struct record_opts *)opt->value;
 760        const struct clockid_map *cm;
 761        const char *ostr = str;
 762
 763        if (unset) {
 764                opts->use_clockid = 0;
 765                return 0;
 766        }
 767
 768        /* no arg passed */
 769        if (!str)
 770                return 0;
 771
 772        /* no setting it twice */
 773        if (opts->use_clockid)
 774                return -1;
 775
 776        opts->use_clockid = true;
 777
 778        /* if its a number, we're done */
 779        if (sscanf(str, "%d", &opts->clockid) == 1)
 780                return 0;
 781
 782        /* allow a "CLOCK_" prefix to the name */
 783        if (!strncasecmp(str, "CLOCK_", 6))
 784                str += 6;
 785
 786        for (cm = clockids; cm->name; cm++) {
 787                if (!strcasecmp(str, cm->name)) {
 788                        opts->clockid = cm->clockid;
 789                        return 0;
 790                }
 791        }
 792
 793        opts->use_clockid = false;
 794        ui__warning("unknown clockid %s, check man page\n", ostr);
 795        return -1;
 796}
 797
 798static const char * const __record_usage[] = {
 799        "perf record [<options>] [<command>]",
 800        "perf record [<options>] -- <command> [<options>]",
 801        NULL
 802};
 803const char * const *record_usage = __record_usage;
 804
 805/*
 806 * XXX Ideally would be local to cmd_record() and passed to a record__new
 807 * because we need to have access to it in record__exit, that is called
 808 * after cmd_record() exits, but since record_options need to be accessible to
 809 * builtin-script, leave it here.
 810 *
 811 * At least we don't ouch it in all the other functions here directly.
 812 *
 813 * Just say no to tons of global variables, sigh.
 814 */
 815static struct record record = {
 816        .opts = {
 817                .sample_time         = true,
 818                .mmap_pages          = UINT_MAX,
 819                .user_freq           = UINT_MAX,
 820                .user_interval       = ULLONG_MAX,
 821                .freq                = 4000,
 822                .target              = {
 823                        .uses_mmap   = true,
 824                        .default_per_cpu = true,
 825                },
 826        },
 827        .tool = {
 828                .sample         = process_sample_event,
 829                .fork           = perf_event__process_fork,
 830                .comm           = perf_event__process_comm,
 831                .mmap           = perf_event__process_mmap,
 832                .mmap2          = perf_event__process_mmap2,
 833        },
 834};
 835
 836#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
 837
 838#ifdef HAVE_DWARF_UNWIND_SUPPORT
 839const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr";
 840#else
 841const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr";
 842#endif
 843
 844/*
 845 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
 846 * with it and switch to use the library functions in perf_evlist that came
 847 * from builtin-record.c, i.e. use record_opts,
 848 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
 849 * using pipes, etc.
 850 */
 851struct option __record_options[] = {
 852        OPT_CALLBACK('e', "event", &record.evlist, "event",
 853                     "event selector. use 'perf list' to list available events",
 854                     parse_events_option),
 855        OPT_CALLBACK(0, "filter", &record.evlist, "filter",
 856                     "event filter", parse_filter),
 857        OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
 858                    "record events on existing process id"),
 859        OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
 860                    "record events on existing thread id"),
 861        OPT_INTEGER('r', "realtime", &record.realtime_prio,
 862                    "collect data with this RT SCHED_FIFO priority"),
 863        OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
 864                    "collect data without buffering"),
 865        OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
 866                    "collect raw sample records from all opened counters"),
 867        OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
 868                            "system-wide collection from all CPUs"),
 869        OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
 870                    "list of cpus to monitor"),
 871        OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
 872        OPT_STRING('o', "output", &record.file.path, "file",
 873                    "output file name"),
 874        OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
 875                        &record.opts.no_inherit_set,
 876                        "child tasks do not inherit counters"),
 877        OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
 878        OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
 879                     "number of mmap data pages",
 880                     perf_evlist__parse_mmap_pages),
 881        OPT_BOOLEAN(0, "group", &record.opts.group,
 882                    "put the counters into a counter group"),
 883        OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
 884                           NULL, "enables call-graph recording" ,
 885                           &record_callchain_opt),
 886        OPT_CALLBACK(0, "call-graph", &record.opts,
 887                     "mode[,dump_size]", record_callchain_help,
 888                     &record_parse_callchain_opt),
 889        OPT_INCR('v', "verbose", &verbose,
 890                    "be more verbose (show counter open errors, etc)"),
 891        OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
 892        OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
 893                    "per thread counts"),
 894        OPT_BOOLEAN('d', "data", &record.opts.sample_address,
 895                    "Sample addresses"),
 896        OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
 897        OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
 898        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
 899                    "don't sample"),
 900        OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
 901                    "do not update the buildid cache"),
 902        OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
 903                    "do not collect buildids in perf.data"),
 904        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
 905                     "monitor event in cgroup name only",
 906                     parse_cgroups),
 907        OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
 908                  "ms to wait before starting measurement after program start"),
 909        OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
 910                   "user to profile"),
 911
 912        OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
 913                     "branch any", "sample any taken branches",
 914                     parse_branch_stack),
 915
 916        OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
 917                     "branch filter mask", "branch stack filter modes",
 918                     parse_branch_stack),
 919        OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
 920                    "sample by weight (on special events only)"),
 921        OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
 922                    "sample transaction flags (special events only)"),
 923        OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
 924                    "use per-thread mmaps"),
 925        OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs,
 926                    "Sample machine registers on interrupt"),
 927        OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
 928                    "Record running/enabled time of read (:S) events"),
 929        OPT_CALLBACK('k', "clockid", &record.opts,
 930        "clockid", "clockid to use for events, see clock_gettime()",
 931        parse_clockid),
 932        OPT_END()
 933};
 934
 935struct option *record_options = __record_options;
 936
 937int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 938{
 939        int err = -ENOMEM;
 940        struct record *rec = &record;
 941        char errbuf[BUFSIZ];
 942
 943        rec->evlist = perf_evlist__new();
 944        if (rec->evlist == NULL)
 945                return -ENOMEM;
 946
 947        perf_config(perf_record_config, rec);
 948
 949        argc = parse_options(argc, argv, record_options, record_usage,
 950                            PARSE_OPT_STOP_AT_NON_OPTION);
 951        if (!argc && target__none(&rec->opts.target))
 952                usage_with_options(record_usage, record_options);
 953
 954        if (nr_cgroups && !rec->opts.target.system_wide) {
 955                ui__error("cgroup monitoring only available in"
 956                          " system-wide mode\n");
 957                usage_with_options(record_usage, record_options);
 958        }
 959
 960        symbol__init(NULL);
 961
 962        if (symbol_conf.kptr_restrict)
 963                pr_warning(
 964"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
 965"check /proc/sys/kernel/kptr_restrict.\n\n"
 966"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
 967"file is not found in the buildid cache or in the vmlinux path.\n\n"
 968"Samples in kernel modules won't be resolved at all.\n\n"
 969"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
 970"even with a suitable vmlinux or kallsyms file.\n\n");
 971
 972        if (rec->no_buildid_cache || rec->no_buildid)
 973                disable_buildid_cache();
 974
 975        if (rec->evlist->nr_entries == 0 &&
 976            perf_evlist__add_default(rec->evlist) < 0) {
 977                pr_err("Not enough memory for event selector list\n");
 978                goto out_symbol_exit;
 979        }
 980
 981        if (rec->opts.target.tid && !rec->opts.no_inherit_set)
 982                rec->opts.no_inherit = true;
 983
 984        err = target__validate(&rec->opts.target);
 985        if (err) {
 986                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
 987                ui__warning("%s", errbuf);
 988        }
 989
 990        err = target__parse_uid(&rec->opts.target);
 991        if (err) {
 992                int saved_errno = errno;
 993
 994                target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
 995                ui__error("%s", errbuf);
 996
 997                err = -saved_errno;
 998                goto out_symbol_exit;
 999        }
1000
1001        err = -ENOMEM;
1002        if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1003                usage_with_options(record_usage, record_options);
1004
1005        if (record_opts__config(&rec->opts)) {
1006                err = -EINVAL;
1007                goto out_symbol_exit;
1008        }
1009
1010        err = __cmd_record(&record, argc, argv);
1011out_symbol_exit:
1012        perf_evlist__delete(rec->evlist);
1013        symbol__exit();
1014        return err;
1015}
1016