linux/tools/perf/util/evsel.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
   3 *
   4 * Parts came from builtin-{top,stat,record}.c, see those files for further
   5 * copyright notes.
   6 *
   7 * Released under the GPL v2. (and only v2, not any later version)
   8 */
   9
  10#include <byteswap.h>
  11#include <errno.h>
  12#include <inttypes.h>
  13#include <linux/bitops.h>
  14#include <api/fs/fs.h>
  15#include <api/fs/tracing_path.h>
  16#include <traceevent/event-parse.h>
  17#include <linux/hw_breakpoint.h>
  18#include <linux/perf_event.h>
  19#include <linux/compiler.h>
  20#include <linux/err.h>
  21#include <sys/ioctl.h>
  22#include <sys/resource.h>
  23#include <sys/types.h>
  24#include <dirent.h>
  25#include "asm/bug.h"
  26#include "callchain.h"
  27#include "cgroup.h"
  28#include "event.h"
  29#include "evsel.h"
  30#include "evlist.h"
  31#include "util.h"
  32#include "cpumap.h"
  33#include "thread_map.h"
  34#include "target.h"
  35#include "perf_regs.h"
  36#include "debug.h"
  37#include "trace-event.h"
  38#include "stat.h"
  39#include "memswap.h"
  40#include "util/parse-branch-options.h"
  41
  42#include "sane_ctype.h"
  43
  44struct perf_missing_features perf_missing_features;
  45
  46static clockid_t clockid;
  47
  48static int perf_evsel__no_extra_init(struct perf_evsel *evsel __maybe_unused)
  49{
  50        return 0;
  51}
  52
  53void __weak test_attr__ready(void) { }
  54
  55static void perf_evsel__no_extra_fini(struct perf_evsel *evsel __maybe_unused)
  56{
  57}
  58
  59static struct {
  60        size_t  size;
  61        int     (*init)(struct perf_evsel *evsel);
  62        void    (*fini)(struct perf_evsel *evsel);
  63} perf_evsel__object = {
  64        .size = sizeof(struct perf_evsel),
  65        .init = perf_evsel__no_extra_init,
  66        .fini = perf_evsel__no_extra_fini,
  67};
  68
  69int perf_evsel__object_config(size_t object_size,
  70                              int (*init)(struct perf_evsel *evsel),
  71                              void (*fini)(struct perf_evsel *evsel))
  72{
  73
  74        if (object_size == 0)
  75                goto set_methods;
  76
  77        if (perf_evsel__object.size > object_size)
  78                return -EINVAL;
  79
  80        perf_evsel__object.size = object_size;
  81
  82set_methods:
  83        if (init != NULL)
  84                perf_evsel__object.init = init;
  85
  86        if (fini != NULL)
  87                perf_evsel__object.fini = fini;
  88
  89        return 0;
  90}
  91
  92#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
  93
  94int __perf_evsel__sample_size(u64 sample_type)
  95{
  96        u64 mask = sample_type & PERF_SAMPLE_MASK;
  97        int size = 0;
  98        int i;
  99
 100        for (i = 0; i < 64; i++) {
 101                if (mask & (1ULL << i))
 102                        size++;
 103        }
 104
 105        size *= sizeof(u64);
 106
 107        return size;
 108}
 109
 110/**
 111 * __perf_evsel__calc_id_pos - calculate id_pos.
 112 * @sample_type: sample type
 113 *
 114 * This function returns the position of the event id (PERF_SAMPLE_ID or
 115 * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
 116 * sample_event.
 117 */
 118static int __perf_evsel__calc_id_pos(u64 sample_type)
 119{
 120        int idx = 0;
 121
 122        if (sample_type & PERF_SAMPLE_IDENTIFIER)
 123                return 0;
 124
 125        if (!(sample_type & PERF_SAMPLE_ID))
 126                return -1;
 127
 128        if (sample_type & PERF_SAMPLE_IP)
 129                idx += 1;
 130
 131        if (sample_type & PERF_SAMPLE_TID)
 132                idx += 1;
 133
 134        if (sample_type & PERF_SAMPLE_TIME)
 135                idx += 1;
 136
 137        if (sample_type & PERF_SAMPLE_ADDR)
 138                idx += 1;
 139
 140        return idx;
 141}
 142
 143/**
 144 * __perf_evsel__calc_is_pos - calculate is_pos.
 145 * @sample_type: sample type
 146 *
 147 * This function returns the position (counting backwards) of the event id
 148 * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
 149 * sample_id_all is used there is an id sample appended to non-sample events.
 150 */
 151static int __perf_evsel__calc_is_pos(u64 sample_type)
 152{
 153        int idx = 1;
 154
 155        if (sample_type & PERF_SAMPLE_IDENTIFIER)
 156                return 1;
 157
 158        if (!(sample_type & PERF_SAMPLE_ID))
 159                return -1;
 160
 161        if (sample_type & PERF_SAMPLE_CPU)
 162                idx += 1;
 163
 164        if (sample_type & PERF_SAMPLE_STREAM_ID)
 165                idx += 1;
 166
 167        return idx;
 168}
 169
 170void perf_evsel__calc_id_pos(struct perf_evsel *evsel)
 171{
 172        evsel->id_pos = __perf_evsel__calc_id_pos(evsel->attr.sample_type);
 173        evsel->is_pos = __perf_evsel__calc_is_pos(evsel->attr.sample_type);
 174}
 175
 176void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
 177                                  enum perf_event_sample_format bit)
 178{
 179        if (!(evsel->attr.sample_type & bit)) {
 180                evsel->attr.sample_type |= bit;
 181                evsel->sample_size += sizeof(u64);
 182                perf_evsel__calc_id_pos(evsel);
 183        }
 184}
 185
 186void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
 187                                    enum perf_event_sample_format bit)
 188{
 189        if (evsel->attr.sample_type & bit) {
 190                evsel->attr.sample_type &= ~bit;
 191                evsel->sample_size -= sizeof(u64);
 192                perf_evsel__calc_id_pos(evsel);
 193        }
 194}
 195
 196void perf_evsel__set_sample_id(struct perf_evsel *evsel,
 197                               bool can_sample_identifier)
 198{
 199        if (can_sample_identifier) {
 200                perf_evsel__reset_sample_bit(evsel, ID);
 201                perf_evsel__set_sample_bit(evsel, IDENTIFIER);
 202        } else {
 203                perf_evsel__set_sample_bit(evsel, ID);
 204        }
 205        evsel->attr.read_format |= PERF_FORMAT_ID;
 206}
 207
 208/**
 209 * perf_evsel__is_function_event - Return whether given evsel is a function
 210 * trace event
 211 *
 212 * @evsel - evsel selector to be tested
 213 *
 214 * Return %true if event is function trace event
 215 */
 216bool perf_evsel__is_function_event(struct perf_evsel *evsel)
 217{
 218#define FUNCTION_EVENT "ftrace:function"
 219
 220        return evsel->name &&
 221               !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT));
 222
 223#undef FUNCTION_EVENT
 224}
 225
 226void perf_evsel__init(struct perf_evsel *evsel,
 227                      struct perf_event_attr *attr, int idx)
 228{
 229        evsel->idx         = idx;
 230        evsel->tracking    = !idx;
 231        evsel->attr        = *attr;
 232        evsel->leader      = evsel;
 233        evsel->unit        = "";
 234        evsel->scale       = 1.0;
 235        evsel->evlist      = NULL;
 236        evsel->bpf_fd      = -1;
 237        INIT_LIST_HEAD(&evsel->node);
 238        INIT_LIST_HEAD(&evsel->config_terms);
 239        perf_evsel__object.init(evsel);
 240        evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
 241        perf_evsel__calc_id_pos(evsel);
 242        evsel->cmdline_group_boundary = false;
 243        evsel->metric_expr   = NULL;
 244        evsel->metric_name   = NULL;
 245        evsel->metric_events = NULL;
 246        evsel->collect_stat  = false;
 247        evsel->pmu_name      = NULL;
 248}
 249
 250struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
 251{
 252        struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
 253
 254        if (!evsel)
 255                return NULL;
 256        perf_evsel__init(evsel, attr, idx);
 257
 258        if (perf_evsel__is_bpf_output(evsel)) {
 259                evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
 260                                            PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
 261                evsel->attr.sample_period = 1;
 262        }
 263
 264        if (perf_evsel__is_clock(evsel)) {
 265                /*
 266                 * The evsel->unit points to static alias->unit
 267                 * so it's ok to use static string in here.
 268                 */
 269                static const char *unit = "msec";
 270
 271                evsel->unit = unit;
 272                evsel->scale = 1e-6;
 273        }
 274
 275        return evsel;
 276}
 277
 278static bool perf_event_can_profile_kernel(void)
 279{
 280        return geteuid() == 0 || perf_event_paranoid() == -1;
 281}
 282
 283struct perf_evsel *perf_evsel__new_cycles(bool precise)
 284{
 285        struct perf_event_attr attr = {
 286                .type   = PERF_TYPE_HARDWARE,
 287                .config = PERF_COUNT_HW_CPU_CYCLES,
 288                .exclude_kernel = !perf_event_can_profile_kernel(),
 289        };
 290        struct perf_evsel *evsel;
 291
 292        event_attr_init(&attr);
 293
 294        if (!precise)
 295                goto new_event;
 296        /*
 297         * Unnamed union member, not supported as struct member named
 298         * initializer in older compilers such as gcc 4.4.7
 299         *
 300         * Just for probing the precise_ip:
 301         */
 302        attr.sample_period = 1;
 303
 304        perf_event_attr__set_max_precise_ip(&attr);
 305        /*
 306         * Now let the usual logic to set up the perf_event_attr defaults
 307         * to kick in when we return and before perf_evsel__open() is called.
 308         */
 309        attr.sample_period = 0;
 310new_event:
 311        evsel = perf_evsel__new(&attr);
 312        if (evsel == NULL)
 313                goto out;
 314
 315        /* use asprintf() because free(evsel) assumes name is allocated */
 316        if (asprintf(&evsel->name, "cycles%s%s%.*s",
 317                     (attr.precise_ip || attr.exclude_kernel) ? ":" : "",
 318                     attr.exclude_kernel ? "u" : "",
 319                     attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0)
 320                goto error_free;
 321out:
 322        return evsel;
 323error_free:
 324        perf_evsel__delete(evsel);
 325        evsel = NULL;
 326        goto out;
 327}
 328
 329/*
 330 * Returns pointer with encoded error via <linux/err.h> interface.
 331 */
 332struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx)
 333{
 334        struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
 335        int err = -ENOMEM;
 336
 337        if (evsel == NULL) {
 338                goto out_err;
 339        } else {
 340                struct perf_event_attr attr = {
 341                        .type          = PERF_TYPE_TRACEPOINT,
 342                        .sample_type   = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
 343                                          PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
 344                };
 345
 346                if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
 347                        goto out_free;
 348
 349                evsel->tp_format = trace_event__tp_format(sys, name);
 350                if (IS_ERR(evsel->tp_format)) {
 351                        err = PTR_ERR(evsel->tp_format);
 352                        goto out_free;
 353                }
 354
 355                event_attr_init(&attr);
 356                attr.config = evsel->tp_format->id;
 357                attr.sample_period = 1;
 358                perf_evsel__init(evsel, &attr, idx);
 359        }
 360
 361        return evsel;
 362
 363out_free:
 364        zfree(&evsel->name);
 365        free(evsel);
 366out_err:
 367        return ERR_PTR(err);
 368}
 369
 370const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
 371        "cycles",
 372        "instructions",
 373        "cache-references",
 374        "cache-misses",
 375        "branches",
 376        "branch-misses",
 377        "bus-cycles",
 378        "stalled-cycles-frontend",
 379        "stalled-cycles-backend",
 380        "ref-cycles",
 381};
 382
 383static const char *__perf_evsel__hw_name(u64 config)
 384{
 385        if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
 386                return perf_evsel__hw_names[config];
 387
 388        return "unknown-hardware";
 389}
 390
 391static int perf_evsel__add_modifiers(struct perf_evsel *evsel, char *bf, size_t size)
 392{
 393        int colon = 0, r = 0;
 394        struct perf_event_attr *attr = &evsel->attr;
 395        bool exclude_guest_default = false;
 396
 397#define MOD_PRINT(context, mod) do {                                    \
 398                if (!attr->exclude_##context) {                         \
 399                        if (!colon) colon = ++r;                        \
 400                        r += scnprintf(bf + r, size - r, "%c", mod);    \
 401                } } while(0)
 402
 403        if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) {
 404                MOD_PRINT(kernel, 'k');
 405                MOD_PRINT(user, 'u');
 406                MOD_PRINT(hv, 'h');
 407                exclude_guest_default = true;
 408        }
 409
 410        if (attr->precise_ip) {
 411                if (!colon)
 412                        colon = ++r;
 413                r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
 414                exclude_guest_default = true;
 415        }
 416
 417        if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) {
 418                MOD_PRINT(host, 'H');
 419                MOD_PRINT(guest, 'G');
 420        }
 421#undef MOD_PRINT
 422        if (colon)
 423                bf[colon - 1] = ':';
 424        return r;
 425}
 426
 427static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
 428{
 429        int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->attr.config));
 430        return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 431}
 432
 433const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
 434        "cpu-clock",
 435        "task-clock",
 436        "page-faults",
 437        "context-switches",
 438        "cpu-migrations",
 439        "minor-faults",
 440        "major-faults",
 441        "alignment-faults",
 442        "emulation-faults",
 443        "dummy",
 444};
 445
 446static const char *__perf_evsel__sw_name(u64 config)
 447{
 448        if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
 449                return perf_evsel__sw_names[config];
 450        return "unknown-software";
 451}
 452
 453static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size)
 454{
 455        int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->attr.config));
 456        return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 457}
 458
 459static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
 460{
 461        int r;
 462
 463        r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);
 464
 465        if (type & HW_BREAKPOINT_R)
 466                r += scnprintf(bf + r, size - r, "r");
 467
 468        if (type & HW_BREAKPOINT_W)
 469                r += scnprintf(bf + r, size - r, "w");
 470
 471        if (type & HW_BREAKPOINT_X)
 472                r += scnprintf(bf + r, size - r, "x");
 473
 474        return r;
 475}
 476
 477static int perf_evsel__bp_name(struct perf_evsel *evsel, char *bf, size_t size)
 478{
 479        struct perf_event_attr *attr = &evsel->attr;
 480        int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
 481        return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 482}
 483
 484const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
 485                                [PERF_EVSEL__MAX_ALIASES] = {
 486 { "L1-dcache", "l1-d",         "l1d",          "L1-data",              },
 487 { "L1-icache", "l1-i",         "l1i",          "L1-instruction",       },
 488 { "LLC",       "L2",                                                   },
 489 { "dTLB",      "d-tlb",        "Data-TLB",                             },
 490 { "iTLB",      "i-tlb",        "Instruction-TLB",                      },
 491 { "branch",    "branches",     "bpu",          "btb",          "bpc",  },
 492 { "node",                                                              },
 493};
 494
 495const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
 496                                   [PERF_EVSEL__MAX_ALIASES] = {
 497 { "load",      "loads",        "read",                                 },
 498 { "store",     "stores",       "write",                                },
 499 { "prefetch",  "prefetches",   "speculative-read", "speculative-load", },
 500};
 501
 502const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
 503                                       [PERF_EVSEL__MAX_ALIASES] = {
 504 { "refs",      "Reference",    "ops",          "access",               },
 505 { "misses",    "miss",                                                 },
 506};
 507
 508#define C(x)            PERF_COUNT_HW_CACHE_##x
 509#define CACHE_READ      (1 << C(OP_READ))
 510#define CACHE_WRITE     (1 << C(OP_WRITE))
 511#define CACHE_PREFETCH  (1 << C(OP_PREFETCH))
 512#define COP(x)          (1 << x)
 513
 514/*
 515 * cache operartion stat
 516 * L1I : Read and prefetch only
 517 * ITLB and BPU : Read-only
 518 */
 519static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
 520 [C(L1D)]       = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
 521 [C(L1I)]       = (CACHE_READ | CACHE_PREFETCH),
 522 [C(LL)]        = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
 523 [C(DTLB)]      = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
 524 [C(ITLB)]      = (CACHE_READ),
 525 [C(BPU)]       = (CACHE_READ),
 526 [C(NODE)]      = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
 527};
 528
 529bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
 530{
 531        if (perf_evsel__hw_cache_stat[type] & COP(op))
 532                return true;    /* valid */
 533        else
 534                return false;   /* invalid */
 535}
 536
 537int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
 538                                            char *bf, size_t size)
 539{
 540        if (result) {
 541                return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
 542                                 perf_evsel__hw_cache_op[op][0],
 543                                 perf_evsel__hw_cache_result[result][0]);
 544        }
 545
 546        return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0],
 547                         perf_evsel__hw_cache_op[op][1]);
 548}
 549
 550static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
 551{
 552        u8 op, result, type = (config >>  0) & 0xff;
 553        const char *err = "unknown-ext-hardware-cache-type";
 554
 555        if (type >= PERF_COUNT_HW_CACHE_MAX)
 556                goto out_err;
 557
 558        op = (config >>  8) & 0xff;
 559        err = "unknown-ext-hardware-cache-op";
 560        if (op >= PERF_COUNT_HW_CACHE_OP_MAX)
 561                goto out_err;
 562
 563        result = (config >> 16) & 0xff;
 564        err = "unknown-ext-hardware-cache-result";
 565        if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 566                goto out_err;
 567
 568        err = "invalid-cache";
 569        if (!perf_evsel__is_cache_op_valid(type, op))
 570                goto out_err;
 571
 572        return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
 573out_err:
 574        return scnprintf(bf, size, "%s", err);
 575}
 576
 577static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t size)
 578{
 579        int ret = __perf_evsel__hw_cache_name(evsel->attr.config, bf, size);
 580        return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
 581}
 582
 583static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size)
 584{
 585        int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config);
 586        return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
 587}
 588
 589const char *perf_evsel__name(struct perf_evsel *evsel)
 590{
 591        char bf[128];
 592
 593        if (evsel->name)
 594                return evsel->name;
 595
 596        switch (evsel->attr.type) {
 597        case PERF_TYPE_RAW:
 598                perf_evsel__raw_name(evsel, bf, sizeof(bf));
 599                break;
 600
 601        case PERF_TYPE_HARDWARE:
 602                perf_evsel__hw_name(evsel, bf, sizeof(bf));
 603                break;
 604
 605        case PERF_TYPE_HW_CACHE:
 606                perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));
 607                break;
 608
 609        case PERF_TYPE_SOFTWARE:
 610                perf_evsel__sw_name(evsel, bf, sizeof(bf));
 611                break;
 612
 613        case PERF_TYPE_TRACEPOINT:
 614                scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
 615                break;
 616
 617        case PERF_TYPE_BREAKPOINT:
 618                perf_evsel__bp_name(evsel, bf, sizeof(bf));
 619                break;
 620
 621        default:
 622                scnprintf(bf, sizeof(bf), "unknown attr type: %d",
 623                          evsel->attr.type);
 624                break;
 625        }
 626
 627        evsel->name = strdup(bf);
 628
 629        return evsel->name ?: "unknown";
 630}
 631
 632const char *perf_evsel__group_name(struct perf_evsel *evsel)
 633{
 634        return evsel->group_name ?: "anon group";
 635}
 636
 637/*
 638 * Returns the group details for the specified leader,
 639 * with following rules.
 640 *
 641 *  For record -e '{cycles,instructions}'
 642 *    'anon group { cycles:u, instructions:u }'
 643 *
 644 *  For record -e 'cycles,instructions' and report --group
 645 *    'cycles:u, instructions:u'
 646 */
 647int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
 648{
 649        int ret = 0;
 650        struct perf_evsel *pos;
 651        const char *group_name = perf_evsel__group_name(evsel);
 652
 653        if (!evsel->forced_leader)
 654                ret = scnprintf(buf, size, "%s { ", group_name);
 655
 656        ret += scnprintf(buf + ret, size - ret, "%s",
 657                         perf_evsel__name(evsel));
 658
 659        for_each_group_member(pos, evsel)
 660                ret += scnprintf(buf + ret, size - ret, ", %s",
 661                                 perf_evsel__name(pos));
 662
 663        if (!evsel->forced_leader)
 664                ret += scnprintf(buf + ret, size - ret, " }");
 665
 666        return ret;
 667}
 668
 669static void __perf_evsel__config_callchain(struct perf_evsel *evsel,
 670                                           struct record_opts *opts,
 671                                           struct callchain_param *param)
 672{
 673        bool function = perf_evsel__is_function_event(evsel);
 674        struct perf_event_attr *attr = &evsel->attr;
 675
 676        perf_evsel__set_sample_bit(evsel, CALLCHAIN);
 677
 678        attr->sample_max_stack = param->max_stack;
 679
 680        if (param->record_mode == CALLCHAIN_LBR) {
 681                if (!opts->branch_stack) {
 682                        if (attr->exclude_user) {
 683                                pr_warning("LBR callstack option is only available "
 684                                           "to get user callchain information. "
 685                                           "Falling back to framepointers.\n");
 686                        } else {
 687                                perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
 688                                attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
 689                                                        PERF_SAMPLE_BRANCH_CALL_STACK |
 690                                                        PERF_SAMPLE_BRANCH_NO_CYCLES |
 691                                                        PERF_SAMPLE_BRANCH_NO_FLAGS;
 692                        }
 693                } else
 694                         pr_warning("Cannot use LBR callstack with branch stack. "
 695                                    "Falling back to framepointers.\n");
 696        }
 697
 698        if (param->record_mode == CALLCHAIN_DWARF) {
 699                if (!function) {
 700                        perf_evsel__set_sample_bit(evsel, REGS_USER);
 701                        perf_evsel__set_sample_bit(evsel, STACK_USER);
 702                        attr->sample_regs_user |= PERF_REGS_MASK;
 703                        attr->sample_stack_user = param->dump_size;
 704                        attr->exclude_callchain_user = 1;
 705                } else {
 706                        pr_info("Cannot use DWARF unwind for function trace event,"
 707                                " falling back to framepointers.\n");
 708                }
 709        }
 710
 711        if (function) {
 712                pr_info("Disabling user space callchains for function trace event.\n");
 713                attr->exclude_callchain_user = 1;
 714        }
 715}
 716
 717void perf_evsel__config_callchain(struct perf_evsel *evsel,
 718                                  struct record_opts *opts,
 719                                  struct callchain_param *param)
 720{
 721        if (param->enabled)
 722                return __perf_evsel__config_callchain(evsel, opts, param);
 723}
 724
 725static void
 726perf_evsel__reset_callgraph(struct perf_evsel *evsel,
 727                            struct callchain_param *param)
 728{
 729        struct perf_event_attr *attr = &evsel->attr;
 730
 731        perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
 732        if (param->record_mode == CALLCHAIN_LBR) {
 733                perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
 734                attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
 735                                              PERF_SAMPLE_BRANCH_CALL_STACK);
 736        }
 737        if (param->record_mode == CALLCHAIN_DWARF) {
 738                perf_evsel__reset_sample_bit(evsel, REGS_USER);
 739                perf_evsel__reset_sample_bit(evsel, STACK_USER);
 740        }
 741}
 742
 743static void apply_config_terms(struct perf_evsel *evsel,
 744                               struct record_opts *opts, bool track)
 745{
 746        struct perf_evsel_config_term *term;
 747        struct list_head *config_terms = &evsel->config_terms;
 748        struct perf_event_attr *attr = &evsel->attr;
 749        /* callgraph default */
 750        struct callchain_param param = {
 751                .record_mode = callchain_param.record_mode,
 752        };
 753        u32 dump_size = 0;
 754        int max_stack = 0;
 755        const char *callgraph_buf = NULL;
 756
 757        list_for_each_entry(term, config_terms, list) {
 758                switch (term->type) {
 759                case PERF_EVSEL__CONFIG_TERM_PERIOD:
 760                        if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
 761                                attr->sample_period = term->val.period;
 762                                attr->freq = 0;
 763                                perf_evsel__reset_sample_bit(evsel, PERIOD);
 764                        }
 765                        break;
 766                case PERF_EVSEL__CONFIG_TERM_FREQ:
 767                        if (!(term->weak && opts->user_freq != UINT_MAX)) {
 768                                attr->sample_freq = term->val.freq;
 769                                attr->freq = 1;
 770                                perf_evsel__set_sample_bit(evsel, PERIOD);
 771                        }
 772                        break;
 773                case PERF_EVSEL__CONFIG_TERM_TIME:
 774                        if (term->val.time)
 775                                perf_evsel__set_sample_bit(evsel, TIME);
 776                        else
 777                                perf_evsel__reset_sample_bit(evsel, TIME);
 778                        break;
 779                case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
 780                        callgraph_buf = term->val.callgraph;
 781                        break;
 782                case PERF_EVSEL__CONFIG_TERM_BRANCH:
 783                        if (term->val.branch && strcmp(term->val.branch, "no")) {
 784                                perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
 785                                parse_branch_str(term->val.branch,
 786                                                 &attr->branch_sample_type);
 787                        } else
 788                                perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
 789                        break;
 790                case PERF_EVSEL__CONFIG_TERM_STACK_USER:
 791                        dump_size = term->val.stack_user;
 792                        break;
 793                case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
 794                        max_stack = term->val.max_stack;
 795                        break;
 796                case PERF_EVSEL__CONFIG_TERM_INHERIT:
 797                        /*
 798                         * attr->inherit should has already been set by
 799                         * perf_evsel__config. If user explicitly set
 800                         * inherit using config terms, override global
 801                         * opt->no_inherit setting.
 802                         */
 803                        attr->inherit = term->val.inherit ? 1 : 0;
 804                        break;
 805                case PERF_EVSEL__CONFIG_TERM_OVERWRITE:
 806                        attr->write_backward = term->val.overwrite ? 1 : 0;
 807                        break;
 808                case PERF_EVSEL__CONFIG_TERM_DRV_CFG:
 809                        break;
 810                default:
 811                        break;
 812                }
 813        }
 814
 815        /* User explicitly set per-event callgraph, clear the old setting and reset. */
 816        if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
 817                bool sample_address = false;
 818
 819                if (max_stack) {
 820                        param.max_stack = max_stack;
 821                        if (callgraph_buf == NULL)
 822                                callgraph_buf = "fp";
 823                }
 824
 825                /* parse callgraph parameters */
 826                if (callgraph_buf != NULL) {
 827                        if (!strcmp(callgraph_buf, "no")) {
 828                                param.enabled = false;
 829                                param.record_mode = CALLCHAIN_NONE;
 830                        } else {
 831                                param.enabled = true;
 832                                if (parse_callchain_record(callgraph_buf, &param)) {
 833                                        pr_err("per-event callgraph setting for %s failed. "
 834                                               "Apply callgraph global setting for it\n",
 835                                               evsel->name);
 836                                        return;
 837                                }
 838                                if (param.record_mode == CALLCHAIN_DWARF)
 839                                        sample_address = true;
 840                        }
 841                }
 842                if (dump_size > 0) {
 843                        dump_size = round_up(dump_size, sizeof(u64));
 844                        param.dump_size = dump_size;
 845                }
 846
 847                /* If global callgraph set, clear it */
 848                if (callchain_param.enabled)
 849                        perf_evsel__reset_callgraph(evsel, &callchain_param);
 850
 851                /* set perf-event callgraph */
 852                if (param.enabled) {
 853                        if (sample_address) {
 854                                perf_evsel__set_sample_bit(evsel, ADDR);
 855                                perf_evsel__set_sample_bit(evsel, DATA_SRC);
 856                                evsel->attr.mmap_data = track;
 857                        }
 858                        perf_evsel__config_callchain(evsel, opts, &param);
 859                }
 860        }
 861}
 862
 863static bool is_dummy_event(struct perf_evsel *evsel)
 864{
 865        return (evsel->attr.type == PERF_TYPE_SOFTWARE) &&
 866               (evsel->attr.config == PERF_COUNT_SW_DUMMY);
 867}
 868
 869/*
 870 * The enable_on_exec/disabled value strategy:
 871 *
 872 *  1) For any type of traced program:
 873 *    - all independent events and group leaders are disabled
 874 *    - all group members are enabled
 875 *
 876 *     Group members are ruled by group leaders. They need to
 877 *     be enabled, because the group scheduling relies on that.
 878 *
 879 *  2) For traced programs executed by perf:
 880 *     - all independent events and group leaders have
 881 *       enable_on_exec set
 882 *     - we don't specifically enable or disable any event during
 883 *       the record command
 884 *
 885 *     Independent events and group leaders are initially disabled
 886 *     and get enabled by exec. Group members are ruled by group
 887 *     leaders as stated in 1).
 888 *
 889 *  3) For traced programs attached by perf (pid/tid):
 890 *     - we specifically enable or disable all events during
 891 *       the record command
 892 *
 893 *     When attaching events to already running traced we
 894 *     enable/disable events specifically, as there's no
 895 *     initial traced exec call.
 896 */
 897void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 898                        struct callchain_param *callchain)
 899{
 900        struct perf_evsel *leader = evsel->leader;
 901        struct perf_event_attr *attr = &evsel->attr;
 902        int track = evsel->tracking;
 903        bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;
 904
 905        attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
 906        attr->inherit       = !opts->no_inherit;
 907        attr->write_backward = opts->overwrite ? 1 : 0;
 908
 909        perf_evsel__set_sample_bit(evsel, IP);
 910        perf_evsel__set_sample_bit(evsel, TID);
 911
 912        if (evsel->sample_read) {
 913                perf_evsel__set_sample_bit(evsel, READ);
 914
 915                /*
 916                 * We need ID even in case of single event, because
 917                 * PERF_SAMPLE_READ process ID specific data.
 918                 */
 919                perf_evsel__set_sample_id(evsel, false);
 920
 921                /*
 922                 * Apply group format only if we belong to group
 923                 * with more than one members.
 924                 */
 925                if (leader->nr_members > 1) {
 926                        attr->read_format |= PERF_FORMAT_GROUP;
 927                        attr->inherit = 0;
 928                }
 929        }
 930
 931        /*
 932         * We default some events to have a default interval. But keep
 933         * it a weak assumption overridable by the user.
 934         */
 935        if (!attr->sample_period || (opts->user_freq != UINT_MAX ||
 936                                     opts->user_interval != ULLONG_MAX)) {
 937                if (opts->freq) {
 938                        perf_evsel__set_sample_bit(evsel, PERIOD);
 939                        attr->freq              = 1;
 940                        attr->sample_freq       = opts->freq;
 941                } else {
 942                        attr->sample_period = opts->default_interval;
 943                }
 944        }
 945
 946        /*
 947         * Disable sampling for all group members other
 948         * than leader in case leader 'leads' the sampling.
 949         */
 950        if ((leader != evsel) && leader->sample_read) {
 951                attr->freq           = 0;
 952                attr->sample_freq    = 0;
 953                attr->sample_period  = 0;
 954                attr->write_backward = 0;
 955                attr->sample_id_all  = 0;
 956        }
 957
 958        if (opts->no_samples)
 959                attr->sample_freq = 0;
 960
 961        if (opts->inherit_stat) {
 962                evsel->attr.read_format |=
 963                        PERF_FORMAT_TOTAL_TIME_ENABLED |
 964                        PERF_FORMAT_TOTAL_TIME_RUNNING |
 965                        PERF_FORMAT_ID;
 966                attr->inherit_stat = 1;
 967        }
 968
 969        if (opts->sample_address) {
 970                perf_evsel__set_sample_bit(evsel, ADDR);
 971                attr->mmap_data = track;
 972        }
 973
 974        /*
 975         * We don't allow user space callchains for  function trace
 976         * event, due to issues with page faults while tracing page
 977         * fault handler and its overall trickiness nature.
 978         */
 979        if (perf_evsel__is_function_event(evsel))
 980                evsel->attr.exclude_callchain_user = 1;
 981
 982        if (callchain && callchain->enabled && !evsel->no_aux_samples)
 983                perf_evsel__config_callchain(evsel, opts, callchain);
 984
 985        if (opts->sample_intr_regs) {
 986                attr->sample_regs_intr = opts->sample_intr_regs;
 987                perf_evsel__set_sample_bit(evsel, REGS_INTR);
 988        }
 989
 990        if (opts->sample_user_regs) {
 991                attr->sample_regs_user |= opts->sample_user_regs;
 992                perf_evsel__set_sample_bit(evsel, REGS_USER);
 993        }
 994
 995        if (target__has_cpu(&opts->target) || opts->sample_cpu)
 996                perf_evsel__set_sample_bit(evsel, CPU);
 997
 998        /*
 999         * When the user explicitly disabled time don't force it here.
1000         */
1001        if (opts->sample_time &&
1002            (!perf_missing_features.sample_id_all &&
1003            (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
1004             opts->sample_time_set)))
1005                perf_evsel__set_sample_bit(evsel, TIME);
1006
1007        if (opts->raw_samples && !evsel->no_aux_samples) {
1008                perf_evsel__set_sample_bit(evsel, TIME);
1009                perf_evsel__set_sample_bit(evsel, RAW);
1010                perf_evsel__set_sample_bit(evsel, CPU);
1011        }
1012
1013        if (opts->sample_address)
1014                perf_evsel__set_sample_bit(evsel, DATA_SRC);
1015
1016        if (opts->sample_phys_addr)
1017                perf_evsel__set_sample_bit(evsel, PHYS_ADDR);
1018
1019        if (opts->no_buffering) {
1020                attr->watermark = 0;
1021                attr->wakeup_events = 1;
1022        }
1023        if (opts->branch_stack && !evsel->no_aux_samples) {
1024                perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
1025                attr->branch_sample_type = opts->branch_stack;
1026        }
1027
1028        if (opts->sample_weight)
1029                perf_evsel__set_sample_bit(evsel, WEIGHT);
1030
1031        attr->task  = track;
1032        attr->mmap  = track;
1033        attr->mmap2 = track && !perf_missing_features.mmap2;
1034        attr->comm  = track;
1035
1036        if (opts->record_namespaces)
1037                attr->namespaces  = track;
1038
1039        if (opts->record_switch_events)
1040                attr->context_switch = track;
1041
1042        if (opts->sample_transaction)
1043                perf_evsel__set_sample_bit(evsel, TRANSACTION);
1044
1045        if (opts->running_time) {
1046                evsel->attr.read_format |=
1047                        PERF_FORMAT_TOTAL_TIME_ENABLED |
1048                        PERF_FORMAT_TOTAL_TIME_RUNNING;
1049        }
1050
1051        /*
1052         * XXX see the function comment above
1053         *
1054         * Disabling only independent events or group leaders,
1055         * keeping group members enabled.
1056         */
1057        if (perf_evsel__is_group_leader(evsel))
1058                attr->disabled = 1;
1059
1060        /*
1061         * Setting enable_on_exec for independent events and
1062         * group leaders for traced executed by perf.
1063         */
1064        if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) &&
1065                !opts->initial_delay)
1066                attr->enable_on_exec = 1;
1067
1068        if (evsel->immediate) {
1069                attr->disabled = 0;
1070                attr->enable_on_exec = 0;
1071        }
1072
1073        clockid = opts->clockid;
1074        if (opts->use_clockid) {
1075                attr->use_clockid = 1;
1076                attr->clockid = opts->clockid;
1077        }
1078
1079        if (evsel->precise_max)
1080                perf_event_attr__set_max_precise_ip(attr);
1081
1082        if (opts->all_user) {
1083                attr->exclude_kernel = 1;
1084                attr->exclude_user   = 0;
1085        }
1086
1087        if (opts->all_kernel) {
1088                attr->exclude_kernel = 0;
1089                attr->exclude_user   = 1;
1090        }
1091
1092        if (evsel->own_cpus)
1093                evsel->attr.read_format |= PERF_FORMAT_ID;
1094
1095        /*
1096         * Apply event specific term settings,
1097         * it overloads any global configuration.
1098         */
1099        apply_config_terms(evsel, opts, track);
1100
1101        evsel->ignore_missing_thread = opts->ignore_missing_thread;
1102
1103        /* The --period option takes the precedence. */
1104        if (opts->period_set) {
1105                if (opts->period)
1106                        perf_evsel__set_sample_bit(evsel, PERIOD);
1107                else
1108                        perf_evsel__reset_sample_bit(evsel, PERIOD);
1109        }
1110
1111        /*
1112         * For initial_delay, a dummy event is added implicitly.
1113         * The software event will trigger -EOPNOTSUPP error out,
1114         * if BRANCH_STACK bit is set.
1115         */
1116        if (opts->initial_delay && is_dummy_event(evsel))
1117                perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
1118}
1119
1120static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
1121{
1122        if (evsel->system_wide)
1123                nthreads = 1;
1124
1125        evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
1126
1127        if (evsel->fd) {
1128                int cpu, thread;
1129                for (cpu = 0; cpu < ncpus; cpu++) {
1130                        for (thread = 0; thread < nthreads; thread++) {
1131                                FD(evsel, cpu, thread) = -1;
1132                        }
1133                }
1134        }
1135
1136        return evsel->fd != NULL ? 0 : -ENOMEM;
1137}
1138
1139static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
1140                          int ioc,  void *arg)
1141{
1142        int cpu, thread;
1143
1144        for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
1145                for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
1146                        int fd = FD(evsel, cpu, thread),
1147                            err = ioctl(fd, ioc, arg);
1148
1149                        if (err)
1150                                return err;
1151                }
1152        }
1153
1154        return 0;
1155}
1156
1157int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
1158{
1159        return perf_evsel__run_ioctl(evsel,
1160                                     PERF_EVENT_IOC_SET_FILTER,
1161                                     (void *)filter);
1162}
1163
1164int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter)
1165{
1166        char *new_filter = strdup(filter);
1167
1168        if (new_filter != NULL) {
1169                free(evsel->filter);
1170                evsel->filter = new_filter;
1171                return 0;
1172        }
1173
1174        return -1;
1175}
1176
1177static int perf_evsel__append_filter(struct perf_evsel *evsel,
1178                                     const char *fmt, const char *filter)
1179{
1180        char *new_filter;
1181
1182        if (evsel->filter == NULL)
1183                return perf_evsel__set_filter(evsel, filter);
1184
1185        if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
1186                free(evsel->filter);
1187                evsel->filter = new_filter;
1188                return 0;
1189        }
1190
1191        return -1;
1192}
1193
1194int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter)
1195{
1196        return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
1197}
1198
1199int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
1200{
1201        return perf_evsel__append_filter(evsel, "%s,%s", filter);
1202}
1203
1204int perf_evsel__enable(struct perf_evsel *evsel)
1205{
1206        return perf_evsel__run_ioctl(evsel,
1207                                     PERF_EVENT_IOC_ENABLE,
1208                                     0);
1209}
1210
1211int perf_evsel__disable(struct perf_evsel *evsel)
1212{
1213        return perf_evsel__run_ioctl(evsel,
1214                                     PERF_EVENT_IOC_DISABLE,
1215                                     0);
1216}
1217
1218int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
1219{
1220        if (ncpus == 0 || nthreads == 0)
1221                return 0;
1222
1223        if (evsel->system_wide)
1224                nthreads = 1;
1225
1226        evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
1227        if (evsel->sample_id == NULL)
1228                return -ENOMEM;
1229
1230        evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
1231        if (evsel->id == NULL) {
1232                xyarray__delete(evsel->sample_id);
1233                evsel->sample_id = NULL;
1234                return -ENOMEM;
1235        }
1236
1237        return 0;
1238}
1239
1240static void perf_evsel__free_fd(struct perf_evsel *evsel)
1241{
1242        xyarray__delete(evsel->fd);
1243        evsel->fd = NULL;
1244}
1245
1246static void perf_evsel__free_id(struct perf_evsel *evsel)
1247{
1248        xyarray__delete(evsel->sample_id);
1249        evsel->sample_id = NULL;
1250        zfree(&evsel->id);
1251}
1252
1253static void perf_evsel__free_config_terms(struct perf_evsel *evsel)
1254{
1255        struct perf_evsel_config_term *term, *h;
1256
1257        list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
1258                list_del(&term->list);
1259                free(term);
1260        }
1261}
1262
1263void perf_evsel__close_fd(struct perf_evsel *evsel)
1264{
1265        int cpu, thread;
1266
1267        for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
1268                for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
1269                        close(FD(evsel, cpu, thread));
1270                        FD(evsel, cpu, thread) = -1;
1271                }
1272}
1273
1274void perf_evsel__exit(struct perf_evsel *evsel)
1275{
1276        assert(list_empty(&evsel->node));
1277        assert(evsel->evlist == NULL);
1278        perf_evsel__free_fd(evsel);
1279        perf_evsel__free_id(evsel);
1280        perf_evsel__free_config_terms(evsel);
1281        cgroup__put(evsel->cgrp);
1282        cpu_map__put(evsel->cpus);
1283        cpu_map__put(evsel->own_cpus);
1284        thread_map__put(evsel->threads);
1285        zfree(&evsel->group_name);
1286        zfree(&evsel->name);
1287        perf_evsel__object.fini(evsel);
1288}
1289
1290void perf_evsel__delete(struct perf_evsel *evsel)
1291{
1292        perf_evsel__exit(evsel);
1293        free(evsel);
1294}
1295
1296void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
1297                                struct perf_counts_values *count)
1298{
1299        struct perf_counts_values tmp;
1300
1301        if (!evsel->prev_raw_counts)
1302                return;
1303
1304        if (cpu == -1) {
1305                tmp = evsel->prev_raw_counts->aggr;
1306                evsel->prev_raw_counts->aggr = *count;
1307        } else {
1308                tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread);
1309                *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
1310        }
1311
1312        count->val = count->val - tmp.val;
1313        count->ena = count->ena - tmp.ena;
1314        count->run = count->run - tmp.run;
1315}
1316
1317void perf_counts_values__scale(struct perf_counts_values *count,
1318                               bool scale, s8 *pscaled)
1319{
1320        s8 scaled = 0;
1321
1322        if (scale) {
1323                if (count->run == 0) {
1324                        scaled = -1;
1325                        count->val = 0;
1326                } else if (count->run < count->ena) {
1327                        scaled = 1;
1328                        count->val = (u64)((double) count->val * count->ena / count->run + 0.5);
1329                }
1330        } else
1331                count->ena = count->run = 0;
1332
1333        if (pscaled)
1334                *pscaled = scaled;
1335}
1336
1337static int perf_evsel__read_size(struct perf_evsel *evsel)
1338{
1339        u64 read_format = evsel->attr.read_format;
1340        int entry = sizeof(u64); /* value */
1341        int size = 0;
1342        int nr = 1;
1343
1344        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1345                size += sizeof(u64);
1346
1347        if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1348                size += sizeof(u64);
1349
1350        if (read_format & PERF_FORMAT_ID)
1351                entry += sizeof(u64);
1352
1353        if (read_format & PERF_FORMAT_GROUP) {
1354                nr = evsel->nr_members;
1355                size += sizeof(u64);
1356        }
1357
1358        size += entry * nr;
1359        return size;
1360}
1361
1362int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
1363                     struct perf_counts_values *count)
1364{
1365        size_t size = perf_evsel__read_size(evsel);
1366
1367        memset(count, 0, sizeof(*count));
1368
1369        if (FD(evsel, cpu, thread) < 0)
1370                return -EINVAL;
1371
1372        if (readn(FD(evsel, cpu, thread), count->values, size) <= 0)
1373                return -errno;
1374
1375        return 0;
1376}
1377
1378static int
1379perf_evsel__read_one(struct perf_evsel *evsel, int cpu, int thread)
1380{
1381        struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread);
1382
1383        return perf_evsel__read(evsel, cpu, thread, count);
1384}
1385
1386static void
1387perf_evsel__set_count(struct perf_evsel *counter, int cpu, int thread,
1388                      u64 val, u64 ena, u64 run)
1389{
1390        struct perf_counts_values *count;
1391
1392        count = perf_counts(counter->counts, cpu, thread);
1393
1394        count->val    = val;
1395        count->ena    = ena;
1396        count->run    = run;
1397        count->loaded = true;
1398}
1399
1400static int
1401perf_evsel__process_group_data(struct perf_evsel *leader,
1402                               int cpu, int thread, u64 *data)
1403{
1404        u64 read_format = leader->attr.read_format;
1405        struct sample_read_value *v;
1406        u64 nr, ena = 0, run = 0, i;
1407
1408        nr = *data++;
1409
1410        if (nr != (u64) leader->nr_members)
1411                return -EINVAL;
1412
1413        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1414                ena = *data++;
1415
1416        if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1417                run = *data++;
1418
1419        v = (struct sample_read_value *) data;
1420
1421        perf_evsel__set_count(leader, cpu, thread,
1422                              v[0].value, ena, run);
1423
1424        for (i = 1; i < nr; i++) {
1425                struct perf_evsel *counter;
1426
1427                counter = perf_evlist__id2evsel(leader->evlist, v[i].id);
1428                if (!counter)
1429                        return -EINVAL;
1430
1431                perf_evsel__set_count(counter, cpu, thread,
1432                                      v[i].value, ena, run);
1433        }
1434
1435        return 0;
1436}
1437
1438static int
1439perf_evsel__read_group(struct perf_evsel *leader, int cpu, int thread)
1440{
1441        struct perf_stat_evsel *ps = leader->stats;
1442        u64 read_format = leader->attr.read_format;
1443        int size = perf_evsel__read_size(leader);
1444        u64 *data = ps->group_data;
1445
1446        if (!(read_format & PERF_FORMAT_ID))
1447                return -EINVAL;
1448
1449        if (!perf_evsel__is_group_leader(leader))
1450                return -EINVAL;
1451
1452        if (!data) {
1453                data = zalloc(size);
1454                if (!data)
1455                        return -ENOMEM;
1456
1457                ps->group_data = data;
1458        }
1459
1460        if (FD(leader, cpu, thread) < 0)
1461                return -EINVAL;
1462
1463        if (readn(FD(leader, cpu, thread), data, size) <= 0)
1464                return -errno;
1465
1466        return perf_evsel__process_group_data(leader, cpu, thread, data);
1467}
1468
1469int perf_evsel__read_counter(struct perf_evsel *evsel, int cpu, int thread)
1470{
1471        u64 read_format = evsel->attr.read_format;
1472
1473        if (read_format & PERF_FORMAT_GROUP)
1474                return perf_evsel__read_group(evsel, cpu, thread);
1475        else
1476                return perf_evsel__read_one(evsel, cpu, thread);
1477}
1478
1479int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
1480                              int cpu, int thread, bool scale)
1481{
1482        struct perf_counts_values count;
1483        size_t nv = scale ? 3 : 1;
1484
1485        if (FD(evsel, cpu, thread) < 0)
1486                return -EINVAL;
1487
1488        if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
1489                return -ENOMEM;
1490
1491        if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
1492                return -errno;
1493
1494        perf_evsel__compute_deltas(evsel, cpu, thread, &count);
1495        perf_counts_values__scale(&count, scale, NULL);
1496        *perf_counts(evsel->counts, cpu, thread) = count;
1497        return 0;
1498}
1499
1500static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
1501{
1502        struct perf_evsel *leader = evsel->leader;
1503        int fd;
1504
1505        if (perf_evsel__is_group_leader(evsel))
1506                return -1;
1507
1508        /*
1509         * Leader must be already processed/open,
1510         * if not it's a bug.
1511         */
1512        BUG_ON(!leader->fd);
1513
1514        fd = FD(leader, cpu, thread);
1515        BUG_ON(fd == -1);
1516
1517        return fd;
1518}
1519
1520struct bit_names {
1521        int bit;
1522        const char *name;
1523};
1524
1525static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits)
1526{
1527        bool first_bit = true;
1528        int i = 0;
1529
1530        do {
1531                if (value & bits[i].bit) {
1532                        buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name);
1533                        first_bit = false;
1534                }
1535        } while (bits[++i].name != NULL);
1536}
1537
1538static void __p_sample_type(char *buf, size_t size, u64 value)
1539{
1540#define bit_name(n) { PERF_SAMPLE_##n, #n }
1541        struct bit_names bits[] = {
1542                bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
1543                bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
1544                bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
1545                bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
1546                bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
1547                bit_name(WEIGHT), bit_name(PHYS_ADDR),
1548                { .name = NULL, }
1549        };
1550#undef bit_name
1551        __p_bits(buf, size, value, bits);
1552}
1553
1554static void __p_branch_sample_type(char *buf, size_t size, u64 value)
1555{
1556#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
1557        struct bit_names bits[] = {
1558                bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY),
1559                bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL),
1560                bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
1561                bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
1562                bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
1563                { .name = NULL, }
1564        };
1565#undef bit_name
1566        __p_bits(buf, size, value, bits);
1567}
1568
1569static void __p_read_format(char *buf, size_t size, u64 value)
1570{
1571#define bit_name(n) { PERF_FORMAT_##n, #n }
1572        struct bit_names bits[] = {
1573                bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
1574                bit_name(ID), bit_name(GROUP),
1575                { .name = NULL, }
1576        };
1577#undef bit_name
1578        __p_bits(buf, size, value, bits);
1579}
1580
1581#define BUF_SIZE                1024
1582
1583#define p_hex(val)              snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
1584#define p_unsigned(val)         snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
1585#define p_signed(val)           snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
1586#define p_sample_type(val)      __p_sample_type(buf, BUF_SIZE, val)
1587#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
1588#define p_read_format(val)      __p_read_format(buf, BUF_SIZE, val)
1589
1590#define PRINT_ATTRn(_n, _f, _p)                         \
1591do {                                                    \
1592        if (attr->_f) {                                 \
1593                _p(attr->_f);                           \
1594                ret += attr__fprintf(fp, _n, buf, priv);\
1595        }                                               \
1596} while (0)
1597
1598#define PRINT_ATTRf(_f, _p)     PRINT_ATTRn(#_f, _f, _p)
1599
1600int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
1601                             attr__fprintf_f attr__fprintf, void *priv)
1602{
1603        char buf[BUF_SIZE];
1604        int ret = 0;
1605
1606        PRINT_ATTRf(type, p_unsigned);
1607        PRINT_ATTRf(size, p_unsigned);
1608        PRINT_ATTRf(config, p_hex);
1609        PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned);
1610        PRINT_ATTRf(sample_type, p_sample_type);
1611        PRINT_ATTRf(read_format, p_read_format);
1612
1613        PRINT_ATTRf(disabled, p_unsigned);
1614        PRINT_ATTRf(inherit, p_unsigned);
1615        PRINT_ATTRf(pinned, p_unsigned);
1616        PRINT_ATTRf(exclusive, p_unsigned);
1617        PRINT_ATTRf(exclude_user, p_unsigned);
1618        PRINT_ATTRf(exclude_kernel, p_unsigned);
1619        PRINT_ATTRf(exclude_hv, p_unsigned);
1620        PRINT_ATTRf(exclude_idle, p_unsigned);
1621        PRINT_ATTRf(mmap, p_unsigned);
1622        PRINT_ATTRf(comm, p_unsigned);
1623        PRINT_ATTRf(freq, p_unsigned);
1624        PRINT_ATTRf(inherit_stat, p_unsigned);
1625        PRINT_ATTRf(enable_on_exec, p_unsigned);
1626        PRINT_ATTRf(task, p_unsigned);
1627        PRINT_ATTRf(watermark, p_unsigned);
1628        PRINT_ATTRf(precise_ip, p_unsigned);
1629        PRINT_ATTRf(mmap_data, p_unsigned);
1630        PRINT_ATTRf(sample_id_all, p_unsigned);
1631        PRINT_ATTRf(exclude_host, p_unsigned);
1632        PRINT_ATTRf(exclude_guest, p_unsigned);
1633        PRINT_ATTRf(exclude_callchain_kernel, p_unsigned);
1634        PRINT_ATTRf(exclude_callchain_user, p_unsigned);
1635        PRINT_ATTRf(mmap2, p_unsigned);
1636        PRINT_ATTRf(comm_exec, p_unsigned);
1637        PRINT_ATTRf(use_clockid, p_unsigned);
1638        PRINT_ATTRf(context_switch, p_unsigned);
1639        PRINT_ATTRf(write_backward, p_unsigned);
1640        PRINT_ATTRf(namespaces, p_unsigned);
1641
1642        PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
1643        PRINT_ATTRf(bp_type, p_unsigned);
1644        PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
1645        PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
1646        PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
1647        PRINT_ATTRf(sample_regs_user, p_hex);
1648        PRINT_ATTRf(sample_stack_user, p_unsigned);
1649        PRINT_ATTRf(clockid, p_signed);
1650        PRINT_ATTRf(sample_regs_intr, p_hex);
1651        PRINT_ATTRf(aux_watermark, p_unsigned);
1652        PRINT_ATTRf(sample_max_stack, p_unsigned);
1653
1654        return ret;
1655}
1656
1657static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
1658                                void *priv __maybe_unused)
1659{
1660        return fprintf(fp, "  %-32s %s\n", name, val);
1661}
1662
1663static void perf_evsel__remove_fd(struct perf_evsel *pos,
1664                                  int nr_cpus, int nr_threads,
1665                                  int thread_idx)
1666{
1667        for (int cpu = 0; cpu < nr_cpus; cpu++)
1668                for (int thread = thread_idx; thread < nr_threads - 1; thread++)
1669                        FD(pos, cpu, thread) = FD(pos, cpu, thread + 1);
1670}
1671
1672static int update_fds(struct perf_evsel *evsel,
1673                      int nr_cpus, int cpu_idx,
1674                      int nr_threads, int thread_idx)
1675{
1676        struct perf_evsel *pos;
1677
1678        if (cpu_idx >= nr_cpus || thread_idx >= nr_threads)
1679                return -EINVAL;
1680
1681        evlist__for_each_entry(evsel->evlist, pos) {
1682                nr_cpus = pos != evsel ? nr_cpus : cpu_idx;
1683
1684                perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
1685
1686                /*
1687                 * Since fds for next evsel has not been created,
1688                 * there is no need to iterate whole event list.
1689                 */
1690                if (pos == evsel)
1691                        break;
1692        }
1693        return 0;
1694}
1695
1696static bool ignore_missing_thread(struct perf_evsel *evsel,
1697                                  int nr_cpus, int cpu,
1698                                  struct thread_map *threads,
1699                                  int thread, int err)
1700{
1701        pid_t ignore_pid = thread_map__pid(threads, thread);
1702
1703        if (!evsel->ignore_missing_thread)
1704                return false;
1705
1706        /* The system wide setup does not work with threads. */
1707        if (evsel->system_wide)
1708                return false;
1709
1710        /* The -ESRCH is perf event syscall errno for pid's not found. */
1711        if (err != -ESRCH)
1712                return false;
1713
1714        /* If there's only one thread, let it fail. */
1715        if (threads->nr == 1)
1716                return false;
1717
1718        /*
1719         * We should remove fd for missing_thread first
1720         * because thread_map__remove() will decrease threads->nr.
1721         */
1722        if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread))
1723                return false;
1724
1725        if (thread_map__remove(threads, thread))
1726                return false;
1727
1728        pr_warning("WARNING: Ignored open failure for pid %d\n",
1729                   ignore_pid);
1730        return true;
1731}
1732
1733int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
1734                     struct thread_map *threads)
1735{
1736        int cpu, thread, nthreads;
1737        unsigned long flags = PERF_FLAG_FD_CLOEXEC;
1738        int pid = -1, err;
1739        enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
1740
1741        if (perf_missing_features.write_backward && evsel->attr.write_backward)
1742                return -EINVAL;
1743
1744        if (cpus == NULL) {
1745                static struct cpu_map *empty_cpu_map;
1746
1747                if (empty_cpu_map == NULL) {
1748                        empty_cpu_map = cpu_map__dummy_new();
1749                        if (empty_cpu_map == NULL)
1750                                return -ENOMEM;
1751                }
1752
1753                cpus = empty_cpu_map;
1754        }
1755
1756        if (threads == NULL) {
1757                static struct thread_map *empty_thread_map;
1758
1759                if (empty_thread_map == NULL) {
1760                        empty_thread_map = thread_map__new_by_tid(-1);
1761                        if (empty_thread_map == NULL)
1762                                return -ENOMEM;
1763                }
1764
1765                threads = empty_thread_map;
1766        }
1767
1768        if (evsel->system_wide)
1769                nthreads = 1;
1770        else
1771                nthreads = threads->nr;
1772
1773        if (evsel->fd == NULL &&
1774            perf_evsel__alloc_fd(evsel, cpus->nr, nthreads) < 0)
1775                return -ENOMEM;
1776
1777        if (evsel->cgrp) {
1778                flags |= PERF_FLAG_PID_CGROUP;
1779                pid = evsel->cgrp->fd;
1780        }
1781
1782fallback_missing_features:
1783        if (perf_missing_features.clockid_wrong)
1784                evsel->attr.clockid = CLOCK_MONOTONIC; /* should always work */
1785        if (perf_missing_features.clockid) {
1786                evsel->attr.use_clockid = 0;
1787                evsel->attr.clockid = 0;
1788        }
1789        if (perf_missing_features.cloexec)
1790                flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
1791        if (perf_missing_features.mmap2)
1792                evsel->attr.mmap2 = 0;
1793        if (perf_missing_features.exclude_guest)
1794                evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
1795        if (perf_missing_features.lbr_flags)
1796                evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
1797                                     PERF_SAMPLE_BRANCH_NO_CYCLES);
1798        if (perf_missing_features.group_read && evsel->attr.inherit)
1799                evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
1800retry_sample_id:
1801        if (perf_missing_features.sample_id_all)
1802                evsel->attr.sample_id_all = 0;
1803
1804        if (verbose >= 2) {
1805                fprintf(stderr, "%.60s\n", graph_dotted_line);
1806                fprintf(stderr, "perf_event_attr:\n");
1807                perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL);
1808                fprintf(stderr, "%.60s\n", graph_dotted_line);
1809        }
1810
1811        for (cpu = 0; cpu < cpus->nr; cpu++) {
1812
1813                for (thread = 0; thread < nthreads; thread++) {
1814                        int fd, group_fd;
1815
1816                        if (!evsel->cgrp && !evsel->system_wide)
1817                                pid = thread_map__pid(threads, thread);
1818
1819                        group_fd = get_group_fd(evsel, cpu, thread);
1820retry_open:
1821                        pr_debug2("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx",
1822                                  pid, cpus->map[cpu], group_fd, flags);
1823
1824                        test_attr__ready();
1825
1826                        fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu],
1827                                                 group_fd, flags);
1828
1829                        FD(evsel, cpu, thread) = fd;
1830
1831                        if (fd < 0) {
1832                                err = -errno;
1833
1834                                if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) {
1835                                        /*
1836                                         * We just removed 1 thread, so take a step
1837                                         * back on thread index and lower the upper
1838                                         * nthreads limit.
1839                                         */
1840                                        nthreads--;
1841                                        thread--;
1842
1843                                        /* ... and pretend like nothing have happened. */
1844                                        err = 0;
1845                                        continue;
1846                                }
1847
1848                                pr_debug2("\nsys_perf_event_open failed, error %d\n",
1849                                          err);
1850                                goto try_fallback;
1851                        }
1852
1853                        pr_debug2(" = %d\n", fd);
1854
1855                        if (evsel->bpf_fd >= 0) {
1856                                int evt_fd = fd;
1857                                int bpf_fd = evsel->bpf_fd;
1858
1859                                err = ioctl(evt_fd,
1860                                            PERF_EVENT_IOC_SET_BPF,
1861                                            bpf_fd);
1862                                if (err && errno != EEXIST) {
1863                                        pr_err("failed to attach bpf fd %d: %s\n",
1864                                               bpf_fd, strerror(errno));
1865                                        err = -EINVAL;
1866                                        goto out_close;
1867                                }
1868                        }
1869
1870                        set_rlimit = NO_CHANGE;
1871
1872                        /*
1873                         * If we succeeded but had to kill clockid, fail and
1874                         * have perf_evsel__open_strerror() print us a nice
1875                         * error.
1876                         */
1877                        if (perf_missing_features.clockid ||
1878                            perf_missing_features.clockid_wrong) {
1879                                err = -EINVAL;
1880                                goto out_close;
1881                        }
1882                }
1883        }
1884
1885        return 0;
1886
1887try_fallback:
1888        /*
1889         * perf stat needs between 5 and 22 fds per CPU. When we run out
1890         * of them try to increase the limits.
1891         */
1892        if (err == -EMFILE && set_rlimit < INCREASED_MAX) {
1893                struct rlimit l;
1894                int old_errno = errno;
1895
1896                if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
1897                        if (set_rlimit == NO_CHANGE)
1898                                l.rlim_cur = l.rlim_max;
1899                        else {
1900                                l.rlim_cur = l.rlim_max + 1000;
1901                                l.rlim_max = l.rlim_cur;
1902                        }
1903                        if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
1904                                set_rlimit++;
1905                                errno = old_errno;
1906                                goto retry_open;
1907                        }
1908                }
1909                errno = old_errno;
1910        }
1911
1912        if (err != -EINVAL || cpu > 0 || thread > 0)
1913                goto out_close;
1914
1915        /*
1916         * Must probe features in the order they were added to the
1917         * perf_event_attr interface.
1918         */
1919        if (!perf_missing_features.write_backward && evsel->attr.write_backward) {
1920                perf_missing_features.write_backward = true;
1921                pr_debug2("switching off write_backward\n");
1922                goto out_close;
1923        } else if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) {
1924                perf_missing_features.clockid_wrong = true;
1925                pr_debug2("switching off clockid\n");
1926                goto fallback_missing_features;
1927        } else if (!perf_missing_features.clockid && evsel->attr.use_clockid) {
1928                perf_missing_features.clockid = true;
1929                pr_debug2("switching off use_clockid\n");
1930                goto fallback_missing_features;
1931        } else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) {
1932                perf_missing_features.cloexec = true;
1933                pr_debug2("switching off cloexec flag\n");
1934                goto fallback_missing_features;
1935        } else if (!perf_missing_features.mmap2 && evsel->attr.mmap2) {
1936                perf_missing_features.mmap2 = true;
1937                pr_debug2("switching off mmap2\n");
1938                goto fallback_missing_features;
1939        } else if (!perf_missing_features.exclude_guest &&
1940                   (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
1941                perf_missing_features.exclude_guest = true;
1942                pr_debug2("switching off exclude_guest, exclude_host\n");
1943                goto fallback_missing_features;
1944        } else if (!perf_missing_features.sample_id_all) {
1945                perf_missing_features.sample_id_all = true;
1946                pr_debug2("switching off sample_id_all\n");
1947                goto retry_sample_id;
1948        } else if (!perf_missing_features.lbr_flags &&
1949                        (evsel->attr.branch_sample_type &
1950                         (PERF_SAMPLE_BRANCH_NO_CYCLES |
1951                          PERF_SAMPLE_BRANCH_NO_FLAGS))) {
1952                perf_missing_features.lbr_flags = true;
1953                pr_debug2("switching off branch sample type no (cycles/flags)\n");
1954                goto fallback_missing_features;
1955        } else if (!perf_missing_features.group_read &&
1956                    evsel->attr.inherit &&
1957                   (evsel->attr.read_format & PERF_FORMAT_GROUP) &&
1958                   perf_evsel__is_group_leader(evsel)) {
1959                perf_missing_features.group_read = true;
1960                pr_debug2("switching off group read\n");
1961                goto fallback_missing_features;
1962        }
1963out_close:
1964        if (err)
1965                threads->err_thread = thread;
1966
1967        do {
1968                while (--thread >= 0) {
1969                        close(FD(evsel, cpu, thread));
1970                        FD(evsel, cpu, thread) = -1;
1971                }
1972                thread = nthreads;
1973        } while (--cpu >= 0);
1974        return err;
1975}
1976
1977void perf_evsel__close(struct perf_evsel *evsel)
1978{
1979        if (evsel->fd == NULL)
1980                return;
1981
1982        perf_evsel__close_fd(evsel);
1983        perf_evsel__free_fd(evsel);
1984}
1985
1986int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
1987                             struct cpu_map *cpus)
1988{
1989        return perf_evsel__open(evsel, cpus, NULL);
1990}
1991
1992int perf_evsel__open_per_thread(struct perf_evsel *evsel,
1993                                struct thread_map *threads)
1994{
1995        return perf_evsel__open(evsel, NULL, threads);
1996}
1997
1998static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
1999                                       const union perf_event *event,
2000                                       struct perf_sample *sample)
2001{
2002        u64 type = evsel->attr.sample_type;
2003        const u64 *array = event->sample.array;
2004        bool swapped = evsel->needs_swap;
2005        union u64_swap u;
2006
2007        array += ((event->header.size -
2008                   sizeof(event->header)) / sizeof(u64)) - 1;
2009
2010        if (type & PERF_SAMPLE_IDENTIFIER) {
2011                sample->id = *array;
2012                array--;
2013        }
2014
2015        if (type & PERF_SAMPLE_CPU) {
2016                u.val64 = *array;
2017                if (swapped) {
2018                        /* undo swap of u64, then swap on individual u32s */
2019                        u.val64 = bswap_64(u.val64);
2020                        u.val32[0] = bswap_32(u.val32[0]);
2021                }
2022
2023                sample->cpu = u.val32[0];
2024                array--;
2025        }
2026
2027        if (type & PERF_SAMPLE_STREAM_ID) {
2028                sample->stream_id = *array;
2029                array--;
2030        }
2031
2032        if (type & PERF_SAMPLE_ID) {
2033                sample->id = *array;
2034                array--;
2035        }
2036
2037        if (type & PERF_SAMPLE_TIME) {
2038                sample->time = *array;
2039                array--;
2040        }
2041
2042        if (type & PERF_SAMPLE_TID) {
2043                u.val64 = *array;
2044                if (swapped) {
2045                        /* undo swap of u64, then swap on individual u32s */
2046                        u.val64 = bswap_64(u.val64);
2047                        u.val32[0] = bswap_32(u.val32[0]);
2048                        u.val32[1] = bswap_32(u.val32[1]);
2049                }
2050
2051                sample->pid = u.val32[0];
2052                sample->tid = u.val32[1];
2053                array--;
2054        }
2055
2056        return 0;
2057}
2058
2059static inline bool overflow(const void *endp, u16 max_size, const void *offset,
2060                            u64 size)
2061{
2062        return size > max_size || offset + size > endp;
2063}
2064
2065#define OVERFLOW_CHECK(offset, size, max_size)                          \
2066        do {                                                            \
2067                if (overflow(endp, (max_size), (offset), (size)))       \
2068                        return -EFAULT;                                 \
2069        } while (0)
2070
2071#define OVERFLOW_CHECK_u64(offset) \
2072        OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
2073
2074static int
2075perf_event__check_size(union perf_event *event, unsigned int sample_size)
2076{
2077        /*
2078         * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
2079         * up to PERF_SAMPLE_PERIOD.  After that overflow() must be used to
2080         * check the format does not go past the end of the event.
2081         */
2082        if (sample_size + sizeof(event->header) > event->header.size)
2083                return -EFAULT;
2084
2085        return 0;
2086}
2087
2088int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
2089                             struct perf_sample *data)
2090{
2091        u64 type = evsel->attr.sample_type;
2092        bool swapped = evsel->needs_swap;
2093        const u64 *array;
2094        u16 max_size = event->header.size;
2095        const void *endp = (void *)event + max_size;
2096        u64 sz;
2097
2098        /*
2099         * used for cross-endian analysis. See git commit 65014ab3
2100         * for why this goofiness is needed.
2101         */
2102        union u64_swap u;
2103
2104        memset(data, 0, sizeof(*data));
2105        data->cpu = data->pid = data->tid = -1;
2106        data->stream_id = data->id = data->time = -1ULL;
2107        data->period = evsel->attr.sample_period;
2108        data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2109        data->misc    = event->header.misc;
2110        data->id = -1ULL;
2111        data->data_src = PERF_MEM_DATA_SRC_NONE;
2112
2113        if (event->header.type != PERF_RECORD_SAMPLE) {
2114                if (!evsel->attr.sample_id_all)
2115                        return 0;
2116                return perf_evsel__parse_id_sample(evsel, event, data);
2117        }
2118
2119        array = event->sample.array;
2120
2121        if (perf_event__check_size(event, evsel->sample_size))
2122                return -EFAULT;
2123
2124        if (type & PERF_SAMPLE_IDENTIFIER) {
2125                data->id = *array;
2126                array++;
2127        }
2128
2129        if (type & PERF_SAMPLE_IP) {
2130                data->ip = *array;
2131                array++;
2132        }
2133
2134        if (type & PERF_SAMPLE_TID) {
2135                u.val64 = *array;
2136                if (swapped) {
2137                        /* undo swap of u64, then swap on individual u32s */
2138                        u.val64 = bswap_64(u.val64);
2139                        u.val32[0] = bswap_32(u.val32[0]);
2140                        u.val32[1] = bswap_32(u.val32[1]);
2141                }
2142
2143                data->pid = u.val32[0];
2144                data->tid = u.val32[1];
2145                array++;
2146        }
2147
2148        if (type & PERF_SAMPLE_TIME) {
2149                data->time = *array;
2150                array++;
2151        }
2152
2153        if (type & PERF_SAMPLE_ADDR) {
2154                data->addr = *array;
2155                array++;
2156        }
2157
2158        if (type & PERF_SAMPLE_ID) {
2159                data->id = *array;
2160                array++;
2161        }
2162
2163        if (type & PERF_SAMPLE_STREAM_ID) {
2164                data->stream_id = *array;
2165                array++;
2166        }
2167
2168        if (type & PERF_SAMPLE_CPU) {
2169
2170                u.val64 = *array;
2171                if (swapped) {
2172                        /* undo swap of u64, then swap on individual u32s */
2173                        u.val64 = bswap_64(u.val64);
2174                        u.val32[0] = bswap_32(u.val32[0]);
2175                }
2176
2177                data->cpu = u.val32[0];
2178                array++;
2179        }
2180
2181        if (type & PERF_SAMPLE_PERIOD) {
2182                data->period = *array;
2183                array++;
2184        }
2185
2186        if (type & PERF_SAMPLE_READ) {
2187                u64 read_format = evsel->attr.read_format;
2188
2189                OVERFLOW_CHECK_u64(array);
2190                if (read_format & PERF_FORMAT_GROUP)
2191                        data->read.group.nr = *array;
2192                else
2193                        data->read.one.value = *array;
2194
2195                array++;
2196
2197                if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2198                        OVERFLOW_CHECK_u64(array);
2199                        data->read.time_enabled = *array;
2200                        array++;
2201                }
2202
2203                if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2204                        OVERFLOW_CHECK_u64(array);
2205                        data->read.time_running = *array;
2206                        array++;
2207                }
2208
2209                /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
2210                if (read_format & PERF_FORMAT_GROUP) {
2211                        const u64 max_group_nr = UINT64_MAX /
2212                                        sizeof(struct sample_read_value);
2213
2214                        if (data->read.group.nr > max_group_nr)
2215                                return -EFAULT;
2216                        sz = data->read.group.nr *
2217                             sizeof(struct sample_read_value);
2218                        OVERFLOW_CHECK(array, sz, max_size);
2219                        data->read.group.values =
2220                                        (struct sample_read_value *)array;
2221                        array = (void *)array + sz;
2222                } else {
2223                        OVERFLOW_CHECK_u64(array);
2224                        data->read.one.id = *array;
2225                        array++;
2226                }
2227        }
2228
2229        if (evsel__has_callchain(evsel)) {
2230                const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
2231
2232                OVERFLOW_CHECK_u64(array);
2233                data->callchain = (struct ip_callchain *)array++;
2234                if (data->callchain->nr > max_callchain_nr)
2235                        return -EFAULT;
2236                sz = data->callchain->nr * sizeof(u64);
2237                OVERFLOW_CHECK(array, sz, max_size);
2238                array = (void *)array + sz;
2239        }
2240
2241        if (type & PERF_SAMPLE_RAW) {
2242                OVERFLOW_CHECK_u64(array);
2243                u.val64 = *array;
2244
2245                /*
2246                 * Undo swap of u64, then swap on individual u32s,
2247                 * get the size of the raw area and undo all of the
2248                 * swap. The pevent interface handles endianity by
2249                 * itself.
2250                 */
2251                if (swapped) {
2252                        u.val64 = bswap_64(u.val64);
2253                        u.val32[0] = bswap_32(u.val32[0]);
2254                        u.val32[1] = bswap_32(u.val32[1]);
2255                }
2256                data->raw_size = u.val32[0];
2257
2258                /*
2259                 * The raw data is aligned on 64bits including the
2260                 * u32 size, so it's safe to use mem_bswap_64.
2261                 */
2262                if (swapped)
2263                        mem_bswap_64((void *) array, data->raw_size);
2264
2265                array = (void *)array + sizeof(u32);
2266
2267                OVERFLOW_CHECK(array, data->raw_size, max_size);
2268                data->raw_data = (void *)array;
2269                array = (void *)array + data->raw_size;
2270        }
2271
2272        if (type & PERF_SAMPLE_BRANCH_STACK) {
2273                const u64 max_branch_nr = UINT64_MAX /
2274                                          sizeof(struct branch_entry);
2275
2276                OVERFLOW_CHECK_u64(array);
2277                data->branch_stack = (struct branch_stack *)array++;
2278
2279                if (data->branch_stack->nr > max_branch_nr)
2280                        return -EFAULT;
2281                sz = data->branch_stack->nr * sizeof(struct branch_entry);
2282                OVERFLOW_CHECK(array, sz, max_size);
2283                array = (void *)array + sz;
2284        }
2285
2286        if (type & PERF_SAMPLE_REGS_USER) {
2287                OVERFLOW_CHECK_u64(array);
2288                data->user_regs.abi = *array;
2289                array++;
2290
2291                if (data->user_regs.abi) {
2292                        u64 mask = evsel->attr.sample_regs_user;
2293
2294                        sz = hweight_long(mask) * sizeof(u64);
2295                        OVERFLOW_CHECK(array, sz, max_size);
2296                        data->user_regs.mask = mask;
2297                        data->user_regs.regs = (u64 *)array;
2298                        array = (void *)array + sz;
2299                }
2300        }
2301
2302        if (type & PERF_SAMPLE_STACK_USER) {
2303                OVERFLOW_CHECK_u64(array);
2304                sz = *array++;
2305
2306                data->user_stack.offset = ((char *)(array - 1)
2307                                          - (char *) event);
2308
2309                if (!sz) {
2310                        data->user_stack.size = 0;
2311                } else {
2312                        OVERFLOW_CHECK(array, sz, max_size);
2313                        data->user_stack.data = (char *)array;
2314                        array = (void *)array + sz;
2315                        OVERFLOW_CHECK_u64(array);
2316                        data->user_stack.size = *array++;
2317                        if (WARN_ONCE(data->user_stack.size > sz,
2318                                      "user stack dump failure\n"))
2319                                return -EFAULT;
2320                }
2321        }
2322
2323        if (type & PERF_SAMPLE_WEIGHT) {
2324                OVERFLOW_CHECK_u64(array);
2325                data->weight = *array;
2326                array++;
2327        }
2328
2329        if (type & PERF_SAMPLE_DATA_SRC) {
2330                OVERFLOW_CHECK_u64(array);
2331                data->data_src = *array;
2332                array++;
2333        }
2334
2335        if (type & PERF_SAMPLE_TRANSACTION) {
2336                OVERFLOW_CHECK_u64(array);
2337                data->transaction = *array;
2338                array++;
2339        }
2340
2341        data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
2342        if (type & PERF_SAMPLE_REGS_INTR) {
2343                OVERFLOW_CHECK_u64(array);
2344                data->intr_regs.abi = *array;
2345                array++;
2346
2347                if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
2348                        u64 mask = evsel->attr.sample_regs_intr;
2349
2350                        sz = hweight_long(mask) * sizeof(u64);
2351                        OVERFLOW_CHECK(array, sz, max_size);
2352                        data->intr_regs.mask = mask;
2353                        data->intr_regs.regs = (u64 *)array;
2354                        array = (void *)array + sz;
2355                }
2356        }
2357
2358        data->phys_addr = 0;
2359        if (type & PERF_SAMPLE_PHYS_ADDR) {
2360                data->phys_addr = *array;
2361                array++;
2362        }
2363
2364        return 0;
2365}
2366
2367int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel,
2368                                       union perf_event *event,
2369                                       u64 *timestamp)
2370{
2371        u64 type = evsel->attr.sample_type;
2372        const u64 *array;
2373
2374        if (!(type & PERF_SAMPLE_TIME))
2375                return -1;
2376
2377        if (event->header.type != PERF_RECORD_SAMPLE) {
2378                struct perf_sample data = {
2379                        .time = -1ULL,
2380                };
2381
2382                if (!evsel->attr.sample_id_all)
2383                        return -1;
2384                if (perf_evsel__parse_id_sample(evsel, event, &data))
2385                        return -1;
2386
2387                *timestamp = data.time;
2388                return 0;
2389        }
2390
2391        array = event->sample.array;
2392
2393        if (perf_event__check_size(event, evsel->sample_size))
2394                return -EFAULT;
2395
2396        if (type & PERF_SAMPLE_IDENTIFIER)
2397                array++;
2398
2399        if (type & PERF_SAMPLE_IP)
2400                array++;
2401
2402        if (type & PERF_SAMPLE_TID)
2403                array++;
2404
2405        if (type & PERF_SAMPLE_TIME)
2406                *timestamp = *array;
2407
2408        return 0;
2409}
2410
2411size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
2412                                     u64 read_format)
2413{
2414        size_t sz, result = sizeof(struct sample_event);
2415
2416        if (type & PERF_SAMPLE_IDENTIFIER)
2417                result += sizeof(u64);
2418
2419        if (type & PERF_SAMPLE_IP)
2420                result += sizeof(u64);
2421
2422        if (type & PERF_SAMPLE_TID)
2423                result += sizeof(u64);
2424
2425        if (type & PERF_SAMPLE_TIME)
2426                result += sizeof(u64);
2427
2428        if (type & PERF_SAMPLE_ADDR)
2429                result += sizeof(u64);
2430
2431        if (type & PERF_SAMPLE_ID)
2432                result += sizeof(u64);
2433
2434        if (type & PERF_SAMPLE_STREAM_ID)
2435                result += sizeof(u64);
2436
2437        if (type & PERF_SAMPLE_CPU)
2438                result += sizeof(u64);
2439
2440        if (type & PERF_SAMPLE_PERIOD)
2441                result += sizeof(u64);
2442
2443        if (type & PERF_SAMPLE_READ) {
2444                result += sizeof(u64);
2445                if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
2446                        result += sizeof(u64);
2447                if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
2448                        result += sizeof(u64);
2449                /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
2450                if (read_format & PERF_FORMAT_GROUP) {
2451                        sz = sample->read.group.nr *
2452                             sizeof(struct sample_read_value);
2453                        result += sz;
2454                } else {
2455                        result += sizeof(u64);
2456                }
2457        }
2458
2459        if (type & PERF_SAMPLE_CALLCHAIN) {
2460                sz = (sample->callchain->nr + 1) * sizeof(u64);
2461                result += sz;
2462        }
2463
2464        if (type & PERF_SAMPLE_RAW) {
2465                result += sizeof(u32);
2466                result += sample->raw_size;
2467        }
2468
2469        if (type & PERF_SAMPLE_BRANCH_STACK) {
2470                sz = sample->branch_stack->nr * sizeof(struct branch_entry);
2471                sz += sizeof(u64);
2472                result += sz;
2473        }
2474
2475        if (type & PERF_SAMPLE_REGS_USER) {
2476                if (sample->user_regs.abi) {
2477                        result += sizeof(u64);
2478                        sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
2479                        result += sz;
2480                } else {
2481                        result += sizeof(u64);
2482                }
2483        }
2484
2485        if (type & PERF_SAMPLE_STACK_USER) {
2486                sz = sample->user_stack.size;
2487                result += sizeof(u64);
2488                if (sz) {
2489                        result += sz;
2490                        result += sizeof(u64);
2491                }
2492        }
2493
2494        if (type & PERF_SAMPLE_WEIGHT)
2495                result += sizeof(u64);
2496
2497        if (type & PERF_SAMPLE_DATA_SRC)
2498                result += sizeof(u64);
2499
2500        if (type & PERF_SAMPLE_TRANSACTION)
2501                result += sizeof(u64);
2502
2503        if (type & PERF_SAMPLE_REGS_INTR) {
2504                if (sample->intr_regs.abi) {
2505                        result += sizeof(u64);
2506                        sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
2507                        result += sz;
2508                } else {
2509                        result += sizeof(u64);
2510                }
2511        }
2512
2513        if (type & PERF_SAMPLE_PHYS_ADDR)
2514                result += sizeof(u64);
2515
2516        return result;
2517}
2518
2519int perf_event__synthesize_sample(union perf_event *event, u64 type,
2520                                  u64 read_format,
2521                                  const struct perf_sample *sample)
2522{
2523        u64 *array;
2524        size_t sz;
2525        /*
2526         * used for cross-endian analysis. See git commit 65014ab3
2527         * for why this goofiness is needed.
2528         */
2529        union u64_swap u;
2530
2531        array = event->sample.array;
2532
2533        if (type & PERF_SAMPLE_IDENTIFIER) {
2534                *array = sample->id;
2535                array++;
2536        }
2537
2538        if (type & PERF_SAMPLE_IP) {
2539                *array = sample->ip;
2540                array++;
2541        }
2542
2543        if (type & PERF_SAMPLE_TID) {
2544                u.val32[0] = sample->pid;
2545                u.val32[1] = sample->tid;
2546                *array = u.val64;
2547                array++;
2548        }
2549
2550        if (type & PERF_SAMPLE_TIME) {
2551                *array = sample->time;
2552                array++;
2553        }
2554
2555        if (type & PERF_SAMPLE_ADDR) {
2556                *array = sample->addr;
2557                array++;
2558        }
2559
2560        if (type & PERF_SAMPLE_ID) {
2561                *array = sample->id;
2562                array++;
2563        }
2564
2565        if (type & PERF_SAMPLE_STREAM_ID) {
2566                *array = sample->stream_id;
2567                array++;
2568        }
2569
2570        if (type & PERF_SAMPLE_CPU) {
2571                u.val32[0] = sample->cpu;
2572                u.val32[1] = 0;
2573                *array = u.val64;
2574                array++;
2575        }
2576
2577        if (type & PERF_SAMPLE_PERIOD) {
2578                *array = sample->period;
2579                array++;
2580        }
2581
2582        if (type & PERF_SAMPLE_READ) {
2583                if (read_format & PERF_FORMAT_GROUP)
2584                        *array = sample->read.group.nr;
2585                else
2586                        *array = sample->read.one.value;
2587                array++;
2588
2589                if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2590                        *array = sample->read.time_enabled;
2591                        array++;
2592                }
2593
2594                if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2595                        *array = sample->read.time_running;
2596                        array++;
2597                }
2598
2599                /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
2600                if (read_format & PERF_FORMAT_GROUP) {
2601                        sz = sample->read.group.nr *
2602                             sizeof(struct sample_read_value);
2603                        memcpy(array, sample->read.group.values, sz);
2604                        array = (void *)array + sz;
2605                } else {
2606                        *array = sample->read.one.id;
2607                        array++;
2608                }
2609        }
2610
2611        if (type & PERF_SAMPLE_CALLCHAIN) {
2612                sz = (sample->callchain->nr + 1) * sizeof(u64);
2613                memcpy(array, sample->callchain, sz);
2614                array = (void *)array + sz;
2615        }
2616
2617        if (type & PERF_SAMPLE_RAW) {
2618                u.val32[0] = sample->raw_size;
2619                *array = u.val64;
2620                array = (void *)array + sizeof(u32);
2621
2622                memcpy(array, sample->raw_data, sample->raw_size);
2623                array = (void *)array + sample->raw_size;
2624        }
2625
2626        if (type & PERF_SAMPLE_BRANCH_STACK) {
2627                sz = sample->branch_stack->nr * sizeof(struct branch_entry);
2628                sz += sizeof(u64);
2629                memcpy(array, sample->branch_stack, sz);
2630                array = (void *)array + sz;
2631        }
2632
2633        if (type & PERF_SAMPLE_REGS_USER) {
2634                if (sample->user_regs.abi) {
2635                        *array++ = sample->user_regs.abi;
2636                        sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
2637                        memcpy(array, sample->user_regs.regs, sz);
2638                        array = (void *)array + sz;
2639                } else {
2640                        *array++ = 0;
2641                }
2642        }
2643
2644        if (type & PERF_SAMPLE_STACK_USER) {
2645                sz = sample->user_stack.size;
2646                *array++ = sz;
2647                if (sz) {
2648                        memcpy(array, sample->user_stack.data, sz);
2649                        array = (void *)array + sz;
2650                        *array++ = sz;
2651                }
2652        }
2653
2654        if (type & PERF_SAMPLE_WEIGHT) {
2655                *array = sample->weight;
2656                array++;
2657        }
2658
2659        if (type & PERF_SAMPLE_DATA_SRC) {
2660                *array = sample->data_src;
2661                array++;
2662        }
2663
2664        if (type & PERF_SAMPLE_TRANSACTION) {
2665                *array = sample->transaction;
2666                array++;
2667        }
2668
2669        if (type & PERF_SAMPLE_REGS_INTR) {
2670                if (sample->intr_regs.abi) {
2671                        *array++ = sample->intr_regs.abi;
2672                        sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
2673                        memcpy(array, sample->intr_regs.regs, sz);
2674                        array = (void *)array + sz;
2675                } else {
2676                        *array++ = 0;
2677                }
2678        }
2679
2680        if (type & PERF_SAMPLE_PHYS_ADDR) {
2681                *array = sample->phys_addr;
2682                array++;
2683        }
2684
2685        return 0;
2686}
2687
2688struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name)
2689{
2690        return tep_find_field(evsel->tp_format, name);
2691}
2692
2693void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
2694                         const char *name)
2695{
2696        struct format_field *field = perf_evsel__field(evsel, name);
2697        int offset;
2698
2699        if (!field)
2700                return NULL;
2701
2702        offset = field->offset;
2703
2704        if (field->flags & FIELD_IS_DYNAMIC) {
2705                offset = *(int *)(sample->raw_data + field->offset);
2706                offset &= 0xffff;
2707        }
2708
2709        return sample->raw_data + offset;
2710}
2711
2712u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
2713                         bool needs_swap)
2714{
2715        u64 value;
2716        void *ptr = sample->raw_data + field->offset;
2717
2718        switch (field->size) {
2719        case 1:
2720                return *(u8 *)ptr;
2721        case 2:
2722                value = *(u16 *)ptr;
2723                break;
2724        case 4:
2725                value = *(u32 *)ptr;
2726                break;
2727        case 8:
2728                memcpy(&value, ptr, sizeof(u64));
2729                break;
2730        default:
2731                return 0;
2732        }
2733
2734        if (!needs_swap)
2735                return value;
2736
2737        switch (field->size) {
2738        case 2:
2739                return bswap_16(value);
2740        case 4:
2741                return bswap_32(value);
2742        case 8:
2743                return bswap_64(value);
2744        default:
2745                return 0;
2746        }
2747
2748        return 0;
2749}
2750
2751u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
2752                       const char *name)
2753{
2754        struct format_field *field = perf_evsel__field(evsel, name);
2755
2756        if (!field)
2757                return 0;
2758
2759        return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
2760}
2761
2762bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
2763                          char *msg, size_t msgsize)
2764{
2765        int paranoid;
2766
2767        if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
2768            evsel->attr.type   == PERF_TYPE_HARDWARE &&
2769            evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
2770                /*
2771                 * If it's cycles then fall back to hrtimer based
2772                 * cpu-clock-tick sw counter, which is always available even if
2773                 * no PMU support.
2774                 *
2775                 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
2776                 * b0a873e).
2777                 */
2778                scnprintf(msg, msgsize, "%s",
2779"The cycles event is not supported, trying to fall back to cpu-clock-ticks");
2780
2781                evsel->attr.type   = PERF_TYPE_SOFTWARE;
2782                evsel->attr.config = PERF_COUNT_SW_CPU_CLOCK;
2783
2784                zfree(&evsel->name);
2785                return true;
2786        } else if (err == EACCES && !evsel->attr.exclude_kernel &&
2787                   (paranoid = perf_event_paranoid()) > 1) {
2788                const char *name = perf_evsel__name(evsel);
2789                char *new_name;
2790                const char *sep = ":";
2791
2792                /* Is there already the separator in the name. */
2793                if (strchr(name, '/') ||
2794                    strchr(name, ':'))
2795                        sep = "";
2796
2797                if (asprintf(&new_name, "%s%su", name, sep) < 0)
2798                        return false;
2799
2800                if (evsel->name)
2801                        free(evsel->name);
2802                evsel->name = new_name;
2803                scnprintf(msg, msgsize,
2804"kernel.perf_event_paranoid=%d, trying to fall back to excluding kernel samples", paranoid);
2805                evsel->attr.exclude_kernel = 1;
2806
2807                return true;
2808        }
2809
2810        return false;
2811}
2812
2813static bool find_process(const char *name)
2814{
2815        size_t len = strlen(name);
2816        DIR *dir;
2817        struct dirent *d;
2818        int ret = -1;
2819
2820        dir = opendir(procfs__mountpoint());
2821        if (!dir)
2822                return false;
2823
2824        /* Walk through the directory. */
2825        while (ret && (d = readdir(dir)) != NULL) {
2826                char path[PATH_MAX];
2827                char *data;
2828                size_t size;
2829
2830                if ((d->d_type != DT_DIR) ||
2831                     !strcmp(".", d->d_name) ||
2832                     !strcmp("..", d->d_name))
2833                        continue;
2834
2835                scnprintf(path, sizeof(path), "%s/%s/comm",
2836                          procfs__mountpoint(), d->d_name);
2837
2838                if (filename__read_str(path, &data, &size))
2839                        continue;
2840
2841                ret = strncmp(name, data, len);
2842                free(data);
2843        }
2844
2845        closedir(dir);
2846        return ret ? false : true;
2847}
2848
2849int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
2850                              int err, char *msg, size_t size)
2851{
2852        char sbuf[STRERR_BUFSIZE];
2853        int printed = 0;
2854
2855        switch (err) {
2856        case EPERM:
2857        case EACCES:
2858                if (err == EPERM)
2859                        printed = scnprintf(msg, size,
2860                                "No permission to enable %s event.\n\n",
2861                                perf_evsel__name(evsel));
2862
2863                return scnprintf(msg + printed, size - printed,
2864                 "You may not have permission to collect %sstats.\n\n"
2865                 "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
2866                 "which controls use of the performance events system by\n"
2867                 "unprivileged users (without CAP_SYS_ADMIN).\n\n"
2868                 "The current value is %d:\n\n"
2869                 "  -1: Allow use of (almost) all events by all users\n"
2870                 "      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
2871                 ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n"
2872                 "      Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n"
2873                 ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
2874                 ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n"
2875                 "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n"
2876                 "      kernel.perf_event_paranoid = -1\n" ,
2877                                 target->system_wide ? "system-wide " : "",
2878                                 perf_event_paranoid());
2879        case ENOENT:
2880                return scnprintf(msg, size, "The %s event is not supported.",
2881                                 perf_evsel__name(evsel));
2882        case EMFILE:
2883                return scnprintf(msg, size, "%s",
2884                         "Too many events are opened.\n"
2885                         "Probably the maximum number of open file descriptors has been reached.\n"
2886                         "Hint: Try again after reducing the number of events.\n"
2887                         "Hint: Try increasing the limit with 'ulimit -n <limit>'");
2888        case ENOMEM:
2889                if (evsel__has_callchain(evsel) &&
2890                    access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
2891                        return scnprintf(msg, size,
2892                                         "Not enough memory to setup event with callchain.\n"
2893                                         "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
2894                                         "Hint: Current value: %d", sysctl__max_stack());
2895                break;
2896        case ENODEV:
2897                if (target->cpu_list)
2898                        return scnprintf(msg, size, "%s",
2899         "No such device - did you specify an out-of-range profile CPU?");
2900                break;
2901        case EOPNOTSUPP:
2902                if (evsel->attr.sample_period != 0)
2903                        return scnprintf(msg, size,
2904        "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
2905                                         perf_evsel__name(evsel));
2906                if (evsel->attr.precise_ip)
2907                        return scnprintf(msg, size, "%s",
2908        "\'precise\' request may not be supported. Try removing 'p' modifier.");
2909#if defined(__i386__) || defined(__x86_64__)
2910                if (evsel->attr.type == PERF_TYPE_HARDWARE)
2911                        return scnprintf(msg, size, "%s",
2912        "No hardware sampling interrupt available.\n");
2913#endif
2914                break;
2915        case EBUSY:
2916                if (find_process("oprofiled"))
2917                        return scnprintf(msg, size,
2918        "The PMU counters are busy/taken by another profiler.\n"
2919        "We found oprofile daemon running, please stop it and try again.");
2920                break;
2921        case EINVAL:
2922                if (evsel->attr.write_backward && perf_missing_features.write_backward)
2923                        return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
2924                if (perf_missing_features.clockid)
2925                        return scnprintf(msg, size, "clockid feature not supported.");
2926                if (perf_missing_features.clockid_wrong)
2927                        return scnprintf(msg, size, "wrong clockid (%d).", clockid);
2928                break;
2929        default:
2930                break;
2931        }
2932
2933        return scnprintf(msg, size,
2934        "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
2935        "/bin/dmesg | grep -i perf may provide additional information.\n",
2936                         err, str_error_r(err, sbuf, sizeof(sbuf)),
2937                         perf_evsel__name(evsel));
2938}
2939
2940struct perf_env *perf_evsel__env(struct perf_evsel *evsel)
2941{
2942        if (evsel && evsel->evlist)
2943                return evsel->evlist->env;
2944        return NULL;
2945}
2946