linux/tools/perf/util/evsel.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
   3 *
   4 * Parts came from builtin-{top,stat,record}.c, see those files for further
   5 * copyright notes.
   6 *
   7 * Released under the GPL v2. (and only v2, not any later version)
   8 */
   9
  10#include <byteswap.h>
  11#include <errno.h>
  12#include <inttypes.h>
  13#include <linux/bitops.h>
  14#include <api/fs/fs.h>
  15#include <api/fs/tracing_path.h>
  16#include <traceevent/event-parse.h>
  17#include <linux/hw_breakpoint.h>
  18#include <linux/perf_event.h>
  19#include <linux/compiler.h>
  20#include <linux/err.h>
  21#include <linux/zalloc.h>
  22#include <sys/ioctl.h>
  23#include <sys/resource.h>
  24#include <sys/types.h>
  25#include <dirent.h>
  26#include <stdlib.h>
  27#include <perf/evsel.h>
  28#include "asm/bug.h"
  29#include "callchain.h"
  30#include "cgroup.h"
  31#include "counts.h"
  32#include "event.h"
  33#include "evsel.h"
  34#include "util/env.h"
  35#include "util/evsel_config.h"
  36#include "util/evsel_fprintf.h"
  37#include "evlist.h"
  38#include <perf/cpumap.h>
  39#include "thread_map.h"
  40#include "target.h"
  41#include "perf_regs.h"
  42#include "record.h"
  43#include "debug.h"
  44#include "trace-event.h"
  45#include "stat.h"
  46#include "string2.h"
  47#include "memswap.h"
  48#include "util.h"
  49#include "../perf-sys.h"
  50#include "util/parse-branch-options.h"
  51#include <internal/xyarray.h>
  52#include <internal/lib.h>
  53
  54#include <linux/ctype.h>
  55
  56struct perf_missing_features perf_missing_features;
  57
  58static clockid_t clockid;
  59
  60static int perf_evsel__no_extra_init(struct evsel *evsel __maybe_unused)
  61{
  62        return 0;
  63}
  64
  65void __weak test_attr__ready(void) { }
  66
  67static void perf_evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
  68{
  69}
  70
  71static struct {
  72        size_t  size;
  73        int     (*init)(struct evsel *evsel);
  74        void    (*fini)(struct evsel *evsel);
  75} perf_evsel__object = {
  76        .size = sizeof(struct evsel),
  77        .init = perf_evsel__no_extra_init,
  78        .fini = perf_evsel__no_extra_fini,
  79};
  80
  81int perf_evsel__object_config(size_t object_size,
  82                              int (*init)(struct evsel *evsel),
  83                              void (*fini)(struct evsel *evsel))
  84{
  85
  86        if (object_size == 0)
  87                goto set_methods;
  88
  89        if (perf_evsel__object.size > object_size)
  90                return -EINVAL;
  91
  92        perf_evsel__object.size = object_size;
  93
  94set_methods:
  95        if (init != NULL)
  96                perf_evsel__object.init = init;
  97
  98        if (fini != NULL)
  99                perf_evsel__object.fini = fini;
 100
 101        return 0;
 102}
 103
 104#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
 105
 106int __perf_evsel__sample_size(u64 sample_type)
 107{
 108        u64 mask = sample_type & PERF_SAMPLE_MASK;
 109        int size = 0;
 110        int i;
 111
 112        for (i = 0; i < 64; i++) {
 113                if (mask & (1ULL << i))
 114                        size++;
 115        }
 116
 117        size *= sizeof(u64);
 118
 119        return size;
 120}
 121
 122/**
 123 * __perf_evsel__calc_id_pos - calculate id_pos.
 124 * @sample_type: sample type
 125 *
 126 * This function returns the position of the event id (PERF_SAMPLE_ID or
 127 * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
 128 * perf_record_sample.
 129 */
 130static int __perf_evsel__calc_id_pos(u64 sample_type)
 131{
 132        int idx = 0;
 133
 134        if (sample_type & PERF_SAMPLE_IDENTIFIER)
 135                return 0;
 136
 137        if (!(sample_type & PERF_SAMPLE_ID))
 138                return -1;
 139
 140        if (sample_type & PERF_SAMPLE_IP)
 141                idx += 1;
 142
 143        if (sample_type & PERF_SAMPLE_TID)
 144                idx += 1;
 145
 146        if (sample_type & PERF_SAMPLE_TIME)
 147                idx += 1;
 148
 149        if (sample_type & PERF_SAMPLE_ADDR)
 150                idx += 1;
 151
 152        return idx;
 153}
 154
 155/**
 156 * __perf_evsel__calc_is_pos - calculate is_pos.
 157 * @sample_type: sample type
 158 *
 159 * This function returns the position (counting backwards) of the event id
 160 * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
 161 * sample_id_all is used there is an id sample appended to non-sample events.
 162 */
 163static int __perf_evsel__calc_is_pos(u64 sample_type)
 164{
 165        int idx = 1;
 166
 167        if (sample_type & PERF_SAMPLE_IDENTIFIER)
 168                return 1;
 169
 170        if (!(sample_type & PERF_SAMPLE_ID))
 171                return -1;
 172
 173        if (sample_type & PERF_SAMPLE_CPU)
 174                idx += 1;
 175
 176        if (sample_type & PERF_SAMPLE_STREAM_ID)
 177                idx += 1;
 178
 179        return idx;
 180}
 181
 182void perf_evsel__calc_id_pos(struct evsel *evsel)
 183{
 184        evsel->id_pos = __perf_evsel__calc_id_pos(evsel->core.attr.sample_type);
 185        evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type);
 186}
 187
 188void __perf_evsel__set_sample_bit(struct evsel *evsel,
 189                                  enum perf_event_sample_format bit)
 190{
 191        if (!(evsel->core.attr.sample_type & bit)) {
 192                evsel->core.attr.sample_type |= bit;
 193                evsel->sample_size += sizeof(u64);
 194                perf_evsel__calc_id_pos(evsel);
 195        }
 196}
 197
 198void __perf_evsel__reset_sample_bit(struct evsel *evsel,
 199                                    enum perf_event_sample_format bit)
 200{
 201        if (evsel->core.attr.sample_type & bit) {
 202                evsel->core.attr.sample_type &= ~bit;
 203                evsel->sample_size -= sizeof(u64);
 204                perf_evsel__calc_id_pos(evsel);
 205        }
 206}
 207
 208void perf_evsel__set_sample_id(struct evsel *evsel,
 209                               bool can_sample_identifier)
 210{
 211        if (can_sample_identifier) {
 212                perf_evsel__reset_sample_bit(evsel, ID);
 213                perf_evsel__set_sample_bit(evsel, IDENTIFIER);
 214        } else {
 215                perf_evsel__set_sample_bit(evsel, ID);
 216        }
 217        evsel->core.attr.read_format |= PERF_FORMAT_ID;
 218}
 219
 220/**
 221 * perf_evsel__is_function_event - Return whether given evsel is a function
 222 * trace event
 223 *
 224 * @evsel - evsel selector to be tested
 225 *
 226 * Return %true if event is function trace event
 227 */
 228bool perf_evsel__is_function_event(struct evsel *evsel)
 229{
 230#define FUNCTION_EVENT "ftrace:function"
 231
 232        return evsel->name &&
 233               !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT));
 234
 235#undef FUNCTION_EVENT
 236}
 237
 238void evsel__init(struct evsel *evsel,
 239                 struct perf_event_attr *attr, int idx)
 240{
 241        perf_evsel__init(&evsel->core, attr);
 242        evsel->idx         = idx;
 243        evsel->tracking    = !idx;
 244        evsel->leader      = evsel;
 245        evsel->unit        = "";
 246        evsel->scale       = 1.0;
 247        evsel->max_events  = ULONG_MAX;
 248        evsel->evlist      = NULL;
 249        evsel->bpf_obj     = NULL;
 250        evsel->bpf_fd      = -1;
 251        INIT_LIST_HEAD(&evsel->config_terms);
 252        perf_evsel__object.init(evsel);
 253        evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
 254        perf_evsel__calc_id_pos(evsel);
 255        evsel->cmdline_group_boundary = false;
 256        evsel->metric_expr   = NULL;
 257        evsel->metric_name   = NULL;
 258        evsel->metric_events = NULL;
 259        evsel->collect_stat  = false;
 260        evsel->pmu_name      = NULL;
 261}
 262
 263struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
 264{
 265        struct evsel *evsel = zalloc(perf_evsel__object.size);
 266
 267        if (!evsel)
 268                return NULL;
 269        evsel__init(evsel, attr, idx);
 270
 271        if (perf_evsel__is_bpf_output(evsel)) {
 272                evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
 273                                            PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
 274                evsel->core.attr.sample_period = 1;
 275        }
 276
 277        if (perf_evsel__is_clock(evsel)) {
 278                /*
 279                 * The evsel->unit points to static alias->unit
 280                 * so it's ok to use static string in here.
 281                 */
 282                static const char *unit = "msec";
 283
 284                evsel->unit = unit;
 285                evsel->scale = 1e-6;
 286        }
 287
 288        return evsel;
 289}
 290
 291static bool perf_event_can_profile_kernel(void)
 292{
 293        return perf_event_paranoid_check(1);
 294}
 295
 296struct evsel *perf_evsel__new_cycles(bool precise)
 297{
 298        struct perf_event_attr attr = {
 299                .type   = PERF_TYPE_HARDWARE,
 300                .config = PERF_COUNT_HW_CPU_CYCLES,
 301                .exclude_kernel = !perf_event_can_profile_kernel(),
 302        };
 303        struct evsel *evsel;
 304
 305        event_attr_init(&attr);
 306
 307        if (!precise)
 308                goto new_event;
 309
 310        /*
 311         * Now let the usual logic to set up the perf_event_attr defaults
 312         * to kick in when we return and before perf_evsel__open() is called.
 313         */
 314new_event:
 315        evsel = evsel__new(&attr);
 316        if (evsel == NULL)
 317                goto out;
 318
 319        evsel->precise_max = true;
 320
 321        /* use asprintf() because free(evsel) assumes name is allocated */
 322        if (asprintf(&evsel->name, "cycles%s%s%.*s",
 323                     (attr.precise_ip || attr.exclude_kernel) ? ":" : "",
 324                     attr.exclude_kernel ? "u" : "",
 325                     attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0)
 326                goto error_free;
 327out:
 328        return evsel;
 329error_free:
 330        evsel__delete(evsel);
 331        evsel = NULL;
 332        goto out;
 333}
 334
 335/*
 336 * Returns pointer with encoded error via <linux/err.h> interface.
 337 */
 338struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx)
 339{
 340        struct evsel *evsel = zalloc(perf_evsel__object.size);
 341        int err = -ENOMEM;
 342
 343        if (evsel == NULL) {
 344                goto out_err;
 345        } else {
 346                struct perf_event_attr attr = {
 347                        .type          = PERF_TYPE_TRACEPOINT,
 348                        .sample_type   = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
 349                                          PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
 350                };
 351
 352                if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
 353                        goto out_free;
 354
 355                evsel->tp_format = trace_event__tp_format(sys, name);
 356                if (IS_ERR(evsel->tp_format)) {
 357                        err = PTR_ERR(evsel->tp_format);
 358                        goto out_free;
 359                }
 360
 361                event_attr_init(&attr);
 362                attr.config = evsel->tp_format->id;
 363                attr.sample_period = 1;
 364                evsel__init(evsel, &attr, idx);
 365        }
 366
 367        return evsel;
 368
 369out_free:
 370        zfree(&evsel->name);
 371        free(evsel);
 372out_err:
 373        return ERR_PTR(err);
 374}
 375
 376const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
 377        "cycles",
 378        "instructions",
 379        "cache-references",
 380        "cache-misses",
 381        "branches",
 382        "branch-misses",
 383        "bus-cycles",
 384        "stalled-cycles-frontend",
 385        "stalled-cycles-backend",
 386        "ref-cycles",
 387};
 388
 389static const char *__perf_evsel__hw_name(u64 config)
 390{
 391        if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
 392                return perf_evsel__hw_names[config];
 393
 394        return "unknown-hardware";
 395}
 396
 397static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
 398{
 399        int colon = 0, r = 0;
 400        struct perf_event_attr *attr = &evsel->core.attr;
 401        bool exclude_guest_default = false;
 402
 403#define MOD_PRINT(context, mod) do {                                    \
 404                if (!attr->exclude_##context) {                         \
 405                        if (!colon) colon = ++r;                        \
 406                        r += scnprintf(bf + r, size - r, "%c", mod);    \
 407                } } while(0)
 408
 409        if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) {
 410                MOD_PRINT(kernel, 'k');
 411                MOD_PRINT(user, 'u');
 412                MOD_PRINT(hv, 'h');
 413                exclude_guest_default = true;
 414        }
 415
 416        if (attr->precise_ip) {
 417                if (!colon)
 418                        colon = ++r;
 419                r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
 420                exclude_guest_default = true;
 421        }
 422
 423        if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) {
 424                MOD_PRINT(host, 'H');
 425                MOD_PRINT(guest, 'G');
 426        }
 427#undef MOD_PRINT
 428        if (colon)
 429                bf[colon - 1] = ':';
 430        return r;
 431}
 432
 433static int perf_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
 434{
 435        int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->core.attr.config));
 436        return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 437}
 438
 439const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
 440        "cpu-clock",
 441        "task-clock",
 442        "page-faults",
 443        "context-switches",
 444        "cpu-migrations",
 445        "minor-faults",
 446        "major-faults",
 447        "alignment-faults",
 448        "emulation-faults",
 449        "dummy",
 450};
 451
 452static const char *__perf_evsel__sw_name(u64 config)
 453{
 454        if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
 455                return perf_evsel__sw_names[config];
 456        return "unknown-software";
 457}
 458
 459static int perf_evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
 460{
 461        int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->core.attr.config));
 462        return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 463}
 464
 465static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
 466{
 467        int r;
 468
 469        r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);
 470
 471        if (type & HW_BREAKPOINT_R)
 472                r += scnprintf(bf + r, size - r, "r");
 473
 474        if (type & HW_BREAKPOINT_W)
 475                r += scnprintf(bf + r, size - r, "w");
 476
 477        if (type & HW_BREAKPOINT_X)
 478                r += scnprintf(bf + r, size - r, "x");
 479
 480        return r;
 481}
 482
 483static int perf_evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
 484{
 485        struct perf_event_attr *attr = &evsel->core.attr;
 486        int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
 487        return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 488}
 489
 490const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
 491                                [PERF_EVSEL__MAX_ALIASES] = {
 492 { "L1-dcache", "l1-d",         "l1d",          "L1-data",              },
 493 { "L1-icache", "l1-i",         "l1i",          "L1-instruction",       },
 494 { "LLC",       "L2",                                                   },
 495 { "dTLB",      "d-tlb",        "Data-TLB",                             },
 496 { "iTLB",      "i-tlb",        "Instruction-TLB",                      },
 497 { "branch",    "branches",     "bpu",          "btb",          "bpc",  },
 498 { "node",                                                              },
 499};
 500
 501const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
 502                                   [PERF_EVSEL__MAX_ALIASES] = {
 503 { "load",      "loads",        "read",                                 },
 504 { "store",     "stores",       "write",                                },
 505 { "prefetch",  "prefetches",   "speculative-read", "speculative-load", },
 506};
 507
 508const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
 509                                       [PERF_EVSEL__MAX_ALIASES] = {
 510 { "refs",      "Reference",    "ops",          "access",               },
 511 { "misses",    "miss",                                                 },
 512};
 513
 514#define C(x)            PERF_COUNT_HW_CACHE_##x
 515#define CACHE_READ      (1 << C(OP_READ))
 516#define CACHE_WRITE     (1 << C(OP_WRITE))
 517#define CACHE_PREFETCH  (1 << C(OP_PREFETCH))
 518#define COP(x)          (1 << x)
 519
 520/*
 521 * cache operartion stat
 522 * L1I : Read and prefetch only
 523 * ITLB and BPU : Read-only
 524 */
 525static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
 526 [C(L1D)]       = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
 527 [C(L1I)]       = (CACHE_READ | CACHE_PREFETCH),
 528 [C(LL)]        = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
 529 [C(DTLB)]      = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
 530 [C(ITLB)]      = (CACHE_READ),
 531 [C(BPU)]       = (CACHE_READ),
 532 [C(NODE)]      = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
 533};
 534
 535bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
 536{
 537        if (perf_evsel__hw_cache_stat[type] & COP(op))
 538                return true;    /* valid */
 539        else
 540                return false;   /* invalid */
 541}
 542
 543int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
 544                                            char *bf, size_t size)
 545{
 546        if (result) {
 547                return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
 548                                 perf_evsel__hw_cache_op[op][0],
 549                                 perf_evsel__hw_cache_result[result][0]);
 550        }
 551
 552        return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0],
 553                         perf_evsel__hw_cache_op[op][1]);
 554}
 555
 556static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
 557{
 558        u8 op, result, type = (config >>  0) & 0xff;
 559        const char *err = "unknown-ext-hardware-cache-type";
 560
 561        if (type >= PERF_COUNT_HW_CACHE_MAX)
 562                goto out_err;
 563
 564        op = (config >>  8) & 0xff;
 565        err = "unknown-ext-hardware-cache-op";
 566        if (op >= PERF_COUNT_HW_CACHE_OP_MAX)
 567                goto out_err;
 568
 569        result = (config >> 16) & 0xff;
 570        err = "unknown-ext-hardware-cache-result";
 571        if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 572                goto out_err;
 573
 574        err = "invalid-cache";
 575        if (!perf_evsel__is_cache_op_valid(type, op))
 576                goto out_err;
 577
 578        return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
 579out_err:
 580        return scnprintf(bf, size, "%s", err);
 581}
 582
 583static int perf_evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
 584{
 585        int ret = __perf_evsel__hw_cache_name(evsel->core.attr.config, bf, size);
 586        return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
 587}
 588
 589static int perf_evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
 590{
 591        int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config);
 592        return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
 593}
 594
 595static int perf_evsel__tool_name(char *bf, size_t size)
 596{
 597        int ret = scnprintf(bf, size, "duration_time");
 598        return ret;
 599}
 600
 601const char *perf_evsel__name(struct evsel *evsel)
 602{
 603        char bf[128];
 604
 605        if (!evsel)
 606                goto out_unknown;
 607
 608        if (evsel->name)
 609                return evsel->name;
 610
 611        switch (evsel->core.attr.type) {
 612        case PERF_TYPE_RAW:
 613                perf_evsel__raw_name(evsel, bf, sizeof(bf));
 614                break;
 615
 616        case PERF_TYPE_HARDWARE:
 617                perf_evsel__hw_name(evsel, bf, sizeof(bf));
 618                break;
 619
 620        case PERF_TYPE_HW_CACHE:
 621                perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));
 622                break;
 623
 624        case PERF_TYPE_SOFTWARE:
 625                if (evsel->tool_event)
 626                        perf_evsel__tool_name(bf, sizeof(bf));
 627                else
 628                        perf_evsel__sw_name(evsel, bf, sizeof(bf));
 629                break;
 630
 631        case PERF_TYPE_TRACEPOINT:
 632                scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
 633                break;
 634
 635        case PERF_TYPE_BREAKPOINT:
 636                perf_evsel__bp_name(evsel, bf, sizeof(bf));
 637                break;
 638
 639        default:
 640                scnprintf(bf, sizeof(bf), "unknown attr type: %d",
 641                          evsel->core.attr.type);
 642                break;
 643        }
 644
 645        evsel->name = strdup(bf);
 646
 647        if (evsel->name)
 648                return evsel->name;
 649out_unknown:
 650        return "unknown";
 651}
 652
 653const char *perf_evsel__group_name(struct evsel *evsel)
 654{
 655        return evsel->group_name ?: "anon group";
 656}
 657
 658/*
 659 * Returns the group details for the specified leader,
 660 * with following rules.
 661 *
 662 *  For record -e '{cycles,instructions}'
 663 *    'anon group { cycles:u, instructions:u }'
 664 *
 665 *  For record -e 'cycles,instructions' and report --group
 666 *    'cycles:u, instructions:u'
 667 */
 668int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
 669{
 670        int ret = 0;
 671        struct evsel *pos;
 672        const char *group_name = perf_evsel__group_name(evsel);
 673
 674        if (!evsel->forced_leader)
 675                ret = scnprintf(buf, size, "%s { ", group_name);
 676
 677        ret += scnprintf(buf + ret, size - ret, "%s",
 678                         perf_evsel__name(evsel));
 679
 680        for_each_group_member(pos, evsel)
 681                ret += scnprintf(buf + ret, size - ret, ", %s",
 682                                 perf_evsel__name(pos));
 683
 684        if (!evsel->forced_leader)
 685                ret += scnprintf(buf + ret, size - ret, " }");
 686
 687        return ret;
 688}
 689
 690static void __perf_evsel__config_callchain(struct evsel *evsel,
 691                                           struct record_opts *opts,
 692                                           struct callchain_param *param)
 693{
 694        bool function = perf_evsel__is_function_event(evsel);
 695        struct perf_event_attr *attr = &evsel->core.attr;
 696
 697        perf_evsel__set_sample_bit(evsel, CALLCHAIN);
 698
 699        attr->sample_max_stack = param->max_stack;
 700
 701        if (opts->kernel_callchains)
 702                attr->exclude_callchain_user = 1;
 703        if (opts->user_callchains)
 704                attr->exclude_callchain_kernel = 1;
 705        if (param->record_mode == CALLCHAIN_LBR) {
 706                if (!opts->branch_stack) {
 707                        if (attr->exclude_user) {
 708                                pr_warning("LBR callstack option is only available "
 709                                           "to get user callchain information. "
 710                                           "Falling back to framepointers.\n");
 711                        } else {
 712                                perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
 713                                attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
 714                                                        PERF_SAMPLE_BRANCH_CALL_STACK |
 715                                                        PERF_SAMPLE_BRANCH_NO_CYCLES |
 716                                                        PERF_SAMPLE_BRANCH_NO_FLAGS |
 717                                                        PERF_SAMPLE_BRANCH_HW_INDEX;
 718                        }
 719                } else
 720                         pr_warning("Cannot use LBR callstack with branch stack. "
 721                                    "Falling back to framepointers.\n");
 722        }
 723
 724        if (param->record_mode == CALLCHAIN_DWARF) {
 725                if (!function) {
 726                        perf_evsel__set_sample_bit(evsel, REGS_USER);
 727                        perf_evsel__set_sample_bit(evsel, STACK_USER);
 728                        if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) {
 729                                attr->sample_regs_user |= DWARF_MINIMAL_REGS;
 730                                pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
 731                                           "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
 732                                           "so the minimal registers set (IP, SP) is explicitly forced.\n");
 733                        } else {
 734                                attr->sample_regs_user |= PERF_REGS_MASK;
 735                        }
 736                        attr->sample_stack_user = param->dump_size;
 737                        attr->exclude_callchain_user = 1;
 738                } else {
 739                        pr_info("Cannot use DWARF unwind for function trace event,"
 740                                " falling back to framepointers.\n");
 741                }
 742        }
 743
 744        if (function) {
 745                pr_info("Disabling user space callchains for function trace event.\n");
 746                attr->exclude_callchain_user = 1;
 747        }
 748}
 749
 750void perf_evsel__config_callchain(struct evsel *evsel,
 751                                  struct record_opts *opts,
 752                                  struct callchain_param *param)
 753{
 754        if (param->enabled)
 755                return __perf_evsel__config_callchain(evsel, opts, param);
 756}
 757
 758static void
 759perf_evsel__reset_callgraph(struct evsel *evsel,
 760                            struct callchain_param *param)
 761{
 762        struct perf_event_attr *attr = &evsel->core.attr;
 763
 764        perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
 765        if (param->record_mode == CALLCHAIN_LBR) {
 766                perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
 767                attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
 768                                              PERF_SAMPLE_BRANCH_CALL_STACK |
 769                                              PERF_SAMPLE_BRANCH_HW_INDEX);
 770        }
 771        if (param->record_mode == CALLCHAIN_DWARF) {
 772                perf_evsel__reset_sample_bit(evsel, REGS_USER);
 773                perf_evsel__reset_sample_bit(evsel, STACK_USER);
 774        }
 775}
 776
 777static void apply_config_terms(struct evsel *evsel,
 778                               struct record_opts *opts, bool track)
 779{
 780        struct perf_evsel_config_term *term;
 781        struct list_head *config_terms = &evsel->config_terms;
 782        struct perf_event_attr *attr = &evsel->core.attr;
 783        /* callgraph default */
 784        struct callchain_param param = {
 785                .record_mode = callchain_param.record_mode,
 786        };
 787        u32 dump_size = 0;
 788        int max_stack = 0;
 789        const char *callgraph_buf = NULL;
 790
 791        list_for_each_entry(term, config_terms, list) {
 792                switch (term->type) {
 793                case PERF_EVSEL__CONFIG_TERM_PERIOD:
 794                        if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
 795                                attr->sample_period = term->val.period;
 796                                attr->freq = 0;
 797                                perf_evsel__reset_sample_bit(evsel, PERIOD);
 798                        }
 799                        break;
 800                case PERF_EVSEL__CONFIG_TERM_FREQ:
 801                        if (!(term->weak && opts->user_freq != UINT_MAX)) {
 802                                attr->sample_freq = term->val.freq;
 803                                attr->freq = 1;
 804                                perf_evsel__set_sample_bit(evsel, PERIOD);
 805                        }
 806                        break;
 807                case PERF_EVSEL__CONFIG_TERM_TIME:
 808                        if (term->val.time)
 809                                perf_evsel__set_sample_bit(evsel, TIME);
 810                        else
 811                                perf_evsel__reset_sample_bit(evsel, TIME);
 812                        break;
 813                case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
 814                        callgraph_buf = term->val.str;
 815                        break;
 816                case PERF_EVSEL__CONFIG_TERM_BRANCH:
 817                        if (term->val.str && strcmp(term->val.str, "no")) {
 818                                perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
 819                                parse_branch_str(term->val.str,
 820                                                 &attr->branch_sample_type);
 821                        } else
 822                                perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
 823                        break;
 824                case PERF_EVSEL__CONFIG_TERM_STACK_USER:
 825                        dump_size = term->val.stack_user;
 826                        break;
 827                case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
 828                        max_stack = term->val.max_stack;
 829                        break;
 830                case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS:
 831                        evsel->max_events = term->val.max_events;
 832                        break;
 833                case PERF_EVSEL__CONFIG_TERM_INHERIT:
 834                        /*
 835                         * attr->inherit should has already been set by
 836                         * perf_evsel__config. If user explicitly set
 837                         * inherit using config terms, override global
 838                         * opt->no_inherit setting.
 839                         */
 840                        attr->inherit = term->val.inherit ? 1 : 0;
 841                        break;
 842                case PERF_EVSEL__CONFIG_TERM_OVERWRITE:
 843                        attr->write_backward = term->val.overwrite ? 1 : 0;
 844                        break;
 845                case PERF_EVSEL__CONFIG_TERM_DRV_CFG:
 846                        break;
 847                case PERF_EVSEL__CONFIG_TERM_PERCORE:
 848                        break;
 849                case PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT:
 850                        attr->aux_output = term->val.aux_output ? 1 : 0;
 851                        break;
 852                case PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
 853                        /* Already applied by auxtrace */
 854                        break;
 855                case PERF_EVSEL__CONFIG_TERM_CFG_CHG:
 856                        break;
 857                default:
 858                        break;
 859                }
 860        }
 861
 862        /* User explicitly set per-event callgraph, clear the old setting and reset. */
 863        if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
 864                bool sample_address = false;
 865
 866                if (max_stack) {
 867                        param.max_stack = max_stack;
 868                        if (callgraph_buf == NULL)
 869                                callgraph_buf = "fp";
 870                }
 871
 872                /* parse callgraph parameters */
 873                if (callgraph_buf != NULL) {
 874                        if (!strcmp(callgraph_buf, "no")) {
 875                                param.enabled = false;
 876                                param.record_mode = CALLCHAIN_NONE;
 877                        } else {
 878                                param.enabled = true;
 879                                if (parse_callchain_record(callgraph_buf, &param)) {
 880                                        pr_err("per-event callgraph setting for %s failed. "
 881                                               "Apply callgraph global setting for it\n",
 882                                               evsel->name);
 883                                        return;
 884                                }
 885                                if (param.record_mode == CALLCHAIN_DWARF)
 886                                        sample_address = true;
 887                        }
 888                }
 889                if (dump_size > 0) {
 890                        dump_size = round_up(dump_size, sizeof(u64));
 891                        param.dump_size = dump_size;
 892                }
 893
 894                /* If global callgraph set, clear it */
 895                if (callchain_param.enabled)
 896                        perf_evsel__reset_callgraph(evsel, &callchain_param);
 897
 898                /* set perf-event callgraph */
 899                if (param.enabled) {
 900                        if (sample_address) {
 901                                perf_evsel__set_sample_bit(evsel, ADDR);
 902                                perf_evsel__set_sample_bit(evsel, DATA_SRC);
 903                                evsel->core.attr.mmap_data = track;
 904                        }
 905                        perf_evsel__config_callchain(evsel, opts, &param);
 906                }
 907        }
 908}
 909
 910static bool is_dummy_event(struct evsel *evsel)
 911{
 912        return (evsel->core.attr.type == PERF_TYPE_SOFTWARE) &&
 913               (evsel->core.attr.config == PERF_COUNT_SW_DUMMY);
 914}
 915
 916struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel,
 917                                                             enum evsel_term_type type)
 918{
 919        struct perf_evsel_config_term *term, *found_term = NULL;
 920
 921        list_for_each_entry(term, &evsel->config_terms, list) {
 922                if (term->type == type)
 923                        found_term = term;
 924        }
 925
 926        return found_term;
 927}
 928
 929/*
 930 * The enable_on_exec/disabled value strategy:
 931 *
 932 *  1) For any type of traced program:
 933 *    - all independent events and group leaders are disabled
 934 *    - all group members are enabled
 935 *
 936 *     Group members are ruled by group leaders. They need to
 937 *     be enabled, because the group scheduling relies on that.
 938 *
 939 *  2) For traced programs executed by perf:
 940 *     - all independent events and group leaders have
 941 *       enable_on_exec set
 942 *     - we don't specifically enable or disable any event during
 943 *       the record command
 944 *
 945 *     Independent events and group leaders are initially disabled
 946 *     and get enabled by exec. Group members are ruled by group
 947 *     leaders as stated in 1).
 948 *
 949 *  3) For traced programs attached by perf (pid/tid):
 950 *     - we specifically enable or disable all events during
 951 *       the record command
 952 *
 953 *     When attaching events to already running traced we
 954 *     enable/disable events specifically, as there's no
 955 *     initial traced exec call.
 956 */
 957void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
 958                        struct callchain_param *callchain)
 959{
 960        struct evsel *leader = evsel->leader;
 961        struct perf_event_attr *attr = &evsel->core.attr;
 962        int track = evsel->tracking;
 963        bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;
 964
 965        attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
 966        attr->inherit       = !opts->no_inherit;
 967        attr->write_backward = opts->overwrite ? 1 : 0;
 968
 969        perf_evsel__set_sample_bit(evsel, IP);
 970        perf_evsel__set_sample_bit(evsel, TID);
 971
 972        if (evsel->sample_read) {
 973                perf_evsel__set_sample_bit(evsel, READ);
 974
 975                /*
 976                 * We need ID even in case of single event, because
 977                 * PERF_SAMPLE_READ process ID specific data.
 978                 */
 979                perf_evsel__set_sample_id(evsel, false);
 980
 981                /*
 982                 * Apply group format only if we belong to group
 983                 * with more than one members.
 984                 */
 985                if (leader->core.nr_members > 1) {
 986                        attr->read_format |= PERF_FORMAT_GROUP;
 987                        attr->inherit = 0;
 988                }
 989        }
 990
 991        /*
 992         * We default some events to have a default interval. But keep
 993         * it a weak assumption overridable by the user.
 994         */
 995        if (!attr->sample_period || (opts->user_freq != UINT_MAX ||
 996                                     opts->user_interval != ULLONG_MAX)) {
 997                if (opts->freq) {
 998                        perf_evsel__set_sample_bit(evsel, PERIOD);
 999                        attr->freq              = 1;
1000                        attr->sample_freq       = opts->freq;
1001                } else {
1002                        attr->sample_period = opts->default_interval;
1003                }
1004        }
1005
1006        /*
1007         * Disable sampling for all group members other
1008         * than leader in case leader 'leads' the sampling.
1009         */
1010        if ((leader != evsel) && leader->sample_read) {
1011                attr->freq           = 0;
1012                attr->sample_freq    = 0;
1013                attr->sample_period  = 0;
1014                attr->write_backward = 0;
1015
1016                /*
1017                 * We don't get sample for slave events, we make them
1018                 * when delivering group leader sample. Set the slave
1019                 * event to follow the master sample_type to ease up
1020                 * report.
1021                 */
1022                attr->sample_type = leader->core.attr.sample_type;
1023        }
1024
1025        if (opts->no_samples)
1026                attr->sample_freq = 0;
1027
1028        if (opts->inherit_stat) {
1029                evsel->core.attr.read_format |=
1030                        PERF_FORMAT_TOTAL_TIME_ENABLED |
1031                        PERF_FORMAT_TOTAL_TIME_RUNNING |
1032                        PERF_FORMAT_ID;
1033                attr->inherit_stat = 1;
1034        }
1035
1036        if (opts->sample_address) {
1037                perf_evsel__set_sample_bit(evsel, ADDR);
1038                attr->mmap_data = track;
1039        }
1040
1041        /*
1042         * We don't allow user space callchains for  function trace
1043         * event, due to issues with page faults while tracing page
1044         * fault handler and its overall trickiness nature.
1045         */
1046        if (perf_evsel__is_function_event(evsel))
1047                evsel->core.attr.exclude_callchain_user = 1;
1048
1049        if (callchain && callchain->enabled && !evsel->no_aux_samples)
1050                perf_evsel__config_callchain(evsel, opts, callchain);
1051
1052        if (opts->sample_intr_regs) {
1053                attr->sample_regs_intr = opts->sample_intr_regs;
1054                perf_evsel__set_sample_bit(evsel, REGS_INTR);
1055        }
1056
1057        if (opts->sample_user_regs) {
1058                attr->sample_regs_user |= opts->sample_user_regs;
1059                perf_evsel__set_sample_bit(evsel, REGS_USER);
1060        }
1061
1062        if (target__has_cpu(&opts->target) || opts->sample_cpu)
1063                perf_evsel__set_sample_bit(evsel, CPU);
1064
1065        /*
1066         * When the user explicitly disabled time don't force it here.
1067         */
1068        if (opts->sample_time &&
1069            (!perf_missing_features.sample_id_all &&
1070            (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
1071             opts->sample_time_set)))
1072                perf_evsel__set_sample_bit(evsel, TIME);
1073
1074        if (opts->raw_samples && !evsel->no_aux_samples) {
1075                perf_evsel__set_sample_bit(evsel, TIME);
1076                perf_evsel__set_sample_bit(evsel, RAW);
1077                perf_evsel__set_sample_bit(evsel, CPU);
1078        }
1079
1080        if (opts->sample_address)
1081                perf_evsel__set_sample_bit(evsel, DATA_SRC);
1082
1083        if (opts->sample_phys_addr)
1084                perf_evsel__set_sample_bit(evsel, PHYS_ADDR);
1085
1086        if (opts->no_buffering) {
1087                attr->watermark = 0;
1088                attr->wakeup_events = 1;
1089        }
1090        if (opts->branch_stack && !evsel->no_aux_samples) {
1091                perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
1092                attr->branch_sample_type = opts->branch_stack;
1093        }
1094
1095        if (opts->sample_weight)
1096                perf_evsel__set_sample_bit(evsel, WEIGHT);
1097
1098        attr->task  = track;
1099        attr->mmap  = track;
1100        attr->mmap2 = track && !perf_missing_features.mmap2;
1101        attr->comm  = track;
1102        attr->ksymbol = track && !perf_missing_features.ksymbol;
1103        attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf;
1104
1105        if (opts->record_namespaces)
1106                attr->namespaces  = track;
1107
1108        if (opts->record_cgroup) {
1109                attr->cgroup = track && !perf_missing_features.cgroup;
1110                perf_evsel__set_sample_bit(evsel, CGROUP);
1111        }
1112
1113        if (opts->record_switch_events)
1114                attr->context_switch = track;
1115
1116        if (opts->sample_transaction)
1117                perf_evsel__set_sample_bit(evsel, TRANSACTION);
1118
1119        if (opts->running_time) {
1120                evsel->core.attr.read_format |=
1121                        PERF_FORMAT_TOTAL_TIME_ENABLED |
1122                        PERF_FORMAT_TOTAL_TIME_RUNNING;
1123        }
1124
1125        /*
1126         * XXX see the function comment above
1127         *
1128         * Disabling only independent events or group leaders,
1129         * keeping group members enabled.
1130         */
1131        if (perf_evsel__is_group_leader(evsel))
1132                attr->disabled = 1;
1133
1134        /*
1135         * Setting enable_on_exec for independent events and
1136         * group leaders for traced executed by perf.
1137         */
1138        if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) &&
1139                !opts->initial_delay)
1140                attr->enable_on_exec = 1;
1141
1142        if (evsel->immediate) {
1143                attr->disabled = 0;
1144                attr->enable_on_exec = 0;
1145        }
1146
1147        clockid = opts->clockid;
1148        if (opts->use_clockid) {
1149                attr->use_clockid = 1;
1150                attr->clockid = opts->clockid;
1151        }
1152
1153        if (evsel->precise_max)
1154                attr->precise_ip = 3;
1155
1156        if (opts->all_user) {
1157                attr->exclude_kernel = 1;
1158                attr->exclude_user   = 0;
1159        }
1160
1161        if (opts->all_kernel) {
1162                attr->exclude_kernel = 0;
1163                attr->exclude_user   = 1;
1164        }
1165
1166        if (evsel->core.own_cpus || evsel->unit)
1167                evsel->core.attr.read_format |= PERF_FORMAT_ID;
1168
1169        /*
1170         * Apply event specific term settings,
1171         * it overloads any global configuration.
1172         */
1173        apply_config_terms(evsel, opts, track);
1174
1175        evsel->ignore_missing_thread = opts->ignore_missing_thread;
1176
1177        /* The --period option takes the precedence. */
1178        if (opts->period_set) {
1179                if (opts->period)
1180                        perf_evsel__set_sample_bit(evsel, PERIOD);
1181                else
1182                        perf_evsel__reset_sample_bit(evsel, PERIOD);
1183        }
1184
1185        /*
1186         * For initial_delay, a dummy event is added implicitly.
1187         * The software event will trigger -EOPNOTSUPP error out,
1188         * if BRANCH_STACK bit is set.
1189         */
1190        if (opts->initial_delay && is_dummy_event(evsel))
1191                perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
1192}
1193
1194int perf_evsel__set_filter(struct evsel *evsel, const char *filter)
1195{
1196        char *new_filter = strdup(filter);
1197
1198        if (new_filter != NULL) {
1199                free(evsel->filter);
1200                evsel->filter = new_filter;
1201                return 0;
1202        }
1203
1204        return -1;
1205}
1206
1207static int perf_evsel__append_filter(struct evsel *evsel,
1208                                     const char *fmt, const char *filter)
1209{
1210        char *new_filter;
1211
1212        if (evsel->filter == NULL)
1213                return perf_evsel__set_filter(evsel, filter);
1214
1215        if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
1216                free(evsel->filter);
1217                evsel->filter = new_filter;
1218                return 0;
1219        }
1220
1221        return -1;
1222}
1223
1224int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter)
1225{
1226        return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
1227}
1228
1229int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter)
1230{
1231        return perf_evsel__append_filter(evsel, "%s,%s", filter);
1232}
1233
1234/* Caller has to clear disabled after going through all CPUs. */
1235int evsel__enable_cpu(struct evsel *evsel, int cpu)
1236{
1237        return perf_evsel__enable_cpu(&evsel->core, cpu);
1238}
1239
1240int evsel__enable(struct evsel *evsel)
1241{
1242        int err = perf_evsel__enable(&evsel->core);
1243
1244        if (!err)
1245                evsel->disabled = false;
1246        return err;
1247}
1248
1249/* Caller has to set disabled after going through all CPUs. */
1250int evsel__disable_cpu(struct evsel *evsel, int cpu)
1251{
1252        return perf_evsel__disable_cpu(&evsel->core, cpu);
1253}
1254
1255int evsel__disable(struct evsel *evsel)
1256{
1257        int err = perf_evsel__disable(&evsel->core);
1258        /*
1259         * We mark it disabled here so that tools that disable a event can
1260         * ignore events after they disable it. I.e. the ring buffer may have
1261         * already a few more events queued up before the kernel got the stop
1262         * request.
1263         */
1264        if (!err)
1265                evsel->disabled = true;
1266
1267        return err;
1268}
1269
1270static void perf_evsel__free_config_terms(struct evsel *evsel)
1271{
1272        struct perf_evsel_config_term *term, *h;
1273
1274        list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
1275                list_del_init(&term->list);
1276                if (term->free_str)
1277                        zfree(&term->val.str);
1278                free(term);
1279        }
1280}
1281
1282void perf_evsel__exit(struct evsel *evsel)
1283{
1284        assert(list_empty(&evsel->core.node));
1285        assert(evsel->evlist == NULL);
1286        perf_evsel__free_counts(evsel);
1287        perf_evsel__free_fd(&evsel->core);
1288        perf_evsel__free_id(&evsel->core);
1289        perf_evsel__free_config_terms(evsel);
1290        cgroup__put(evsel->cgrp);
1291        perf_cpu_map__put(evsel->core.cpus);
1292        perf_cpu_map__put(evsel->core.own_cpus);
1293        perf_thread_map__put(evsel->core.threads);
1294        zfree(&evsel->group_name);
1295        zfree(&evsel->name);
1296        zfree(&evsel->pmu_name);
1297        perf_evsel__object.fini(evsel);
1298}
1299
1300void evsel__delete(struct evsel *evsel)
1301{
1302        perf_evsel__exit(evsel);
1303        free(evsel);
1304}
1305
1306void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
1307                                struct perf_counts_values *count)
1308{
1309        struct perf_counts_values tmp;
1310
1311        if (!evsel->prev_raw_counts)
1312                return;
1313
1314        if (cpu == -1) {
1315                tmp = evsel->prev_raw_counts->aggr;
1316                evsel->prev_raw_counts->aggr = *count;
1317        } else {
1318                tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread);
1319                *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
1320        }
1321
1322        count->val = count->val - tmp.val;
1323        count->ena = count->ena - tmp.ena;
1324        count->run = count->run - tmp.run;
1325}
1326
1327void perf_counts_values__scale(struct perf_counts_values *count,
1328                               bool scale, s8 *pscaled)
1329{
1330        s8 scaled = 0;
1331
1332        if (scale) {
1333                if (count->run == 0) {
1334                        scaled = -1;
1335                        count->val = 0;
1336                } else if (count->run < count->ena) {
1337                        scaled = 1;
1338                        count->val = (u64)((double) count->val * count->ena / count->run);
1339                }
1340        }
1341
1342        if (pscaled)
1343                *pscaled = scaled;
1344}
1345
1346static int
1347perf_evsel__read_one(struct evsel *evsel, int cpu, int thread)
1348{
1349        struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread);
1350
1351        return perf_evsel__read(&evsel->core, cpu, thread, count);
1352}
1353
1354static void
1355perf_evsel__set_count(struct evsel *counter, int cpu, int thread,
1356                      u64 val, u64 ena, u64 run)
1357{
1358        struct perf_counts_values *count;
1359
1360        count = perf_counts(counter->counts, cpu, thread);
1361
1362        count->val    = val;
1363        count->ena    = ena;
1364        count->run    = run;
1365
1366        perf_counts__set_loaded(counter->counts, cpu, thread, true);
1367}
1368
1369static int
1370perf_evsel__process_group_data(struct evsel *leader,
1371                               int cpu, int thread, u64 *data)
1372{
1373        u64 read_format = leader->core.attr.read_format;
1374        struct sample_read_value *v;
1375        u64 nr, ena = 0, run = 0, i;
1376
1377        nr = *data++;
1378
1379        if (nr != (u64) leader->core.nr_members)
1380                return -EINVAL;
1381
1382        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1383                ena = *data++;
1384
1385        if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1386                run = *data++;
1387
1388        v = (struct sample_read_value *) data;
1389
1390        perf_evsel__set_count(leader, cpu, thread,
1391                              v[0].value, ena, run);
1392
1393        for (i = 1; i < nr; i++) {
1394                struct evsel *counter;
1395
1396                counter = perf_evlist__id2evsel(leader->evlist, v[i].id);
1397                if (!counter)
1398                        return -EINVAL;
1399
1400                perf_evsel__set_count(counter, cpu, thread,
1401                                      v[i].value, ena, run);
1402        }
1403
1404        return 0;
1405}
1406
1407static int
1408perf_evsel__read_group(struct evsel *leader, int cpu, int thread)
1409{
1410        struct perf_stat_evsel *ps = leader->stats;
1411        u64 read_format = leader->core.attr.read_format;
1412        int size = perf_evsel__read_size(&leader->core);
1413        u64 *data = ps->group_data;
1414
1415        if (!(read_format & PERF_FORMAT_ID))
1416                return -EINVAL;
1417
1418        if (!perf_evsel__is_group_leader(leader))
1419                return -EINVAL;
1420
1421        if (!data) {
1422                data = zalloc(size);
1423                if (!data)
1424                        return -ENOMEM;
1425
1426                ps->group_data = data;
1427        }
1428
1429        if (FD(leader, cpu, thread) < 0)
1430                return -EINVAL;
1431
1432        if (readn(FD(leader, cpu, thread), data, size) <= 0)
1433                return -errno;
1434
1435        return perf_evsel__process_group_data(leader, cpu, thread, data);
1436}
1437
1438int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread)
1439{
1440        u64 read_format = evsel->core.attr.read_format;
1441
1442        if (read_format & PERF_FORMAT_GROUP)
1443                return perf_evsel__read_group(evsel, cpu, thread);
1444        else
1445                return perf_evsel__read_one(evsel, cpu, thread);
1446}
1447
1448int __perf_evsel__read_on_cpu(struct evsel *evsel,
1449                              int cpu, int thread, bool scale)
1450{
1451        struct perf_counts_values count;
1452        size_t nv = scale ? 3 : 1;
1453
1454        if (FD(evsel, cpu, thread) < 0)
1455                return -EINVAL;
1456
1457        if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
1458                return -ENOMEM;
1459
1460        if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
1461                return -errno;
1462
1463        perf_evsel__compute_deltas(evsel, cpu, thread, &count);
1464        perf_counts_values__scale(&count, scale, NULL);
1465        *perf_counts(evsel->counts, cpu, thread) = count;
1466        return 0;
1467}
1468
1469static int get_group_fd(struct evsel *evsel, int cpu, int thread)
1470{
1471        struct evsel *leader = evsel->leader;
1472        int fd;
1473
1474        if (perf_evsel__is_group_leader(evsel))
1475                return -1;
1476
1477        /*
1478         * Leader must be already processed/open,
1479         * if not it's a bug.
1480         */
1481        BUG_ON(!leader->core.fd);
1482
1483        fd = FD(leader, cpu, thread);
1484        BUG_ON(fd == -1);
1485
1486        return fd;
1487}
1488
1489static void perf_evsel__remove_fd(struct evsel *pos,
1490                                  int nr_cpus, int nr_threads,
1491                                  int thread_idx)
1492{
1493        for (int cpu = 0; cpu < nr_cpus; cpu++)
1494                for (int thread = thread_idx; thread < nr_threads - 1; thread++)
1495                        FD(pos, cpu, thread) = FD(pos, cpu, thread + 1);
1496}
1497
1498static int update_fds(struct evsel *evsel,
1499                      int nr_cpus, int cpu_idx,
1500                      int nr_threads, int thread_idx)
1501{
1502        struct evsel *pos;
1503
1504        if (cpu_idx >= nr_cpus || thread_idx >= nr_threads)
1505                return -EINVAL;
1506
1507        evlist__for_each_entry(evsel->evlist, pos) {
1508                nr_cpus = pos != evsel ? nr_cpus : cpu_idx;
1509
1510                perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
1511
1512                /*
1513                 * Since fds for next evsel has not been created,
1514                 * there is no need to iterate whole event list.
1515                 */
1516                if (pos == evsel)
1517                        break;
1518        }
1519        return 0;
1520}
1521
1522static bool ignore_missing_thread(struct evsel *evsel,
1523                                  int nr_cpus, int cpu,
1524                                  struct perf_thread_map *threads,
1525                                  int thread, int err)
1526{
1527        pid_t ignore_pid = perf_thread_map__pid(threads, thread);
1528
1529        if (!evsel->ignore_missing_thread)
1530                return false;
1531
1532        /* The system wide setup does not work with threads. */
1533        if (evsel->core.system_wide)
1534                return false;
1535
1536        /* The -ESRCH is perf event syscall errno for pid's not found. */
1537        if (err != -ESRCH)
1538                return false;
1539
1540        /* If there's only one thread, let it fail. */
1541        if (threads->nr == 1)
1542                return false;
1543
1544        /*
1545         * We should remove fd for missing_thread first
1546         * because thread_map__remove() will decrease threads->nr.
1547         */
1548        if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread))
1549                return false;
1550
1551        if (thread_map__remove(threads, thread))
1552                return false;
1553
1554        pr_warning("WARNING: Ignored open failure for pid %d\n",
1555                   ignore_pid);
1556        return true;
1557}
1558
1559static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
1560                                void *priv __maybe_unused)
1561{
1562        return fprintf(fp, "  %-32s %s\n", name, val);
1563}
1564
1565static void display_attr(struct perf_event_attr *attr)
1566{
1567        if (verbose >= 2 || debug_peo_args) {
1568                fprintf(stderr, "%.60s\n", graph_dotted_line);
1569                fprintf(stderr, "perf_event_attr:\n");
1570                perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL);
1571                fprintf(stderr, "%.60s\n", graph_dotted_line);
1572        }
1573}
1574
1575static int perf_event_open(struct evsel *evsel,
1576                           pid_t pid, int cpu, int group_fd,
1577                           unsigned long flags)
1578{
1579        int precise_ip = evsel->core.attr.precise_ip;
1580        int fd;
1581
1582        while (1) {
1583                pr_debug2_peo("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx",
1584                          pid, cpu, group_fd, flags);
1585
1586                fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, group_fd, flags);
1587                if (fd >= 0)
1588                        break;
1589
1590                /* Do not try less precise if not requested. */
1591                if (!evsel->precise_max)
1592                        break;
1593
1594                /*
1595                 * We tried all the precise_ip values, and it's
1596                 * still failing, so leave it to standard fallback.
1597                 */
1598                if (!evsel->core.attr.precise_ip) {
1599                        evsel->core.attr.precise_ip = precise_ip;
1600                        break;
1601                }
1602
1603                pr_debug2_peo("\nsys_perf_event_open failed, error %d\n", -ENOTSUP);
1604                evsel->core.attr.precise_ip--;
1605                pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip);
1606                display_attr(&evsel->core.attr);
1607        }
1608
1609        return fd;
1610}
1611
1612static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
1613                struct perf_thread_map *threads,
1614                int start_cpu, int end_cpu)
1615{
1616        int cpu, thread, nthreads;
1617        unsigned long flags = PERF_FLAG_FD_CLOEXEC;
1618        int pid = -1, err, old_errno;
1619        enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
1620
1621        if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) ||
1622            (perf_missing_features.aux_output     && evsel->core.attr.aux_output))
1623                return -EINVAL;
1624
1625        if (cpus == NULL) {
1626                static struct perf_cpu_map *empty_cpu_map;
1627
1628                if (empty_cpu_map == NULL) {
1629                        empty_cpu_map = perf_cpu_map__dummy_new();
1630                        if (empty_cpu_map == NULL)
1631                                return -ENOMEM;
1632                }
1633
1634                cpus = empty_cpu_map;
1635        }
1636
1637        if (threads == NULL) {
1638                static struct perf_thread_map *empty_thread_map;
1639
1640                if (empty_thread_map == NULL) {
1641                        empty_thread_map = thread_map__new_by_tid(-1);
1642                        if (empty_thread_map == NULL)
1643                                return -ENOMEM;
1644                }
1645
1646                threads = empty_thread_map;
1647        }
1648
1649        if (evsel->core.system_wide)
1650                nthreads = 1;
1651        else
1652                nthreads = threads->nr;
1653
1654        if (evsel->core.fd == NULL &&
1655            perf_evsel__alloc_fd(&evsel->core, cpus->nr, nthreads) < 0)
1656                return -ENOMEM;
1657
1658        if (evsel->cgrp) {
1659                flags |= PERF_FLAG_PID_CGROUP;
1660                pid = evsel->cgrp->fd;
1661        }
1662
1663fallback_missing_features:
1664        if (perf_missing_features.clockid_wrong)
1665                evsel->core.attr.clockid = CLOCK_MONOTONIC; /* should always work */
1666        if (perf_missing_features.clockid) {
1667                evsel->core.attr.use_clockid = 0;
1668                evsel->core.attr.clockid = 0;
1669        }
1670        if (perf_missing_features.cloexec)
1671                flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
1672        if (perf_missing_features.mmap2)
1673                evsel->core.attr.mmap2 = 0;
1674        if (perf_missing_features.exclude_guest)
1675                evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
1676        if (perf_missing_features.lbr_flags)
1677                evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
1678                                     PERF_SAMPLE_BRANCH_NO_CYCLES);
1679        if (perf_missing_features.group_read && evsel->core.attr.inherit)
1680                evsel->core.attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
1681        if (perf_missing_features.ksymbol)
1682                evsel->core.attr.ksymbol = 0;
1683        if (perf_missing_features.bpf)
1684                evsel->core.attr.bpf_event = 0;
1685        if (perf_missing_features.branch_hw_idx)
1686                evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX;
1687retry_sample_id:
1688        if (perf_missing_features.sample_id_all)
1689                evsel->core.attr.sample_id_all = 0;
1690
1691        display_attr(&evsel->core.attr);
1692
1693        for (cpu = start_cpu; cpu < end_cpu; cpu++) {
1694
1695                for (thread = 0; thread < nthreads; thread++) {
1696                        int fd, group_fd;
1697
1698                        if (!evsel->cgrp && !evsel->core.system_wide)
1699                                pid = perf_thread_map__pid(threads, thread);
1700
1701                        group_fd = get_group_fd(evsel, cpu, thread);
1702retry_open:
1703                        test_attr__ready();
1704
1705                        fd = perf_event_open(evsel, pid, cpus->map[cpu],
1706                                             group_fd, flags);
1707
1708                        FD(evsel, cpu, thread) = fd;
1709
1710                        if (fd < 0) {
1711                                err = -errno;
1712
1713                                if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) {
1714                                        /*
1715                                         * We just removed 1 thread, so take a step
1716                                         * back on thread index and lower the upper
1717                                         * nthreads limit.
1718                                         */
1719                                        nthreads--;
1720                                        thread--;
1721
1722                                        /* ... and pretend like nothing have happened. */
1723                                        err = 0;
1724                                        continue;
1725                                }
1726
1727                                pr_debug2_peo("\nsys_perf_event_open failed, error %d\n",
1728                                          err);
1729                                goto try_fallback;
1730                        }
1731
1732                        pr_debug2_peo(" = %d\n", fd);
1733
1734                        if (evsel->bpf_fd >= 0) {
1735                                int evt_fd = fd;
1736                                int bpf_fd = evsel->bpf_fd;
1737
1738                                err = ioctl(evt_fd,
1739                                            PERF_EVENT_IOC_SET_BPF,
1740                                            bpf_fd);
1741                                if (err && errno != EEXIST) {
1742                                        pr_err("failed to attach bpf fd %d: %s\n",
1743                                               bpf_fd, strerror(errno));
1744                                        err = -EINVAL;
1745                                        goto out_close;
1746                                }
1747                        }
1748
1749                        set_rlimit = NO_CHANGE;
1750
1751                        /*
1752                         * If we succeeded but had to kill clockid, fail and
1753                         * have perf_evsel__open_strerror() print us a nice
1754                         * error.
1755                         */
1756                        if (perf_missing_features.clockid ||
1757                            perf_missing_features.clockid_wrong) {
1758                                err = -EINVAL;
1759                                goto out_close;
1760                        }
1761                }
1762        }
1763
1764        return 0;
1765
1766try_fallback:
1767        /*
1768         * perf stat needs between 5 and 22 fds per CPU. When we run out
1769         * of them try to increase the limits.
1770         */
1771        if (err == -EMFILE && set_rlimit < INCREASED_MAX) {
1772                struct rlimit l;
1773
1774                old_errno = errno;
1775                if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
1776                        if (set_rlimit == NO_CHANGE)
1777                                l.rlim_cur = l.rlim_max;
1778                        else {
1779                                l.rlim_cur = l.rlim_max + 1000;
1780                                l.rlim_max = l.rlim_cur;
1781                        }
1782                        if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
1783                                set_rlimit++;
1784                                errno = old_errno;
1785                                goto retry_open;
1786                        }
1787                }
1788                errno = old_errno;
1789        }
1790
1791        if (err != -EINVAL || cpu > 0 || thread > 0)
1792                goto out_close;
1793
1794        /*
1795         * Must probe features in the order they were added to the
1796         * perf_event_attr interface.
1797         */
1798        if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
1799                perf_missing_features.cgroup = true;
1800                pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n");
1801                goto out_close;
1802        } else if (!perf_missing_features.branch_hw_idx &&
1803            (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
1804                perf_missing_features.branch_hw_idx = true;
1805                pr_debug2("switching off branch HW index support\n");
1806                goto fallback_missing_features;
1807        } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
1808                perf_missing_features.aux_output = true;
1809                pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n");
1810                goto out_close;
1811        } else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) {
1812                perf_missing_features.bpf = true;
1813                pr_debug2_peo("switching off bpf_event\n");
1814                goto fallback_missing_features;
1815        } else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) {
1816                perf_missing_features.ksymbol = true;
1817                pr_debug2_peo("switching off ksymbol\n");
1818                goto fallback_missing_features;
1819        } else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) {
1820                perf_missing_features.write_backward = true;
1821                pr_debug2_peo("switching off write_backward\n");
1822                goto out_close;
1823        } else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) {
1824                perf_missing_features.clockid_wrong = true;
1825                pr_debug2_peo("switching off clockid\n");
1826                goto fallback_missing_features;
1827        } else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) {
1828                perf_missing_features.clockid = true;
1829                pr_debug2_peo("switching off use_clockid\n");
1830                goto fallback_missing_features;
1831        } else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) {
1832                perf_missing_features.cloexec = true;
1833                pr_debug2_peo("switching off cloexec flag\n");
1834                goto fallback_missing_features;
1835        } else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) {
1836                perf_missing_features.mmap2 = true;
1837                pr_debug2_peo("switching off mmap2\n");
1838                goto fallback_missing_features;
1839        } else if (!perf_missing_features.exclude_guest &&
1840                   (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
1841                perf_missing_features.exclude_guest = true;
1842                pr_debug2_peo("switching off exclude_guest, exclude_host\n");
1843                goto fallback_missing_features;
1844        } else if (!perf_missing_features.sample_id_all) {
1845                perf_missing_features.sample_id_all = true;
1846                pr_debug2_peo("switching off sample_id_all\n");
1847                goto retry_sample_id;
1848        } else if (!perf_missing_features.lbr_flags &&
1849                        (evsel->core.attr.branch_sample_type &
1850                         (PERF_SAMPLE_BRANCH_NO_CYCLES |
1851                          PERF_SAMPLE_BRANCH_NO_FLAGS))) {
1852                perf_missing_features.lbr_flags = true;
1853                pr_debug2_peo("switching off branch sample type no (cycles/flags)\n");
1854                goto fallback_missing_features;
1855        } else if (!perf_missing_features.group_read &&
1856                    evsel->core.attr.inherit &&
1857                   (evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
1858                   perf_evsel__is_group_leader(evsel)) {
1859                perf_missing_features.group_read = true;
1860                pr_debug2_peo("switching off group read\n");
1861                goto fallback_missing_features;
1862        }
1863out_close:
1864        if (err)
1865                threads->err_thread = thread;
1866
1867        old_errno = errno;
1868        do {
1869                while (--thread >= 0) {
1870                        if (FD(evsel, cpu, thread) >= 0)
1871                                close(FD(evsel, cpu, thread));
1872                        FD(evsel, cpu, thread) = -1;
1873                }
1874                thread = nthreads;
1875        } while (--cpu >= 0);
1876        errno = old_errno;
1877        return err;
1878}
1879
1880int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
1881                struct perf_thread_map *threads)
1882{
1883        return evsel__open_cpu(evsel, cpus, threads, 0, cpus ? cpus->nr : 1);
1884}
1885
1886void evsel__close(struct evsel *evsel)
1887{
1888        perf_evsel__close(&evsel->core);
1889        perf_evsel__free_id(&evsel->core);
1890}
1891
1892int perf_evsel__open_per_cpu(struct evsel *evsel,
1893                             struct perf_cpu_map *cpus,
1894                             int cpu)
1895{
1896        if (cpu == -1)
1897                return evsel__open_cpu(evsel, cpus, NULL, 0,
1898                                        cpus ? cpus->nr : 1);
1899
1900        return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1);
1901}
1902
1903int perf_evsel__open_per_thread(struct evsel *evsel,
1904                                struct perf_thread_map *threads)
1905{
1906        return evsel__open(evsel, NULL, threads);
1907}
1908
1909static int perf_evsel__parse_id_sample(const struct evsel *evsel,
1910                                       const union perf_event *event,
1911                                       struct perf_sample *sample)
1912{
1913        u64 type = evsel->core.attr.sample_type;
1914        const __u64 *array = event->sample.array;
1915        bool swapped = evsel->needs_swap;
1916        union u64_swap u;
1917
1918        array += ((event->header.size -
1919                   sizeof(event->header)) / sizeof(u64)) - 1;
1920
1921        if (type & PERF_SAMPLE_IDENTIFIER) {
1922                sample->id = *array;
1923                array--;
1924        }
1925
1926        if (type & PERF_SAMPLE_CPU) {
1927                u.val64 = *array;
1928                if (swapped) {
1929                        /* undo swap of u64, then swap on individual u32s */
1930                        u.val64 = bswap_64(u.val64);
1931                        u.val32[0] = bswap_32(u.val32[0]);
1932                }
1933
1934                sample->cpu = u.val32[0];
1935                array--;
1936        }
1937
1938        if (type & PERF_SAMPLE_STREAM_ID) {
1939                sample->stream_id = *array;
1940                array--;
1941        }
1942
1943        if (type & PERF_SAMPLE_ID) {
1944                sample->id = *array;
1945                array--;
1946        }
1947
1948        if (type & PERF_SAMPLE_TIME) {
1949                sample->time = *array;
1950                array--;
1951        }
1952
1953        if (type & PERF_SAMPLE_TID) {
1954                u.val64 = *array;
1955                if (swapped) {
1956                        /* undo swap of u64, then swap on individual u32s */
1957                        u.val64 = bswap_64(u.val64);
1958                        u.val32[0] = bswap_32(u.val32[0]);
1959                        u.val32[1] = bswap_32(u.val32[1]);
1960                }
1961
1962                sample->pid = u.val32[0];
1963                sample->tid = u.val32[1];
1964                array--;
1965        }
1966
1967        return 0;
1968}
1969
1970static inline bool overflow(const void *endp, u16 max_size, const void *offset,
1971                            u64 size)
1972{
1973        return size > max_size || offset + size > endp;
1974}
1975
1976#define OVERFLOW_CHECK(offset, size, max_size)                          \
1977        do {                                                            \
1978                if (overflow(endp, (max_size), (offset), (size)))       \
1979                        return -EFAULT;                                 \
1980        } while (0)
1981
1982#define OVERFLOW_CHECK_u64(offset) \
1983        OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
1984
1985static int
1986perf_event__check_size(union perf_event *event, unsigned int sample_size)
1987{
1988        /*
1989         * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
1990         * up to PERF_SAMPLE_PERIOD.  After that overflow() must be used to
1991         * check the format does not go past the end of the event.
1992         */
1993        if (sample_size + sizeof(event->header) > event->header.size)
1994                return -EFAULT;
1995
1996        return 0;
1997}
1998
1999int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
2000                             struct perf_sample *data)
2001{
2002        u64 type = evsel->core.attr.sample_type;
2003        bool swapped = evsel->needs_swap;
2004        const __u64 *array;
2005        u16 max_size = event->header.size;
2006        const void *endp = (void *)event + max_size;
2007        u64 sz;
2008
2009        /*
2010         * used for cross-endian analysis. See git commit 65014ab3
2011         * for why this goofiness is needed.
2012         */
2013        union u64_swap u;
2014
2015        memset(data, 0, sizeof(*data));
2016        data->cpu = data->pid = data->tid = -1;
2017        data->stream_id = data->id = data->time = -1ULL;
2018        data->period = evsel->core.attr.sample_period;
2019        data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2020        data->misc    = event->header.misc;
2021        data->id = -1ULL;
2022        data->data_src = PERF_MEM_DATA_SRC_NONE;
2023
2024        if (event->header.type != PERF_RECORD_SAMPLE) {
2025                if (!evsel->core.attr.sample_id_all)
2026                        return 0;
2027                return perf_evsel__parse_id_sample(evsel, event, data);
2028        }
2029
2030        array = event->sample.array;
2031
2032        if (perf_event__check_size(event, evsel->sample_size))
2033                return -EFAULT;
2034
2035        if (type & PERF_SAMPLE_IDENTIFIER) {
2036                data->id = *array;
2037                array++;
2038        }
2039
2040        if (type & PERF_SAMPLE_IP) {
2041                data->ip = *array;
2042                array++;
2043        }
2044
2045        if (type & PERF_SAMPLE_TID) {
2046                u.val64 = *array;
2047                if (swapped) {
2048                        /* undo swap of u64, then swap on individual u32s */
2049                        u.val64 = bswap_64(u.val64);
2050                        u.val32[0] = bswap_32(u.val32[0]);
2051                        u.val32[1] = bswap_32(u.val32[1]);
2052                }
2053
2054                data->pid = u.val32[0];
2055                data->tid = u.val32[1];
2056                array++;
2057        }
2058
2059        if (type & PERF_SAMPLE_TIME) {
2060                data->time = *array;
2061                array++;
2062        }
2063
2064        if (type & PERF_SAMPLE_ADDR) {
2065                data->addr = *array;
2066                array++;
2067        }
2068
2069        if (type & PERF_SAMPLE_ID) {
2070                data->id = *array;
2071                array++;
2072        }
2073
2074        if (type & PERF_SAMPLE_STREAM_ID) {
2075                data->stream_id = *array;
2076                array++;
2077        }
2078
2079        if (type & PERF_SAMPLE_CPU) {
2080
2081                u.val64 = *array;
2082                if (swapped) {
2083                        /* undo swap of u64, then swap on individual u32s */
2084                        u.val64 = bswap_64(u.val64);
2085                        u.val32[0] = bswap_32(u.val32[0]);
2086                }
2087
2088                data->cpu = u.val32[0];
2089                array++;
2090        }
2091
2092        if (type & PERF_SAMPLE_PERIOD) {
2093                data->period = *array;
2094                array++;
2095        }
2096
2097        if (type & PERF_SAMPLE_READ) {
2098                u64 read_format = evsel->core.attr.read_format;
2099
2100                OVERFLOW_CHECK_u64(array);
2101                if (read_format & PERF_FORMAT_GROUP)
2102                        data->read.group.nr = *array;
2103                else
2104                        data->read.one.value = *array;
2105
2106                array++;
2107
2108                if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2109                        OVERFLOW_CHECK_u64(array);
2110                        data->read.time_enabled = *array;
2111                        array++;
2112                }
2113
2114                if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2115                        OVERFLOW_CHECK_u64(array);
2116                        data->read.time_running = *array;
2117                        array++;
2118                }
2119
2120                /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
2121                if (read_format & PERF_FORMAT_GROUP) {
2122                        const u64 max_group_nr = UINT64_MAX /
2123                                        sizeof(struct sample_read_value);
2124
2125                        if (data->read.group.nr > max_group_nr)
2126                                return -EFAULT;
2127                        sz = data->read.group.nr *
2128                             sizeof(struct sample_read_value);
2129                        OVERFLOW_CHECK(array, sz, max_size);
2130                        data->read.group.values =
2131                                        (struct sample_read_value *)array;
2132                        array = (void *)array + sz;
2133                } else {
2134                        OVERFLOW_CHECK_u64(array);
2135                        data->read.one.id = *array;
2136                        array++;
2137                }
2138        }
2139
2140        if (evsel__has_callchain(evsel)) {
2141                const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
2142
2143                OVERFLOW_CHECK_u64(array);
2144                data->callchain = (struct ip_callchain *)array++;
2145                if (data->callchain->nr > max_callchain_nr)
2146                        return -EFAULT;
2147                sz = data->callchain->nr * sizeof(u64);
2148                OVERFLOW_CHECK(array, sz, max_size);
2149                array = (void *)array + sz;
2150        }
2151
2152        if (type & PERF_SAMPLE_RAW) {
2153                OVERFLOW_CHECK_u64(array);
2154                u.val64 = *array;
2155
2156                /*
2157                 * Undo swap of u64, then swap on individual u32s,
2158                 * get the size of the raw area and undo all of the
2159                 * swap. The pevent interface handles endianity by
2160                 * itself.
2161                 */
2162                if (swapped) {
2163                        u.val64 = bswap_64(u.val64);
2164                        u.val32[0] = bswap_32(u.val32[0]);
2165                        u.val32[1] = bswap_32(u.val32[1]);
2166                }
2167                data->raw_size = u.val32[0];
2168
2169                /*
2170                 * The raw data is aligned on 64bits including the
2171                 * u32 size, so it's safe to use mem_bswap_64.
2172                 */
2173                if (swapped)
2174                        mem_bswap_64((void *) array, data->raw_size);
2175
2176                array = (void *)array + sizeof(u32);
2177
2178                OVERFLOW_CHECK(array, data->raw_size, max_size);
2179                data->raw_data = (void *)array;
2180                array = (void *)array + data->raw_size;
2181        }
2182
2183        if (type & PERF_SAMPLE_BRANCH_STACK) {
2184                const u64 max_branch_nr = UINT64_MAX /
2185                                          sizeof(struct branch_entry);
2186
2187                OVERFLOW_CHECK_u64(array);
2188                data->branch_stack = (struct branch_stack *)array++;
2189
2190                if (data->branch_stack->nr > max_branch_nr)
2191                        return -EFAULT;
2192
2193                sz = data->branch_stack->nr * sizeof(struct branch_entry);
2194                if (perf_evsel__has_branch_hw_idx(evsel))
2195                        sz += sizeof(u64);
2196                else
2197                        data->no_hw_idx = true;
2198                OVERFLOW_CHECK(array, sz, max_size);
2199                array = (void *)array + sz;
2200        }
2201
2202        if (type & PERF_SAMPLE_REGS_USER) {
2203                OVERFLOW_CHECK_u64(array);
2204                data->user_regs.abi = *array;
2205                array++;
2206
2207                if (data->user_regs.abi) {
2208                        u64 mask = evsel->core.attr.sample_regs_user;
2209
2210                        sz = hweight64(mask) * sizeof(u64);
2211                        OVERFLOW_CHECK(array, sz, max_size);
2212                        data->user_regs.mask = mask;
2213                        data->user_regs.regs = (u64 *)array;
2214                        array = (void *)array + sz;
2215                }
2216        }
2217
2218        if (type & PERF_SAMPLE_STACK_USER) {
2219                OVERFLOW_CHECK_u64(array);
2220                sz = *array++;
2221
2222                data->user_stack.offset = ((char *)(array - 1)
2223                                          - (char *) event);
2224
2225                if (!sz) {
2226                        data->user_stack.size = 0;
2227                } else {
2228                        OVERFLOW_CHECK(array, sz, max_size);
2229                        data->user_stack.data = (char *)array;
2230                        array = (void *)array + sz;
2231                        OVERFLOW_CHECK_u64(array);
2232                        data->user_stack.size = *array++;
2233                        if (WARN_ONCE(data->user_stack.size > sz,
2234                                      "user stack dump failure\n"))
2235                                return -EFAULT;
2236                }
2237        }
2238
2239        if (type & PERF_SAMPLE_WEIGHT) {
2240                OVERFLOW_CHECK_u64(array);
2241                data->weight = *array;
2242                array++;
2243        }
2244
2245        if (type & PERF_SAMPLE_DATA_SRC) {
2246                OVERFLOW_CHECK_u64(array);
2247                data->data_src = *array;
2248                array++;
2249        }
2250
2251        if (type & PERF_SAMPLE_TRANSACTION) {
2252                OVERFLOW_CHECK_u64(array);
2253                data->transaction = *array;
2254                array++;
2255        }
2256
2257        data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
2258        if (type & PERF_SAMPLE_REGS_INTR) {
2259                OVERFLOW_CHECK_u64(array);
2260                data->intr_regs.abi = *array;
2261                array++;
2262
2263                if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
2264                        u64 mask = evsel->core.attr.sample_regs_intr;
2265
2266                        sz = hweight64(mask) * sizeof(u64);
2267                        OVERFLOW_CHECK(array, sz, max_size);
2268                        data->intr_regs.mask = mask;
2269                        data->intr_regs.regs = (u64 *)array;
2270                        array = (void *)array + sz;
2271                }
2272        }
2273
2274        data->phys_addr = 0;
2275        if (type & PERF_SAMPLE_PHYS_ADDR) {
2276                data->phys_addr = *array;
2277                array++;
2278        }
2279
2280        data->cgroup = 0;
2281        if (type & PERF_SAMPLE_CGROUP) {
2282                data->cgroup = *array;
2283                array++;
2284        }
2285
2286        if (type & PERF_SAMPLE_AUX) {
2287                OVERFLOW_CHECK_u64(array);
2288                sz = *array++;
2289
2290                OVERFLOW_CHECK(array, sz, max_size);
2291                /* Undo swap of data */
2292                if (swapped)
2293                        mem_bswap_64((char *)array, sz);
2294                data->aux_sample.size = sz;
2295                data->aux_sample.data = (char *)array;
2296                array = (void *)array + sz;
2297        }
2298
2299        return 0;
2300}
2301
2302int perf_evsel__parse_sample_timestamp(struct evsel *evsel,
2303                                       union perf_event *event,
2304                                       u64 *timestamp)
2305{
2306        u64 type = evsel->core.attr.sample_type;
2307        const __u64 *array;
2308
2309        if (!(type & PERF_SAMPLE_TIME))
2310                return -1;
2311
2312        if (event->header.type != PERF_RECORD_SAMPLE) {
2313                struct perf_sample data = {
2314                        .time = -1ULL,
2315                };
2316
2317                if (!evsel->core.attr.sample_id_all)
2318                        return -1;
2319                if (perf_evsel__parse_id_sample(evsel, event, &data))
2320                        return -1;
2321
2322                *timestamp = data.time;
2323                return 0;
2324        }
2325
2326        array = event->sample.array;
2327
2328        if (perf_event__check_size(event, evsel->sample_size))
2329                return -EFAULT;
2330
2331        if (type & PERF_SAMPLE_IDENTIFIER)
2332                array++;
2333
2334        if (type & PERF_SAMPLE_IP)
2335                array++;
2336
2337        if (type & PERF_SAMPLE_TID)
2338                array++;
2339
2340        if (type & PERF_SAMPLE_TIME)
2341                *timestamp = *array;
2342
2343        return 0;
2344}
2345
2346struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name)
2347{
2348        return tep_find_field(evsel->tp_format, name);
2349}
2350
2351void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample,
2352                         const char *name)
2353{
2354        struct tep_format_field *field = perf_evsel__field(evsel, name);
2355        int offset;
2356
2357        if (!field)
2358                return NULL;
2359
2360        offset = field->offset;
2361
2362        if (field->flags & TEP_FIELD_IS_DYNAMIC) {
2363                offset = *(int *)(sample->raw_data + field->offset);
2364                offset &= 0xffff;
2365        }
2366
2367        return sample->raw_data + offset;
2368}
2369
2370u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample,
2371                         bool needs_swap)
2372{
2373        u64 value;
2374        void *ptr = sample->raw_data + field->offset;
2375
2376        switch (field->size) {
2377        case 1:
2378                return *(u8 *)ptr;
2379        case 2:
2380                value = *(u16 *)ptr;
2381                break;
2382        case 4:
2383                value = *(u32 *)ptr;
2384                break;
2385        case 8:
2386                memcpy(&value, ptr, sizeof(u64));
2387                break;
2388        default:
2389                return 0;
2390        }
2391
2392        if (!needs_swap)
2393                return value;
2394
2395        switch (field->size) {
2396        case 2:
2397                return bswap_16(value);
2398        case 4:
2399                return bswap_32(value);
2400        case 8:
2401                return bswap_64(value);
2402        default:
2403                return 0;
2404        }
2405
2406        return 0;
2407}
2408
2409u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample,
2410                       const char *name)
2411{
2412        struct tep_format_field *field = perf_evsel__field(evsel, name);
2413
2414        if (!field)
2415                return 0;
2416
2417        return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
2418}
2419
2420bool perf_evsel__fallback(struct evsel *evsel, int err,
2421                          char *msg, size_t msgsize)
2422{
2423        int paranoid;
2424
2425        if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
2426            evsel->core.attr.type   == PERF_TYPE_HARDWARE &&
2427            evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) {
2428                /*
2429                 * If it's cycles then fall back to hrtimer based
2430                 * cpu-clock-tick sw counter, which is always available even if
2431                 * no PMU support.
2432                 *
2433                 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
2434                 * b0a873e).
2435                 */
2436                scnprintf(msg, msgsize, "%s",
2437"The cycles event is not supported, trying to fall back to cpu-clock-ticks");
2438
2439                evsel->core.attr.type   = PERF_TYPE_SOFTWARE;
2440                evsel->core.attr.config = PERF_COUNT_SW_CPU_CLOCK;
2441
2442                zfree(&evsel->name);
2443                return true;
2444        } else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
2445                   (paranoid = perf_event_paranoid()) > 1) {
2446                const char *name = perf_evsel__name(evsel);
2447                char *new_name;
2448                const char *sep = ":";
2449
2450                /* Is there already the separator in the name. */
2451                if (strchr(name, '/') ||
2452                    strchr(name, ':'))
2453                        sep = "";
2454
2455                if (asprintf(&new_name, "%s%su", name, sep) < 0)
2456                        return false;
2457
2458                if (evsel->name)
2459                        free(evsel->name);
2460                evsel->name = new_name;
2461                scnprintf(msg, msgsize, "kernel.perf_event_paranoid=%d, trying "
2462                          "to fall back to excluding kernel and hypervisor "
2463                          " samples", paranoid);
2464                evsel->core.attr.exclude_kernel = 1;
2465                evsel->core.attr.exclude_hv     = 1;
2466
2467                return true;
2468        }
2469
2470        return false;
2471}
2472
2473static bool find_process(const char *name)
2474{
2475        size_t len = strlen(name);
2476        DIR *dir;
2477        struct dirent *d;
2478        int ret = -1;
2479
2480        dir = opendir(procfs__mountpoint());
2481        if (!dir)
2482                return false;
2483
2484        /* Walk through the directory. */
2485        while (ret && (d = readdir(dir)) != NULL) {
2486                char path[PATH_MAX];
2487                char *data;
2488                size_t size;
2489
2490                if ((d->d_type != DT_DIR) ||
2491                     !strcmp(".", d->d_name) ||
2492                     !strcmp("..", d->d_name))
2493                        continue;
2494
2495                scnprintf(path, sizeof(path), "%s/%s/comm",
2496                          procfs__mountpoint(), d->d_name);
2497
2498                if (filename__read_str(path, &data, &size))
2499                        continue;
2500
2501                ret = strncmp(name, data, len);
2502                free(data);
2503        }
2504
2505        closedir(dir);
2506        return ret ? false : true;
2507}
2508
2509int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
2510                              int err, char *msg, size_t size)
2511{
2512        char sbuf[STRERR_BUFSIZE];
2513        int printed = 0;
2514
2515        switch (err) {
2516        case EPERM:
2517        case EACCES:
2518                if (err == EPERM)
2519                        printed = scnprintf(msg, size,
2520                                "No permission to enable %s event.\n\n",
2521                                perf_evsel__name(evsel));
2522
2523                return scnprintf(msg + printed, size - printed,
2524                 "You may not have permission to collect %sstats.\n\n"
2525                 "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
2526                 "which controls use of the performance events system by\n"
2527                 "unprivileged users (without CAP_SYS_ADMIN).\n\n"
2528                 "The current value is %d:\n\n"
2529                 "  -1: Allow use of (almost) all events by all users\n"
2530                 "      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
2531                 ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n"
2532                 "      Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n"
2533                 ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
2534                 ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n"
2535                 "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n"
2536                 "      kernel.perf_event_paranoid = -1\n" ,
2537                                 target->system_wide ? "system-wide " : "",
2538                                 perf_event_paranoid());
2539        case ENOENT:
2540                return scnprintf(msg, size, "The %s event is not supported.",
2541                                 perf_evsel__name(evsel));
2542        case EMFILE:
2543                return scnprintf(msg, size, "%s",
2544                         "Too many events are opened.\n"
2545                         "Probably the maximum number of open file descriptors has been reached.\n"
2546                         "Hint: Try again after reducing the number of events.\n"
2547                         "Hint: Try increasing the limit with 'ulimit -n <limit>'");
2548        case ENOMEM:
2549                if (evsel__has_callchain(evsel) &&
2550                    access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
2551                        return scnprintf(msg, size,
2552                                         "Not enough memory to setup event with callchain.\n"
2553                                         "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
2554                                         "Hint: Current value: %d", sysctl__max_stack());
2555                break;
2556        case ENODEV:
2557                if (target->cpu_list)
2558                        return scnprintf(msg, size, "%s",
2559         "No such device - did you specify an out-of-range profile CPU?");
2560                break;
2561        case EOPNOTSUPP:
2562                if (evsel->core.attr.sample_period != 0)
2563                        return scnprintf(msg, size,
2564        "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
2565                                         perf_evsel__name(evsel));
2566                if (evsel->core.attr.precise_ip)
2567                        return scnprintf(msg, size, "%s",
2568        "\'precise\' request may not be supported. Try removing 'p' modifier.");
2569#if defined(__i386__) || defined(__x86_64__)
2570                if (evsel->core.attr.type == PERF_TYPE_HARDWARE)
2571                        return scnprintf(msg, size, "%s",
2572        "No hardware sampling interrupt available.\n");
2573#endif
2574                break;
2575        case EBUSY:
2576                if (find_process("oprofiled"))
2577                        return scnprintf(msg, size,
2578        "The PMU counters are busy/taken by another profiler.\n"
2579        "We found oprofile daemon running, please stop it and try again.");
2580                break;
2581        case EINVAL:
2582                if (evsel->core.attr.write_backward && perf_missing_features.write_backward)
2583                        return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
2584                if (perf_missing_features.clockid)
2585                        return scnprintf(msg, size, "clockid feature not supported.");
2586                if (perf_missing_features.clockid_wrong)
2587                        return scnprintf(msg, size, "wrong clockid (%d).", clockid);
2588                if (perf_missing_features.aux_output)
2589                        return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
2590                break;
2591        default:
2592                break;
2593        }
2594
2595        return scnprintf(msg, size,
2596        "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
2597        "/bin/dmesg | grep -i perf may provide additional information.\n",
2598                         err, str_error_r(err, sbuf, sizeof(sbuf)),
2599                         perf_evsel__name(evsel));
2600}
2601
2602struct perf_env *perf_evsel__env(struct evsel *evsel)
2603{
2604        if (evsel && evsel->evlist)
2605                return evsel->evlist->env;
2606        return &perf_env;
2607}
2608
2609static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
2610{
2611        int cpu, thread;
2612
2613        for (cpu = 0; cpu < xyarray__max_x(evsel->core.fd); cpu++) {
2614                for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
2615                     thread++) {
2616                        int fd = FD(evsel, cpu, thread);
2617
2618                        if (perf_evlist__id_add_fd(&evlist->core, &evsel->core,
2619                                                   cpu, thread, fd) < 0)
2620                                return -1;
2621                }
2622        }
2623
2624        return 0;
2625}
2626
2627int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
2628{
2629        struct perf_cpu_map *cpus = evsel->core.cpus;
2630        struct perf_thread_map *threads = evsel->core.threads;
2631
2632        if (perf_evsel__alloc_id(&evsel->core, cpus->nr, threads->nr))
2633                return -ENOMEM;
2634
2635        return store_evsel_ids(evsel, evlist);
2636}
2637