linux/tools/lib/perf/mmap.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <sys/mman.h>
   3#include <inttypes.h>
   4#include <asm/bug.h>
   5#include <errno.h>
   6#include <string.h>
   7#include <linux/ring_buffer.h>
   8#include <linux/perf_event.h>
   9#include <perf/mmap.h>
  10#include <perf/event.h>
  11#include <perf/evsel.h>
  12#include <internal/mmap.h>
  13#include <internal/lib.h>
  14#include <linux/kernel.h>
  15#include <linux/math64.h>
  16#include <linux/stringify.h>
  17#include "internal.h"
  18
  19void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
  20                     bool overwrite, libperf_unmap_cb_t unmap_cb)
  21{
  22        map->fd = -1;
  23        map->overwrite = overwrite;
  24        map->unmap_cb  = unmap_cb;
  25        refcount_set(&map->refcnt, 0);
  26        if (prev)
  27                prev->next = map;
  28}
  29
  30size_t perf_mmap__mmap_len(struct perf_mmap *map)
  31{
  32        return map->mask + 1 + page_size;
  33}
  34
  35int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
  36                    int fd, struct perf_cpu cpu)
  37{
  38        map->prev = 0;
  39        map->mask = mp->mask;
  40        map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
  41                         MAP_SHARED, fd, 0);
  42        if (map->base == MAP_FAILED) {
  43                map->base = NULL;
  44                return -1;
  45        }
  46
  47        map->fd  = fd;
  48        map->cpu = cpu;
  49        return 0;
  50}
  51
  52void perf_mmap__munmap(struct perf_mmap *map)
  53{
  54        if (map && map->base != NULL) {
  55                munmap(map->base, perf_mmap__mmap_len(map));
  56                map->base = NULL;
  57                map->fd = -1;
  58                refcount_set(&map->refcnt, 0);
  59        }
  60        if (map && map->unmap_cb)
  61                map->unmap_cb(map);
  62}
  63
  64void perf_mmap__get(struct perf_mmap *map)
  65{
  66        refcount_inc(&map->refcnt);
  67}
  68
  69void perf_mmap__put(struct perf_mmap *map)
  70{
  71        BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
  72
  73        if (refcount_dec_and_test(&map->refcnt))
  74                perf_mmap__munmap(map);
  75}
  76
  77static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
  78{
  79        ring_buffer_write_tail(md->base, tail);
  80}
  81
  82u64 perf_mmap__read_head(struct perf_mmap *map)
  83{
  84        return ring_buffer_read_head(map->base);
  85}
  86
  87static bool perf_mmap__empty(struct perf_mmap *map)
  88{
  89        struct perf_event_mmap_page *pc = map->base;
  90
  91        return perf_mmap__read_head(map) == map->prev && !pc->aux_size;
  92}
  93
  94void perf_mmap__consume(struct perf_mmap *map)
  95{
  96        if (!map->overwrite) {
  97                u64 old = map->prev;
  98
  99                perf_mmap__write_tail(map, old);
 100        }
 101
 102        if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
 103                perf_mmap__put(map);
 104}
 105
 106static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
 107{
 108        struct perf_event_header *pheader;
 109        u64 evt_head = *start;
 110        int size = mask + 1;
 111
 112        pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
 113        pheader = (struct perf_event_header *)(buf + (*start & mask));
 114        while (true) {
 115                if (evt_head - *start >= (unsigned int)size) {
 116                        pr_debug("Finished reading overwrite ring buffer: rewind\n");
 117                        if (evt_head - *start > (unsigned int)size)
 118                                evt_head -= pheader->size;
 119                        *end = evt_head;
 120                        return 0;
 121                }
 122
 123                pheader = (struct perf_event_header *)(buf + (evt_head & mask));
 124
 125                if (pheader->size == 0) {
 126                        pr_debug("Finished reading overwrite ring buffer: get start\n");
 127                        *end = evt_head;
 128                        return 0;
 129                }
 130
 131                evt_head += pheader->size;
 132                pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
 133        }
 134        WARN_ONCE(1, "Shouldn't get here\n");
 135        return -1;
 136}
 137
 138/*
 139 * Report the start and end of the available data in ringbuffer
 140 */
 141static int __perf_mmap__read_init(struct perf_mmap *md)
 142{
 143        u64 head = perf_mmap__read_head(md);
 144        u64 old = md->prev;
 145        unsigned char *data = md->base + page_size;
 146        unsigned long size;
 147
 148        md->start = md->overwrite ? head : old;
 149        md->end = md->overwrite ? old : head;
 150
 151        if ((md->end - md->start) < md->flush)
 152                return -EAGAIN;
 153
 154        size = md->end - md->start;
 155        if (size > (unsigned long)(md->mask) + 1) {
 156                if (!md->overwrite) {
 157                        WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
 158
 159                        md->prev = head;
 160                        perf_mmap__consume(md);
 161                        return -EAGAIN;
 162                }
 163
 164                /*
 165                 * Backward ring buffer is full. We still have a chance to read
 166                 * most of data from it.
 167                 */
 168                if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
 169                        return -EINVAL;
 170        }
 171
 172        return 0;
 173}
 174
 175int perf_mmap__read_init(struct perf_mmap *map)
 176{
 177        /*
 178         * Check if event was unmapped due to a POLLHUP/POLLERR.
 179         */
 180        if (!refcount_read(&map->refcnt))
 181                return -ENOENT;
 182
 183        return __perf_mmap__read_init(map);
 184}
 185
 186/*
 187 * Mandatory for overwrite mode
 188 * The direction of overwrite mode is backward.
 189 * The last perf_mmap__read() will set tail to map->core.prev.
 190 * Need to correct the map->core.prev to head which is the end of next read.
 191 */
 192void perf_mmap__read_done(struct perf_mmap *map)
 193{
 194        /*
 195         * Check if event was unmapped due to a POLLHUP/POLLERR.
 196         */
 197        if (!refcount_read(&map->refcnt))
 198                return;
 199
 200        map->prev = perf_mmap__read_head(map);
 201}
 202
 203/* When check_messup is true, 'end' must points to a good entry */
 204static union perf_event *perf_mmap__read(struct perf_mmap *map,
 205                                         u64 *startp, u64 end)
 206{
 207        unsigned char *data = map->base + page_size;
 208        union perf_event *event = NULL;
 209        int diff = end - *startp;
 210
 211        if (diff >= (int)sizeof(event->header)) {
 212                size_t size;
 213
 214                event = (union perf_event *)&data[*startp & map->mask];
 215                size = event->header.size;
 216
 217                if (size < sizeof(event->header) || diff < (int)size)
 218                        return NULL;
 219
 220                /*
 221                 * Event straddles the mmap boundary -- header should always
 222                 * be inside due to u64 alignment of output.
 223                 */
 224                if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
 225                        unsigned int offset = *startp;
 226                        unsigned int len = min(sizeof(*event), size), cpy;
 227                        void *dst = map->event_copy;
 228
 229                        do {
 230                                cpy = min(map->mask + 1 - (offset & map->mask), len);
 231                                memcpy(dst, &data[offset & map->mask], cpy);
 232                                offset += cpy;
 233                                dst += cpy;
 234                                len -= cpy;
 235                        } while (len);
 236
 237                        event = (union perf_event *)map->event_copy;
 238                }
 239
 240                *startp += size;
 241        }
 242
 243        return event;
 244}
 245
 246/*
 247 * Read event from ring buffer one by one.
 248 * Return one event for each call.
 249 *
 250 * Usage:
 251 * perf_mmap__read_init()
 252 * while(event = perf_mmap__read_event()) {
 253 *      //process the event
 254 *      perf_mmap__consume()
 255 * }
 256 * perf_mmap__read_done()
 257 */
 258union perf_event *perf_mmap__read_event(struct perf_mmap *map)
 259{
 260        union perf_event *event;
 261
 262        /*
 263         * Check if event was unmapped due to a POLLHUP/POLLERR.
 264         */
 265        if (!refcount_read(&map->refcnt))
 266                return NULL;
 267
 268        /* non-overwirte doesn't pause the ringbuffer */
 269        if (!map->overwrite)
 270                map->end = perf_mmap__read_head(map);
 271
 272        event = perf_mmap__read(map, &map->start, map->end);
 273
 274        if (!map->overwrite)
 275                map->prev = map->start;
 276
 277        return event;
 278}
 279
 280#if defined(__i386__) || defined(__x86_64__)
 281static u64 read_perf_counter(unsigned int counter)
 282{
 283        unsigned int low, high;
 284
 285        asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
 286
 287        return low | ((u64)high) << 32;
 288}
 289
 290static u64 read_timestamp(void)
 291{
 292        unsigned int low, high;
 293
 294        asm volatile("rdtsc" : "=a" (low), "=d" (high));
 295
 296        return low | ((u64)high) << 32;
 297}
 298#elif defined(__aarch64__)
 299#define read_sysreg(r) ({                                               \
 300        u64 __val;                                                      \
 301        asm volatile("mrs %0, " __stringify(r) : "=r" (__val));         \
 302        __val;                                                          \
 303})
 304
 305static u64 read_pmccntr(void)
 306{
 307        return read_sysreg(pmccntr_el0);
 308}
 309
 310#define PMEVCNTR_READ(idx)                                      \
 311        static u64 read_pmevcntr_##idx(void) {                  \
 312                return read_sysreg(pmevcntr##idx##_el0);        \
 313        }
 314
 315PMEVCNTR_READ(0);
 316PMEVCNTR_READ(1);
 317PMEVCNTR_READ(2);
 318PMEVCNTR_READ(3);
 319PMEVCNTR_READ(4);
 320PMEVCNTR_READ(5);
 321PMEVCNTR_READ(6);
 322PMEVCNTR_READ(7);
 323PMEVCNTR_READ(8);
 324PMEVCNTR_READ(9);
 325PMEVCNTR_READ(10);
 326PMEVCNTR_READ(11);
 327PMEVCNTR_READ(12);
 328PMEVCNTR_READ(13);
 329PMEVCNTR_READ(14);
 330PMEVCNTR_READ(15);
 331PMEVCNTR_READ(16);
 332PMEVCNTR_READ(17);
 333PMEVCNTR_READ(18);
 334PMEVCNTR_READ(19);
 335PMEVCNTR_READ(20);
 336PMEVCNTR_READ(21);
 337PMEVCNTR_READ(22);
 338PMEVCNTR_READ(23);
 339PMEVCNTR_READ(24);
 340PMEVCNTR_READ(25);
 341PMEVCNTR_READ(26);
 342PMEVCNTR_READ(27);
 343PMEVCNTR_READ(28);
 344PMEVCNTR_READ(29);
 345PMEVCNTR_READ(30);
 346
 347/*
 348 * Read a value direct from PMEVCNTR<idx>
 349 */
 350static u64 read_perf_counter(unsigned int counter)
 351{
 352        static u64 (* const read_f[])(void) = {
 353                read_pmevcntr_0,
 354                read_pmevcntr_1,
 355                read_pmevcntr_2,
 356                read_pmevcntr_3,
 357                read_pmevcntr_4,
 358                read_pmevcntr_5,
 359                read_pmevcntr_6,
 360                read_pmevcntr_7,
 361                read_pmevcntr_8,
 362                read_pmevcntr_9,
 363                read_pmevcntr_10,
 364                read_pmevcntr_11,
 365                read_pmevcntr_13,
 366                read_pmevcntr_12,
 367                read_pmevcntr_14,
 368                read_pmevcntr_15,
 369                read_pmevcntr_16,
 370                read_pmevcntr_17,
 371                read_pmevcntr_18,
 372                read_pmevcntr_19,
 373                read_pmevcntr_20,
 374                read_pmevcntr_21,
 375                read_pmevcntr_22,
 376                read_pmevcntr_23,
 377                read_pmevcntr_24,
 378                read_pmevcntr_25,
 379                read_pmevcntr_26,
 380                read_pmevcntr_27,
 381                read_pmevcntr_28,
 382                read_pmevcntr_29,
 383                read_pmevcntr_30,
 384                read_pmccntr
 385        };
 386
 387        if (counter < ARRAY_SIZE(read_f))
 388                return (read_f[counter])();
 389
 390        return 0;
 391}
 392
 393static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
 394
 395#else
 396static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
 397static u64 read_timestamp(void) { return 0; }
 398#endif
 399
 400int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count)
 401{
 402        struct perf_event_mmap_page *pc = map->base;
 403        u32 seq, idx, time_mult = 0, time_shift = 0;
 404        u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
 405
 406        if (!pc || !pc->cap_user_rdpmc)
 407                return -1;
 408
 409        do {
 410                seq = READ_ONCE(pc->lock);
 411                barrier();
 412
 413                count->ena = READ_ONCE(pc->time_enabled);
 414                count->run = READ_ONCE(pc->time_running);
 415
 416                if (pc->cap_user_time && count->ena != count->run) {
 417                        cyc = read_timestamp();
 418                        time_mult = READ_ONCE(pc->time_mult);
 419                        time_shift = READ_ONCE(pc->time_shift);
 420                        time_offset = READ_ONCE(pc->time_offset);
 421
 422                        if (pc->cap_user_time_short) {
 423                                time_cycles = READ_ONCE(pc->time_cycles);
 424                                time_mask = READ_ONCE(pc->time_mask);
 425                        }
 426                }
 427
 428                idx = READ_ONCE(pc->index);
 429                cnt = READ_ONCE(pc->offset);
 430                if (pc->cap_user_rdpmc && idx) {
 431                        s64 evcnt = read_perf_counter(idx - 1);
 432                        u16 width = READ_ONCE(pc->pmc_width);
 433
 434                        evcnt <<= 64 - width;
 435                        evcnt >>= 64 - width;
 436                        cnt += evcnt;
 437                } else
 438                        return -1;
 439
 440                barrier();
 441        } while (READ_ONCE(pc->lock) != seq);
 442
 443        if (count->ena != count->run) {
 444                u64 delta;
 445
 446                /* Adjust for cap_usr_time_short, a nop if not */
 447                cyc = time_cycles + ((cyc - time_cycles) & time_mask);
 448
 449                delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
 450
 451                count->ena += delta;
 452                if (idx)
 453                        count->run += delta;
 454        }
 455
 456        count->val = cnt;
 457
 458        return 0;
 459}
 460