linux/tools/lib/perf/mmap.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <sys/mman.h>
   3#include <inttypes.h>
   4#include <asm/bug.h>
   5#include <errno.h>
   6#include <string.h>
   7#include <linux/ring_buffer.h>
   8#include <linux/perf_event.h>
   9#include <perf/mmap.h>
  10#include <perf/event.h>
  11#include <perf/evsel.h>
  12#include <internal/mmap.h>
  13#include <internal/lib.h>
  14#include <linux/kernel.h>
  15#include <linux/math64.h>
  16#include "internal.h"
  17
  18void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
  19                     bool overwrite, libperf_unmap_cb_t unmap_cb)
  20{
  21        map->fd = -1;
  22        map->overwrite = overwrite;
  23        map->unmap_cb  = unmap_cb;
  24        refcount_set(&map->refcnt, 0);
  25        if (prev)
  26                prev->next = map;
  27}
  28
  29size_t perf_mmap__mmap_len(struct perf_mmap *map)
  30{
  31        return map->mask + 1 + page_size;
  32}
  33
  34int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
  35                    int fd, int cpu)
  36{
  37        map->prev = 0;
  38        map->mask = mp->mask;
  39        map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
  40                         MAP_SHARED, fd, 0);
  41        if (map->base == MAP_FAILED) {
  42                map->base = NULL;
  43                return -1;
  44        }
  45
  46        map->fd  = fd;
  47        map->cpu = cpu;
  48        return 0;
  49}
  50
  51void perf_mmap__munmap(struct perf_mmap *map)
  52{
  53        if (map && map->base != NULL) {
  54                munmap(map->base, perf_mmap__mmap_len(map));
  55                map->base = NULL;
  56                map->fd = -1;
  57                refcount_set(&map->refcnt, 0);
  58        }
  59        if (map && map->unmap_cb)
  60                map->unmap_cb(map);
  61}
  62
  63void perf_mmap__get(struct perf_mmap *map)
  64{
  65        refcount_inc(&map->refcnt);
  66}
  67
  68void perf_mmap__put(struct perf_mmap *map)
  69{
  70        BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
  71
  72        if (refcount_dec_and_test(&map->refcnt))
  73                perf_mmap__munmap(map);
  74}
  75
  76static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
  77{
  78        ring_buffer_write_tail(md->base, tail);
  79}
  80
  81u64 perf_mmap__read_head(struct perf_mmap *map)
  82{
  83        return ring_buffer_read_head(map->base);
  84}
  85
  86static bool perf_mmap__empty(struct perf_mmap *map)
  87{
  88        struct perf_event_mmap_page *pc = map->base;
  89
  90        return perf_mmap__read_head(map) == map->prev && !pc->aux_size;
  91}
  92
  93void perf_mmap__consume(struct perf_mmap *map)
  94{
  95        if (!map->overwrite) {
  96                u64 old = map->prev;
  97
  98                perf_mmap__write_tail(map, old);
  99        }
 100
 101        if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
 102                perf_mmap__put(map);
 103}
 104
 105static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
 106{
 107        struct perf_event_header *pheader;
 108        u64 evt_head = *start;
 109        int size = mask + 1;
 110
 111        pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
 112        pheader = (struct perf_event_header *)(buf + (*start & mask));
 113        while (true) {
 114                if (evt_head - *start >= (unsigned int)size) {
 115                        pr_debug("Finished reading overwrite ring buffer: rewind\n");
 116                        if (evt_head - *start > (unsigned int)size)
 117                                evt_head -= pheader->size;
 118                        *end = evt_head;
 119                        return 0;
 120                }
 121
 122                pheader = (struct perf_event_header *)(buf + (evt_head & mask));
 123
 124                if (pheader->size == 0) {
 125                        pr_debug("Finished reading overwrite ring buffer: get start\n");
 126                        *end = evt_head;
 127                        return 0;
 128                }
 129
 130                evt_head += pheader->size;
 131                pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
 132        }
 133        WARN_ONCE(1, "Shouldn't get here\n");
 134        return -1;
 135}
 136
 137/*
 138 * Report the start and end of the available data in ringbuffer
 139 */
 140static int __perf_mmap__read_init(struct perf_mmap *md)
 141{
 142        u64 head = perf_mmap__read_head(md);
 143        u64 old = md->prev;
 144        unsigned char *data = md->base + page_size;
 145        unsigned long size;
 146
 147        md->start = md->overwrite ? head : old;
 148        md->end = md->overwrite ? old : head;
 149
 150        if ((md->end - md->start) < md->flush)
 151                return -EAGAIN;
 152
 153        size = md->end - md->start;
 154        if (size > (unsigned long)(md->mask) + 1) {
 155                if (!md->overwrite) {
 156                        WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
 157
 158                        md->prev = head;
 159                        perf_mmap__consume(md);
 160                        return -EAGAIN;
 161                }
 162
 163                /*
 164                 * Backward ring buffer is full. We still have a chance to read
 165                 * most of data from it.
 166                 */
 167                if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
 168                        return -EINVAL;
 169        }
 170
 171        return 0;
 172}
 173
 174int perf_mmap__read_init(struct perf_mmap *map)
 175{
 176        /*
 177         * Check if event was unmapped due to a POLLHUP/POLLERR.
 178         */
 179        if (!refcount_read(&map->refcnt))
 180                return -ENOENT;
 181
 182        return __perf_mmap__read_init(map);
 183}
 184
 185/*
 186 * Mandatory for overwrite mode
 187 * The direction of overwrite mode is backward.
 188 * The last perf_mmap__read() will set tail to map->core.prev.
 189 * Need to correct the map->core.prev to head which is the end of next read.
 190 */
 191void perf_mmap__read_done(struct perf_mmap *map)
 192{
 193        /*
 194         * Check if event was unmapped due to a POLLHUP/POLLERR.
 195         */
 196        if (!refcount_read(&map->refcnt))
 197                return;
 198
 199        map->prev = perf_mmap__read_head(map);
 200}
 201
 202/* When check_messup is true, 'end' must points to a good entry */
 203static union perf_event *perf_mmap__read(struct perf_mmap *map,
 204                                         u64 *startp, u64 end)
 205{
 206        unsigned char *data = map->base + page_size;
 207        union perf_event *event = NULL;
 208        int diff = end - *startp;
 209
 210        if (diff >= (int)sizeof(event->header)) {
 211                size_t size;
 212
 213                event = (union perf_event *)&data[*startp & map->mask];
 214                size = event->header.size;
 215
 216                if (size < sizeof(event->header) || diff < (int)size)
 217                        return NULL;
 218
 219                /*
 220                 * Event straddles the mmap boundary -- header should always
 221                 * be inside due to u64 alignment of output.
 222                 */
 223                if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
 224                        unsigned int offset = *startp;
 225                        unsigned int len = min(sizeof(*event), size), cpy;
 226                        void *dst = map->event_copy;
 227
 228                        do {
 229                                cpy = min(map->mask + 1 - (offset & map->mask), len);
 230                                memcpy(dst, &data[offset & map->mask], cpy);
 231                                offset += cpy;
 232                                dst += cpy;
 233                                len -= cpy;
 234                        } while (len);
 235
 236                        event = (union perf_event *)map->event_copy;
 237                }
 238
 239                *startp += size;
 240        }
 241
 242        return event;
 243}
 244
 245/*
 246 * Read event from ring buffer one by one.
 247 * Return one event for each call.
 248 *
 249 * Usage:
 250 * perf_mmap__read_init()
 251 * while(event = perf_mmap__read_event()) {
 252 *      //process the event
 253 *      perf_mmap__consume()
 254 * }
 255 * perf_mmap__read_done()
 256 */
 257union perf_event *perf_mmap__read_event(struct perf_mmap *map)
 258{
 259        union perf_event *event;
 260
 261        /*
 262         * Check if event was unmapped due to a POLLHUP/POLLERR.
 263         */
 264        if (!refcount_read(&map->refcnt))
 265                return NULL;
 266
 267        /* non-overwirte doesn't pause the ringbuffer */
 268        if (!map->overwrite)
 269                map->end = perf_mmap__read_head(map);
 270
 271        event = perf_mmap__read(map, &map->start, map->end);
 272
 273        if (!map->overwrite)
 274                map->prev = map->start;
 275
 276        return event;
 277}
 278
 279#if defined(__i386__) || defined(__x86_64__)
 280static u64 read_perf_counter(unsigned int counter)
 281{
 282        unsigned int low, high;
 283
 284        asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
 285
 286        return low | ((u64)high) << 32;
 287}
 288
 289static u64 read_timestamp(void)
 290{
 291        unsigned int low, high;
 292
 293        asm volatile("rdtsc" : "=a" (low), "=d" (high));
 294
 295        return low | ((u64)high) << 32;
 296}
 297#else
 298static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
 299static u64 read_timestamp(void) { return 0; }
 300#endif
 301
 302int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count)
 303{
 304        struct perf_event_mmap_page *pc = map->base;
 305        u32 seq, idx, time_mult = 0, time_shift = 0;
 306        u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
 307
 308        if (!pc || !pc->cap_user_rdpmc)
 309                return -1;
 310
 311        do {
 312                seq = READ_ONCE(pc->lock);
 313                barrier();
 314
 315                count->ena = READ_ONCE(pc->time_enabled);
 316                count->run = READ_ONCE(pc->time_running);
 317
 318                if (pc->cap_user_time && count->ena != count->run) {
 319                        cyc = read_timestamp();
 320                        time_mult = READ_ONCE(pc->time_mult);
 321                        time_shift = READ_ONCE(pc->time_shift);
 322                        time_offset = READ_ONCE(pc->time_offset);
 323
 324                        if (pc->cap_user_time_short) {
 325                                time_cycles = READ_ONCE(pc->time_cycles);
 326                                time_mask = READ_ONCE(pc->time_mask);
 327                        }
 328                }
 329
 330                idx = READ_ONCE(pc->index);
 331                cnt = READ_ONCE(pc->offset);
 332                if (pc->cap_user_rdpmc && idx) {
 333                        s64 evcnt = read_perf_counter(idx - 1);
 334                        u16 width = READ_ONCE(pc->pmc_width);
 335
 336                        evcnt <<= 64 - width;
 337                        evcnt >>= 64 - width;
 338                        cnt += evcnt;
 339                } else
 340                        return -1;
 341
 342                barrier();
 343        } while (READ_ONCE(pc->lock) != seq);
 344
 345        if (count->ena != count->run) {
 346                u64 delta;
 347
 348                /* Adjust for cap_usr_time_short, a nop if not */
 349                cyc = time_cycles + ((cyc - time_cycles) & time_mask);
 350
 351                delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
 352
 353                count->ena += delta;
 354                if (idx)
 355                        count->run += delta;
 356
 357                cnt = mul_u64_u64_div64(cnt, count->ena, count->run);
 358        }
 359
 360        count->val = cnt;
 361
 362        return 0;
 363}
 364