linux/tools/include/linux/ring_buffer.h
<<
>>
Prefs
   1#ifndef _TOOLS_LINUX_RING_BUFFER_H_
   2#define _TOOLS_LINUX_RING_BUFFER_H_
   3
   4#include <asm/barrier.h>
   5#include <linux/perf_event.h>
   6
   7/*
   8 * Contract with kernel for walking the perf ring buffer from
   9 * user space requires the following barrier pairing (quote
  10 * from kernel/events/ring_buffer.c):
  11 *
  12 *   Since the mmap() consumer (userspace) can run on a
  13 *   different CPU:
  14 *
  15 *   kernel                             user
  16 *
  17 *   if (LOAD ->data_tail) {            LOAD ->data_head
  18 *                      (A)             smp_rmb()       (C)
  19 *      STORE $data                     LOAD $data
  20 *      smp_wmb()       (B)             smp_mb()        (D)
  21 *      STORE ->data_head               STORE ->data_tail
  22 *   }
  23 *
  24 *   Where A pairs with D, and B pairs with C.
  25 *
  26 *   In our case A is a control dependency that separates the
  27 *   load of the ->data_tail and the stores of $data. In case
  28 *   ->data_tail indicates there is no room in the buffer to
  29 *   store $data we do not.
  30 *
  31 *   D needs to be a full barrier since it separates the data
  32 *   READ from the tail WRITE.
  33 *
  34 *   For B a WMB is sufficient since it separates two WRITEs,
  35 *   and for C an RMB is sufficient since it separates two READs.
  36 *
  37 * Note, instead of B, C, D we could also use smp_store_release()
  38 * in B and D as well as smp_load_acquire() in C.
  39 *
  40 * However, this optimization does not make sense for all kernel
  41 * supported architectures since for a fair number it would
  42 * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(),
  43 * and smp_mb() + WRITE_ONCE() pair for smp_store_release().
  44 *
  45 * Thus for those smp_wmb() in B and smp_rmb() in C would still
  46 * be less expensive. For the case of D this has either the same
  47 * cost or is less expensive, for example, due to TSO x86 can
  48 * avoid the CPU barrier entirely.
  49 */
  50
  51static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
  52{
  53/*
  54 * Architectures where smp_load_acquire() does not fallback to
  55 * READ_ONCE() + smp_mb() pair.
  56 */
  57#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
  58    defined(__ia64__) || defined(__sparc__) && defined(__arch64__)
  59        return smp_load_acquire(&base->data_head);
  60#else
  61        u64 head = READ_ONCE(base->data_head);
  62
  63        smp_rmb();
  64        return head;
  65#endif
  66}
  67
  68static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base,
  69                                          u64 tail)
  70{
  71        smp_store_release(&base->data_tail, tail);
  72}
  73
  74#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */
  75