linux/mm/mmap_lock.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#define CREATE_TRACE_POINTS
   3#include <trace/events/mmap_lock.h>
   4
   5#include <linux/mm.h>
   6#include <linux/cgroup.h>
   7#include <linux/memcontrol.h>
   8#include <linux/mmap_lock.h>
   9#include <linux/mutex.h>
  10#include <linux/percpu.h>
  11#include <linux/rcupdate.h>
  12#include <linux/smp.h>
  13#include <linux/trace_events.h>
  14
  15EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking);
  16EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned);
  17EXPORT_TRACEPOINT_SYMBOL(mmap_lock_released);
  18
  19#ifdef CONFIG_MEMCG
  20
  21/*
  22 * Our various events all share the same buffer (because we don't want or need
  23 * to allocate a set of buffers *per event type*), so we need to protect against
  24 * concurrent _reg() and _unreg() calls, and count how many _reg() calls have
  25 * been made.
  26 */
  27static DEFINE_MUTEX(reg_lock);
  28static int reg_refcount; /* Protected by reg_lock. */
  29
  30/*
  31 * Size of the buffer for memcg path names. Ignoring stack trace support,
  32 * trace_events_hist.c uses MAX_FILTER_STR_VAL for this, so we also use it.
  33 */
  34#define MEMCG_PATH_BUF_SIZE MAX_FILTER_STR_VAL
  35
  36/*
  37 * How many contexts our trace events might be called in: normal, softirq, irq,
  38 * and NMI.
  39 */
  40#define CONTEXT_COUNT 4
  41
  42static DEFINE_PER_CPU(char __rcu *, memcg_path_buf);
  43static char **tmp_bufs;
  44static DEFINE_PER_CPU(int, memcg_path_buf_idx);
  45
  46/* Called with reg_lock held. */
  47static void free_memcg_path_bufs(void)
  48{
  49        int cpu;
  50        char **old = tmp_bufs;
  51
  52        for_each_possible_cpu(cpu) {
  53                *(old++) = rcu_dereference_protected(
  54                        per_cpu(memcg_path_buf, cpu),
  55                        lockdep_is_held(&reg_lock));
  56                rcu_assign_pointer(per_cpu(memcg_path_buf, cpu), NULL);
  57        }
  58
  59        /* Wait for inflight memcg_path_buf users to finish. */
  60        synchronize_rcu();
  61
  62        old = tmp_bufs;
  63        for_each_possible_cpu(cpu) {
  64                kfree(*(old++));
  65        }
  66
  67        kfree(tmp_bufs);
  68        tmp_bufs = NULL;
  69}
  70
  71int trace_mmap_lock_reg(void)
  72{
  73        int cpu;
  74        char *new;
  75
  76        mutex_lock(&reg_lock);
  77
  78        /* If the refcount is going 0->1, proceed with allocating buffers. */
  79        if (reg_refcount++)
  80                goto out;
  81
  82        tmp_bufs = kmalloc_array(num_possible_cpus(), sizeof(*tmp_bufs),
  83                                 GFP_KERNEL);
  84        if (tmp_bufs == NULL)
  85                goto out_fail;
  86
  87        for_each_possible_cpu(cpu) {
  88                new = kmalloc(MEMCG_PATH_BUF_SIZE * CONTEXT_COUNT, GFP_KERNEL);
  89                if (new == NULL)
  90                        goto out_fail_free;
  91                rcu_assign_pointer(per_cpu(memcg_path_buf, cpu), new);
  92                /* Don't need to wait for inflights, they'd have gotten NULL. */
  93        }
  94
  95out:
  96        mutex_unlock(&reg_lock);
  97        return 0;
  98
  99out_fail_free:
 100        free_memcg_path_bufs();
 101out_fail:
 102        /* Since we failed, undo the earlier ref increment. */
 103        --reg_refcount;
 104
 105        mutex_unlock(&reg_lock);
 106        return -ENOMEM;
 107}
 108
 109void trace_mmap_lock_unreg(void)
 110{
 111        mutex_lock(&reg_lock);
 112
 113        /* If the refcount is going 1->0, proceed with freeing buffers. */
 114        if (--reg_refcount)
 115                goto out;
 116
 117        free_memcg_path_bufs();
 118
 119out:
 120        mutex_unlock(&reg_lock);
 121}
 122
 123static inline char *get_memcg_path_buf(void)
 124{
 125        char *buf;
 126        int idx;
 127
 128        rcu_read_lock();
 129        buf = rcu_dereference(*this_cpu_ptr(&memcg_path_buf));
 130        if (buf == NULL) {
 131                rcu_read_unlock();
 132                return NULL;
 133        }
 134        idx = this_cpu_add_return(memcg_path_buf_idx, MEMCG_PATH_BUF_SIZE) -
 135              MEMCG_PATH_BUF_SIZE;
 136        return &buf[idx];
 137}
 138
 139static inline void put_memcg_path_buf(void)
 140{
 141        this_cpu_sub(memcg_path_buf_idx, MEMCG_PATH_BUF_SIZE);
 142        rcu_read_unlock();
 143}
 144
 145/*
 146 * Write the given mm_struct's memcg path to a percpu buffer, and return a
 147 * pointer to it. If the path cannot be determined, or no buffer was available
 148 * (because the trace event is being unregistered), NULL is returned.
 149 *
 150 * Note: buffers are allocated per-cpu to avoid locking, so preemption must be
 151 * disabled by the caller before calling us, and re-enabled only after the
 152 * caller is done with the pointer.
 153 *
 154 * The caller must call put_memcg_path_buf() once the buffer is no longer
 155 * needed. This must be done while preemption is still disabled.
 156 */
 157static const char *get_mm_memcg_path(struct mm_struct *mm)
 158{
 159        char *buf = NULL;
 160        struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
 161
 162        if (memcg == NULL)
 163                goto out;
 164        if (unlikely(memcg->css.cgroup == NULL))
 165                goto out_put;
 166
 167        buf = get_memcg_path_buf();
 168        if (buf == NULL)
 169                goto out_put;
 170
 171        cgroup_path(memcg->css.cgroup, buf, MEMCG_PATH_BUF_SIZE);
 172
 173out_put:
 174        css_put(&memcg->css);
 175out:
 176        return buf;
 177}
 178
 179#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
 180        do {                                                                   \
 181                const char *memcg_path;                                        \
 182                preempt_disable();                                             \
 183                memcg_path = get_mm_memcg_path(mm);                            \
 184                trace_mmap_lock_##type(mm,                                     \
 185                                       memcg_path != NULL ? memcg_path : "",   \
 186                                       ##__VA_ARGS__);                         \
 187                if (likely(memcg_path != NULL))                                \
 188                        put_memcg_path_buf();                                  \
 189                preempt_enable();                                              \
 190        } while (0)
 191
 192#else /* !CONFIG_MEMCG */
 193
 194int trace_mmap_lock_reg(void)
 195{
 196        return 0;
 197}
 198
 199void trace_mmap_lock_unreg(void)
 200{
 201}
 202
 203#define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
 204        trace_mmap_lock_##type(mm, "", ##__VA_ARGS__)
 205
 206#endif /* CONFIG_MEMCG */
 207
 208/*
 209 * Trace calls must be in a separate file, as otherwise there's a circular
 210 * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h.
 211 */
 212
 213void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write)
 214{
 215        TRACE_MMAP_LOCK_EVENT(start_locking, mm, write);
 216}
 217EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking);
 218
 219void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write,
 220                                           bool success)
 221{
 222        TRACE_MMAP_LOCK_EVENT(acquire_returned, mm, write, success);
 223}
 224EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned);
 225
 226void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write)
 227{
 228        TRACE_MMAP_LOCK_EVENT(released, mm, write);
 229}
 230EXPORT_SYMBOL(__mmap_lock_do_trace_released);
 231