linux/kernel/bpf/queue_stack_maps.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * queue_stack_maps.c: BPF queue and stack maps
   4 *
   5 * Copyright (c) 2018 Politecnico di Torino
   6 */
   7#include <linux/bpf.h>
   8#include <linux/list.h>
   9#include <linux/slab.h>
  10#include <linux/capability.h>
  11#include "percpu_freelist.h"
  12
  13#define QUEUE_STACK_CREATE_FLAG_MASK \
  14        (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
  15
  16
  17struct bpf_queue_stack {
  18        struct bpf_map map;
  19        raw_spinlock_t lock;
  20        u32 head, tail;
  21        u32 size; /* max_entries + 1 */
  22
  23        char elements[0] __aligned(8);
  24};
  25
  26static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
  27{
  28        return container_of(map, struct bpf_queue_stack, map);
  29}
  30
  31static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs)
  32{
  33        return qs->head == qs->tail;
  34}
  35
  36static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
  37{
  38        u32 head = qs->head + 1;
  39
  40        if (unlikely(head >= qs->size))
  41                head = 0;
  42
  43        return head == qs->tail;
  44}
  45
  46/* Called from syscall */
  47static int queue_stack_map_alloc_check(union bpf_attr *attr)
  48{
  49        if (!capable(CAP_SYS_ADMIN))
  50                return -EPERM;
  51
  52        /* check sanity of attributes */
  53        if (attr->max_entries == 0 || attr->key_size != 0 ||
  54            attr->value_size == 0 ||
  55            attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
  56                return -EINVAL;
  57
  58        if (attr->value_size > KMALLOC_MAX_SIZE)
  59                /* if value_size is bigger, the user space won't be able to
  60                 * access the elements.
  61                 */
  62                return -E2BIG;
  63
  64        return 0;
  65}
  66
  67static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
  68{
  69        int ret, numa_node = bpf_map_attr_numa_node(attr);
  70        struct bpf_queue_stack *qs;
  71        u64 size, queue_size, cost;
  72
  73        size = (u64) attr->max_entries + 1;
  74        cost = queue_size = sizeof(*qs) + size * attr->value_size;
  75        if (cost >= U32_MAX - PAGE_SIZE)
  76                return ERR_PTR(-E2BIG);
  77
  78        cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
  79
  80        ret = bpf_map_precharge_memlock(cost);
  81        if (ret < 0)
  82                return ERR_PTR(ret);
  83
  84        qs = bpf_map_area_alloc(queue_size, numa_node);
  85        if (!qs)
  86                return ERR_PTR(-ENOMEM);
  87
  88        memset(qs, 0, sizeof(*qs));
  89
  90        bpf_map_init_from_attr(&qs->map, attr);
  91
  92        qs->map.pages = cost;
  93        qs->size = size;
  94
  95        raw_spin_lock_init(&qs->lock);
  96
  97        return &qs->map;
  98}
  99
 100/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
 101static void queue_stack_map_free(struct bpf_map *map)
 102{
 103        struct bpf_queue_stack *qs = bpf_queue_stack(map);
 104
 105        /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
 106         * so the programs (can be more than one that used this map) were
 107         * disconnected from events. Wait for outstanding critical sections in
 108         * these programs to complete
 109         */
 110        synchronize_rcu();
 111
 112        bpf_map_area_free(qs);
 113}
 114
 115static int __queue_map_get(struct bpf_map *map, void *value, bool delete)
 116{
 117        struct bpf_queue_stack *qs = bpf_queue_stack(map);
 118        unsigned long flags;
 119        int err = 0;
 120        void *ptr;
 121
 122        raw_spin_lock_irqsave(&qs->lock, flags);
 123
 124        if (queue_stack_map_is_empty(qs)) {
 125                memset(value, 0, qs->map.value_size);
 126                err = -ENOENT;
 127                goto out;
 128        }
 129
 130        ptr = &qs->elements[qs->tail * qs->map.value_size];
 131        memcpy(value, ptr, qs->map.value_size);
 132
 133        if (delete) {
 134                if (unlikely(++qs->tail >= qs->size))
 135                        qs->tail = 0;
 136        }
 137
 138out:
 139        raw_spin_unlock_irqrestore(&qs->lock, flags);
 140        return err;
 141}
 142
 143
 144static int __stack_map_get(struct bpf_map *map, void *value, bool delete)
 145{
 146        struct bpf_queue_stack *qs = bpf_queue_stack(map);
 147        unsigned long flags;
 148        int err = 0;
 149        void *ptr;
 150        u32 index;
 151
 152        raw_spin_lock_irqsave(&qs->lock, flags);
 153
 154        if (queue_stack_map_is_empty(qs)) {
 155                memset(value, 0, qs->map.value_size);
 156                err = -ENOENT;
 157                goto out;
 158        }
 159
 160        index = qs->head - 1;
 161        if (unlikely(index >= qs->size))
 162                index = qs->size - 1;
 163
 164        ptr = &qs->elements[index * qs->map.value_size];
 165        memcpy(value, ptr, qs->map.value_size);
 166
 167        if (delete)
 168                qs->head = index;
 169
 170out:
 171        raw_spin_unlock_irqrestore(&qs->lock, flags);
 172        return err;
 173}
 174
 175/* Called from syscall or from eBPF program */
 176static int queue_map_peek_elem(struct bpf_map *map, void *value)
 177{
 178        return __queue_map_get(map, value, false);
 179}
 180
 181/* Called from syscall or from eBPF program */
 182static int stack_map_peek_elem(struct bpf_map *map, void *value)
 183{
 184        return __stack_map_get(map, value, false);
 185}
 186
 187/* Called from syscall or from eBPF program */
 188static int queue_map_pop_elem(struct bpf_map *map, void *value)
 189{
 190        return __queue_map_get(map, value, true);
 191}
 192
 193/* Called from syscall or from eBPF program */
 194static int stack_map_pop_elem(struct bpf_map *map, void *value)
 195{
 196        return __stack_map_get(map, value, true);
 197}
 198
 199/* Called from syscall or from eBPF program */
 200static int queue_stack_map_push_elem(struct bpf_map *map, void *value,
 201                                     u64 flags)
 202{
 203        struct bpf_queue_stack *qs = bpf_queue_stack(map);
 204        unsigned long irq_flags;
 205        int err = 0;
 206        void *dst;
 207
 208        /* BPF_EXIST is used to force making room for a new element in case the
 209         * map is full
 210         */
 211        bool replace = (flags & BPF_EXIST);
 212
 213        /* Check supported flags for queue and stack maps */
 214        if (flags & BPF_NOEXIST || flags > BPF_EXIST)
 215                return -EINVAL;
 216
 217        raw_spin_lock_irqsave(&qs->lock, irq_flags);
 218
 219        if (queue_stack_map_is_full(qs)) {
 220                if (!replace) {
 221                        err = -E2BIG;
 222                        goto out;
 223                }
 224                /* advance tail pointer to overwrite oldest element */
 225                if (unlikely(++qs->tail >= qs->size))
 226                        qs->tail = 0;
 227        }
 228
 229        dst = &qs->elements[qs->head * qs->map.value_size];
 230        memcpy(dst, value, qs->map.value_size);
 231
 232        if (unlikely(++qs->head >= qs->size))
 233                qs->head = 0;
 234
 235out:
 236        raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
 237        return err;
 238}
 239
 240/* Called from syscall or from eBPF program */
 241static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
 242{
 243        return NULL;
 244}
 245
 246/* Called from syscall or from eBPF program */
 247static int queue_stack_map_update_elem(struct bpf_map *map, void *key,
 248                                       void *value, u64 flags)
 249{
 250        return -EINVAL;
 251}
 252
 253/* Called from syscall or from eBPF program */
 254static int queue_stack_map_delete_elem(struct bpf_map *map, void *key)
 255{
 256        return -EINVAL;
 257}
 258
 259/* Called from syscall */
 260static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
 261                                        void *next_key)
 262{
 263        return -EINVAL;
 264}
 265
 266const struct bpf_map_ops queue_map_ops = {
 267        .map_alloc_check = queue_stack_map_alloc_check,
 268        .map_alloc = queue_stack_map_alloc,
 269        .map_free = queue_stack_map_free,
 270        .map_lookup_elem = queue_stack_map_lookup_elem,
 271        .map_update_elem = queue_stack_map_update_elem,
 272        .map_delete_elem = queue_stack_map_delete_elem,
 273        .map_push_elem = queue_stack_map_push_elem,
 274        .map_pop_elem = queue_map_pop_elem,
 275        .map_peek_elem = queue_map_peek_elem,
 276        .map_get_next_key = queue_stack_map_get_next_key,
 277};
 278
 279const struct bpf_map_ops stack_map_ops = {
 280        .map_alloc_check = queue_stack_map_alloc_check,
 281        .map_alloc = queue_stack_map_alloc,
 282        .map_free = queue_stack_map_free,
 283        .map_lookup_elem = queue_stack_map_lookup_elem,
 284        .map_update_elem = queue_stack_map_update_elem,
 285        .map_delete_elem = queue_stack_map_delete_elem,
 286        .map_push_elem = queue_stack_map_push_elem,
 287        .map_pop_elem = stack_map_pop_elem,
 288        .map_peek_elem = stack_map_peek_elem,
 289        .map_get_next_key = queue_stack_map_get_next_key,
 290};
 291