linux/kernel/bpf/arraymap.c
<<
>>
Prefs
   1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2 *
   3 * This program is free software; you can redistribute it and/or
   4 * modify it under the terms of version 2 of the GNU General Public
   5 * License as published by the Free Software Foundation.
   6 *
   7 * This program is distributed in the hope that it will be useful, but
   8 * WITHOUT ANY WARRANTY; without even the implied warranty of
   9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10 * General Public License for more details.
  11 */
  12#include <linux/bpf.h>
  13#include <linux/err.h>
  14#include <linux/vmalloc.h>
  15#include <linux/slab.h>
  16#include <linux/mm.h>
  17#include <linux/filter.h>
  18#include <linux/perf_event.h>
  19
  20/* Called from syscall */
  21static struct bpf_map *array_map_alloc(union bpf_attr *attr)
  22{
  23        struct bpf_array *array;
  24        u32 elem_size, array_size;
  25
  26        /* check sanity of attributes */
  27        if (attr->max_entries == 0 || attr->key_size != 4 ||
  28            attr->value_size == 0)
  29                return ERR_PTR(-EINVAL);
  30
  31        if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
  32                /* if value_size is bigger, the user space won't be able to
  33                 * access the elements.
  34                 */
  35                return ERR_PTR(-E2BIG);
  36
  37        elem_size = round_up(attr->value_size, 8);
  38
  39        /* check round_up into zero and u32 overflow */
  40        if (elem_size == 0 ||
  41            attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size)
  42                return ERR_PTR(-ENOMEM);
  43
  44        array_size = sizeof(*array) + attr->max_entries * elem_size;
  45
  46        /* allocate all map elements and zero-initialize them */
  47        array = kzalloc(array_size, GFP_USER | __GFP_NOWARN);
  48        if (!array) {
  49                array = vzalloc(array_size);
  50                if (!array)
  51                        return ERR_PTR(-ENOMEM);
  52        }
  53
  54        /* copy mandatory map attributes */
  55        array->map.key_size = attr->key_size;
  56        array->map.value_size = attr->value_size;
  57        array->map.max_entries = attr->max_entries;
  58        array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
  59        array->elem_size = elem_size;
  60
  61        return &array->map;
  62}
  63
  64/* Called from syscall or from eBPF program */
  65static void *array_map_lookup_elem(struct bpf_map *map, void *key)
  66{
  67        struct bpf_array *array = container_of(map, struct bpf_array, map);
  68        u32 index = *(u32 *)key;
  69
  70        if (index >= array->map.max_entries)
  71                return NULL;
  72
  73        return array->value + array->elem_size * index;
  74}
  75
  76/* Called from syscall */
  77static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
  78{
  79        struct bpf_array *array = container_of(map, struct bpf_array, map);
  80        u32 index = *(u32 *)key;
  81        u32 *next = (u32 *)next_key;
  82
  83        if (index >= array->map.max_entries) {
  84                *next = 0;
  85                return 0;
  86        }
  87
  88        if (index == array->map.max_entries - 1)
  89                return -ENOENT;
  90
  91        *next = index + 1;
  92        return 0;
  93}
  94
  95/* Called from syscall or from eBPF program */
  96static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
  97                                 u64 map_flags)
  98{
  99        struct bpf_array *array = container_of(map, struct bpf_array, map);
 100        u32 index = *(u32 *)key;
 101
 102        if (map_flags > BPF_EXIST)
 103                /* unknown flags */
 104                return -EINVAL;
 105
 106        if (index >= array->map.max_entries)
 107                /* all elements were pre-allocated, cannot insert a new one */
 108                return -E2BIG;
 109
 110        if (map_flags == BPF_NOEXIST)
 111                /* all elements already exist */
 112                return -EEXIST;
 113
 114        memcpy(array->value + array->elem_size * index, value, map->value_size);
 115        return 0;
 116}
 117
 118/* Called from syscall or from eBPF program */
 119static int array_map_delete_elem(struct bpf_map *map, void *key)
 120{
 121        return -EINVAL;
 122}
 123
 124/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
 125static void array_map_free(struct bpf_map *map)
 126{
 127        struct bpf_array *array = container_of(map, struct bpf_array, map);
 128
 129        /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
 130         * so the programs (can be more than one that used this map) were
 131         * disconnected from events. Wait for outstanding programs to complete
 132         * and free the array
 133         */
 134        synchronize_rcu();
 135
 136        kvfree(array);
 137}
 138
 139static const struct bpf_map_ops array_ops = {
 140        .map_alloc = array_map_alloc,
 141        .map_free = array_map_free,
 142        .map_get_next_key = array_map_get_next_key,
 143        .map_lookup_elem = array_map_lookup_elem,
 144        .map_update_elem = array_map_update_elem,
 145        .map_delete_elem = array_map_delete_elem,
 146};
 147
 148static struct bpf_map_type_list array_type __read_mostly = {
 149        .ops = &array_ops,
 150        .type = BPF_MAP_TYPE_ARRAY,
 151};
 152
 153static int __init register_array_map(void)
 154{
 155        bpf_register_map_type(&array_type);
 156        return 0;
 157}
 158late_initcall(register_array_map);
 159
 160static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr)
 161{
 162        /* only file descriptors can be stored in this type of map */
 163        if (attr->value_size != sizeof(u32))
 164                return ERR_PTR(-EINVAL);
 165        return array_map_alloc(attr);
 166}
 167
 168static void fd_array_map_free(struct bpf_map *map)
 169{
 170        struct bpf_array *array = container_of(map, struct bpf_array, map);
 171        int i;
 172
 173        synchronize_rcu();
 174
 175        /* make sure it's empty */
 176        for (i = 0; i < array->map.max_entries; i++)
 177                BUG_ON(array->ptrs[i] != NULL);
 178        kvfree(array);
 179}
 180
 181static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
 182{
 183        return NULL;
 184}
 185
 186/* only called from syscall */
 187static int fd_array_map_update_elem(struct bpf_map *map, void *key,
 188                                    void *value, u64 map_flags)
 189{
 190        struct bpf_array *array = container_of(map, struct bpf_array, map);
 191        void *new_ptr, *old_ptr;
 192        u32 index = *(u32 *)key, ufd;
 193
 194        if (map_flags != BPF_ANY)
 195                return -EINVAL;
 196
 197        if (index >= array->map.max_entries)
 198                return -E2BIG;
 199
 200        ufd = *(u32 *)value;
 201        new_ptr = map->ops->map_fd_get_ptr(map, ufd);
 202        if (IS_ERR(new_ptr))
 203                return PTR_ERR(new_ptr);
 204
 205        old_ptr = xchg(array->ptrs + index, new_ptr);
 206        if (old_ptr)
 207                map->ops->map_fd_put_ptr(old_ptr);
 208
 209        return 0;
 210}
 211
 212static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
 213{
 214        struct bpf_array *array = container_of(map, struct bpf_array, map);
 215        void *old_ptr;
 216        u32 index = *(u32 *)key;
 217
 218        if (index >= array->map.max_entries)
 219                return -E2BIG;
 220
 221        old_ptr = xchg(array->ptrs + index, NULL);
 222        if (old_ptr) {
 223                map->ops->map_fd_put_ptr(old_ptr);
 224                return 0;
 225        } else {
 226                return -ENOENT;
 227        }
 228}
 229
 230static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd)
 231{
 232        struct bpf_array *array = container_of(map, struct bpf_array, map);
 233        struct bpf_prog *prog = bpf_prog_get(fd);
 234        if (IS_ERR(prog))
 235                return prog;
 236
 237        if (!bpf_prog_array_compatible(array, prog)) {
 238                bpf_prog_put(prog);
 239                return ERR_PTR(-EINVAL);
 240        }
 241        return prog;
 242}
 243
 244static void prog_fd_array_put_ptr(void *ptr)
 245{
 246        struct bpf_prog *prog = ptr;
 247
 248        bpf_prog_put_rcu(prog);
 249}
 250
 251/* decrement refcnt of all bpf_progs that are stored in this map */
 252void bpf_fd_array_map_clear(struct bpf_map *map)
 253{
 254        struct bpf_array *array = container_of(map, struct bpf_array, map);
 255        int i;
 256
 257        for (i = 0; i < array->map.max_entries; i++)
 258                fd_array_map_delete_elem(map, &i);
 259}
 260
 261static const struct bpf_map_ops prog_array_ops = {
 262        .map_alloc = fd_array_map_alloc,
 263        .map_free = fd_array_map_free,
 264        .map_get_next_key = array_map_get_next_key,
 265        .map_lookup_elem = fd_array_map_lookup_elem,
 266        .map_update_elem = fd_array_map_update_elem,
 267        .map_delete_elem = fd_array_map_delete_elem,
 268        .map_fd_get_ptr = prog_fd_array_get_ptr,
 269        .map_fd_put_ptr = prog_fd_array_put_ptr,
 270};
 271
 272static struct bpf_map_type_list prog_array_type __read_mostly = {
 273        .ops = &prog_array_ops,
 274        .type = BPF_MAP_TYPE_PROG_ARRAY,
 275};
 276
 277static int __init register_prog_array_map(void)
 278{
 279        bpf_register_map_type(&prog_array_type);
 280        return 0;
 281}
 282late_initcall(register_prog_array_map);
 283
 284static void perf_event_array_map_free(struct bpf_map *map)
 285{
 286        bpf_fd_array_map_clear(map);
 287        fd_array_map_free(map);
 288}
 289
 290static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 291{
 292        struct perf_event *event;
 293        const struct perf_event_attr *attr;
 294
 295        event = perf_event_get(fd);
 296        if (IS_ERR(event))
 297                return event;
 298
 299        attr = perf_event_attrs(event);
 300        if (IS_ERR(attr))
 301                goto err;
 302
 303        if (attr->inherit)
 304                goto err;
 305
 306        if (attr->type == PERF_TYPE_RAW)
 307                return event;
 308
 309        if (attr->type == PERF_TYPE_HARDWARE)
 310                return event;
 311
 312        if (attr->type == PERF_TYPE_SOFTWARE &&
 313            attr->config == PERF_COUNT_SW_BPF_OUTPUT)
 314                return event;
 315err:
 316        perf_event_release_kernel(event);
 317        return ERR_PTR(-EINVAL);
 318}
 319
 320static void perf_event_fd_array_put_ptr(void *ptr)
 321{
 322        struct perf_event *event = ptr;
 323
 324        perf_event_release_kernel(event);
 325}
 326
 327static const struct bpf_map_ops perf_event_array_ops = {
 328        .map_alloc = fd_array_map_alloc,
 329        .map_free = perf_event_array_map_free,
 330        .map_get_next_key = array_map_get_next_key,
 331        .map_lookup_elem = fd_array_map_lookup_elem,
 332        .map_update_elem = fd_array_map_update_elem,
 333        .map_delete_elem = fd_array_map_delete_elem,
 334        .map_fd_get_ptr = perf_event_fd_array_get_ptr,
 335        .map_fd_put_ptr = perf_event_fd_array_put_ptr,
 336};
 337
 338static struct bpf_map_type_list perf_event_array_type __read_mostly = {
 339        .ops = &perf_event_array_ops,
 340        .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
 341};
 342
 343static int __init register_perf_event_array_map(void)
 344{
 345        bpf_register_map_type(&perf_event_array_type);
 346        return 0;
 347}
 348late_initcall(register_perf_event_array_map);
 349