linux/kernel/bpf/arraymap.c
<<
>>
Prefs
   1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2 *
   3 * This program is free software; you can redistribute it and/or
   4 * modify it under the terms of version 2 of the GNU General Public
   5 * License as published by the Free Software Foundation.
   6 *
   7 * This program is distributed in the hope that it will be useful, but
   8 * WITHOUT ANY WARRANTY; without even the implied warranty of
   9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10 * General Public License for more details.
  11 */
  12#include <linux/bpf.h>
  13#include <linux/err.h>
  14#include <linux/vmalloc.h>
  15#include <linux/slab.h>
  16#include <linux/mm.h>
  17#include <linux/filter.h>
  18#include <linux/perf_event.h>
  19
  20static void bpf_array_free_percpu(struct bpf_array *array)
  21{
  22        int i;
  23
  24        for (i = 0; i < array->map.max_entries; i++)
  25                free_percpu(array->pptrs[i]);
  26}
  27
  28static int bpf_array_alloc_percpu(struct bpf_array *array)
  29{
  30        void __percpu *ptr;
  31        int i;
  32
  33        for (i = 0; i < array->map.max_entries; i++) {
  34                ptr = __alloc_percpu_gfp(array->elem_size, 8,
  35                                         GFP_USER | __GFP_NOWARN);
  36                if (!ptr) {
  37                        bpf_array_free_percpu(array);
  38                        return -ENOMEM;
  39                }
  40                array->pptrs[i] = ptr;
  41        }
  42
  43        return 0;
  44}
  45
  46/* Called from syscall */
  47static struct bpf_map *array_map_alloc(union bpf_attr *attr)
  48{
  49        bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
  50        struct bpf_array *array;
  51        u64 array_size;
  52        u32 elem_size;
  53
  54        /* check sanity of attributes */
  55        if (attr->max_entries == 0 || attr->key_size != 4 ||
  56            attr->value_size == 0 || attr->map_flags)
  57                return ERR_PTR(-EINVAL);
  58
  59        if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
  60                /* if value_size is bigger, the user space won't be able to
  61                 * access the elements.
  62                 */
  63                return ERR_PTR(-E2BIG);
  64
  65        elem_size = round_up(attr->value_size, 8);
  66
  67        array_size = sizeof(*array);
  68        if (percpu)
  69                array_size += (u64) attr->max_entries * sizeof(void *);
  70        else
  71                array_size += (u64) attr->max_entries * elem_size;
  72
  73        /* make sure there is no u32 overflow later in round_up() */
  74        if (array_size >= U32_MAX - PAGE_SIZE)
  75                return ERR_PTR(-ENOMEM);
  76
  77
  78        /* allocate all map elements and zero-initialize them */
  79        array = kzalloc(array_size, GFP_USER | __GFP_NOWARN);
  80        if (!array) {
  81                array = vzalloc(array_size);
  82                if (!array)
  83                        return ERR_PTR(-ENOMEM);
  84        }
  85
  86        /* copy mandatory map attributes */
  87        array->map.map_type = attr->map_type;
  88        array->map.key_size = attr->key_size;
  89        array->map.value_size = attr->value_size;
  90        array->map.max_entries = attr->max_entries;
  91        array->elem_size = elem_size;
  92
  93        if (!percpu)
  94                goto out;
  95
  96        array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
  97
  98        if (array_size >= U32_MAX - PAGE_SIZE ||
  99            elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) {
 100                kvfree(array);
 101                return ERR_PTR(-ENOMEM);
 102        }
 103out:
 104        array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
 105
 106        return &array->map;
 107}
 108
 109/* Called from syscall or from eBPF program */
 110static void *array_map_lookup_elem(struct bpf_map *map, void *key)
 111{
 112        struct bpf_array *array = container_of(map, struct bpf_array, map);
 113        u32 index = *(u32 *)key;
 114
 115        if (unlikely(index >= array->map.max_entries))
 116                return NULL;
 117
 118        return array->value + array->elem_size * index;
 119}
 120
 121/* Called from eBPF program */
 122static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
 123{
 124        struct bpf_array *array = container_of(map, struct bpf_array, map);
 125        u32 index = *(u32 *)key;
 126
 127        if (unlikely(index >= array->map.max_entries))
 128                return NULL;
 129
 130        return this_cpu_ptr(array->pptrs[index]);
 131}
 132
 133int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
 134{
 135        struct bpf_array *array = container_of(map, struct bpf_array, map);
 136        u32 index = *(u32 *)key;
 137        void __percpu *pptr;
 138        int cpu, off = 0;
 139        u32 size;
 140
 141        if (unlikely(index >= array->map.max_entries))
 142                return -ENOENT;
 143
 144        /* per_cpu areas are zero-filled and bpf programs can only
 145         * access 'value_size' of them, so copying rounded areas
 146         * will not leak any kernel data
 147         */
 148        size = round_up(map->value_size, 8);
 149        rcu_read_lock();
 150        pptr = array->pptrs[index];
 151        for_each_possible_cpu(cpu) {
 152                bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
 153                off += size;
 154        }
 155        rcu_read_unlock();
 156        return 0;
 157}
 158
 159/* Called from syscall */
 160static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 161{
 162        struct bpf_array *array = container_of(map, struct bpf_array, map);
 163        u32 index = *(u32 *)key;
 164        u32 *next = (u32 *)next_key;
 165
 166        if (index >= array->map.max_entries) {
 167                *next = 0;
 168                return 0;
 169        }
 170
 171        if (index == array->map.max_entries - 1)
 172                return -ENOENT;
 173
 174        *next = index + 1;
 175        return 0;
 176}
 177
 178/* Called from syscall or from eBPF program */
 179static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
 180                                 u64 map_flags)
 181{
 182        struct bpf_array *array = container_of(map, struct bpf_array, map);
 183        u32 index = *(u32 *)key;
 184
 185        if (unlikely(map_flags > BPF_EXIST))
 186                /* unknown flags */
 187                return -EINVAL;
 188
 189        if (unlikely(index >= array->map.max_entries))
 190                /* all elements were pre-allocated, cannot insert a new one */
 191                return -E2BIG;
 192
 193        if (unlikely(map_flags == BPF_NOEXIST))
 194                /* all elements already exist */
 195                return -EEXIST;
 196
 197        if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 198                memcpy(this_cpu_ptr(array->pptrs[index]),
 199                       value, map->value_size);
 200        else
 201                memcpy(array->value + array->elem_size * index,
 202                       value, map->value_size);
 203        return 0;
 204}
 205
 206int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
 207                            u64 map_flags)
 208{
 209        struct bpf_array *array = container_of(map, struct bpf_array, map);
 210        u32 index = *(u32 *)key;
 211        void __percpu *pptr;
 212        int cpu, off = 0;
 213        u32 size;
 214
 215        if (unlikely(map_flags > BPF_EXIST))
 216                /* unknown flags */
 217                return -EINVAL;
 218
 219        if (unlikely(index >= array->map.max_entries))
 220                /* all elements were pre-allocated, cannot insert a new one */
 221                return -E2BIG;
 222
 223        if (unlikely(map_flags == BPF_NOEXIST))
 224                /* all elements already exist */
 225                return -EEXIST;
 226
 227        /* the user space will provide round_up(value_size, 8) bytes that
 228         * will be copied into per-cpu area. bpf programs can only access
 229         * value_size of it. During lookup the same extra bytes will be
 230         * returned or zeros which were zero-filled by percpu_alloc,
 231         * so no kernel data leaks possible
 232         */
 233        size = round_up(map->value_size, 8);
 234        rcu_read_lock();
 235        pptr = array->pptrs[index];
 236        for_each_possible_cpu(cpu) {
 237                bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
 238                off += size;
 239        }
 240        rcu_read_unlock();
 241        return 0;
 242}
 243
 244/* Called from syscall or from eBPF program */
 245static int array_map_delete_elem(struct bpf_map *map, void *key)
 246{
 247        return -EINVAL;
 248}
 249
 250/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
 251static void array_map_free(struct bpf_map *map)
 252{
 253        struct bpf_array *array = container_of(map, struct bpf_array, map);
 254
 255        /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
 256         * so the programs (can be more than one that used this map) were
 257         * disconnected from events. Wait for outstanding programs to complete
 258         * and free the array
 259         */
 260        synchronize_rcu();
 261
 262        if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 263                bpf_array_free_percpu(array);
 264
 265        kvfree(array);
 266}
 267
 268static const struct bpf_map_ops array_ops = {
 269        .map_alloc = array_map_alloc,
 270        .map_free = array_map_free,
 271        .map_get_next_key = array_map_get_next_key,
 272        .map_lookup_elem = array_map_lookup_elem,
 273        .map_update_elem = array_map_update_elem,
 274        .map_delete_elem = array_map_delete_elem,
 275};
 276
 277static struct bpf_map_type_list array_type __read_mostly = {
 278        .ops = &array_ops,
 279        .type = BPF_MAP_TYPE_ARRAY,
 280};
 281
 282static const struct bpf_map_ops percpu_array_ops = {
 283        .map_alloc = array_map_alloc,
 284        .map_free = array_map_free,
 285        .map_get_next_key = array_map_get_next_key,
 286        .map_lookup_elem = percpu_array_map_lookup_elem,
 287        .map_update_elem = array_map_update_elem,
 288        .map_delete_elem = array_map_delete_elem,
 289};
 290
 291static struct bpf_map_type_list percpu_array_type __read_mostly = {
 292        .ops = &percpu_array_ops,
 293        .type = BPF_MAP_TYPE_PERCPU_ARRAY,
 294};
 295
 296static int __init register_array_map(void)
 297{
 298        bpf_register_map_type(&array_type);
 299        bpf_register_map_type(&percpu_array_type);
 300        return 0;
 301}
 302late_initcall(register_array_map);
 303
 304static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr)
 305{
 306        /* only file descriptors can be stored in this type of map */
 307        if (attr->value_size != sizeof(u32))
 308                return ERR_PTR(-EINVAL);
 309        return array_map_alloc(attr);
 310}
 311
 312static void fd_array_map_free(struct bpf_map *map)
 313{
 314        struct bpf_array *array = container_of(map, struct bpf_array, map);
 315        int i;
 316
 317        synchronize_rcu();
 318
 319        /* make sure it's empty */
 320        for (i = 0; i < array->map.max_entries; i++)
 321                BUG_ON(array->ptrs[i] != NULL);
 322        kvfree(array);
 323}
 324
 325static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
 326{
 327        return NULL;
 328}
 329
 330/* only called from syscall */
 331static int fd_array_map_update_elem(struct bpf_map *map, void *key,
 332                                    void *value, u64 map_flags)
 333{
 334        struct bpf_array *array = container_of(map, struct bpf_array, map);
 335        void *new_ptr, *old_ptr;
 336        u32 index = *(u32 *)key, ufd;
 337
 338        if (map_flags != BPF_ANY)
 339                return -EINVAL;
 340
 341        if (index >= array->map.max_entries)
 342                return -E2BIG;
 343
 344        ufd = *(u32 *)value;
 345        new_ptr = map->ops->map_fd_get_ptr(map, ufd);
 346        if (IS_ERR(new_ptr))
 347                return PTR_ERR(new_ptr);
 348
 349        old_ptr = xchg(array->ptrs + index, new_ptr);
 350        if (old_ptr)
 351                map->ops->map_fd_put_ptr(old_ptr);
 352
 353        return 0;
 354}
 355
 356static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
 357{
 358        struct bpf_array *array = container_of(map, struct bpf_array, map);
 359        void *old_ptr;
 360        u32 index = *(u32 *)key;
 361
 362        if (index >= array->map.max_entries)
 363                return -E2BIG;
 364
 365        old_ptr = xchg(array->ptrs + index, NULL);
 366        if (old_ptr) {
 367                map->ops->map_fd_put_ptr(old_ptr);
 368                return 0;
 369        } else {
 370                return -ENOENT;
 371        }
 372}
 373
 374static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd)
 375{
 376        struct bpf_array *array = container_of(map, struct bpf_array, map);
 377        struct bpf_prog *prog = bpf_prog_get(fd);
 378        if (IS_ERR(prog))
 379                return prog;
 380
 381        if (!bpf_prog_array_compatible(array, prog)) {
 382                bpf_prog_put(prog);
 383                return ERR_PTR(-EINVAL);
 384        }
 385        return prog;
 386}
 387
 388static void prog_fd_array_put_ptr(void *ptr)
 389{
 390        struct bpf_prog *prog = ptr;
 391
 392        bpf_prog_put_rcu(prog);
 393}
 394
 395/* decrement refcnt of all bpf_progs that are stored in this map */
 396void bpf_fd_array_map_clear(struct bpf_map *map)
 397{
 398        struct bpf_array *array = container_of(map, struct bpf_array, map);
 399        int i;
 400
 401        for (i = 0; i < array->map.max_entries; i++)
 402                fd_array_map_delete_elem(map, &i);
 403}
 404
 405static const struct bpf_map_ops prog_array_ops = {
 406        .map_alloc = fd_array_map_alloc,
 407        .map_free = fd_array_map_free,
 408        .map_get_next_key = array_map_get_next_key,
 409        .map_lookup_elem = fd_array_map_lookup_elem,
 410        .map_update_elem = fd_array_map_update_elem,
 411        .map_delete_elem = fd_array_map_delete_elem,
 412        .map_fd_get_ptr = prog_fd_array_get_ptr,
 413        .map_fd_put_ptr = prog_fd_array_put_ptr,
 414};
 415
 416static struct bpf_map_type_list prog_array_type __read_mostly = {
 417        .ops = &prog_array_ops,
 418        .type = BPF_MAP_TYPE_PROG_ARRAY,
 419};
 420
 421static int __init register_prog_array_map(void)
 422{
 423        bpf_register_map_type(&prog_array_type);
 424        return 0;
 425}
 426late_initcall(register_prog_array_map);
 427
 428static void perf_event_array_map_free(struct bpf_map *map)
 429{
 430        bpf_fd_array_map_clear(map);
 431        fd_array_map_free(map);
 432}
 433
 434static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 435{
 436        struct perf_event *event;
 437        const struct perf_event_attr *attr;
 438        struct file *file;
 439
 440        file = perf_event_get(fd);
 441        if (IS_ERR(file))
 442                return file;
 443
 444        event = file->private_data;
 445
 446        attr = perf_event_attrs(event);
 447        if (IS_ERR(attr))
 448                goto err;
 449
 450        if (attr->inherit)
 451                goto err;
 452
 453        if (attr->type == PERF_TYPE_RAW)
 454                return file;
 455
 456        if (attr->type == PERF_TYPE_HARDWARE)
 457                return file;
 458
 459        if (attr->type == PERF_TYPE_SOFTWARE &&
 460            attr->config == PERF_COUNT_SW_BPF_OUTPUT)
 461                return file;
 462err:
 463        fput(file);
 464        return ERR_PTR(-EINVAL);
 465}
 466
 467static void perf_event_fd_array_put_ptr(void *ptr)
 468{
 469        fput((struct file *)ptr);
 470}
 471
 472static const struct bpf_map_ops perf_event_array_ops = {
 473        .map_alloc = fd_array_map_alloc,
 474        .map_free = perf_event_array_map_free,
 475        .map_get_next_key = array_map_get_next_key,
 476        .map_lookup_elem = fd_array_map_lookup_elem,
 477        .map_update_elem = fd_array_map_update_elem,
 478        .map_delete_elem = fd_array_map_delete_elem,
 479        .map_fd_get_ptr = perf_event_fd_array_get_ptr,
 480        .map_fd_put_ptr = perf_event_fd_array_put_ptr,
 481};
 482
 483static struct bpf_map_type_list perf_event_array_type __read_mostly = {
 484        .ops = &perf_event_array_ops,
 485        .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
 486};
 487
 488static int __init register_perf_event_array_map(void)
 489{
 490        bpf_register_map_type(&perf_event_array_type);
 491        return 0;
 492}
 493late_initcall(register_perf_event_array_map);
 494