linux/kernel/bpf/local_storage.c
<<
>>
Prefs
   1//SPDX-License-Identifier: GPL-2.0
   2#include <linux/bpf-cgroup.h>
   3#include <linux/bpf.h>
   4#include <linux/btf.h>
   5#include <linux/bug.h>
   6#include <linux/filter.h>
   7#include <linux/mm.h>
   8#include <linux/rbtree.h>
   9#include <linux/slab.h>
  10#include <uapi/linux/btf.h>
  11
  12DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
  13
  14#ifdef CONFIG_CGROUP_BPF
  15
  16#define LOCAL_STORAGE_CREATE_FLAG_MASK                                  \
  17        (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
  18
  19struct bpf_cgroup_storage_map {
  20        struct bpf_map map;
  21
  22        spinlock_t lock;
  23        struct bpf_prog *prog;
  24        struct rb_root root;
  25        struct list_head list;
  26};
  27
  28static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
  29{
  30        return container_of(map, struct bpf_cgroup_storage_map, map);
  31}
  32
  33static int bpf_cgroup_storage_key_cmp(
  34        const struct bpf_cgroup_storage_key *key1,
  35        const struct bpf_cgroup_storage_key *key2)
  36{
  37        if (key1->cgroup_inode_id < key2->cgroup_inode_id)
  38                return -1;
  39        else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
  40                return 1;
  41        else if (key1->attach_type < key2->attach_type)
  42                return -1;
  43        else if (key1->attach_type > key2->attach_type)
  44                return 1;
  45        return 0;
  46}
  47
  48static struct bpf_cgroup_storage *cgroup_storage_lookup(
  49        struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
  50        bool locked)
  51{
  52        struct rb_root *root = &map->root;
  53        struct rb_node *node;
  54
  55        if (!locked)
  56                spin_lock_bh(&map->lock);
  57
  58        node = root->rb_node;
  59        while (node) {
  60                struct bpf_cgroup_storage *storage;
  61
  62                storage = container_of(node, struct bpf_cgroup_storage, node);
  63
  64                switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
  65                case -1:
  66                        node = node->rb_left;
  67                        break;
  68                case 1:
  69                        node = node->rb_right;
  70                        break;
  71                default:
  72                        if (!locked)
  73                                spin_unlock_bh(&map->lock);
  74                        return storage;
  75                }
  76        }
  77
  78        if (!locked)
  79                spin_unlock_bh(&map->lock);
  80
  81        return NULL;
  82}
  83
  84static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
  85                                 struct bpf_cgroup_storage *storage)
  86{
  87        struct rb_root *root = &map->root;
  88        struct rb_node **new = &(root->rb_node), *parent = NULL;
  89
  90        while (*new) {
  91                struct bpf_cgroup_storage *this;
  92
  93                this = container_of(*new, struct bpf_cgroup_storage, node);
  94
  95                parent = *new;
  96                switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
  97                case -1:
  98                        new = &((*new)->rb_left);
  99                        break;
 100                case 1:
 101                        new = &((*new)->rb_right);
 102                        break;
 103                default:
 104                        return -EEXIST;
 105                }
 106        }
 107
 108        rb_link_node(&storage->node, parent, new);
 109        rb_insert_color(&storage->node, root);
 110
 111        return 0;
 112}
 113
 114static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
 115{
 116        struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 117        struct bpf_cgroup_storage_key *key = _key;
 118        struct bpf_cgroup_storage *storage;
 119
 120        storage = cgroup_storage_lookup(map, key, false);
 121        if (!storage)
 122                return NULL;
 123
 124        return &READ_ONCE(storage->buf)->data[0];
 125}
 126
 127static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 128                                      void *value, u64 flags)
 129{
 130        struct bpf_cgroup_storage_key *key = _key;
 131        struct bpf_cgroup_storage *storage;
 132        struct bpf_storage_buffer *new;
 133
 134        if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST)))
 135                return -EINVAL;
 136
 137        if (unlikely(flags & BPF_NOEXIST))
 138                return -EINVAL;
 139
 140        if (unlikely((flags & BPF_F_LOCK) &&
 141                     !map_value_has_spin_lock(map)))
 142                return -EINVAL;
 143
 144        storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
 145                                        key, false);
 146        if (!storage)
 147                return -ENOENT;
 148
 149        if (flags & BPF_F_LOCK) {
 150                copy_map_value_locked(map, storage->buf->data, value, false);
 151                return 0;
 152        }
 153
 154        new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
 155                           map->value_size,
 156                           __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
 157                           map->numa_node);
 158        if (!new)
 159                return -ENOMEM;
 160
 161        memcpy(&new->data[0], value, map->value_size);
 162        check_and_init_map_lock(map, new->data);
 163
 164        new = xchg(&storage->buf, new);
 165        kfree_rcu(new, rcu);
 166
 167        return 0;
 168}
 169
 170int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
 171                                   void *value)
 172{
 173        struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 174        struct bpf_cgroup_storage_key *key = _key;
 175        struct bpf_cgroup_storage *storage;
 176        int cpu, off = 0;
 177        u32 size;
 178
 179        rcu_read_lock();
 180        storage = cgroup_storage_lookup(map, key, false);
 181        if (!storage) {
 182                rcu_read_unlock();
 183                return -ENOENT;
 184        }
 185
 186        /* per_cpu areas are zero-filled and bpf programs can only
 187         * access 'value_size' of them, so copying rounded areas
 188         * will not leak any kernel data
 189         */
 190        size = round_up(_map->value_size, 8);
 191        for_each_possible_cpu(cpu) {
 192                bpf_long_memcpy(value + off,
 193                                per_cpu_ptr(storage->percpu_buf, cpu), size);
 194                off += size;
 195        }
 196        rcu_read_unlock();
 197        return 0;
 198}
 199
 200int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
 201                                     void *value, u64 map_flags)
 202{
 203        struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 204        struct bpf_cgroup_storage_key *key = _key;
 205        struct bpf_cgroup_storage *storage;
 206        int cpu, off = 0;
 207        u32 size;
 208
 209        if (map_flags != BPF_ANY && map_flags != BPF_EXIST)
 210                return -EINVAL;
 211
 212        rcu_read_lock();
 213        storage = cgroup_storage_lookup(map, key, false);
 214        if (!storage) {
 215                rcu_read_unlock();
 216                return -ENOENT;
 217        }
 218
 219        /* the user space will provide round_up(value_size, 8) bytes that
 220         * will be copied into per-cpu area. bpf programs can only access
 221         * value_size of it. During lookup the same extra bytes will be
 222         * returned or zeros which were zero-filled by percpu_alloc,
 223         * so no kernel data leaks possible
 224         */
 225        size = round_up(_map->value_size, 8);
 226        for_each_possible_cpu(cpu) {
 227                bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu),
 228                                value + off, size);
 229                off += size;
 230        }
 231        rcu_read_unlock();
 232        return 0;
 233}
 234
 235static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
 236                                       void *_next_key)
 237{
 238        struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 239        struct bpf_cgroup_storage_key *key = _key;
 240        struct bpf_cgroup_storage_key *next = _next_key;
 241        struct bpf_cgroup_storage *storage;
 242
 243        spin_lock_bh(&map->lock);
 244
 245        if (list_empty(&map->list))
 246                goto enoent;
 247
 248        if (key) {
 249                storage = cgroup_storage_lookup(map, key, true);
 250                if (!storage)
 251                        goto enoent;
 252
 253                storage = list_next_entry(storage, list);
 254                if (!storage)
 255                        goto enoent;
 256        } else {
 257                storage = list_first_entry(&map->list,
 258                                         struct bpf_cgroup_storage, list);
 259        }
 260
 261        spin_unlock_bh(&map->lock);
 262        next->attach_type = storage->key.attach_type;
 263        next->cgroup_inode_id = storage->key.cgroup_inode_id;
 264        return 0;
 265
 266enoent:
 267        spin_unlock_bh(&map->lock);
 268        return -ENOENT;
 269}
 270
 271static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
 272{
 273        int numa_node = bpf_map_attr_numa_node(attr);
 274        struct bpf_cgroup_storage_map *map;
 275        struct bpf_map_memory mem;
 276        int ret;
 277
 278        if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
 279                return ERR_PTR(-EINVAL);
 280
 281        if (attr->value_size == 0)
 282                return ERR_PTR(-EINVAL);
 283
 284        if (attr->value_size > PAGE_SIZE)
 285                return ERR_PTR(-E2BIG);
 286
 287        if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK ||
 288            !bpf_map_flags_access_ok(attr->map_flags))
 289                return ERR_PTR(-EINVAL);
 290
 291        if (attr->max_entries)
 292                /* max_entries is not used and enforced to be 0 */
 293                return ERR_PTR(-EINVAL);
 294
 295        ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map));
 296        if (ret < 0)
 297                return ERR_PTR(ret);
 298
 299        map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
 300                           __GFP_ZERO | GFP_USER, numa_node);
 301        if (!map) {
 302                bpf_map_charge_finish(&mem);
 303                return ERR_PTR(-ENOMEM);
 304        }
 305
 306        bpf_map_charge_move(&map->map.memory, &mem);
 307
 308        /* copy mandatory map attributes */
 309        bpf_map_init_from_attr(&map->map, attr);
 310
 311        spin_lock_init(&map->lock);
 312        map->root = RB_ROOT;
 313        INIT_LIST_HEAD(&map->list);
 314
 315        return &map->map;
 316}
 317
 318static void cgroup_storage_map_free(struct bpf_map *_map)
 319{
 320        struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 321
 322        WARN_ON(!RB_EMPTY_ROOT(&map->root));
 323        WARN_ON(!list_empty(&map->list));
 324
 325        kfree(map);
 326}
 327
 328static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
 329{
 330        return -EINVAL;
 331}
 332
 333static int cgroup_storage_check_btf(const struct bpf_map *map,
 334                                    const struct btf *btf,
 335                                    const struct btf_type *key_type,
 336                                    const struct btf_type *value_type)
 337{
 338        struct btf_member *m;
 339        u32 offset, size;
 340
 341        /* Key is expected to be of struct bpf_cgroup_storage_key type,
 342         * which is:
 343         * struct bpf_cgroup_storage_key {
 344         *      __u64   cgroup_inode_id;
 345         *      __u32   attach_type;
 346         * };
 347         */
 348
 349        /*
 350         * Key_type must be a structure with two fields.
 351         */
 352        if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ||
 353            BTF_INFO_VLEN(key_type->info) != 2)
 354                return -EINVAL;
 355
 356        /*
 357         * The first field must be a 64 bit integer at 0 offset.
 358         */
 359        m = (struct btf_member *)(key_type + 1);
 360        size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, cgroup_inode_id);
 361        if (!btf_member_is_reg_int(btf, key_type, m, 0, size))
 362                return -EINVAL;
 363
 364        /*
 365         * The second field must be a 32 bit integer at 64 bit offset.
 366         */
 367        m++;
 368        offset = offsetof(struct bpf_cgroup_storage_key, attach_type);
 369        size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, attach_type);
 370        if (!btf_member_is_reg_int(btf, key_type, m, offset, size))
 371                return -EINVAL;
 372
 373        return 0;
 374}
 375
 376static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *_key,
 377                                         struct seq_file *m)
 378{
 379        enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
 380        struct bpf_cgroup_storage_key *key = _key;
 381        struct bpf_cgroup_storage *storage;
 382        int cpu;
 383
 384        rcu_read_lock();
 385        storage = cgroup_storage_lookup(map_to_storage(map), key, false);
 386        if (!storage) {
 387                rcu_read_unlock();
 388                return;
 389        }
 390
 391        btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
 392        stype = cgroup_storage_type(map);
 393        if (stype == BPF_CGROUP_STORAGE_SHARED) {
 394                seq_puts(m, ": ");
 395                btf_type_seq_show(map->btf, map->btf_value_type_id,
 396                                  &READ_ONCE(storage->buf)->data[0], m);
 397                seq_puts(m, "\n");
 398        } else {
 399                seq_puts(m, ": {\n");
 400                for_each_possible_cpu(cpu) {
 401                        seq_printf(m, "\tcpu%d: ", cpu);
 402                        btf_type_seq_show(map->btf, map->btf_value_type_id,
 403                                          per_cpu_ptr(storage->percpu_buf, cpu),
 404                                          m);
 405                        seq_puts(m, "\n");
 406                }
 407                seq_puts(m, "}\n");
 408        }
 409        rcu_read_unlock();
 410}
 411
 412const struct bpf_map_ops cgroup_storage_map_ops = {
 413        .map_alloc = cgroup_storage_map_alloc,
 414        .map_free = cgroup_storage_map_free,
 415        .map_get_next_key = cgroup_storage_get_next_key,
 416        .map_lookup_elem = cgroup_storage_lookup_elem,
 417        .map_update_elem = cgroup_storage_update_elem,
 418        .map_delete_elem = cgroup_storage_delete_elem,
 419        .map_check_btf = cgroup_storage_check_btf,
 420        .map_seq_show_elem = cgroup_storage_seq_show_elem,
 421};
 422
 423int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
 424{
 425        enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
 426        struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 427        int ret = -EBUSY;
 428
 429        spin_lock_bh(&map->lock);
 430
 431        if (map->prog && map->prog != prog)
 432                goto unlock;
 433        if (prog->aux->cgroup_storage[stype] &&
 434            prog->aux->cgroup_storage[stype] != _map)
 435                goto unlock;
 436
 437        map->prog = prog;
 438        prog->aux->cgroup_storage[stype] = _map;
 439        ret = 0;
 440unlock:
 441        spin_unlock_bh(&map->lock);
 442
 443        return ret;
 444}
 445
 446void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
 447{
 448        enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
 449        struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 450
 451        spin_lock_bh(&map->lock);
 452        if (map->prog == prog) {
 453                WARN_ON(prog->aux->cgroup_storage[stype] != _map);
 454                map->prog = NULL;
 455                prog->aux->cgroup_storage[stype] = NULL;
 456        }
 457        spin_unlock_bh(&map->lock);
 458}
 459
 460static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)
 461{
 462        size_t size;
 463
 464        if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) {
 465                size = sizeof(struct bpf_storage_buffer) + map->value_size;
 466                *pages = round_up(sizeof(struct bpf_cgroup_storage) + size,
 467                                  PAGE_SIZE) >> PAGE_SHIFT;
 468        } else {
 469                size = map->value_size;
 470                *pages = round_up(round_up(size, 8) * num_possible_cpus(),
 471                                  PAGE_SIZE) >> PAGE_SHIFT;
 472        }
 473
 474        return size;
 475}
 476
 477struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
 478                                        enum bpf_cgroup_storage_type stype)
 479{
 480        struct bpf_cgroup_storage *storage;
 481        struct bpf_map *map;
 482        gfp_t flags;
 483        size_t size;
 484        u32 pages;
 485
 486        map = prog->aux->cgroup_storage[stype];
 487        if (!map)
 488                return NULL;
 489
 490        size = bpf_cgroup_storage_calculate_size(map, &pages);
 491
 492        if (bpf_map_charge_memlock(map, pages))
 493                return ERR_PTR(-EPERM);
 494
 495        storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
 496                               __GFP_ZERO | GFP_USER, map->numa_node);
 497        if (!storage)
 498                goto enomem;
 499
 500        flags = __GFP_ZERO | GFP_USER;
 501
 502        if (stype == BPF_CGROUP_STORAGE_SHARED) {
 503                storage->buf = kmalloc_node(size, flags, map->numa_node);
 504                if (!storage->buf)
 505                        goto enomem;
 506                check_and_init_map_lock(map, storage->buf->data);
 507        } else {
 508                storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);
 509                if (!storage->percpu_buf)
 510                        goto enomem;
 511        }
 512
 513        storage->map = (struct bpf_cgroup_storage_map *)map;
 514
 515        return storage;
 516
 517enomem:
 518        bpf_map_uncharge_memlock(map, pages);
 519        kfree(storage);
 520        return ERR_PTR(-ENOMEM);
 521}
 522
 523static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu)
 524{
 525        struct bpf_cgroup_storage *storage =
 526                container_of(rcu, struct bpf_cgroup_storage, rcu);
 527
 528        kfree(storage->buf);
 529        kfree(storage);
 530}
 531
 532static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu)
 533{
 534        struct bpf_cgroup_storage *storage =
 535                container_of(rcu, struct bpf_cgroup_storage, rcu);
 536
 537        free_percpu(storage->percpu_buf);
 538        kfree(storage);
 539}
 540
 541void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
 542{
 543        enum bpf_cgroup_storage_type stype;
 544        struct bpf_map *map;
 545        u32 pages;
 546
 547        if (!storage)
 548                return;
 549
 550        map = &storage->map->map;
 551
 552        bpf_cgroup_storage_calculate_size(map, &pages);
 553        bpf_map_uncharge_memlock(map, pages);
 554
 555        stype = cgroup_storage_type(map);
 556        if (stype == BPF_CGROUP_STORAGE_SHARED)
 557                call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu);
 558        else
 559                call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu);
 560}
 561
 562void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
 563                             struct cgroup *cgroup,
 564                             enum bpf_attach_type type)
 565{
 566        struct bpf_cgroup_storage_map *map;
 567
 568        if (!storage)
 569                return;
 570
 571        storage->key.attach_type = type;
 572        storage->key.cgroup_inode_id = cgroup->kn->id.id;
 573
 574        map = storage->map;
 575
 576        spin_lock_bh(&map->lock);
 577        WARN_ON(cgroup_storage_insert(map, storage));
 578        list_add(&storage->list, &map->list);
 579        spin_unlock_bh(&map->lock);
 580}
 581
 582void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
 583{
 584        struct bpf_cgroup_storage_map *map;
 585        struct rb_root *root;
 586
 587        if (!storage)
 588                return;
 589
 590        map = storage->map;
 591
 592        spin_lock_bh(&map->lock);
 593        root = &map->root;
 594        rb_erase(&storage->node, root);
 595
 596        list_del(&storage->list);
 597        spin_unlock_bh(&map->lock);
 598}
 599
 600#endif
 601