linux/kernel/bpf/hashtab.c
<<
>>
Prefs
   1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2 * Copyright (c) 2016 Facebook
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 */
  13#include <linux/bpf.h>
  14#include <linux/jhash.h>
  15#include <linux/filter.h>
  16#include <linux/vmalloc.h>
  17#include "percpu_freelist.h"
  18
  19struct bucket {
  20        struct hlist_head head;
  21        raw_spinlock_t lock;
  22};
  23
  24struct bpf_htab {
  25        struct bpf_map map;
  26        struct bucket *buckets;
  27        void *elems;
  28        struct pcpu_freelist freelist;
  29        void __percpu *extra_elems;
  30        atomic_t count; /* number of elements in this hashtable */
  31        u32 n_buckets;  /* number of hash buckets */
  32        u32 elem_size;  /* size of each element in bytes */
  33};
  34
  35enum extra_elem_state {
  36        HTAB_NOT_AN_EXTRA_ELEM = 0,
  37        HTAB_EXTRA_ELEM_FREE,
  38        HTAB_EXTRA_ELEM_USED
  39};
  40
  41/* each htab element is struct htab_elem + key + value */
  42struct htab_elem {
  43        union {
  44                struct hlist_node hash_node;
  45                struct bpf_htab *htab;
  46                struct pcpu_freelist_node fnode;
  47        };
  48        union {
  49                struct rcu_head rcu;
  50                enum extra_elem_state state;
  51        };
  52        u32 hash;
  53        char key[0] __aligned(8);
  54};
  55
  56static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
  57                                     void __percpu *pptr)
  58{
  59        *(void __percpu **)(l->key + key_size) = pptr;
  60}
  61
  62static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
  63{
  64        return *(void __percpu **)(l->key + key_size);
  65}
  66
  67static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
  68{
  69        return (struct htab_elem *) (htab->elems + i * htab->elem_size);
  70}
  71
  72static void htab_free_elems(struct bpf_htab *htab)
  73{
  74        int i;
  75
  76        if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
  77                goto free_elems;
  78
  79        for (i = 0; i < htab->map.max_entries; i++) {
  80                void __percpu *pptr;
  81
  82                pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
  83                                         htab->map.key_size);
  84                free_percpu(pptr);
  85        }
  86free_elems:
  87        vfree(htab->elems);
  88}
  89
  90static int prealloc_elems_and_freelist(struct bpf_htab *htab)
  91{
  92        int err = -ENOMEM, i;
  93
  94        htab->elems = vzalloc(htab->elem_size * htab->map.max_entries);
  95        if (!htab->elems)
  96                return -ENOMEM;
  97
  98        if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
  99                goto skip_percpu_elems;
 100
 101        for (i = 0; i < htab->map.max_entries; i++) {
 102                u32 size = round_up(htab->map.value_size, 8);
 103                void __percpu *pptr;
 104
 105                pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
 106                if (!pptr)
 107                        goto free_elems;
 108                htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
 109                                  pptr);
 110        }
 111
 112skip_percpu_elems:
 113        err = pcpu_freelist_init(&htab->freelist);
 114        if (err)
 115                goto free_elems;
 116
 117        pcpu_freelist_populate(&htab->freelist, htab->elems, htab->elem_size,
 118                               htab->map.max_entries);
 119        return 0;
 120
 121free_elems:
 122        htab_free_elems(htab);
 123        return err;
 124}
 125
 126static int alloc_extra_elems(struct bpf_htab *htab)
 127{
 128        void __percpu *pptr;
 129        int cpu;
 130
 131        pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
 132        if (!pptr)
 133                return -ENOMEM;
 134
 135        for_each_possible_cpu(cpu) {
 136                ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
 137                        HTAB_EXTRA_ELEM_FREE;
 138        }
 139        htab->extra_elems = pptr;
 140        return 0;
 141}
 142
 143/* Called from syscall */
 144static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 145{
 146        bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_HASH;
 147        struct bpf_htab *htab;
 148        int err, i;
 149        u64 cost;
 150
 151        if (attr->map_flags & ~BPF_F_NO_PREALLOC)
 152                /* reserved bits should not be used */
 153                return ERR_PTR(-EINVAL);
 154
 155        htab = kzalloc(sizeof(*htab), GFP_USER);
 156        if (!htab)
 157                return ERR_PTR(-ENOMEM);
 158
 159        /* mandatory map attributes */
 160        htab->map.map_type = attr->map_type;
 161        htab->map.key_size = attr->key_size;
 162        htab->map.value_size = attr->value_size;
 163        htab->map.max_entries = attr->max_entries;
 164        htab->map.map_flags = attr->map_flags;
 165
 166        /* check sanity of attributes.
 167         * value_size == 0 may be allowed in the future to use map as a set
 168         */
 169        err = -EINVAL;
 170        if (htab->map.max_entries == 0 || htab->map.key_size == 0 ||
 171            htab->map.value_size == 0)
 172                goto free_htab;
 173
 174        /* hash table size must be power of 2 */
 175        htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
 176
 177        err = -E2BIG;
 178        if (htab->map.key_size > MAX_BPF_STACK)
 179                /* eBPF programs initialize keys on stack, so they cannot be
 180                 * larger than max stack size
 181                 */
 182                goto free_htab;
 183
 184        if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
 185            MAX_BPF_STACK - sizeof(struct htab_elem))
 186                /* if value_size is bigger, the user space won't be able to
 187                 * access the elements via bpf syscall. This check also makes
 188                 * sure that the elem_size doesn't overflow and it's
 189                 * kmalloc-able later in htab_map_update_elem()
 190                 */
 191                goto free_htab;
 192
 193        if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE)
 194                /* make sure the size for pcpu_alloc() is reasonable */
 195                goto free_htab;
 196
 197        htab->elem_size = sizeof(struct htab_elem) +
 198                          round_up(htab->map.key_size, 8);
 199        if (percpu)
 200                htab->elem_size += sizeof(void *);
 201        else
 202                htab->elem_size += round_up(htab->map.value_size, 8);
 203
 204        /* prevent zero size kmalloc and check for u32 overflow */
 205        if (htab->n_buckets == 0 ||
 206            htab->n_buckets > U32_MAX / sizeof(struct bucket))
 207                goto free_htab;
 208
 209        cost = (u64) htab->n_buckets * sizeof(struct bucket) +
 210               (u64) htab->elem_size * htab->map.max_entries;
 211
 212        if (percpu)
 213                cost += (u64) round_up(htab->map.value_size, 8) *
 214                        num_possible_cpus() * htab->map.max_entries;
 215        else
 216               cost += (u64) htab->elem_size * num_possible_cpus();
 217
 218        if (cost >= U32_MAX - PAGE_SIZE)
 219                /* make sure page count doesn't overflow */
 220                goto free_htab;
 221
 222        htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
 223
 224        /* if map size is larger than memlock limit, reject it early */
 225        err = bpf_map_precharge_memlock(htab->map.pages);
 226        if (err)
 227                goto free_htab;
 228
 229        err = -ENOMEM;
 230        htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct bucket),
 231                                      GFP_USER | __GFP_NOWARN);
 232
 233        if (!htab->buckets) {
 234                htab->buckets = vmalloc(htab->n_buckets * sizeof(struct bucket));
 235                if (!htab->buckets)
 236                        goto free_htab;
 237        }
 238
 239        for (i = 0; i < htab->n_buckets; i++) {
 240                INIT_HLIST_HEAD(&htab->buckets[i].head);
 241                raw_spin_lock_init(&htab->buckets[i].lock);
 242        }
 243
 244        if (!percpu) {
 245                err = alloc_extra_elems(htab);
 246                if (err)
 247                        goto free_buckets;
 248        }
 249
 250        if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
 251                err = prealloc_elems_and_freelist(htab);
 252                if (err)
 253                        goto free_extra_elems;
 254        }
 255
 256        return &htab->map;
 257
 258free_extra_elems:
 259        free_percpu(htab->extra_elems);
 260free_buckets:
 261        kvfree(htab->buckets);
 262free_htab:
 263        kfree(htab);
 264        return ERR_PTR(err);
 265}
 266
 267static inline u32 htab_map_hash(const void *key, u32 key_len)
 268{
 269        return jhash(key, key_len, 0);
 270}
 271
 272static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
 273{
 274        return &htab->buckets[hash & (htab->n_buckets - 1)];
 275}
 276
 277static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
 278{
 279        return &__select_bucket(htab, hash)->head;
 280}
 281
 282static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
 283                                         void *key, u32 key_size)
 284{
 285        struct htab_elem *l;
 286
 287        hlist_for_each_entry_rcu(l, head, hash_node)
 288                if (l->hash == hash && !memcmp(&l->key, key, key_size))
 289                        return l;
 290
 291        return NULL;
 292}
 293
 294/* Called from syscall or from eBPF program */
 295static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
 296{
 297        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 298        struct hlist_head *head;
 299        struct htab_elem *l;
 300        u32 hash, key_size;
 301
 302        /* Must be called with rcu_read_lock. */
 303        WARN_ON_ONCE(!rcu_read_lock_held());
 304
 305        key_size = map->key_size;
 306
 307        hash = htab_map_hash(key, key_size);
 308
 309        head = select_bucket(htab, hash);
 310
 311        l = lookup_elem_raw(head, hash, key, key_size);
 312
 313        return l;
 314}
 315
 316static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
 317{
 318        struct htab_elem *l = __htab_map_lookup_elem(map, key);
 319
 320        if (l)
 321                return l->key + round_up(map->key_size, 8);
 322
 323        return NULL;
 324}
 325
 326/* Called from syscall */
 327static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 328{
 329        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 330        struct hlist_head *head;
 331        struct htab_elem *l, *next_l;
 332        u32 hash, key_size;
 333        int i;
 334
 335        WARN_ON_ONCE(!rcu_read_lock_held());
 336
 337        key_size = map->key_size;
 338
 339        hash = htab_map_hash(key, key_size);
 340
 341        head = select_bucket(htab, hash);
 342
 343        /* lookup the key */
 344        l = lookup_elem_raw(head, hash, key, key_size);
 345
 346        if (!l) {
 347                i = 0;
 348                goto find_first_elem;
 349        }
 350
 351        /* key was found, get next key in the same bucket */
 352        next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
 353                                  struct htab_elem, hash_node);
 354
 355        if (next_l) {
 356                /* if next elem in this hash list is non-zero, just return it */
 357                memcpy(next_key, next_l->key, key_size);
 358                return 0;
 359        }
 360
 361        /* no more elements in this hash list, go to the next bucket */
 362        i = hash & (htab->n_buckets - 1);
 363        i++;
 364
 365find_first_elem:
 366        /* iterate over buckets */
 367        for (; i < htab->n_buckets; i++) {
 368                head = select_bucket(htab, i);
 369
 370                /* pick first element in the bucket */
 371                next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
 372                                          struct htab_elem, hash_node);
 373                if (next_l) {
 374                        /* if it's not empty, just return it */
 375                        memcpy(next_key, next_l->key, key_size);
 376                        return 0;
 377                }
 378        }
 379
 380        /* iterated over all buckets and all elements */
 381        return -ENOENT;
 382}
 383
 384static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
 385{
 386        if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
 387                free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
 388        kfree(l);
 389}
 390
 391static void htab_elem_free_rcu(struct rcu_head *head)
 392{
 393        struct htab_elem *l = container_of(head, struct htab_elem, rcu);
 394        struct bpf_htab *htab = l->htab;
 395
 396        /* must increment bpf_prog_active to avoid kprobe+bpf triggering while
 397         * we're calling kfree, otherwise deadlock is possible if kprobes
 398         * are placed somewhere inside of slub
 399         */
 400        preempt_disable();
 401        __this_cpu_inc(bpf_prog_active);
 402        htab_elem_free(htab, l);
 403        __this_cpu_dec(bpf_prog_active);
 404        preempt_enable();
 405}
 406
 407static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 408{
 409        if (l->state == HTAB_EXTRA_ELEM_USED) {
 410                l->state = HTAB_EXTRA_ELEM_FREE;
 411                return;
 412        }
 413
 414        if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
 415                pcpu_freelist_push(&htab->freelist, &l->fnode);
 416        } else {
 417                atomic_dec(&htab->count);
 418                l->htab = htab;
 419                call_rcu(&l->rcu, htab_elem_free_rcu);
 420        }
 421}
 422
 423static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 424                                         void *value, u32 key_size, u32 hash,
 425                                         bool percpu, bool onallcpus,
 426                                         bool old_elem_exists)
 427{
 428        u32 size = htab->map.value_size;
 429        bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
 430        struct htab_elem *l_new;
 431        void __percpu *pptr;
 432        int err = 0;
 433
 434        if (prealloc) {
 435                l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
 436                if (!l_new)
 437                        err = -E2BIG;
 438        } else {
 439                if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
 440                        atomic_dec(&htab->count);
 441                        err = -E2BIG;
 442                } else {
 443                        l_new = kmalloc(htab->elem_size,
 444                                        GFP_ATOMIC | __GFP_NOWARN);
 445                        if (!l_new)
 446                                return ERR_PTR(-ENOMEM);
 447                }
 448        }
 449
 450        if (err) {
 451                if (!old_elem_exists)
 452                        return ERR_PTR(err);
 453
 454                /* if we're updating the existing element and the hash table
 455                 * is full, use per-cpu extra elems
 456                 */
 457                l_new = this_cpu_ptr(htab->extra_elems);
 458                if (l_new->state != HTAB_EXTRA_ELEM_FREE)
 459                        return ERR_PTR(-E2BIG);
 460                l_new->state = HTAB_EXTRA_ELEM_USED;
 461        } else {
 462                l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
 463        }
 464
 465        memcpy(l_new->key, key, key_size);
 466        if (percpu) {
 467                /* round up value_size to 8 bytes */
 468                size = round_up(size, 8);
 469
 470                if (prealloc) {
 471                        pptr = htab_elem_get_ptr(l_new, key_size);
 472                } else {
 473                        /* alloc_percpu zero-fills */
 474                        pptr = __alloc_percpu_gfp(size, 8,
 475                                                  GFP_ATOMIC | __GFP_NOWARN);
 476                        if (!pptr) {
 477                                kfree(l_new);
 478                                return ERR_PTR(-ENOMEM);
 479                        }
 480                }
 481
 482                if (!onallcpus) {
 483                        /* copy true value_size bytes */
 484                        memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
 485                } else {
 486                        int off = 0, cpu;
 487
 488                        for_each_possible_cpu(cpu) {
 489                                bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
 490                                                value + off, size);
 491                                off += size;
 492                        }
 493                }
 494                if (!prealloc)
 495                        htab_elem_set_ptr(l_new, key_size, pptr);
 496        } else {
 497                memcpy(l_new->key + round_up(key_size, 8), value, size);
 498        }
 499
 500        l_new->hash = hash;
 501        return l_new;
 502}
 503
 504static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
 505                       u64 map_flags)
 506{
 507        if (l_old && map_flags == BPF_NOEXIST)
 508                /* elem already exists */
 509                return -EEXIST;
 510
 511        if (!l_old && map_flags == BPF_EXIST)
 512                /* elem doesn't exist, cannot update it */
 513                return -ENOENT;
 514
 515        return 0;
 516}
 517
 518/* Called from syscall or from eBPF program */
 519static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 520                                u64 map_flags)
 521{
 522        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 523        struct htab_elem *l_new = NULL, *l_old;
 524        struct hlist_head *head;
 525        unsigned long flags;
 526        struct bucket *b;
 527        u32 key_size, hash;
 528        int ret;
 529
 530        if (unlikely(map_flags > BPF_EXIST))
 531                /* unknown flags */
 532                return -EINVAL;
 533
 534        WARN_ON_ONCE(!rcu_read_lock_held());
 535
 536        key_size = map->key_size;
 537
 538        hash = htab_map_hash(key, key_size);
 539
 540        b = __select_bucket(htab, hash);
 541        head = &b->head;
 542
 543        /* bpf_map_update_elem() can be called in_irq() */
 544        raw_spin_lock_irqsave(&b->lock, flags);
 545
 546        l_old = lookup_elem_raw(head, hash, key, key_size);
 547
 548        ret = check_flags(htab, l_old, map_flags);
 549        if (ret)
 550                goto err;
 551
 552        l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
 553                                !!l_old);
 554        if (IS_ERR(l_new)) {
 555                /* all pre-allocated elements are in use or memory exhausted */
 556                ret = PTR_ERR(l_new);
 557                goto err;
 558        }
 559
 560        /* add new element to the head of the list, so that
 561         * concurrent search will find it before old elem
 562         */
 563        hlist_add_head_rcu(&l_new->hash_node, head);
 564        if (l_old) {
 565                hlist_del_rcu(&l_old->hash_node);
 566                free_htab_elem(htab, l_old);
 567        }
 568        ret = 0;
 569err:
 570        raw_spin_unlock_irqrestore(&b->lock, flags);
 571        return ret;
 572}
 573
 574static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 575                                         void *value, u64 map_flags,
 576                                         bool onallcpus)
 577{
 578        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 579        struct htab_elem *l_new = NULL, *l_old;
 580        struct hlist_head *head;
 581        unsigned long flags;
 582        struct bucket *b;
 583        u32 key_size, hash;
 584        int ret;
 585
 586        if (unlikely(map_flags > BPF_EXIST))
 587                /* unknown flags */
 588                return -EINVAL;
 589
 590        WARN_ON_ONCE(!rcu_read_lock_held());
 591
 592        key_size = map->key_size;
 593
 594        hash = htab_map_hash(key, key_size);
 595
 596        b = __select_bucket(htab, hash);
 597        head = &b->head;
 598
 599        /* bpf_map_update_elem() can be called in_irq() */
 600        raw_spin_lock_irqsave(&b->lock, flags);
 601
 602        l_old = lookup_elem_raw(head, hash, key, key_size);
 603
 604        ret = check_flags(htab, l_old, map_flags);
 605        if (ret)
 606                goto err;
 607
 608        if (l_old) {
 609                void __percpu *pptr = htab_elem_get_ptr(l_old, key_size);
 610                u32 size = htab->map.value_size;
 611
 612                /* per-cpu hash map can update value in-place */
 613                if (!onallcpus) {
 614                        memcpy(this_cpu_ptr(pptr), value, size);
 615                } else {
 616                        int off = 0, cpu;
 617
 618                        size = round_up(size, 8);
 619                        for_each_possible_cpu(cpu) {
 620                                bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
 621                                                value + off, size);
 622                                off += size;
 623                        }
 624                }
 625        } else {
 626                l_new = alloc_htab_elem(htab, key, value, key_size,
 627                                        hash, true, onallcpus, false);
 628                if (IS_ERR(l_new)) {
 629                        ret = PTR_ERR(l_new);
 630                        goto err;
 631                }
 632                hlist_add_head_rcu(&l_new->hash_node, head);
 633        }
 634        ret = 0;
 635err:
 636        raw_spin_unlock_irqrestore(&b->lock, flags);
 637        return ret;
 638}
 639
 640static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 641                                       void *value, u64 map_flags)
 642{
 643        return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
 644}
 645
 646/* Called from syscall or from eBPF program */
 647static int htab_map_delete_elem(struct bpf_map *map, void *key)
 648{
 649        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 650        struct hlist_head *head;
 651        struct bucket *b;
 652        struct htab_elem *l;
 653        unsigned long flags;
 654        u32 hash, key_size;
 655        int ret = -ENOENT;
 656
 657        WARN_ON_ONCE(!rcu_read_lock_held());
 658
 659        key_size = map->key_size;
 660
 661        hash = htab_map_hash(key, key_size);
 662        b = __select_bucket(htab, hash);
 663        head = &b->head;
 664
 665        raw_spin_lock_irqsave(&b->lock, flags);
 666
 667        l = lookup_elem_raw(head, hash, key, key_size);
 668
 669        if (l) {
 670                hlist_del_rcu(&l->hash_node);
 671                free_htab_elem(htab, l);
 672                ret = 0;
 673        }
 674
 675        raw_spin_unlock_irqrestore(&b->lock, flags);
 676        return ret;
 677}
 678
 679static void delete_all_elements(struct bpf_htab *htab)
 680{
 681        int i;
 682
 683        for (i = 0; i < htab->n_buckets; i++) {
 684                struct hlist_head *head = select_bucket(htab, i);
 685                struct hlist_node *n;
 686                struct htab_elem *l;
 687
 688                hlist_for_each_entry_safe(l, n, head, hash_node) {
 689                        hlist_del_rcu(&l->hash_node);
 690                        htab_elem_free(htab, l);
 691                }
 692        }
 693}
 694/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
 695static void htab_map_free(struct bpf_map *map)
 696{
 697        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 698
 699        /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
 700         * so the programs (can be more than one that used this map) were
 701         * disconnected from events. Wait for outstanding critical sections in
 702         * these programs to complete
 703         */
 704        synchronize_rcu();
 705
 706        /* some of free_htab_elem() callbacks for elements of this map may
 707         * not have executed. Wait for them.
 708         */
 709        rcu_barrier();
 710        if (htab->map.map_flags & BPF_F_NO_PREALLOC) {
 711                delete_all_elements(htab);
 712        } else {
 713                htab_free_elems(htab);
 714                pcpu_freelist_destroy(&htab->freelist);
 715        }
 716        free_percpu(htab->extra_elems);
 717        kvfree(htab->buckets);
 718        kfree(htab);
 719}
 720
 721static const struct bpf_map_ops htab_ops = {
 722        .map_alloc = htab_map_alloc,
 723        .map_free = htab_map_free,
 724        .map_get_next_key = htab_map_get_next_key,
 725        .map_lookup_elem = htab_map_lookup_elem,
 726        .map_update_elem = htab_map_update_elem,
 727        .map_delete_elem = htab_map_delete_elem,
 728};
 729
 730static struct bpf_map_type_list htab_type __read_mostly = {
 731        .ops = &htab_ops,
 732        .type = BPF_MAP_TYPE_HASH,
 733};
 734
 735/* Called from eBPF program */
 736static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
 737{
 738        struct htab_elem *l = __htab_map_lookup_elem(map, key);
 739
 740        if (l)
 741                return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
 742        else
 743                return NULL;
 744}
 745
 746int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
 747{
 748        struct htab_elem *l;
 749        void __percpu *pptr;
 750        int ret = -ENOENT;
 751        int cpu, off = 0;
 752        u32 size;
 753
 754        /* per_cpu areas are zero-filled and bpf programs can only
 755         * access 'value_size' of them, so copying rounded areas
 756         * will not leak any kernel data
 757         */
 758        size = round_up(map->value_size, 8);
 759        rcu_read_lock();
 760        l = __htab_map_lookup_elem(map, key);
 761        if (!l)
 762                goto out;
 763        pptr = htab_elem_get_ptr(l, map->key_size);
 764        for_each_possible_cpu(cpu) {
 765                bpf_long_memcpy(value + off,
 766                                per_cpu_ptr(pptr, cpu), size);
 767                off += size;
 768        }
 769        ret = 0;
 770out:
 771        rcu_read_unlock();
 772        return ret;
 773}
 774
 775int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
 776                           u64 map_flags)
 777{
 778        int ret;
 779
 780        rcu_read_lock();
 781        ret = __htab_percpu_map_update_elem(map, key, value, map_flags, true);
 782        rcu_read_unlock();
 783
 784        return ret;
 785}
 786
 787static const struct bpf_map_ops htab_percpu_ops = {
 788        .map_alloc = htab_map_alloc,
 789        .map_free = htab_map_free,
 790        .map_get_next_key = htab_map_get_next_key,
 791        .map_lookup_elem = htab_percpu_map_lookup_elem,
 792        .map_update_elem = htab_percpu_map_update_elem,
 793        .map_delete_elem = htab_map_delete_elem,
 794};
 795
 796static struct bpf_map_type_list htab_percpu_type __read_mostly = {
 797        .ops = &htab_percpu_ops,
 798        .type = BPF_MAP_TYPE_PERCPU_HASH,
 799};
 800
 801static int __init register_htab_map(void)
 802{
 803        bpf_register_map_type(&htab_type);
 804        bpf_register_map_type(&htab_percpu_type);
 805        return 0;
 806}
 807late_initcall(register_htab_map);
 808