1
2
3
4
5#include <linux/bpf.h>
6#include <linux/btf.h>
7#include <linux/jhash.h>
8#include <linux/filter.h>
9#include <linux/rculist_nulls.h>
10#include <linux/random.h>
11#include <uapi/linux/btf.h>
12#include <linux/rcupdate_trace.h>
13#include "percpu_freelist.h"
14#include "bpf_lru_list.h"
15#include "map_in_map.h"
16
17#define HTAB_CREATE_FLAG_MASK \
18 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
19 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
20
21#define BATCH_OPS(_name) \
22 .map_lookup_batch = \
23 _name##_map_lookup_batch, \
24 .map_lookup_and_delete_batch = \
25 _name##_map_lookup_and_delete_batch, \
26 .map_update_batch = \
27 generic_map_update_batch, \
28 .map_delete_batch = \
29 generic_map_delete_batch
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81struct bucket {
82 struct hlist_nulls_head head;
83 union {
84 raw_spinlock_t raw_lock;
85 spinlock_t lock;
86 };
87};
88
89struct bpf_htab {
90 struct bpf_map map;
91 struct bucket *buckets;
92 void *elems;
93 union {
94 struct pcpu_freelist freelist;
95 struct bpf_lru lru;
96 };
97 struct htab_elem *__percpu *extra_elems;
98 atomic_t count;
99 u32 n_buckets;
100 u32 elem_size;
101 u32 hashrnd;
102};
103
104
105struct htab_elem {
106 union {
107 struct hlist_nulls_node hash_node;
108 struct {
109 void *padding;
110 union {
111 struct bpf_htab *htab;
112 struct pcpu_freelist_node fnode;
113 struct htab_elem *batch_flink;
114 };
115 };
116 };
117 union {
118 struct rcu_head rcu;
119 struct bpf_lru_node lru_node;
120 };
121 u32 hash;
122 char key[] __aligned(8);
123};
124
125static inline bool htab_is_prealloc(const struct bpf_htab *htab)
126{
127 return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
128}
129
130static inline bool htab_use_raw_lock(const struct bpf_htab *htab)
131{
132 return (!IS_ENABLED(CONFIG_PREEMPT_RT) || htab_is_prealloc(htab));
133}
134
135static void htab_init_buckets(struct bpf_htab *htab)
136{
137 unsigned i;
138
139 for (i = 0; i < htab->n_buckets; i++) {
140 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
141 if (htab_use_raw_lock(htab))
142 raw_spin_lock_init(&htab->buckets[i].raw_lock);
143 else
144 spin_lock_init(&htab->buckets[i].lock);
145 }
146}
147
148static inline unsigned long htab_lock_bucket(const struct bpf_htab *htab,
149 struct bucket *b)
150{
151 unsigned long flags;
152
153 if (htab_use_raw_lock(htab))
154 raw_spin_lock_irqsave(&b->raw_lock, flags);
155 else
156 spin_lock_irqsave(&b->lock, flags);
157 return flags;
158}
159
160static inline void htab_unlock_bucket(const struct bpf_htab *htab,
161 struct bucket *b,
162 unsigned long flags)
163{
164 if (htab_use_raw_lock(htab))
165 raw_spin_unlock_irqrestore(&b->raw_lock, flags);
166 else
167 spin_unlock_irqrestore(&b->lock, flags);
168}
169
170static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
171
172static bool htab_is_lru(const struct bpf_htab *htab)
173{
174 return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH ||
175 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
176}
177
178static bool htab_is_percpu(const struct bpf_htab *htab)
179{
180 return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH ||
181 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
182}
183
184static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
185 void __percpu *pptr)
186{
187 *(void __percpu **)(l->key + key_size) = pptr;
188}
189
190static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
191{
192 return *(void __percpu **)(l->key + key_size);
193}
194
195static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
196{
197 return *(void **)(l->key + roundup(map->key_size, 8));
198}
199
200static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
201{
202 return (struct htab_elem *) (htab->elems + i * htab->elem_size);
203}
204
205static void htab_free_elems(struct bpf_htab *htab)
206{
207 int i;
208
209 if (!htab_is_percpu(htab))
210 goto free_elems;
211
212 for (i = 0; i < htab->map.max_entries; i++) {
213 void __percpu *pptr;
214
215 pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
216 htab->map.key_size);
217 free_percpu(pptr);
218 cond_resched();
219 }
220free_elems:
221 bpf_map_area_free(htab->elems);
222}
223
224
225
226
227
228
229
230
231
232
233
234
235static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
236 u32 hash)
237{
238 struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash);
239 struct htab_elem *l;
240
241 if (node) {
242 l = container_of(node, struct htab_elem, lru_node);
243 memcpy(l->key, key, htab->map.key_size);
244 return l;
245 }
246
247 return NULL;
248}
249
250static int prealloc_init(struct bpf_htab *htab)
251{
252 u32 num_entries = htab->map.max_entries;
253 int err = -ENOMEM, i;
254
255 if (!htab_is_percpu(htab) && !htab_is_lru(htab))
256 num_entries += num_possible_cpus();
257
258 htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries,
259 htab->map.numa_node);
260 if (!htab->elems)
261 return -ENOMEM;
262
263 if (!htab_is_percpu(htab))
264 goto skip_percpu_elems;
265
266 for (i = 0; i < num_entries; i++) {
267 u32 size = round_up(htab->map.value_size, 8);
268 void __percpu *pptr;
269
270 pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
271 if (!pptr)
272 goto free_elems;
273 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
274 pptr);
275 cond_resched();
276 }
277
278skip_percpu_elems:
279 if (htab_is_lru(htab))
280 err = bpf_lru_init(&htab->lru,
281 htab->map.map_flags & BPF_F_NO_COMMON_LRU,
282 offsetof(struct htab_elem, hash) -
283 offsetof(struct htab_elem, lru_node),
284 htab_lru_map_delete_node,
285 htab);
286 else
287 err = pcpu_freelist_init(&htab->freelist);
288
289 if (err)
290 goto free_elems;
291
292 if (htab_is_lru(htab))
293 bpf_lru_populate(&htab->lru, htab->elems,
294 offsetof(struct htab_elem, lru_node),
295 htab->elem_size, num_entries);
296 else
297 pcpu_freelist_populate(&htab->freelist,
298 htab->elems + offsetof(struct htab_elem, fnode),
299 htab->elem_size, num_entries);
300
301 return 0;
302
303free_elems:
304 htab_free_elems(htab);
305 return err;
306}
307
308static void prealloc_destroy(struct bpf_htab *htab)
309{
310 htab_free_elems(htab);
311
312 if (htab_is_lru(htab))
313 bpf_lru_destroy(&htab->lru);
314 else
315 pcpu_freelist_destroy(&htab->freelist);
316}
317
318static int alloc_extra_elems(struct bpf_htab *htab)
319{
320 struct htab_elem *__percpu *pptr, *l_new;
321 struct pcpu_freelist_node *l;
322 int cpu;
323
324 pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
325 GFP_USER | __GFP_NOWARN);
326 if (!pptr)
327 return -ENOMEM;
328
329 for_each_possible_cpu(cpu) {
330 l = pcpu_freelist_pop(&htab->freelist);
331
332
333
334 l_new = container_of(l, struct htab_elem, fnode);
335 *per_cpu_ptr(pptr, cpu) = l_new;
336 }
337 htab->extra_elems = pptr;
338 return 0;
339}
340
341
342static int htab_map_alloc_check(union bpf_attr *attr)
343{
344 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
345 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
346 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
347 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
348
349
350
351
352
353 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
354 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
355 bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
356 int numa_node = bpf_map_attr_numa_node(attr);
357
358 BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
359 offsetof(struct htab_elem, hash_node.pprev));
360 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
361 offsetof(struct htab_elem, hash_node.pprev));
362
363 if (lru && !bpf_capable())
364
365
366
367 return -EPERM;
368
369 if (zero_seed && !capable(CAP_SYS_ADMIN))
370
371 return -EPERM;
372
373 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK ||
374 !bpf_map_flags_access_ok(attr->map_flags))
375 return -EINVAL;
376
377 if (!lru && percpu_lru)
378 return -EINVAL;
379
380 if (lru && !prealloc)
381 return -ENOTSUPP;
382
383 if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru))
384 return -EINVAL;
385
386
387
388
389 if (attr->max_entries == 0 || attr->key_size == 0 ||
390 attr->value_size == 0)
391 return -EINVAL;
392
393 if (attr->key_size > MAX_BPF_STACK)
394
395
396
397 return -E2BIG;
398
399 if (attr->value_size >= KMALLOC_MAX_SIZE -
400 MAX_BPF_STACK - sizeof(struct htab_elem))
401
402
403
404
405
406 return -E2BIG;
407
408 return 0;
409}
410
411static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
412{
413 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
414 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
415 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
416 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
417
418
419
420
421
422 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
423 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
424 struct bpf_htab *htab;
425 u64 cost;
426 int err;
427
428 htab = kzalloc(sizeof(*htab), GFP_USER);
429 if (!htab)
430 return ERR_PTR(-ENOMEM);
431
432 bpf_map_init_from_attr(&htab->map, attr);
433
434 if (percpu_lru) {
435
436
437
438
439 htab->map.max_entries = roundup(attr->max_entries,
440 num_possible_cpus());
441 if (htab->map.max_entries < attr->max_entries)
442 htab->map.max_entries = rounddown(attr->max_entries,
443 num_possible_cpus());
444 }
445
446
447 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
448
449 htab->elem_size = sizeof(struct htab_elem) +
450 round_up(htab->map.key_size, 8);
451 if (percpu)
452 htab->elem_size += sizeof(void *);
453 else
454 htab->elem_size += round_up(htab->map.value_size, 8);
455
456 err = -E2BIG;
457
458 if (htab->n_buckets == 0 ||
459 htab->n_buckets > U32_MAX / sizeof(struct bucket))
460 goto free_htab;
461
462 cost = (u64) htab->n_buckets * sizeof(struct bucket) +
463 (u64) htab->elem_size * htab->map.max_entries;
464
465 if (percpu)
466 cost += (u64) round_up(htab->map.value_size, 8) *
467 num_possible_cpus() * htab->map.max_entries;
468 else
469 cost += (u64) htab->elem_size * num_possible_cpus();
470
471
472 err = bpf_map_charge_init(&htab->map.memory, cost);
473 if (err)
474 goto free_htab;
475
476 err = -ENOMEM;
477 htab->buckets = bpf_map_area_alloc(htab->n_buckets *
478 sizeof(struct bucket),
479 htab->map.numa_node);
480 if (!htab->buckets)
481 goto free_charge;
482
483 if (htab->map.map_flags & BPF_F_ZERO_SEED)
484 htab->hashrnd = 0;
485 else
486 htab->hashrnd = get_random_int();
487
488 htab_init_buckets(htab);
489
490 if (prealloc) {
491 err = prealloc_init(htab);
492 if (err)
493 goto free_buckets;
494
495 if (!percpu && !lru) {
496
497
498
499 err = alloc_extra_elems(htab);
500 if (err)
501 goto free_prealloc;
502 }
503 }
504
505 return &htab->map;
506
507free_prealloc:
508 prealloc_destroy(htab);
509free_buckets:
510 bpf_map_area_free(htab->buckets);
511free_charge:
512 bpf_map_charge_finish(&htab->map.memory);
513free_htab:
514 kfree(htab);
515 return ERR_PTR(err);
516}
517
518static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd)
519{
520 return jhash(key, key_len, hashrnd);
521}
522
523static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
524{
525 return &htab->buckets[hash & (htab->n_buckets - 1)];
526}
527
528static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
529{
530 return &__select_bucket(htab, hash)->head;
531}
532
533
534static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
535 void *key, u32 key_size)
536{
537 struct hlist_nulls_node *n;
538 struct htab_elem *l;
539
540 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
541 if (l->hash == hash && !memcmp(&l->key, key, key_size))
542 return l;
543
544 return NULL;
545}
546
547
548
549
550
551static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
552 u32 hash, void *key,
553 u32 key_size, u32 n_buckets)
554{
555 struct hlist_nulls_node *n;
556 struct htab_elem *l;
557
558again:
559 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
560 if (l->hash == hash && !memcmp(&l->key, key, key_size))
561 return l;
562
563 if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
564 goto again;
565
566 return NULL;
567}
568
569
570
571
572
573
574static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
575{
576 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
577 struct hlist_nulls_head *head;
578 struct htab_elem *l;
579 u32 hash, key_size;
580
581 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
582
583 key_size = map->key_size;
584
585 hash = htab_map_hash(key, key_size, htab->hashrnd);
586
587 head = select_bucket(htab, hash);
588
589 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
590
591 return l;
592}
593
594static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
595{
596 struct htab_elem *l = __htab_map_lookup_elem(map, key);
597
598 if (l)
599 return l->key + round_up(map->key_size, 8);
600
601 return NULL;
602}
603
604
605
606
607
608
609
610
611
612
613
614
615static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
616{
617 struct bpf_insn *insn = insn_buf;
618 const int ret = BPF_REG_0;
619
620 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
621 (void *(*)(struct bpf_map *map, void *key))NULL));
622 *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
623 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
624 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
625 offsetof(struct htab_elem, key) +
626 round_up(map->key_size, 8));
627 return insn - insn_buf;
628}
629
630static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map,
631 void *key, const bool mark)
632{
633 struct htab_elem *l = __htab_map_lookup_elem(map, key);
634
635 if (l) {
636 if (mark)
637 bpf_lru_node_set_ref(&l->lru_node);
638 return l->key + round_up(map->key_size, 8);
639 }
640
641 return NULL;
642}
643
644static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
645{
646 return __htab_lru_map_lookup_elem(map, key, true);
647}
648
649static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
650{
651 return __htab_lru_map_lookup_elem(map, key, false);
652}
653
654static int htab_lru_map_gen_lookup(struct bpf_map *map,
655 struct bpf_insn *insn_buf)
656{
657 struct bpf_insn *insn = insn_buf;
658 const int ret = BPF_REG_0;
659 const int ref_reg = BPF_REG_1;
660
661 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
662 (void *(*)(struct bpf_map *map, void *key))NULL));
663 *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
664 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4);
665 *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret,
666 offsetof(struct htab_elem, lru_node) +
667 offsetof(struct bpf_lru_node, ref));
668 *insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1);
669 *insn++ = BPF_ST_MEM(BPF_B, ret,
670 offsetof(struct htab_elem, lru_node) +
671 offsetof(struct bpf_lru_node, ref),
672 1);
673 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
674 offsetof(struct htab_elem, key) +
675 round_up(map->key_size, 8));
676 return insn - insn_buf;
677}
678
679
680
681
682static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
683{
684 struct bpf_htab *htab = (struct bpf_htab *)arg;
685 struct htab_elem *l = NULL, *tgt_l;
686 struct hlist_nulls_head *head;
687 struct hlist_nulls_node *n;
688 unsigned long flags;
689 struct bucket *b;
690
691 tgt_l = container_of(node, struct htab_elem, lru_node);
692 b = __select_bucket(htab, tgt_l->hash);
693 head = &b->head;
694
695 flags = htab_lock_bucket(htab, b);
696
697 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
698 if (l == tgt_l) {
699 hlist_nulls_del_rcu(&l->hash_node);
700 break;
701 }
702
703 htab_unlock_bucket(htab, b, flags);
704
705 return l == tgt_l;
706}
707
708
709static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
710{
711 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
712 struct hlist_nulls_head *head;
713 struct htab_elem *l, *next_l;
714 u32 hash, key_size;
715 int i = 0;
716
717 WARN_ON_ONCE(!rcu_read_lock_held());
718
719 key_size = map->key_size;
720
721 if (!key)
722 goto find_first_elem;
723
724 hash = htab_map_hash(key, key_size, htab->hashrnd);
725
726 head = select_bucket(htab, hash);
727
728
729 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
730
731 if (!l)
732 goto find_first_elem;
733
734
735 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
736 struct htab_elem, hash_node);
737
738 if (next_l) {
739
740 memcpy(next_key, next_l->key, key_size);
741 return 0;
742 }
743
744
745 i = hash & (htab->n_buckets - 1);
746 i++;
747
748find_first_elem:
749
750 for (; i < htab->n_buckets; i++) {
751 head = select_bucket(htab, i);
752
753
754 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
755 struct htab_elem, hash_node);
756 if (next_l) {
757
758 memcpy(next_key, next_l->key, key_size);
759 return 0;
760 }
761 }
762
763
764 return -ENOENT;
765}
766
767static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
768{
769 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
770 free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
771 kfree(l);
772}
773
774static void htab_elem_free_rcu(struct rcu_head *head)
775{
776 struct htab_elem *l = container_of(head, struct htab_elem, rcu);
777 struct bpf_htab *htab = l->htab;
778
779 htab_elem_free(htab, l);
780}
781
782static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
783{
784 struct bpf_map *map = &htab->map;
785 void *ptr;
786
787 if (map->ops->map_fd_put_ptr) {
788 ptr = fd_htab_map_get_ptr(map, l);
789 map->ops->map_fd_put_ptr(ptr);
790 }
791}
792
793static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
794{
795 htab_put_fd_value(htab, l);
796
797 if (htab_is_prealloc(htab)) {
798 __pcpu_freelist_push(&htab->freelist, &l->fnode);
799 } else {
800 atomic_dec(&htab->count);
801 l->htab = htab;
802 call_rcu(&l->rcu, htab_elem_free_rcu);
803 }
804}
805
806static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
807 void *value, bool onallcpus)
808{
809 if (!onallcpus) {
810
811 memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
812 } else {
813 u32 size = round_up(htab->map.value_size, 8);
814 int off = 0, cpu;
815
816 for_each_possible_cpu(cpu) {
817 bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
818 value + off, size);
819 off += size;
820 }
821 }
822}
823
824static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
825 void *value, bool onallcpus)
826{
827
828
829
830
831
832
833 if (htab_is_prealloc(htab) && !onallcpus) {
834 u32 size = round_up(htab->map.value_size, 8);
835 int current_cpu = raw_smp_processor_id();
836 int cpu;
837
838 for_each_possible_cpu(cpu) {
839 if (cpu == current_cpu)
840 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
841 size);
842 else
843 memset(per_cpu_ptr(pptr, cpu), 0, size);
844 }
845 } else {
846 pcpu_copy_value(htab, pptr, value, onallcpus);
847 }
848}
849
850static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
851{
852 return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
853 BITS_PER_LONG == 64;
854}
855
856static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
857 void *value, u32 key_size, u32 hash,
858 bool percpu, bool onallcpus,
859 struct htab_elem *old_elem)
860{
861 u32 size = htab->map.value_size;
862 bool prealloc = htab_is_prealloc(htab);
863 struct htab_elem *l_new, **pl_new;
864 void __percpu *pptr;
865
866 if (prealloc) {
867 if (old_elem) {
868
869
870
871 pl_new = this_cpu_ptr(htab->extra_elems);
872 l_new = *pl_new;
873 htab_put_fd_value(htab, old_elem);
874 *pl_new = old_elem;
875 } else {
876 struct pcpu_freelist_node *l;
877
878 l = __pcpu_freelist_pop(&htab->freelist);
879 if (!l)
880 return ERR_PTR(-E2BIG);
881 l_new = container_of(l, struct htab_elem, fnode);
882 }
883 } else {
884 if (atomic_inc_return(&htab->count) > htab->map.max_entries)
885 if (!old_elem) {
886
887
888
889
890
891 l_new = ERR_PTR(-E2BIG);
892 goto dec_count;
893 }
894 l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
895 htab->map.numa_node);
896 if (!l_new) {
897 l_new = ERR_PTR(-ENOMEM);
898 goto dec_count;
899 }
900 check_and_init_map_lock(&htab->map,
901 l_new->key + round_up(key_size, 8));
902 }
903
904 memcpy(l_new->key, key, key_size);
905 if (percpu) {
906 size = round_up(size, 8);
907 if (prealloc) {
908 pptr = htab_elem_get_ptr(l_new, key_size);
909 } else {
910
911 pptr = __alloc_percpu_gfp(size, 8,
912 GFP_ATOMIC | __GFP_NOWARN);
913 if (!pptr) {
914 kfree(l_new);
915 l_new = ERR_PTR(-ENOMEM);
916 goto dec_count;
917 }
918 }
919
920 pcpu_init_value(htab, pptr, value, onallcpus);
921
922 if (!prealloc)
923 htab_elem_set_ptr(l_new, key_size, pptr);
924 } else if (fd_htab_map_needs_adjust(htab)) {
925 size = round_up(size, 8);
926 memcpy(l_new->key + round_up(key_size, 8), value, size);
927 } else {
928 copy_map_value(&htab->map,
929 l_new->key + round_up(key_size, 8),
930 value);
931 }
932
933 l_new->hash = hash;
934 return l_new;
935dec_count:
936 atomic_dec(&htab->count);
937 return l_new;
938}
939
940static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
941 u64 map_flags)
942{
943 if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
944
945 return -EEXIST;
946
947 if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
948
949 return -ENOENT;
950
951 return 0;
952}
953
954
955static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
956 u64 map_flags)
957{
958 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
959 struct htab_elem *l_new = NULL, *l_old;
960 struct hlist_nulls_head *head;
961 unsigned long flags;
962 struct bucket *b;
963 u32 key_size, hash;
964 int ret;
965
966 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
967
968 return -EINVAL;
969
970 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
971
972 key_size = map->key_size;
973
974 hash = htab_map_hash(key, key_size, htab->hashrnd);
975
976 b = __select_bucket(htab, hash);
977 head = &b->head;
978
979 if (unlikely(map_flags & BPF_F_LOCK)) {
980 if (unlikely(!map_value_has_spin_lock(map)))
981 return -EINVAL;
982
983 l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
984 htab->n_buckets);
985 ret = check_flags(htab, l_old, map_flags);
986 if (ret)
987 return ret;
988 if (l_old) {
989
990 copy_map_value_locked(map,
991 l_old->key + round_up(key_size, 8),
992 value, false);
993 return 0;
994 }
995
996
997
998
999 }
1000
1001 flags = htab_lock_bucket(htab, b);
1002
1003 l_old = lookup_elem_raw(head, hash, key, key_size);
1004
1005 ret = check_flags(htab, l_old, map_flags);
1006 if (ret)
1007 goto err;
1008
1009 if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
1010
1011
1012
1013
1014
1015
1016 copy_map_value_locked(map,
1017 l_old->key + round_up(key_size, 8),
1018 value, false);
1019 ret = 0;
1020 goto err;
1021 }
1022
1023 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
1024 l_old);
1025 if (IS_ERR(l_new)) {
1026
1027 ret = PTR_ERR(l_new);
1028 goto err;
1029 }
1030
1031
1032
1033
1034 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1035 if (l_old) {
1036 hlist_nulls_del_rcu(&l_old->hash_node);
1037 if (!htab_is_prealloc(htab))
1038 free_htab_elem(htab, l_old);
1039 }
1040 ret = 0;
1041err:
1042 htab_unlock_bucket(htab, b, flags);
1043 return ret;
1044}
1045
1046static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
1047 u64 map_flags)
1048{
1049 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1050 struct htab_elem *l_new, *l_old = NULL;
1051 struct hlist_nulls_head *head;
1052 unsigned long flags;
1053 struct bucket *b;
1054 u32 key_size, hash;
1055 int ret;
1056
1057 if (unlikely(map_flags > BPF_EXIST))
1058
1059 return -EINVAL;
1060
1061 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
1062
1063 key_size = map->key_size;
1064
1065 hash = htab_map_hash(key, key_size, htab->hashrnd);
1066
1067 b = __select_bucket(htab, hash);
1068 head = &b->head;
1069
1070
1071
1072
1073
1074
1075 l_new = prealloc_lru_pop(htab, key, hash);
1076 if (!l_new)
1077 return -ENOMEM;
1078 memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
1079
1080 flags = htab_lock_bucket(htab, b);
1081
1082 l_old = lookup_elem_raw(head, hash, key, key_size);
1083
1084 ret = check_flags(htab, l_old, map_flags);
1085 if (ret)
1086 goto err;
1087
1088
1089
1090
1091 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1092 if (l_old) {
1093 bpf_lru_node_set_ref(&l_new->lru_node);
1094 hlist_nulls_del_rcu(&l_old->hash_node);
1095 }
1096 ret = 0;
1097
1098err:
1099 htab_unlock_bucket(htab, b, flags);
1100
1101 if (ret)
1102 bpf_lru_push_free(&htab->lru, &l_new->lru_node);
1103 else if (l_old)
1104 bpf_lru_push_free(&htab->lru, &l_old->lru_node);
1105
1106 return ret;
1107}
1108
1109static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
1110 void *value, u64 map_flags,
1111 bool onallcpus)
1112{
1113 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1114 struct htab_elem *l_new = NULL, *l_old;
1115 struct hlist_nulls_head *head;
1116 unsigned long flags;
1117 struct bucket *b;
1118 u32 key_size, hash;
1119 int ret;
1120
1121 if (unlikely(map_flags > BPF_EXIST))
1122
1123 return -EINVAL;
1124
1125 WARN_ON_ONCE(!rcu_read_lock_held());
1126
1127 key_size = map->key_size;
1128
1129 hash = htab_map_hash(key, key_size, htab->hashrnd);
1130
1131 b = __select_bucket(htab, hash);
1132 head = &b->head;
1133
1134 flags = htab_lock_bucket(htab, b);
1135
1136 l_old = lookup_elem_raw(head, hash, key, key_size);
1137
1138 ret = check_flags(htab, l_old, map_flags);
1139 if (ret)
1140 goto err;
1141
1142 if (l_old) {
1143
1144 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1145 value, onallcpus);
1146 } else {
1147 l_new = alloc_htab_elem(htab, key, value, key_size,
1148 hash, true, onallcpus, NULL);
1149 if (IS_ERR(l_new)) {
1150 ret = PTR_ERR(l_new);
1151 goto err;
1152 }
1153 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1154 }
1155 ret = 0;
1156err:
1157 htab_unlock_bucket(htab, b, flags);
1158 return ret;
1159}
1160
1161static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
1162 void *value, u64 map_flags,
1163 bool onallcpus)
1164{
1165 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1166 struct htab_elem *l_new = NULL, *l_old;
1167 struct hlist_nulls_head *head;
1168 unsigned long flags;
1169 struct bucket *b;
1170 u32 key_size, hash;
1171 int ret;
1172
1173 if (unlikely(map_flags > BPF_EXIST))
1174
1175 return -EINVAL;
1176
1177 WARN_ON_ONCE(!rcu_read_lock_held());
1178
1179 key_size = map->key_size;
1180
1181 hash = htab_map_hash(key, key_size, htab->hashrnd);
1182
1183 b = __select_bucket(htab, hash);
1184 head = &b->head;
1185
1186
1187
1188
1189
1190
1191 if (map_flags != BPF_EXIST) {
1192 l_new = prealloc_lru_pop(htab, key, hash);
1193 if (!l_new)
1194 return -ENOMEM;
1195 }
1196
1197 flags = htab_lock_bucket(htab, b);
1198
1199 l_old = lookup_elem_raw(head, hash, key, key_size);
1200
1201 ret = check_flags(htab, l_old, map_flags);
1202 if (ret)
1203 goto err;
1204
1205 if (l_old) {
1206 bpf_lru_node_set_ref(&l_old->lru_node);
1207
1208
1209 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1210 value, onallcpus);
1211 } else {
1212 pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
1213 value, onallcpus);
1214 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1215 l_new = NULL;
1216 }
1217 ret = 0;
1218err:
1219 htab_unlock_bucket(htab, b, flags);
1220 if (l_new)
1221 bpf_lru_push_free(&htab->lru, &l_new->lru_node);
1222 return ret;
1223}
1224
1225static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
1226 void *value, u64 map_flags)
1227{
1228 return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
1229}
1230
1231static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
1232 void *value, u64 map_flags)
1233{
1234 return __htab_lru_percpu_map_update_elem(map, key, value, map_flags,
1235 false);
1236}
1237
1238
1239static int htab_map_delete_elem(struct bpf_map *map, void *key)
1240{
1241 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1242 struct hlist_nulls_head *head;
1243 struct bucket *b;
1244 struct htab_elem *l;
1245 unsigned long flags;
1246 u32 hash, key_size;
1247 int ret = -ENOENT;
1248
1249 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
1250
1251 key_size = map->key_size;
1252
1253 hash = htab_map_hash(key, key_size, htab->hashrnd);
1254 b = __select_bucket(htab, hash);
1255 head = &b->head;
1256
1257 flags = htab_lock_bucket(htab, b);
1258
1259 l = lookup_elem_raw(head, hash, key, key_size);
1260
1261 if (l) {
1262 hlist_nulls_del_rcu(&l->hash_node);
1263 free_htab_elem(htab, l);
1264 ret = 0;
1265 }
1266
1267 htab_unlock_bucket(htab, b, flags);
1268 return ret;
1269}
1270
1271static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
1272{
1273 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1274 struct hlist_nulls_head *head;
1275 struct bucket *b;
1276 struct htab_elem *l;
1277 unsigned long flags;
1278 u32 hash, key_size;
1279 int ret = -ENOENT;
1280
1281 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
1282
1283 key_size = map->key_size;
1284
1285 hash = htab_map_hash(key, key_size, htab->hashrnd);
1286 b = __select_bucket(htab, hash);
1287 head = &b->head;
1288
1289 flags = htab_lock_bucket(htab, b);
1290
1291 l = lookup_elem_raw(head, hash, key, key_size);
1292
1293 if (l) {
1294 hlist_nulls_del_rcu(&l->hash_node);
1295 ret = 0;
1296 }
1297
1298 htab_unlock_bucket(htab, b, flags);
1299 if (l)
1300 bpf_lru_push_free(&htab->lru, &l->lru_node);
1301 return ret;
1302}
1303
1304static void delete_all_elements(struct bpf_htab *htab)
1305{
1306 int i;
1307
1308 for (i = 0; i < htab->n_buckets; i++) {
1309 struct hlist_nulls_head *head = select_bucket(htab, i);
1310 struct hlist_nulls_node *n;
1311 struct htab_elem *l;
1312
1313 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
1314 hlist_nulls_del_rcu(&l->hash_node);
1315 htab_elem_free(htab, l);
1316 }
1317 }
1318}
1319
1320
1321static void htab_map_free(struct bpf_map *map)
1322{
1323 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333 rcu_barrier();
1334 if (!htab_is_prealloc(htab))
1335 delete_all_elements(htab);
1336 else
1337 prealloc_destroy(htab);
1338
1339 free_percpu(htab->extra_elems);
1340 bpf_map_area_free(htab->buckets);
1341 kfree(htab);
1342}
1343
1344static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
1345 struct seq_file *m)
1346{
1347 void *value;
1348
1349 rcu_read_lock();
1350
1351 value = htab_map_lookup_elem(map, key);
1352 if (!value) {
1353 rcu_read_unlock();
1354 return;
1355 }
1356
1357 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
1358 seq_puts(m, ": ");
1359 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
1360 seq_puts(m, "\n");
1361
1362 rcu_read_unlock();
1363}
1364
1365static int
1366__htab_map_lookup_and_delete_batch(struct bpf_map *map,
1367 const union bpf_attr *attr,
1368 union bpf_attr __user *uattr,
1369 bool do_delete, bool is_lru_map,
1370 bool is_percpu)
1371{
1372 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1373 u32 bucket_cnt, total, key_size, value_size, roundup_key_size;
1374 void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val;
1375 void __user *uvalues = u64_to_user_ptr(attr->batch.values);
1376 void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
1377 void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
1378 u32 batch, max_count, size, bucket_size;
1379 struct htab_elem *node_to_free = NULL;
1380 u64 elem_map_flags, map_flags;
1381 struct hlist_nulls_head *head;
1382 struct hlist_nulls_node *n;
1383 unsigned long flags = 0;
1384 bool locked = false;
1385 struct htab_elem *l;
1386 struct bucket *b;
1387 int ret = 0;
1388
1389 elem_map_flags = attr->batch.elem_flags;
1390 if ((elem_map_flags & ~BPF_F_LOCK) ||
1391 ((elem_map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
1392 return -EINVAL;
1393
1394 map_flags = attr->batch.flags;
1395 if (map_flags)
1396 return -EINVAL;
1397
1398 max_count = attr->batch.count;
1399 if (!max_count)
1400 return 0;
1401
1402 if (put_user(0, &uattr->batch.count))
1403 return -EFAULT;
1404
1405 batch = 0;
1406 if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch)))
1407 return -EFAULT;
1408
1409 if (batch >= htab->n_buckets)
1410 return -ENOENT;
1411
1412 key_size = htab->map.key_size;
1413 roundup_key_size = round_up(htab->map.key_size, 8);
1414 value_size = htab->map.value_size;
1415 size = round_up(value_size, 8);
1416 if (is_percpu)
1417 value_size = size * num_possible_cpus();
1418 total = 0;
1419
1420
1421
1422 bucket_size = 5;
1423
1424alloc:
1425
1426
1427
1428 keys = kvmalloc(key_size * bucket_size, GFP_USER | __GFP_NOWARN);
1429 values = kvmalloc(value_size * bucket_size, GFP_USER | __GFP_NOWARN);
1430 if (!keys || !values) {
1431 ret = -ENOMEM;
1432 goto after_loop;
1433 }
1434
1435again:
1436 bpf_disable_instrumentation();
1437 rcu_read_lock();
1438again_nocopy:
1439 dst_key = keys;
1440 dst_val = values;
1441 b = &htab->buckets[batch];
1442 head = &b->head;
1443
1444 if (locked)
1445 flags = htab_lock_bucket(htab, b);
1446
1447 bucket_cnt = 0;
1448 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
1449 bucket_cnt++;
1450
1451 if (bucket_cnt && !locked) {
1452 locked = true;
1453 goto again_nocopy;
1454 }
1455
1456 if (bucket_cnt > (max_count - total)) {
1457 if (total == 0)
1458 ret = -ENOSPC;
1459
1460
1461
1462 htab_unlock_bucket(htab, b, flags);
1463 rcu_read_unlock();
1464 bpf_enable_instrumentation();
1465 goto after_loop;
1466 }
1467
1468 if (bucket_cnt > bucket_size) {
1469 bucket_size = bucket_cnt;
1470
1471
1472
1473 htab_unlock_bucket(htab, b, flags);
1474 rcu_read_unlock();
1475 bpf_enable_instrumentation();
1476 kvfree(keys);
1477 kvfree(values);
1478 goto alloc;
1479 }
1480
1481
1482 if (!locked)
1483 goto next_batch;
1484
1485 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
1486 memcpy(dst_key, l->key, key_size);
1487
1488 if (is_percpu) {
1489 int off = 0, cpu;
1490 void __percpu *pptr;
1491
1492 pptr = htab_elem_get_ptr(l, map->key_size);
1493 for_each_possible_cpu(cpu) {
1494 bpf_long_memcpy(dst_val + off,
1495 per_cpu_ptr(pptr, cpu), size);
1496 off += size;
1497 }
1498 } else {
1499 value = l->key + roundup_key_size;
1500 if (elem_map_flags & BPF_F_LOCK)
1501 copy_map_value_locked(map, dst_val, value,
1502 true);
1503 else
1504 copy_map_value(map, dst_val, value);
1505 check_and_init_map_lock(map, dst_val);
1506 }
1507 if (do_delete) {
1508 hlist_nulls_del_rcu(&l->hash_node);
1509
1510
1511
1512
1513
1514
1515 if (is_lru_map) {
1516 l->batch_flink = node_to_free;
1517 node_to_free = l;
1518 } else {
1519 free_htab_elem(htab, l);
1520 }
1521 }
1522 dst_key += key_size;
1523 dst_val += value_size;
1524 }
1525
1526 htab_unlock_bucket(htab, b, flags);
1527 locked = false;
1528
1529 while (node_to_free) {
1530 l = node_to_free;
1531 node_to_free = node_to_free->batch_flink;
1532 bpf_lru_push_free(&htab->lru, &l->lru_node);
1533 }
1534
1535next_batch:
1536
1537
1538
1539 if (!bucket_cnt && (batch + 1 < htab->n_buckets)) {
1540 batch++;
1541 goto again_nocopy;
1542 }
1543
1544 rcu_read_unlock();
1545 bpf_enable_instrumentation();
1546 if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys,
1547 key_size * bucket_cnt) ||
1548 copy_to_user(uvalues + total * value_size, values,
1549 value_size * bucket_cnt))) {
1550 ret = -EFAULT;
1551 goto after_loop;
1552 }
1553
1554 total += bucket_cnt;
1555 batch++;
1556 if (batch >= htab->n_buckets) {
1557 ret = -ENOENT;
1558 goto after_loop;
1559 }
1560 goto again;
1561
1562after_loop:
1563 if (ret == -EFAULT)
1564 goto out;
1565
1566
1567 ubatch = u64_to_user_ptr(attr->batch.out_batch);
1568 if (copy_to_user(ubatch, &batch, sizeof(batch)) ||
1569 put_user(total, &uattr->batch.count))
1570 ret = -EFAULT;
1571
1572out:
1573 kvfree(keys);
1574 kvfree(values);
1575 return ret;
1576}
1577
1578static int
1579htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
1580 union bpf_attr __user *uattr)
1581{
1582 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1583 false, true);
1584}
1585
1586static int
1587htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
1588 const union bpf_attr *attr,
1589 union bpf_attr __user *uattr)
1590{
1591 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1592 false, true);
1593}
1594
1595static int
1596htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
1597 union bpf_attr __user *uattr)
1598{
1599 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1600 false, false);
1601}
1602
1603static int
1604htab_map_lookup_and_delete_batch(struct bpf_map *map,
1605 const union bpf_attr *attr,
1606 union bpf_attr __user *uattr)
1607{
1608 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1609 false, false);
1610}
1611
1612static int
1613htab_lru_percpu_map_lookup_batch(struct bpf_map *map,
1614 const union bpf_attr *attr,
1615 union bpf_attr __user *uattr)
1616{
1617 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1618 true, true);
1619}
1620
1621static int
1622htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
1623 const union bpf_attr *attr,
1624 union bpf_attr __user *uattr)
1625{
1626 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1627 true, true);
1628}
1629
1630static int
1631htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
1632 union bpf_attr __user *uattr)
1633{
1634 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1635 true, false);
1636}
1637
1638static int
1639htab_lru_map_lookup_and_delete_batch(struct bpf_map *map,
1640 const union bpf_attr *attr,
1641 union bpf_attr __user *uattr)
1642{
1643 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1644 true, false);
1645}
1646
1647struct bpf_iter_seq_hash_map_info {
1648 struct bpf_map *map;
1649 struct bpf_htab *htab;
1650 void *percpu_value_buf;
1651 u32 bucket_id;
1652 u32 skip_elems;
1653};
1654
1655static struct htab_elem *
1656bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
1657 struct htab_elem *prev_elem)
1658{
1659 const struct bpf_htab *htab = info->htab;
1660 u32 skip_elems = info->skip_elems;
1661 u32 bucket_id = info->bucket_id;
1662 struct hlist_nulls_head *head;
1663 struct hlist_nulls_node *n;
1664 struct htab_elem *elem;
1665 struct bucket *b;
1666 u32 i, count;
1667
1668 if (bucket_id >= htab->n_buckets)
1669 return NULL;
1670
1671
1672 if (prev_elem) {
1673
1674
1675
1676 n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node));
1677 elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node);
1678 if (elem)
1679 return elem;
1680
1681
1682 b = &htab->buckets[bucket_id++];
1683 rcu_read_unlock();
1684 skip_elems = 0;
1685 }
1686
1687 for (i = bucket_id; i < htab->n_buckets; i++) {
1688 b = &htab->buckets[i];
1689 rcu_read_lock();
1690
1691 count = 0;
1692 head = &b->head;
1693 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
1694 if (count >= skip_elems) {
1695 info->bucket_id = i;
1696 info->skip_elems = count;
1697 return elem;
1698 }
1699 count++;
1700 }
1701
1702 rcu_read_unlock();
1703 skip_elems = 0;
1704 }
1705
1706 info->bucket_id = i;
1707 info->skip_elems = 0;
1708 return NULL;
1709}
1710
1711static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos)
1712{
1713 struct bpf_iter_seq_hash_map_info *info = seq->private;
1714 struct htab_elem *elem;
1715
1716 elem = bpf_hash_map_seq_find_next(info, NULL);
1717 if (!elem)
1718 return NULL;
1719
1720 if (*pos == 0)
1721 ++*pos;
1722 return elem;
1723}
1724
1725static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1726{
1727 struct bpf_iter_seq_hash_map_info *info = seq->private;
1728
1729 ++*pos;
1730 ++info->skip_elems;
1731 return bpf_hash_map_seq_find_next(info, v);
1732}
1733
1734static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
1735{
1736 struct bpf_iter_seq_hash_map_info *info = seq->private;
1737 u32 roundup_key_size, roundup_value_size;
1738 struct bpf_iter__bpf_map_elem ctx = {};
1739 struct bpf_map *map = info->map;
1740 struct bpf_iter_meta meta;
1741 int ret = 0, off = 0, cpu;
1742 struct bpf_prog *prog;
1743 void __percpu *pptr;
1744
1745 meta.seq = seq;
1746 prog = bpf_iter_get_info(&meta, elem == NULL);
1747 if (prog) {
1748 ctx.meta = &meta;
1749 ctx.map = info->map;
1750 if (elem) {
1751 roundup_key_size = round_up(map->key_size, 8);
1752 ctx.key = elem->key;
1753 if (!info->percpu_value_buf) {
1754 ctx.value = elem->key + roundup_key_size;
1755 } else {
1756 roundup_value_size = round_up(map->value_size, 8);
1757 pptr = htab_elem_get_ptr(elem, map->key_size);
1758 for_each_possible_cpu(cpu) {
1759 bpf_long_memcpy(info->percpu_value_buf + off,
1760 per_cpu_ptr(pptr, cpu),
1761 roundup_value_size);
1762 off += roundup_value_size;
1763 }
1764 ctx.value = info->percpu_value_buf;
1765 }
1766 }
1767 ret = bpf_iter_run_prog(prog, &ctx);
1768 }
1769
1770 return ret;
1771}
1772
1773static int bpf_hash_map_seq_show(struct seq_file *seq, void *v)
1774{
1775 return __bpf_hash_map_seq_show(seq, v);
1776}
1777
1778static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v)
1779{
1780 if (!v)
1781 (void)__bpf_hash_map_seq_show(seq, NULL);
1782 else
1783 rcu_read_unlock();
1784}
1785
1786static int bpf_iter_init_hash_map(void *priv_data,
1787 struct bpf_iter_aux_info *aux)
1788{
1789 struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
1790 struct bpf_map *map = aux->map;
1791 void *value_buf;
1792 u32 buf_size;
1793
1794 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
1795 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
1796 buf_size = round_up(map->value_size, 8) * num_possible_cpus();
1797 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
1798 if (!value_buf)
1799 return -ENOMEM;
1800
1801 seq_info->percpu_value_buf = value_buf;
1802 }
1803
1804 seq_info->map = map;
1805 seq_info->htab = container_of(map, struct bpf_htab, map);
1806 return 0;
1807}
1808
1809static void bpf_iter_fini_hash_map(void *priv_data)
1810{
1811 struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
1812
1813 kfree(seq_info->percpu_value_buf);
1814}
1815
1816static const struct seq_operations bpf_hash_map_seq_ops = {
1817 .start = bpf_hash_map_seq_start,
1818 .next = bpf_hash_map_seq_next,
1819 .stop = bpf_hash_map_seq_stop,
1820 .show = bpf_hash_map_seq_show,
1821};
1822
1823static const struct bpf_iter_seq_info iter_seq_info = {
1824 .seq_ops = &bpf_hash_map_seq_ops,
1825 .init_seq_private = bpf_iter_init_hash_map,
1826 .fini_seq_private = bpf_iter_fini_hash_map,
1827 .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
1828};
1829
1830static int htab_map_btf_id;
1831const struct bpf_map_ops htab_map_ops = {
1832 .map_meta_equal = bpf_map_meta_equal,
1833 .map_alloc_check = htab_map_alloc_check,
1834 .map_alloc = htab_map_alloc,
1835 .map_free = htab_map_free,
1836 .map_get_next_key = htab_map_get_next_key,
1837 .map_lookup_elem = htab_map_lookup_elem,
1838 .map_update_elem = htab_map_update_elem,
1839 .map_delete_elem = htab_map_delete_elem,
1840 .map_gen_lookup = htab_map_gen_lookup,
1841 .map_seq_show_elem = htab_map_seq_show_elem,
1842 BATCH_OPS(htab),
1843 .map_btf_name = "bpf_htab",
1844 .map_btf_id = &htab_map_btf_id,
1845 .iter_seq_info = &iter_seq_info,
1846};
1847
1848static int htab_lru_map_btf_id;
1849const struct bpf_map_ops htab_lru_map_ops = {
1850 .map_meta_equal = bpf_map_meta_equal,
1851 .map_alloc_check = htab_map_alloc_check,
1852 .map_alloc = htab_map_alloc,
1853 .map_free = htab_map_free,
1854 .map_get_next_key = htab_map_get_next_key,
1855 .map_lookup_elem = htab_lru_map_lookup_elem,
1856 .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
1857 .map_update_elem = htab_lru_map_update_elem,
1858 .map_delete_elem = htab_lru_map_delete_elem,
1859 .map_gen_lookup = htab_lru_map_gen_lookup,
1860 .map_seq_show_elem = htab_map_seq_show_elem,
1861 BATCH_OPS(htab_lru),
1862 .map_btf_name = "bpf_htab",
1863 .map_btf_id = &htab_lru_map_btf_id,
1864 .iter_seq_info = &iter_seq_info,
1865};
1866
1867
1868static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
1869{
1870 struct htab_elem *l = __htab_map_lookup_elem(map, key);
1871
1872 if (l)
1873 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
1874 else
1875 return NULL;
1876}
1877
1878static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
1879{
1880 struct htab_elem *l = __htab_map_lookup_elem(map, key);
1881
1882 if (l) {
1883 bpf_lru_node_set_ref(&l->lru_node);
1884 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
1885 }
1886
1887 return NULL;
1888}
1889
1890int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
1891{
1892 struct htab_elem *l;
1893 void __percpu *pptr;
1894 int ret = -ENOENT;
1895 int cpu, off = 0;
1896 u32 size;
1897
1898
1899
1900
1901
1902 size = round_up(map->value_size, 8);
1903 rcu_read_lock();
1904 l = __htab_map_lookup_elem(map, key);
1905 if (!l)
1906 goto out;
1907
1908
1909
1910 pptr = htab_elem_get_ptr(l, map->key_size);
1911 for_each_possible_cpu(cpu) {
1912 bpf_long_memcpy(value + off,
1913 per_cpu_ptr(pptr, cpu), size);
1914 off += size;
1915 }
1916 ret = 0;
1917out:
1918 rcu_read_unlock();
1919 return ret;
1920}
1921
1922int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
1923 u64 map_flags)
1924{
1925 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1926 int ret;
1927
1928 rcu_read_lock();
1929 if (htab_is_lru(htab))
1930 ret = __htab_lru_percpu_map_update_elem(map, key, value,
1931 map_flags, true);
1932 else
1933 ret = __htab_percpu_map_update_elem(map, key, value, map_flags,
1934 true);
1935 rcu_read_unlock();
1936
1937 return ret;
1938}
1939
1940static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
1941 struct seq_file *m)
1942{
1943 struct htab_elem *l;
1944 void __percpu *pptr;
1945 int cpu;
1946
1947 rcu_read_lock();
1948
1949 l = __htab_map_lookup_elem(map, key);
1950 if (!l) {
1951 rcu_read_unlock();
1952 return;
1953 }
1954
1955 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
1956 seq_puts(m, ": {\n");
1957 pptr = htab_elem_get_ptr(l, map->key_size);
1958 for_each_possible_cpu(cpu) {
1959 seq_printf(m, "\tcpu%d: ", cpu);
1960 btf_type_seq_show(map->btf, map->btf_value_type_id,
1961 per_cpu_ptr(pptr, cpu), m);
1962 seq_puts(m, "\n");
1963 }
1964 seq_puts(m, "}\n");
1965
1966 rcu_read_unlock();
1967}
1968
1969static int htab_percpu_map_btf_id;
1970const struct bpf_map_ops htab_percpu_map_ops = {
1971 .map_meta_equal = bpf_map_meta_equal,
1972 .map_alloc_check = htab_map_alloc_check,
1973 .map_alloc = htab_map_alloc,
1974 .map_free = htab_map_free,
1975 .map_get_next_key = htab_map_get_next_key,
1976 .map_lookup_elem = htab_percpu_map_lookup_elem,
1977 .map_update_elem = htab_percpu_map_update_elem,
1978 .map_delete_elem = htab_map_delete_elem,
1979 .map_seq_show_elem = htab_percpu_map_seq_show_elem,
1980 BATCH_OPS(htab_percpu),
1981 .map_btf_name = "bpf_htab",
1982 .map_btf_id = &htab_percpu_map_btf_id,
1983 .iter_seq_info = &iter_seq_info,
1984};
1985
1986static int htab_lru_percpu_map_btf_id;
1987const struct bpf_map_ops htab_lru_percpu_map_ops = {
1988 .map_meta_equal = bpf_map_meta_equal,
1989 .map_alloc_check = htab_map_alloc_check,
1990 .map_alloc = htab_map_alloc,
1991 .map_free = htab_map_free,
1992 .map_get_next_key = htab_map_get_next_key,
1993 .map_lookup_elem = htab_lru_percpu_map_lookup_elem,
1994 .map_update_elem = htab_lru_percpu_map_update_elem,
1995 .map_delete_elem = htab_lru_map_delete_elem,
1996 .map_seq_show_elem = htab_percpu_map_seq_show_elem,
1997 BATCH_OPS(htab_lru_percpu),
1998 .map_btf_name = "bpf_htab",
1999 .map_btf_id = &htab_lru_percpu_map_btf_id,
2000 .iter_seq_info = &iter_seq_info,
2001};
2002
2003static int fd_htab_map_alloc_check(union bpf_attr *attr)
2004{
2005 if (attr->value_size != sizeof(u32))
2006 return -EINVAL;
2007 return htab_map_alloc_check(attr);
2008}
2009
2010static void fd_htab_map_free(struct bpf_map *map)
2011{
2012 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
2013 struct hlist_nulls_node *n;
2014 struct hlist_nulls_head *head;
2015 struct htab_elem *l;
2016 int i;
2017
2018 for (i = 0; i < htab->n_buckets; i++) {
2019 head = select_bucket(htab, i);
2020
2021 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
2022 void *ptr = fd_htab_map_get_ptr(map, l);
2023
2024 map->ops->map_fd_put_ptr(ptr);
2025 }
2026 }
2027
2028 htab_map_free(map);
2029}
2030
2031
2032int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
2033{
2034 void **ptr;
2035 int ret = 0;
2036
2037 if (!map->ops->map_fd_sys_lookup_elem)
2038 return -ENOTSUPP;
2039
2040 rcu_read_lock();
2041 ptr = htab_map_lookup_elem(map, key);
2042 if (ptr)
2043 *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
2044 else
2045 ret = -ENOENT;
2046 rcu_read_unlock();
2047
2048 return ret;
2049}
2050
2051
2052int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
2053 void *key, void *value, u64 map_flags)
2054{
2055 void *ptr;
2056 int ret;
2057 u32 ufd = *(u32 *)value;
2058
2059 ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
2060 if (IS_ERR(ptr))
2061 return PTR_ERR(ptr);
2062
2063 ret = htab_map_update_elem(map, key, &ptr, map_flags);
2064 if (ret)
2065 map->ops->map_fd_put_ptr(ptr);
2066
2067 return ret;
2068}
2069
2070static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
2071{
2072 struct bpf_map *map, *inner_map_meta;
2073
2074 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
2075 if (IS_ERR(inner_map_meta))
2076 return inner_map_meta;
2077
2078 map = htab_map_alloc(attr);
2079 if (IS_ERR(map)) {
2080 bpf_map_meta_free(inner_map_meta);
2081 return map;
2082 }
2083
2084 map->inner_map_meta = inner_map_meta;
2085
2086 return map;
2087}
2088
2089static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
2090{
2091 struct bpf_map **inner_map = htab_map_lookup_elem(map, key);
2092
2093 if (!inner_map)
2094 return NULL;
2095
2096 return READ_ONCE(*inner_map);
2097}
2098
2099static int htab_of_map_gen_lookup(struct bpf_map *map,
2100 struct bpf_insn *insn_buf)
2101{
2102 struct bpf_insn *insn = insn_buf;
2103 const int ret = BPF_REG_0;
2104
2105 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
2106 (void *(*)(struct bpf_map *map, void *key))NULL));
2107 *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
2108 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2);
2109 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
2110 offsetof(struct htab_elem, key) +
2111 round_up(map->key_size, 8));
2112 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
2113
2114 return insn - insn_buf;
2115}
2116
2117static void htab_of_map_free(struct bpf_map *map)
2118{
2119 bpf_map_meta_free(map->inner_map_meta);
2120 fd_htab_map_free(map);
2121}
2122
2123static int htab_of_maps_map_btf_id;
2124const struct bpf_map_ops htab_of_maps_map_ops = {
2125 .map_alloc_check = fd_htab_map_alloc_check,
2126 .map_alloc = htab_of_map_alloc,
2127 .map_free = htab_of_map_free,
2128 .map_get_next_key = htab_map_get_next_key,
2129 .map_lookup_elem = htab_of_map_lookup_elem,
2130 .map_delete_elem = htab_map_delete_elem,
2131 .map_fd_get_ptr = bpf_map_fd_get_ptr,
2132 .map_fd_put_ptr = bpf_map_fd_put_ptr,
2133 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
2134 .map_gen_lookup = htab_of_map_gen_lookup,
2135 .map_check_btf = map_check_no_btf,
2136 .map_btf_name = "bpf_htab",
2137 .map_btf_id = &htab_of_maps_map_btf_id,
2138};
2139