1
2
3
4
5#include <linux/bpf.h>
6#include <linux/btf.h>
7#include <linux/jhash.h>
8#include <linux/filter.h>
9#include <linux/rculist_nulls.h>
10#include <linux/random.h>
11#include <uapi/linux/btf.h>
12#include <linux/rcupdate_trace.h>
13#include "percpu_freelist.h"
14#include "bpf_lru_list.h"
15#include "map_in_map.h"
16
17#define HTAB_CREATE_FLAG_MASK \
18 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
19 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
20
21#define BATCH_OPS(_name) \
22 .map_lookup_batch = \
23 _name##_map_lookup_batch, \
24 .map_lookup_and_delete_batch = \
25 _name##_map_lookup_and_delete_batch, \
26 .map_update_batch = \
27 generic_map_update_batch, \
28 .map_delete_batch = \
29 generic_map_delete_batch
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81struct bucket {
82 struct hlist_nulls_head head;
83 union {
84 raw_spinlock_t raw_lock;
85 spinlock_t lock;
86 };
87};
88
89#define HASHTAB_MAP_LOCK_COUNT 8
90#define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1)
91
92struct bpf_htab {
93 struct bpf_map map;
94 struct bucket *buckets;
95 void *elems;
96 union {
97 struct pcpu_freelist freelist;
98 struct bpf_lru lru;
99 };
100 struct htab_elem *__percpu *extra_elems;
101 atomic_t count;
102 u32 n_buckets;
103 u32 elem_size;
104 u32 hashrnd;
105 struct lock_class_key lockdep_key;
106 int __percpu *map_locked[HASHTAB_MAP_LOCK_COUNT];
107};
108
109
110struct htab_elem {
111 union {
112 struct hlist_nulls_node hash_node;
113 struct {
114 void *padding;
115 union {
116 struct bpf_htab *htab;
117 struct pcpu_freelist_node fnode;
118 struct htab_elem *batch_flink;
119 };
120 };
121 };
122 union {
123 struct rcu_head rcu;
124 struct bpf_lru_node lru_node;
125 };
126 u32 hash;
127 char key[] __aligned(8);
128};
129
130static inline bool htab_is_prealloc(const struct bpf_htab *htab)
131{
132 return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
133}
134
135static inline bool htab_use_raw_lock(const struct bpf_htab *htab)
136{
137 return (!IS_ENABLED(CONFIG_PREEMPT_RT) || htab_is_prealloc(htab));
138}
139
140static void htab_init_buckets(struct bpf_htab *htab)
141{
142 unsigned i;
143
144 for (i = 0; i < htab->n_buckets; i++) {
145 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
146 if (htab_use_raw_lock(htab)) {
147 raw_spin_lock_init(&htab->buckets[i].raw_lock);
148 lockdep_set_class(&htab->buckets[i].raw_lock,
149 &htab->lockdep_key);
150 } else {
151 spin_lock_init(&htab->buckets[i].lock);
152 lockdep_set_class(&htab->buckets[i].lock,
153 &htab->lockdep_key);
154 }
155 cond_resched();
156 }
157}
158
159static inline int htab_lock_bucket(const struct bpf_htab *htab,
160 struct bucket *b, u32 hash,
161 unsigned long *pflags)
162{
163 unsigned long flags;
164
165 hash = hash & HASHTAB_MAP_LOCK_MASK;
166
167 migrate_disable();
168 if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) {
169 __this_cpu_dec(*(htab->map_locked[hash]));
170 migrate_enable();
171 return -EBUSY;
172 }
173
174 if (htab_use_raw_lock(htab))
175 raw_spin_lock_irqsave(&b->raw_lock, flags);
176 else
177 spin_lock_irqsave(&b->lock, flags);
178 *pflags = flags;
179
180 return 0;
181}
182
183static inline void htab_unlock_bucket(const struct bpf_htab *htab,
184 struct bucket *b, u32 hash,
185 unsigned long flags)
186{
187 hash = hash & HASHTAB_MAP_LOCK_MASK;
188 if (htab_use_raw_lock(htab))
189 raw_spin_unlock_irqrestore(&b->raw_lock, flags);
190 else
191 spin_unlock_irqrestore(&b->lock, flags);
192 __this_cpu_dec(*(htab->map_locked[hash]));
193 migrate_enable();
194}
195
196static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
197
198static bool htab_is_lru(const struct bpf_htab *htab)
199{
200 return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH ||
201 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
202}
203
204static bool htab_is_percpu(const struct bpf_htab *htab)
205{
206 return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH ||
207 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
208}
209
210static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
211 void __percpu *pptr)
212{
213 *(void __percpu **)(l->key + key_size) = pptr;
214}
215
216static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
217{
218 return *(void __percpu **)(l->key + key_size);
219}
220
221static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
222{
223 return *(void **)(l->key + roundup(map->key_size, 8));
224}
225
226static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
227{
228 return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size);
229}
230
231static bool htab_has_extra_elems(struct bpf_htab *htab)
232{
233 return !htab_is_percpu(htab) && !htab_is_lru(htab);
234}
235
236static void htab_free_prealloced_timers(struct bpf_htab *htab)
237{
238 u32 num_entries = htab->map.max_entries;
239 int i;
240
241 if (likely(!map_value_has_timer(&htab->map)))
242 return;
243 if (htab_has_extra_elems(htab))
244 num_entries += num_possible_cpus();
245
246 for (i = 0; i < num_entries; i++) {
247 struct htab_elem *elem;
248
249 elem = get_htab_elem(htab, i);
250 bpf_timer_cancel_and_free(elem->key +
251 round_up(htab->map.key_size, 8) +
252 htab->map.timer_off);
253 cond_resched();
254 }
255}
256
257static void htab_free_elems(struct bpf_htab *htab)
258{
259 int i;
260
261 if (!htab_is_percpu(htab))
262 goto free_elems;
263
264 for (i = 0; i < htab->map.max_entries; i++) {
265 void __percpu *pptr;
266
267 pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
268 htab->map.key_size);
269 free_percpu(pptr);
270 cond_resched();
271 }
272free_elems:
273 bpf_map_area_free(htab->elems);
274}
275
276
277
278
279
280
281
282
283
284
285
286
287static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
288 u32 hash)
289{
290 struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash);
291 struct htab_elem *l;
292
293 if (node) {
294 u32 key_size = htab->map.key_size;
295
296 l = container_of(node, struct htab_elem, lru_node);
297 memcpy(l->key, key, key_size);
298 check_and_init_map_value(&htab->map,
299 l->key + round_up(key_size, 8));
300 return l;
301 }
302
303 return NULL;
304}
305
306static int prealloc_init(struct bpf_htab *htab)
307{
308 u32 num_entries = htab->map.max_entries;
309 int err = -ENOMEM, i;
310
311 if (htab_has_extra_elems(htab))
312 num_entries += num_possible_cpus();
313
314 htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries,
315 htab->map.numa_node);
316 if (!htab->elems)
317 return -ENOMEM;
318
319 if (!htab_is_percpu(htab))
320 goto skip_percpu_elems;
321
322 for (i = 0; i < num_entries; i++) {
323 u32 size = round_up(htab->map.value_size, 8);
324 void __percpu *pptr;
325
326 pptr = bpf_map_alloc_percpu(&htab->map, size, 8,
327 GFP_USER | __GFP_NOWARN);
328 if (!pptr)
329 goto free_elems;
330 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
331 pptr);
332 cond_resched();
333 }
334
335skip_percpu_elems:
336 if (htab_is_lru(htab))
337 err = bpf_lru_init(&htab->lru,
338 htab->map.map_flags & BPF_F_NO_COMMON_LRU,
339 offsetof(struct htab_elem, hash) -
340 offsetof(struct htab_elem, lru_node),
341 htab_lru_map_delete_node,
342 htab);
343 else
344 err = pcpu_freelist_init(&htab->freelist);
345
346 if (err)
347 goto free_elems;
348
349 if (htab_is_lru(htab))
350 bpf_lru_populate(&htab->lru, htab->elems,
351 offsetof(struct htab_elem, lru_node),
352 htab->elem_size, num_entries);
353 else
354 pcpu_freelist_populate(&htab->freelist,
355 htab->elems + offsetof(struct htab_elem, fnode),
356 htab->elem_size, num_entries);
357
358 return 0;
359
360free_elems:
361 htab_free_elems(htab);
362 return err;
363}
364
365static void prealloc_destroy(struct bpf_htab *htab)
366{
367 htab_free_elems(htab);
368
369 if (htab_is_lru(htab))
370 bpf_lru_destroy(&htab->lru);
371 else
372 pcpu_freelist_destroy(&htab->freelist);
373}
374
375static int alloc_extra_elems(struct bpf_htab *htab)
376{
377 struct htab_elem *__percpu *pptr, *l_new;
378 struct pcpu_freelist_node *l;
379 int cpu;
380
381 pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8,
382 GFP_USER | __GFP_NOWARN);
383 if (!pptr)
384 return -ENOMEM;
385
386 for_each_possible_cpu(cpu) {
387 l = pcpu_freelist_pop(&htab->freelist);
388
389
390
391 l_new = container_of(l, struct htab_elem, fnode);
392 *per_cpu_ptr(pptr, cpu) = l_new;
393 }
394 htab->extra_elems = pptr;
395 return 0;
396}
397
398
399static int htab_map_alloc_check(union bpf_attr *attr)
400{
401 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
402 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
403 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
404 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
405
406
407
408
409
410 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
411 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
412 bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED);
413 int numa_node = bpf_map_attr_numa_node(attr);
414
415 BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
416 offsetof(struct htab_elem, hash_node.pprev));
417 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
418 offsetof(struct htab_elem, hash_node.pprev));
419
420 if (lru && !bpf_capable())
421
422
423
424 return -EPERM;
425
426 if (zero_seed && !capable(CAP_SYS_ADMIN))
427
428 return -EPERM;
429
430 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK ||
431 !bpf_map_flags_access_ok(attr->map_flags))
432 return -EINVAL;
433
434 if (!lru && percpu_lru)
435 return -EINVAL;
436
437 if (lru && !prealloc)
438 return -ENOTSUPP;
439
440 if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru))
441 return -EINVAL;
442
443
444
445
446 if (attr->max_entries == 0 || attr->key_size == 0 ||
447 attr->value_size == 0)
448 return -EINVAL;
449
450 if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE -
451 sizeof(struct htab_elem))
452
453
454
455
456
457 return -E2BIG;
458
459 return 0;
460}
461
462static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
463{
464 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
465 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
466 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
467 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
468
469
470
471
472
473 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
474 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
475 struct bpf_htab *htab;
476 int err, i;
477
478 htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT);
479 if (!htab)
480 return ERR_PTR(-ENOMEM);
481
482 lockdep_register_key(&htab->lockdep_key);
483
484 bpf_map_init_from_attr(&htab->map, attr);
485
486 if (percpu_lru) {
487
488
489
490
491 htab->map.max_entries = roundup(attr->max_entries,
492 num_possible_cpus());
493 if (htab->map.max_entries < attr->max_entries)
494 htab->map.max_entries = rounddown(attr->max_entries,
495 num_possible_cpus());
496 }
497
498
499 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
500
501 htab->elem_size = sizeof(struct htab_elem) +
502 round_up(htab->map.key_size, 8);
503 if (percpu)
504 htab->elem_size += sizeof(void *);
505 else
506 htab->elem_size += round_up(htab->map.value_size, 8);
507
508 err = -E2BIG;
509
510 if (htab->n_buckets == 0 ||
511 htab->n_buckets > U32_MAX / sizeof(struct bucket))
512 goto free_htab;
513
514 err = -ENOMEM;
515 htab->buckets = bpf_map_area_alloc(htab->n_buckets *
516 sizeof(struct bucket),
517 htab->map.numa_node);
518 if (!htab->buckets)
519 goto free_htab;
520
521 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) {
522 htab->map_locked[i] = bpf_map_alloc_percpu(&htab->map,
523 sizeof(int),
524 sizeof(int),
525 GFP_USER);
526 if (!htab->map_locked[i])
527 goto free_map_locked;
528 }
529
530 if (htab->map.map_flags & BPF_F_ZERO_SEED)
531 htab->hashrnd = 0;
532 else
533 htab->hashrnd = get_random_int();
534
535 htab_init_buckets(htab);
536
537 if (prealloc) {
538 err = prealloc_init(htab);
539 if (err)
540 goto free_map_locked;
541
542 if (!percpu && !lru) {
543
544
545
546 err = alloc_extra_elems(htab);
547 if (err)
548 goto free_prealloc;
549 }
550 }
551
552 return &htab->map;
553
554free_prealloc:
555 prealloc_destroy(htab);
556free_map_locked:
557 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
558 free_percpu(htab->map_locked[i]);
559 bpf_map_area_free(htab->buckets);
560free_htab:
561 lockdep_unregister_key(&htab->lockdep_key);
562 kfree(htab);
563 return ERR_PTR(err);
564}
565
566static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd)
567{
568 return jhash(key, key_len, hashrnd);
569}
570
571static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
572{
573 return &htab->buckets[hash & (htab->n_buckets - 1)];
574}
575
576static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
577{
578 return &__select_bucket(htab, hash)->head;
579}
580
581
582static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
583 void *key, u32 key_size)
584{
585 struct hlist_nulls_node *n;
586 struct htab_elem *l;
587
588 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
589 if (l->hash == hash && !memcmp(&l->key, key, key_size))
590 return l;
591
592 return NULL;
593}
594
595
596
597
598
599static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
600 u32 hash, void *key,
601 u32 key_size, u32 n_buckets)
602{
603 struct hlist_nulls_node *n;
604 struct htab_elem *l;
605
606again:
607 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
608 if (l->hash == hash && !memcmp(&l->key, key, key_size))
609 return l;
610
611 if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
612 goto again;
613
614 return NULL;
615}
616
617
618
619
620
621
622static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
623{
624 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
625 struct hlist_nulls_head *head;
626 struct htab_elem *l;
627 u32 hash, key_size;
628
629 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
630 !rcu_read_lock_bh_held());
631
632 key_size = map->key_size;
633
634 hash = htab_map_hash(key, key_size, htab->hashrnd);
635
636 head = select_bucket(htab, hash);
637
638 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
639
640 return l;
641}
642
643static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
644{
645 struct htab_elem *l = __htab_map_lookup_elem(map, key);
646
647 if (l)
648 return l->key + round_up(map->key_size, 8);
649
650 return NULL;
651}
652
653
654
655
656
657
658
659
660
661
662
663
664static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
665{
666 struct bpf_insn *insn = insn_buf;
667 const int ret = BPF_REG_0;
668
669 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
670 (void *(*)(struct bpf_map *map, void *key))NULL));
671 *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
672 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
673 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
674 offsetof(struct htab_elem, key) +
675 round_up(map->key_size, 8));
676 return insn - insn_buf;
677}
678
679static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map,
680 void *key, const bool mark)
681{
682 struct htab_elem *l = __htab_map_lookup_elem(map, key);
683
684 if (l) {
685 if (mark)
686 bpf_lru_node_set_ref(&l->lru_node);
687 return l->key + round_up(map->key_size, 8);
688 }
689
690 return NULL;
691}
692
693static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
694{
695 return __htab_lru_map_lookup_elem(map, key, true);
696}
697
698static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
699{
700 return __htab_lru_map_lookup_elem(map, key, false);
701}
702
703static int htab_lru_map_gen_lookup(struct bpf_map *map,
704 struct bpf_insn *insn_buf)
705{
706 struct bpf_insn *insn = insn_buf;
707 const int ret = BPF_REG_0;
708 const int ref_reg = BPF_REG_1;
709
710 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
711 (void *(*)(struct bpf_map *map, void *key))NULL));
712 *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
713 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4);
714 *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret,
715 offsetof(struct htab_elem, lru_node) +
716 offsetof(struct bpf_lru_node, ref));
717 *insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1);
718 *insn++ = BPF_ST_MEM(BPF_B, ret,
719 offsetof(struct htab_elem, lru_node) +
720 offsetof(struct bpf_lru_node, ref),
721 1);
722 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
723 offsetof(struct htab_elem, key) +
724 round_up(map->key_size, 8));
725 return insn - insn_buf;
726}
727
728static void check_and_free_timer(struct bpf_htab *htab, struct htab_elem *elem)
729{
730 if (unlikely(map_value_has_timer(&htab->map)))
731 bpf_timer_cancel_and_free(elem->key +
732 round_up(htab->map.key_size, 8) +
733 htab->map.timer_off);
734}
735
736
737
738
739static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
740{
741 struct bpf_htab *htab = (struct bpf_htab *)arg;
742 struct htab_elem *l = NULL, *tgt_l;
743 struct hlist_nulls_head *head;
744 struct hlist_nulls_node *n;
745 unsigned long flags;
746 struct bucket *b;
747 int ret;
748
749 tgt_l = container_of(node, struct htab_elem, lru_node);
750 b = __select_bucket(htab, tgt_l->hash);
751 head = &b->head;
752
753 ret = htab_lock_bucket(htab, b, tgt_l->hash, &flags);
754 if (ret)
755 return false;
756
757 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
758 if (l == tgt_l) {
759 hlist_nulls_del_rcu(&l->hash_node);
760 check_and_free_timer(htab, l);
761 break;
762 }
763
764 htab_unlock_bucket(htab, b, tgt_l->hash, flags);
765
766 return l == tgt_l;
767}
768
769
770static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
771{
772 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
773 struct hlist_nulls_head *head;
774 struct htab_elem *l, *next_l;
775 u32 hash, key_size;
776 int i = 0;
777
778 WARN_ON_ONCE(!rcu_read_lock_held());
779
780 key_size = map->key_size;
781
782 if (!key)
783 goto find_first_elem;
784
785 hash = htab_map_hash(key, key_size, htab->hashrnd);
786
787 head = select_bucket(htab, hash);
788
789
790 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
791
792 if (!l)
793 goto find_first_elem;
794
795
796 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
797 struct htab_elem, hash_node);
798
799 if (next_l) {
800
801 memcpy(next_key, next_l->key, key_size);
802 return 0;
803 }
804
805
806 i = hash & (htab->n_buckets - 1);
807 i++;
808
809find_first_elem:
810
811 for (; i < htab->n_buckets; i++) {
812 head = select_bucket(htab, i);
813
814
815 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
816 struct htab_elem, hash_node);
817 if (next_l) {
818
819 memcpy(next_key, next_l->key, key_size);
820 return 0;
821 }
822 }
823
824
825 return -ENOENT;
826}
827
828static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
829{
830 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
831 free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
832 check_and_free_timer(htab, l);
833 kfree(l);
834}
835
836static void htab_elem_free_rcu(struct rcu_head *head)
837{
838 struct htab_elem *l = container_of(head, struct htab_elem, rcu);
839 struct bpf_htab *htab = l->htab;
840
841 htab_elem_free(htab, l);
842}
843
844static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
845{
846 struct bpf_map *map = &htab->map;
847 void *ptr;
848
849 if (map->ops->map_fd_put_ptr) {
850 ptr = fd_htab_map_get_ptr(map, l);
851 map->ops->map_fd_put_ptr(ptr);
852 }
853}
854
855static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
856{
857 htab_put_fd_value(htab, l);
858
859 if (htab_is_prealloc(htab)) {
860 check_and_free_timer(htab, l);
861 __pcpu_freelist_push(&htab->freelist, &l->fnode);
862 } else {
863 atomic_dec(&htab->count);
864 l->htab = htab;
865 call_rcu(&l->rcu, htab_elem_free_rcu);
866 }
867}
868
869static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
870 void *value, bool onallcpus)
871{
872 if (!onallcpus) {
873
874 memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
875 } else {
876 u32 size = round_up(htab->map.value_size, 8);
877 int off = 0, cpu;
878
879 for_each_possible_cpu(cpu) {
880 bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
881 value + off, size);
882 off += size;
883 }
884 }
885}
886
887static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
888 void *value, bool onallcpus)
889{
890
891
892
893
894
895
896 if (htab_is_prealloc(htab) && !onallcpus) {
897 u32 size = round_up(htab->map.value_size, 8);
898 int current_cpu = raw_smp_processor_id();
899 int cpu;
900
901 for_each_possible_cpu(cpu) {
902 if (cpu == current_cpu)
903 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
904 size);
905 else
906 memset(per_cpu_ptr(pptr, cpu), 0, size);
907 }
908 } else {
909 pcpu_copy_value(htab, pptr, value, onallcpus);
910 }
911}
912
913static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
914{
915 return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
916 BITS_PER_LONG == 64;
917}
918
919static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
920 void *value, u32 key_size, u32 hash,
921 bool percpu, bool onallcpus,
922 struct htab_elem *old_elem)
923{
924 u32 size = htab->map.value_size;
925 bool prealloc = htab_is_prealloc(htab);
926 struct htab_elem *l_new, **pl_new;
927 void __percpu *pptr;
928
929 if (prealloc) {
930 if (old_elem) {
931
932
933
934 pl_new = this_cpu_ptr(htab->extra_elems);
935 l_new = *pl_new;
936 htab_put_fd_value(htab, old_elem);
937 *pl_new = old_elem;
938 } else {
939 struct pcpu_freelist_node *l;
940
941 l = __pcpu_freelist_pop(&htab->freelist);
942 if (!l)
943 return ERR_PTR(-E2BIG);
944 l_new = container_of(l, struct htab_elem, fnode);
945 }
946 } else {
947 if (atomic_inc_return(&htab->count) > htab->map.max_entries)
948 if (!old_elem) {
949
950
951
952
953
954 l_new = ERR_PTR(-E2BIG);
955 goto dec_count;
956 }
957 l_new = bpf_map_kmalloc_node(&htab->map, htab->elem_size,
958 GFP_ATOMIC | __GFP_NOWARN,
959 htab->map.numa_node);
960 if (!l_new) {
961 l_new = ERR_PTR(-ENOMEM);
962 goto dec_count;
963 }
964 check_and_init_map_value(&htab->map,
965 l_new->key + round_up(key_size, 8));
966 }
967
968 memcpy(l_new->key, key, key_size);
969 if (percpu) {
970 size = round_up(size, 8);
971 if (prealloc) {
972 pptr = htab_elem_get_ptr(l_new, key_size);
973 } else {
974
975 pptr = bpf_map_alloc_percpu(&htab->map, size, 8,
976 GFP_ATOMIC | __GFP_NOWARN);
977 if (!pptr) {
978 kfree(l_new);
979 l_new = ERR_PTR(-ENOMEM);
980 goto dec_count;
981 }
982 }
983
984 pcpu_init_value(htab, pptr, value, onallcpus);
985
986 if (!prealloc)
987 htab_elem_set_ptr(l_new, key_size, pptr);
988 } else if (fd_htab_map_needs_adjust(htab)) {
989 size = round_up(size, 8);
990 memcpy(l_new->key + round_up(key_size, 8), value, size);
991 } else {
992 copy_map_value(&htab->map,
993 l_new->key + round_up(key_size, 8),
994 value);
995 }
996
997 l_new->hash = hash;
998 return l_new;
999dec_count:
1000 atomic_dec(&htab->count);
1001 return l_new;
1002}
1003
1004static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
1005 u64 map_flags)
1006{
1007 if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
1008
1009 return -EEXIST;
1010
1011 if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
1012
1013 return -ENOENT;
1014
1015 return 0;
1016}
1017
1018
1019static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
1020 u64 map_flags)
1021{
1022 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1023 struct htab_elem *l_new = NULL, *l_old;
1024 struct hlist_nulls_head *head;
1025 unsigned long flags;
1026 struct bucket *b;
1027 u32 key_size, hash;
1028 int ret;
1029
1030 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
1031
1032 return -EINVAL;
1033
1034 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1035 !rcu_read_lock_bh_held());
1036
1037 key_size = map->key_size;
1038
1039 hash = htab_map_hash(key, key_size, htab->hashrnd);
1040
1041 b = __select_bucket(htab, hash);
1042 head = &b->head;
1043
1044 if (unlikely(map_flags & BPF_F_LOCK)) {
1045 if (unlikely(!map_value_has_spin_lock(map)))
1046 return -EINVAL;
1047
1048 l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
1049 htab->n_buckets);
1050 ret = check_flags(htab, l_old, map_flags);
1051 if (ret)
1052 return ret;
1053 if (l_old) {
1054
1055 copy_map_value_locked(map,
1056 l_old->key + round_up(key_size, 8),
1057 value, false);
1058 return 0;
1059 }
1060
1061
1062
1063
1064 }
1065
1066 ret = htab_lock_bucket(htab, b, hash, &flags);
1067 if (ret)
1068 return ret;
1069
1070 l_old = lookup_elem_raw(head, hash, key, key_size);
1071
1072 ret = check_flags(htab, l_old, map_flags);
1073 if (ret)
1074 goto err;
1075
1076 if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
1077
1078
1079
1080
1081
1082
1083 copy_map_value_locked(map,
1084 l_old->key + round_up(key_size, 8),
1085 value, false);
1086 ret = 0;
1087 goto err;
1088 }
1089
1090 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
1091 l_old);
1092 if (IS_ERR(l_new)) {
1093
1094 ret = PTR_ERR(l_new);
1095 goto err;
1096 }
1097
1098
1099
1100
1101 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1102 if (l_old) {
1103 hlist_nulls_del_rcu(&l_old->hash_node);
1104 if (!htab_is_prealloc(htab))
1105 free_htab_elem(htab, l_old);
1106 else
1107 check_and_free_timer(htab, l_old);
1108 }
1109 ret = 0;
1110err:
1111 htab_unlock_bucket(htab, b, hash, flags);
1112 return ret;
1113}
1114
1115static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem)
1116{
1117 check_and_free_timer(htab, elem);
1118 bpf_lru_push_free(&htab->lru, &elem->lru_node);
1119}
1120
1121static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
1122 u64 map_flags)
1123{
1124 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1125 struct htab_elem *l_new, *l_old = NULL;
1126 struct hlist_nulls_head *head;
1127 unsigned long flags;
1128 struct bucket *b;
1129 u32 key_size, hash;
1130 int ret;
1131
1132 if (unlikely(map_flags > BPF_EXIST))
1133
1134 return -EINVAL;
1135
1136 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1137 !rcu_read_lock_bh_held());
1138
1139 key_size = map->key_size;
1140
1141 hash = htab_map_hash(key, key_size, htab->hashrnd);
1142
1143 b = __select_bucket(htab, hash);
1144 head = &b->head;
1145
1146
1147
1148
1149
1150
1151 l_new = prealloc_lru_pop(htab, key, hash);
1152 if (!l_new)
1153 return -ENOMEM;
1154 copy_map_value(&htab->map,
1155 l_new->key + round_up(map->key_size, 8), value);
1156
1157 ret = htab_lock_bucket(htab, b, hash, &flags);
1158 if (ret)
1159 return ret;
1160
1161 l_old = lookup_elem_raw(head, hash, key, key_size);
1162
1163 ret = check_flags(htab, l_old, map_flags);
1164 if (ret)
1165 goto err;
1166
1167
1168
1169
1170 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1171 if (l_old) {
1172 bpf_lru_node_set_ref(&l_new->lru_node);
1173 hlist_nulls_del_rcu(&l_old->hash_node);
1174 }
1175 ret = 0;
1176
1177err:
1178 htab_unlock_bucket(htab, b, hash, flags);
1179
1180 if (ret)
1181 htab_lru_push_free(htab, l_new);
1182 else if (l_old)
1183 htab_lru_push_free(htab, l_old);
1184
1185 return ret;
1186}
1187
1188static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
1189 void *value, u64 map_flags,
1190 bool onallcpus)
1191{
1192 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1193 struct htab_elem *l_new = NULL, *l_old;
1194 struct hlist_nulls_head *head;
1195 unsigned long flags;
1196 struct bucket *b;
1197 u32 key_size, hash;
1198 int ret;
1199
1200 if (unlikely(map_flags > BPF_EXIST))
1201
1202 return -EINVAL;
1203
1204 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1205 !rcu_read_lock_bh_held());
1206
1207 key_size = map->key_size;
1208
1209 hash = htab_map_hash(key, key_size, htab->hashrnd);
1210
1211 b = __select_bucket(htab, hash);
1212 head = &b->head;
1213
1214 ret = htab_lock_bucket(htab, b, hash, &flags);
1215 if (ret)
1216 return ret;
1217
1218 l_old = lookup_elem_raw(head, hash, key, key_size);
1219
1220 ret = check_flags(htab, l_old, map_flags);
1221 if (ret)
1222 goto err;
1223
1224 if (l_old) {
1225
1226 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1227 value, onallcpus);
1228 } else {
1229 l_new = alloc_htab_elem(htab, key, value, key_size,
1230 hash, true, onallcpus, NULL);
1231 if (IS_ERR(l_new)) {
1232 ret = PTR_ERR(l_new);
1233 goto err;
1234 }
1235 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1236 }
1237 ret = 0;
1238err:
1239 htab_unlock_bucket(htab, b, hash, flags);
1240 return ret;
1241}
1242
1243static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
1244 void *value, u64 map_flags,
1245 bool onallcpus)
1246{
1247 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1248 struct htab_elem *l_new = NULL, *l_old;
1249 struct hlist_nulls_head *head;
1250 unsigned long flags;
1251 struct bucket *b;
1252 u32 key_size, hash;
1253 int ret;
1254
1255 if (unlikely(map_flags > BPF_EXIST))
1256
1257 return -EINVAL;
1258
1259 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1260 !rcu_read_lock_bh_held());
1261
1262 key_size = map->key_size;
1263
1264 hash = htab_map_hash(key, key_size, htab->hashrnd);
1265
1266 b = __select_bucket(htab, hash);
1267 head = &b->head;
1268
1269
1270
1271
1272
1273
1274 if (map_flags != BPF_EXIST) {
1275 l_new = prealloc_lru_pop(htab, key, hash);
1276 if (!l_new)
1277 return -ENOMEM;
1278 }
1279
1280 ret = htab_lock_bucket(htab, b, hash, &flags);
1281 if (ret)
1282 return ret;
1283
1284 l_old = lookup_elem_raw(head, hash, key, key_size);
1285
1286 ret = check_flags(htab, l_old, map_flags);
1287 if (ret)
1288 goto err;
1289
1290 if (l_old) {
1291 bpf_lru_node_set_ref(&l_old->lru_node);
1292
1293
1294 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
1295 value, onallcpus);
1296 } else {
1297 pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
1298 value, onallcpus);
1299 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
1300 l_new = NULL;
1301 }
1302 ret = 0;
1303err:
1304 htab_unlock_bucket(htab, b, hash, flags);
1305 if (l_new)
1306 bpf_lru_push_free(&htab->lru, &l_new->lru_node);
1307 return ret;
1308}
1309
1310static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
1311 void *value, u64 map_flags)
1312{
1313 return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
1314}
1315
1316static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
1317 void *value, u64 map_flags)
1318{
1319 return __htab_lru_percpu_map_update_elem(map, key, value, map_flags,
1320 false);
1321}
1322
1323
1324static int htab_map_delete_elem(struct bpf_map *map, void *key)
1325{
1326 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1327 struct hlist_nulls_head *head;
1328 struct bucket *b;
1329 struct htab_elem *l;
1330 unsigned long flags;
1331 u32 hash, key_size;
1332 int ret;
1333
1334 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1335 !rcu_read_lock_bh_held());
1336
1337 key_size = map->key_size;
1338
1339 hash = htab_map_hash(key, key_size, htab->hashrnd);
1340 b = __select_bucket(htab, hash);
1341 head = &b->head;
1342
1343 ret = htab_lock_bucket(htab, b, hash, &flags);
1344 if (ret)
1345 return ret;
1346
1347 l = lookup_elem_raw(head, hash, key, key_size);
1348
1349 if (l) {
1350 hlist_nulls_del_rcu(&l->hash_node);
1351 free_htab_elem(htab, l);
1352 } else {
1353 ret = -ENOENT;
1354 }
1355
1356 htab_unlock_bucket(htab, b, hash, flags);
1357 return ret;
1358}
1359
1360static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
1361{
1362 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1363 struct hlist_nulls_head *head;
1364 struct bucket *b;
1365 struct htab_elem *l;
1366 unsigned long flags;
1367 u32 hash, key_size;
1368 int ret;
1369
1370 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
1371 !rcu_read_lock_bh_held());
1372
1373 key_size = map->key_size;
1374
1375 hash = htab_map_hash(key, key_size, htab->hashrnd);
1376 b = __select_bucket(htab, hash);
1377 head = &b->head;
1378
1379 ret = htab_lock_bucket(htab, b, hash, &flags);
1380 if (ret)
1381 return ret;
1382
1383 l = lookup_elem_raw(head, hash, key, key_size);
1384
1385 if (l)
1386 hlist_nulls_del_rcu(&l->hash_node);
1387 else
1388 ret = -ENOENT;
1389
1390 htab_unlock_bucket(htab, b, hash, flags);
1391 if (l)
1392 htab_lru_push_free(htab, l);
1393 return ret;
1394}
1395
1396static void delete_all_elements(struct bpf_htab *htab)
1397{
1398 int i;
1399
1400 for (i = 0; i < htab->n_buckets; i++) {
1401 struct hlist_nulls_head *head = select_bucket(htab, i);
1402 struct hlist_nulls_node *n;
1403 struct htab_elem *l;
1404
1405 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
1406 hlist_nulls_del_rcu(&l->hash_node);
1407 htab_elem_free(htab, l);
1408 }
1409 }
1410}
1411
1412static void htab_free_malloced_timers(struct bpf_htab *htab)
1413{
1414 int i;
1415
1416 rcu_read_lock();
1417 for (i = 0; i < htab->n_buckets; i++) {
1418 struct hlist_nulls_head *head = select_bucket(htab, i);
1419 struct hlist_nulls_node *n;
1420 struct htab_elem *l;
1421
1422 hlist_nulls_for_each_entry(l, n, head, hash_node)
1423 check_and_free_timer(htab, l);
1424 cond_resched_rcu();
1425 }
1426 rcu_read_unlock();
1427}
1428
1429static void htab_map_free_timers(struct bpf_map *map)
1430{
1431 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1432
1433 if (likely(!map_value_has_timer(&htab->map)))
1434 return;
1435 if (!htab_is_prealloc(htab))
1436 htab_free_malloced_timers(htab);
1437 else
1438 htab_free_prealloced_timers(htab);
1439}
1440
1441
1442static void htab_map_free(struct bpf_map *map)
1443{
1444 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1445 int i;
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455 rcu_barrier();
1456 if (!htab_is_prealloc(htab))
1457 delete_all_elements(htab);
1458 else
1459 prealloc_destroy(htab);
1460
1461 free_percpu(htab->extra_elems);
1462 bpf_map_area_free(htab->buckets);
1463 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
1464 free_percpu(htab->map_locked[i]);
1465 lockdep_unregister_key(&htab->lockdep_key);
1466 kfree(htab);
1467}
1468
1469static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
1470 struct seq_file *m)
1471{
1472 void *value;
1473
1474 rcu_read_lock();
1475
1476 value = htab_map_lookup_elem(map, key);
1477 if (!value) {
1478 rcu_read_unlock();
1479 return;
1480 }
1481
1482 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
1483 seq_puts(m, ": ");
1484 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
1485 seq_puts(m, "\n");
1486
1487 rcu_read_unlock();
1488}
1489
1490static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
1491 void *value, bool is_lru_map,
1492 bool is_percpu, u64 flags)
1493{
1494 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1495 struct hlist_nulls_head *head;
1496 unsigned long bflags;
1497 struct htab_elem *l;
1498 u32 hash, key_size;
1499 struct bucket *b;
1500 int ret;
1501
1502 key_size = map->key_size;
1503
1504 hash = htab_map_hash(key, key_size, htab->hashrnd);
1505 b = __select_bucket(htab, hash);
1506 head = &b->head;
1507
1508 ret = htab_lock_bucket(htab, b, hash, &bflags);
1509 if (ret)
1510 return ret;
1511
1512 l = lookup_elem_raw(head, hash, key, key_size);
1513 if (!l) {
1514 ret = -ENOENT;
1515 } else {
1516 if (is_percpu) {
1517 u32 roundup_value_size = round_up(map->value_size, 8);
1518 void __percpu *pptr;
1519 int off = 0, cpu;
1520
1521 pptr = htab_elem_get_ptr(l, key_size);
1522 for_each_possible_cpu(cpu) {
1523 bpf_long_memcpy(value + off,
1524 per_cpu_ptr(pptr, cpu),
1525 roundup_value_size);
1526 off += roundup_value_size;
1527 }
1528 } else {
1529 u32 roundup_key_size = round_up(map->key_size, 8);
1530
1531 if (flags & BPF_F_LOCK)
1532 copy_map_value_locked(map, value, l->key +
1533 roundup_key_size,
1534 true);
1535 else
1536 copy_map_value(map, value, l->key +
1537 roundup_key_size);
1538 check_and_init_map_value(map, value);
1539 }
1540
1541 hlist_nulls_del_rcu(&l->hash_node);
1542 if (!is_lru_map)
1543 free_htab_elem(htab, l);
1544 }
1545
1546 htab_unlock_bucket(htab, b, hash, bflags);
1547
1548 if (is_lru_map && l)
1549 htab_lru_push_free(htab, l);
1550
1551 return ret;
1552}
1553
1554static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
1555 void *value, u64 flags)
1556{
1557 return __htab_map_lookup_and_delete_elem(map, key, value, false, false,
1558 flags);
1559}
1560
1561static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
1562 void *key, void *value,
1563 u64 flags)
1564{
1565 return __htab_map_lookup_and_delete_elem(map, key, value, false, true,
1566 flags);
1567}
1568
1569static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
1570 void *value, u64 flags)
1571{
1572 return __htab_map_lookup_and_delete_elem(map, key, value, true, false,
1573 flags);
1574}
1575
1576static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
1577 void *key, void *value,
1578 u64 flags)
1579{
1580 return __htab_map_lookup_and_delete_elem(map, key, value, true, true,
1581 flags);
1582}
1583
1584static int
1585__htab_map_lookup_and_delete_batch(struct bpf_map *map,
1586 const union bpf_attr *attr,
1587 union bpf_attr __user *uattr,
1588 bool do_delete, bool is_lru_map,
1589 bool is_percpu)
1590{
1591 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1592 u32 bucket_cnt, total, key_size, value_size, roundup_key_size;
1593 void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val;
1594 void __user *uvalues = u64_to_user_ptr(attr->batch.values);
1595 void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
1596 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
1597 u32 batch, max_count, size, bucket_size;
1598 struct htab_elem *node_to_free = NULL;
1599 u64 elem_map_flags, map_flags;
1600 struct hlist_nulls_head *head;
1601 struct hlist_nulls_node *n;
1602 unsigned long flags = 0;
1603 bool locked = false;
1604 struct htab_elem *l;
1605 struct bucket *b;
1606 int ret = 0;
1607
1608 elem_map_flags = attr->batch.elem_flags;
1609 if ((elem_map_flags & ~BPF_F_LOCK) ||
1610 ((elem_map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
1611 return -EINVAL;
1612
1613 map_flags = attr->batch.flags;
1614 if (map_flags)
1615 return -EINVAL;
1616
1617 max_count = attr->batch.count;
1618 if (!max_count)
1619 return 0;
1620
1621 if (put_user(0, &uattr->batch.count))
1622 return -EFAULT;
1623
1624 batch = 0;
1625 if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch)))
1626 return -EFAULT;
1627
1628 if (batch >= htab->n_buckets)
1629 return -ENOENT;
1630
1631 key_size = htab->map.key_size;
1632 roundup_key_size = round_up(htab->map.key_size, 8);
1633 value_size = htab->map.value_size;
1634 size = round_up(value_size, 8);
1635 if (is_percpu)
1636 value_size = size * num_possible_cpus();
1637 total = 0;
1638
1639
1640
1641 bucket_size = 5;
1642
1643alloc:
1644
1645
1646
1647 keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN);
1648 values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN);
1649 if (!keys || !values) {
1650 ret = -ENOMEM;
1651 goto after_loop;
1652 }
1653
1654again:
1655 bpf_disable_instrumentation();
1656 rcu_read_lock();
1657again_nocopy:
1658 dst_key = keys;
1659 dst_val = values;
1660 b = &htab->buckets[batch];
1661 head = &b->head;
1662
1663 if (locked) {
1664 ret = htab_lock_bucket(htab, b, batch, &flags);
1665 if (ret)
1666 goto next_batch;
1667 }
1668
1669 bucket_cnt = 0;
1670 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
1671 bucket_cnt++;
1672
1673 if (bucket_cnt && !locked) {
1674 locked = true;
1675 goto again_nocopy;
1676 }
1677
1678 if (bucket_cnt > (max_count - total)) {
1679 if (total == 0)
1680 ret = -ENOSPC;
1681
1682
1683
1684 htab_unlock_bucket(htab, b, batch, flags);
1685 rcu_read_unlock();
1686 bpf_enable_instrumentation();
1687 goto after_loop;
1688 }
1689
1690 if (bucket_cnt > bucket_size) {
1691 bucket_size = bucket_cnt;
1692
1693
1694
1695 htab_unlock_bucket(htab, b, batch, flags);
1696 rcu_read_unlock();
1697 bpf_enable_instrumentation();
1698 kvfree(keys);
1699 kvfree(values);
1700 goto alloc;
1701 }
1702
1703
1704 if (!locked)
1705 goto next_batch;
1706
1707 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
1708 memcpy(dst_key, l->key, key_size);
1709
1710 if (is_percpu) {
1711 int off = 0, cpu;
1712 void __percpu *pptr;
1713
1714 pptr = htab_elem_get_ptr(l, map->key_size);
1715 for_each_possible_cpu(cpu) {
1716 bpf_long_memcpy(dst_val + off,
1717 per_cpu_ptr(pptr, cpu), size);
1718 off += size;
1719 }
1720 } else {
1721 value = l->key + roundup_key_size;
1722 if (elem_map_flags & BPF_F_LOCK)
1723 copy_map_value_locked(map, dst_val, value,
1724 true);
1725 else
1726 copy_map_value(map, dst_val, value);
1727 check_and_init_map_value(map, dst_val);
1728 }
1729 if (do_delete) {
1730 hlist_nulls_del_rcu(&l->hash_node);
1731
1732
1733
1734
1735
1736
1737 if (is_lru_map) {
1738 l->batch_flink = node_to_free;
1739 node_to_free = l;
1740 } else {
1741 free_htab_elem(htab, l);
1742 }
1743 }
1744 dst_key += key_size;
1745 dst_val += value_size;
1746 }
1747
1748 htab_unlock_bucket(htab, b, batch, flags);
1749 locked = false;
1750
1751 while (node_to_free) {
1752 l = node_to_free;
1753 node_to_free = node_to_free->batch_flink;
1754 htab_lru_push_free(htab, l);
1755 }
1756
1757next_batch:
1758
1759
1760
1761 if (!bucket_cnt && (batch + 1 < htab->n_buckets)) {
1762 batch++;
1763 goto again_nocopy;
1764 }
1765
1766 rcu_read_unlock();
1767 bpf_enable_instrumentation();
1768 if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys,
1769 key_size * bucket_cnt) ||
1770 copy_to_user(uvalues + total * value_size, values,
1771 value_size * bucket_cnt))) {
1772 ret = -EFAULT;
1773 goto after_loop;
1774 }
1775
1776 total += bucket_cnt;
1777 batch++;
1778 if (batch >= htab->n_buckets) {
1779 ret = -ENOENT;
1780 goto after_loop;
1781 }
1782 goto again;
1783
1784after_loop:
1785 if (ret == -EFAULT)
1786 goto out;
1787
1788
1789 ubatch = u64_to_user_ptr(attr->batch.out_batch);
1790 if (copy_to_user(ubatch, &batch, sizeof(batch)) ||
1791 put_user(total, &uattr->batch.count))
1792 ret = -EFAULT;
1793
1794out:
1795 kvfree(keys);
1796 kvfree(values);
1797 return ret;
1798}
1799
1800static int
1801htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
1802 union bpf_attr __user *uattr)
1803{
1804 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1805 false, true);
1806}
1807
1808static int
1809htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
1810 const union bpf_attr *attr,
1811 union bpf_attr __user *uattr)
1812{
1813 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1814 false, true);
1815}
1816
1817static int
1818htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
1819 union bpf_attr __user *uattr)
1820{
1821 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1822 false, false);
1823}
1824
1825static int
1826htab_map_lookup_and_delete_batch(struct bpf_map *map,
1827 const union bpf_attr *attr,
1828 union bpf_attr __user *uattr)
1829{
1830 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1831 false, false);
1832}
1833
1834static int
1835htab_lru_percpu_map_lookup_batch(struct bpf_map *map,
1836 const union bpf_attr *attr,
1837 union bpf_attr __user *uattr)
1838{
1839 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1840 true, true);
1841}
1842
1843static int
1844htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
1845 const union bpf_attr *attr,
1846 union bpf_attr __user *uattr)
1847{
1848 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1849 true, true);
1850}
1851
1852static int
1853htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
1854 union bpf_attr __user *uattr)
1855{
1856 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
1857 true, false);
1858}
1859
1860static int
1861htab_lru_map_lookup_and_delete_batch(struct bpf_map *map,
1862 const union bpf_attr *attr,
1863 union bpf_attr __user *uattr)
1864{
1865 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
1866 true, false);
1867}
1868
1869struct bpf_iter_seq_hash_map_info {
1870 struct bpf_map *map;
1871 struct bpf_htab *htab;
1872 void *percpu_value_buf;
1873 u32 bucket_id;
1874 u32 skip_elems;
1875};
1876
1877static struct htab_elem *
1878bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info,
1879 struct htab_elem *prev_elem)
1880{
1881 const struct bpf_htab *htab = info->htab;
1882 u32 skip_elems = info->skip_elems;
1883 u32 bucket_id = info->bucket_id;
1884 struct hlist_nulls_head *head;
1885 struct hlist_nulls_node *n;
1886 struct htab_elem *elem;
1887 struct bucket *b;
1888 u32 i, count;
1889
1890 if (bucket_id >= htab->n_buckets)
1891 return NULL;
1892
1893
1894 if (prev_elem) {
1895
1896
1897
1898 n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node));
1899 elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node);
1900 if (elem)
1901 return elem;
1902
1903
1904 b = &htab->buckets[bucket_id++];
1905 rcu_read_unlock();
1906 skip_elems = 0;
1907 }
1908
1909 for (i = bucket_id; i < htab->n_buckets; i++) {
1910 b = &htab->buckets[i];
1911 rcu_read_lock();
1912
1913 count = 0;
1914 head = &b->head;
1915 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
1916 if (count >= skip_elems) {
1917 info->bucket_id = i;
1918 info->skip_elems = count;
1919 return elem;
1920 }
1921 count++;
1922 }
1923
1924 rcu_read_unlock();
1925 skip_elems = 0;
1926 }
1927
1928 info->bucket_id = i;
1929 info->skip_elems = 0;
1930 return NULL;
1931}
1932
1933static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos)
1934{
1935 struct bpf_iter_seq_hash_map_info *info = seq->private;
1936 struct htab_elem *elem;
1937
1938 elem = bpf_hash_map_seq_find_next(info, NULL);
1939 if (!elem)
1940 return NULL;
1941
1942 if (*pos == 0)
1943 ++*pos;
1944 return elem;
1945}
1946
1947static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1948{
1949 struct bpf_iter_seq_hash_map_info *info = seq->private;
1950
1951 ++*pos;
1952 ++info->skip_elems;
1953 return bpf_hash_map_seq_find_next(info, v);
1954}
1955
1956static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
1957{
1958 struct bpf_iter_seq_hash_map_info *info = seq->private;
1959 u32 roundup_key_size, roundup_value_size;
1960 struct bpf_iter__bpf_map_elem ctx = {};
1961 struct bpf_map *map = info->map;
1962 struct bpf_iter_meta meta;
1963 int ret = 0, off = 0, cpu;
1964 struct bpf_prog *prog;
1965 void __percpu *pptr;
1966
1967 meta.seq = seq;
1968 prog = bpf_iter_get_info(&meta, elem == NULL);
1969 if (prog) {
1970 ctx.meta = &meta;
1971 ctx.map = info->map;
1972 if (elem) {
1973 roundup_key_size = round_up(map->key_size, 8);
1974 ctx.key = elem->key;
1975 if (!info->percpu_value_buf) {
1976 ctx.value = elem->key + roundup_key_size;
1977 } else {
1978 roundup_value_size = round_up(map->value_size, 8);
1979 pptr = htab_elem_get_ptr(elem, map->key_size);
1980 for_each_possible_cpu(cpu) {
1981 bpf_long_memcpy(info->percpu_value_buf + off,
1982 per_cpu_ptr(pptr, cpu),
1983 roundup_value_size);
1984 off += roundup_value_size;
1985 }
1986 ctx.value = info->percpu_value_buf;
1987 }
1988 }
1989 ret = bpf_iter_run_prog(prog, &ctx);
1990 }
1991
1992 return ret;
1993}
1994
1995static int bpf_hash_map_seq_show(struct seq_file *seq, void *v)
1996{
1997 return __bpf_hash_map_seq_show(seq, v);
1998}
1999
2000static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v)
2001{
2002 if (!v)
2003 (void)__bpf_hash_map_seq_show(seq, NULL);
2004 else
2005 rcu_read_unlock();
2006}
2007
2008static int bpf_iter_init_hash_map(void *priv_data,
2009 struct bpf_iter_aux_info *aux)
2010{
2011 struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
2012 struct bpf_map *map = aux->map;
2013 void *value_buf;
2014 u32 buf_size;
2015
2016 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
2017 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
2018 buf_size = round_up(map->value_size, 8) * num_possible_cpus();
2019 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
2020 if (!value_buf)
2021 return -ENOMEM;
2022
2023 seq_info->percpu_value_buf = value_buf;
2024 }
2025
2026 seq_info->map = map;
2027 seq_info->htab = container_of(map, struct bpf_htab, map);
2028 return 0;
2029}
2030
2031static void bpf_iter_fini_hash_map(void *priv_data)
2032{
2033 struct bpf_iter_seq_hash_map_info *seq_info = priv_data;
2034
2035 kfree(seq_info->percpu_value_buf);
2036}
2037
2038static const struct seq_operations bpf_hash_map_seq_ops = {
2039 .start = bpf_hash_map_seq_start,
2040 .next = bpf_hash_map_seq_next,
2041 .stop = bpf_hash_map_seq_stop,
2042 .show = bpf_hash_map_seq_show,
2043};
2044
2045static const struct bpf_iter_seq_info iter_seq_info = {
2046 .seq_ops = &bpf_hash_map_seq_ops,
2047 .init_seq_private = bpf_iter_init_hash_map,
2048 .fini_seq_private = bpf_iter_fini_hash_map,
2049 .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
2050};
2051
2052static int bpf_for_each_hash_elem(struct bpf_map *map, void *callback_fn,
2053 void *callback_ctx, u64 flags)
2054{
2055 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
2056 struct hlist_nulls_head *head;
2057 struct hlist_nulls_node *n;
2058 struct htab_elem *elem;
2059 u32 roundup_key_size;
2060 int i, num_elems = 0;
2061 void __percpu *pptr;
2062 struct bucket *b;
2063 void *key, *val;
2064 bool is_percpu;
2065 u64 ret = 0;
2066
2067 if (flags != 0)
2068 return -EINVAL;
2069
2070 is_percpu = htab_is_percpu(htab);
2071
2072 roundup_key_size = round_up(map->key_size, 8);
2073
2074
2075
2076 if (is_percpu)
2077 migrate_disable();
2078 for (i = 0; i < htab->n_buckets; i++) {
2079 b = &htab->buckets[i];
2080 rcu_read_lock();
2081 head = &b->head;
2082 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
2083 key = elem->key;
2084 if (is_percpu) {
2085
2086 pptr = htab_elem_get_ptr(elem, map->key_size);
2087 val = this_cpu_ptr(pptr);
2088 } else {
2089 val = elem->key + roundup_key_size;
2090 }
2091 num_elems++;
2092 ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
2093 (u64)(long)key, (u64)(long)val,
2094 (u64)(long)callback_ctx, 0);
2095
2096 if (ret) {
2097 rcu_read_unlock();
2098 goto out;
2099 }
2100 }
2101 rcu_read_unlock();
2102 }
2103out:
2104 if (is_percpu)
2105 migrate_enable();
2106 return num_elems;
2107}
2108
2109static int htab_map_btf_id;
2110const struct bpf_map_ops htab_map_ops = {
2111 .map_meta_equal = bpf_map_meta_equal,
2112 .map_alloc_check = htab_map_alloc_check,
2113 .map_alloc = htab_map_alloc,
2114 .map_free = htab_map_free,
2115 .map_get_next_key = htab_map_get_next_key,
2116 .map_release_uref = htab_map_free_timers,
2117 .map_lookup_elem = htab_map_lookup_elem,
2118 .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
2119 .map_update_elem = htab_map_update_elem,
2120 .map_delete_elem = htab_map_delete_elem,
2121 .map_gen_lookup = htab_map_gen_lookup,
2122 .map_seq_show_elem = htab_map_seq_show_elem,
2123 .map_set_for_each_callback_args = map_set_for_each_callback_args,
2124 .map_for_each_callback = bpf_for_each_hash_elem,
2125 BATCH_OPS(htab),
2126 .map_btf_name = "bpf_htab",
2127 .map_btf_id = &htab_map_btf_id,
2128 .iter_seq_info = &iter_seq_info,
2129};
2130
2131static int htab_lru_map_btf_id;
2132const struct bpf_map_ops htab_lru_map_ops = {
2133 .map_meta_equal = bpf_map_meta_equal,
2134 .map_alloc_check = htab_map_alloc_check,
2135 .map_alloc = htab_map_alloc,
2136 .map_free = htab_map_free,
2137 .map_get_next_key = htab_map_get_next_key,
2138 .map_release_uref = htab_map_free_timers,
2139 .map_lookup_elem = htab_lru_map_lookup_elem,
2140 .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
2141 .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
2142 .map_update_elem = htab_lru_map_update_elem,
2143 .map_delete_elem = htab_lru_map_delete_elem,
2144 .map_gen_lookup = htab_lru_map_gen_lookup,
2145 .map_seq_show_elem = htab_map_seq_show_elem,
2146 .map_set_for_each_callback_args = map_set_for_each_callback_args,
2147 .map_for_each_callback = bpf_for_each_hash_elem,
2148 BATCH_OPS(htab_lru),
2149 .map_btf_name = "bpf_htab",
2150 .map_btf_id = &htab_lru_map_btf_id,
2151 .iter_seq_info = &iter_seq_info,
2152};
2153
2154
2155static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
2156{
2157 struct htab_elem *l = __htab_map_lookup_elem(map, key);
2158
2159 if (l)
2160 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
2161 else
2162 return NULL;
2163}
2164
2165static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
2166{
2167 struct htab_elem *l = __htab_map_lookup_elem(map, key);
2168
2169 if (l) {
2170 bpf_lru_node_set_ref(&l->lru_node);
2171 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
2172 }
2173
2174 return NULL;
2175}
2176
2177int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
2178{
2179 struct htab_elem *l;
2180 void __percpu *pptr;
2181 int ret = -ENOENT;
2182 int cpu, off = 0;
2183 u32 size;
2184
2185
2186
2187
2188
2189 size = round_up(map->value_size, 8);
2190 rcu_read_lock();
2191 l = __htab_map_lookup_elem(map, key);
2192 if (!l)
2193 goto out;
2194
2195
2196
2197 pptr = htab_elem_get_ptr(l, map->key_size);
2198 for_each_possible_cpu(cpu) {
2199 bpf_long_memcpy(value + off,
2200 per_cpu_ptr(pptr, cpu), size);
2201 off += size;
2202 }
2203 ret = 0;
2204out:
2205 rcu_read_unlock();
2206 return ret;
2207}
2208
2209int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
2210 u64 map_flags)
2211{
2212 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
2213 int ret;
2214
2215 rcu_read_lock();
2216 if (htab_is_lru(htab))
2217 ret = __htab_lru_percpu_map_update_elem(map, key, value,
2218 map_flags, true);
2219 else
2220 ret = __htab_percpu_map_update_elem(map, key, value, map_flags,
2221 true);
2222 rcu_read_unlock();
2223
2224 return ret;
2225}
2226
2227static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
2228 struct seq_file *m)
2229{
2230 struct htab_elem *l;
2231 void __percpu *pptr;
2232 int cpu;
2233
2234 rcu_read_lock();
2235
2236 l = __htab_map_lookup_elem(map, key);
2237 if (!l) {
2238 rcu_read_unlock();
2239 return;
2240 }
2241
2242 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
2243 seq_puts(m, ": {\n");
2244 pptr = htab_elem_get_ptr(l, map->key_size);
2245 for_each_possible_cpu(cpu) {
2246 seq_printf(m, "\tcpu%d: ", cpu);
2247 btf_type_seq_show(map->btf, map->btf_value_type_id,
2248 per_cpu_ptr(pptr, cpu), m);
2249 seq_puts(m, "\n");
2250 }
2251 seq_puts(m, "}\n");
2252
2253 rcu_read_unlock();
2254}
2255
2256static int htab_percpu_map_btf_id;
2257const struct bpf_map_ops htab_percpu_map_ops = {
2258 .map_meta_equal = bpf_map_meta_equal,
2259 .map_alloc_check = htab_map_alloc_check,
2260 .map_alloc = htab_map_alloc,
2261 .map_free = htab_map_free,
2262 .map_get_next_key = htab_map_get_next_key,
2263 .map_lookup_elem = htab_percpu_map_lookup_elem,
2264 .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
2265 .map_update_elem = htab_percpu_map_update_elem,
2266 .map_delete_elem = htab_map_delete_elem,
2267 .map_seq_show_elem = htab_percpu_map_seq_show_elem,
2268 .map_set_for_each_callback_args = map_set_for_each_callback_args,
2269 .map_for_each_callback = bpf_for_each_hash_elem,
2270 BATCH_OPS(htab_percpu),
2271 .map_btf_name = "bpf_htab",
2272 .map_btf_id = &htab_percpu_map_btf_id,
2273 .iter_seq_info = &iter_seq_info,
2274};
2275
2276static int htab_lru_percpu_map_btf_id;
2277const struct bpf_map_ops htab_lru_percpu_map_ops = {
2278 .map_meta_equal = bpf_map_meta_equal,
2279 .map_alloc_check = htab_map_alloc_check,
2280 .map_alloc = htab_map_alloc,
2281 .map_free = htab_map_free,
2282 .map_get_next_key = htab_map_get_next_key,
2283 .map_lookup_elem = htab_lru_percpu_map_lookup_elem,
2284 .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
2285 .map_update_elem = htab_lru_percpu_map_update_elem,
2286 .map_delete_elem = htab_lru_map_delete_elem,
2287 .map_seq_show_elem = htab_percpu_map_seq_show_elem,
2288 .map_set_for_each_callback_args = map_set_for_each_callback_args,
2289 .map_for_each_callback = bpf_for_each_hash_elem,
2290 BATCH_OPS(htab_lru_percpu),
2291 .map_btf_name = "bpf_htab",
2292 .map_btf_id = &htab_lru_percpu_map_btf_id,
2293 .iter_seq_info = &iter_seq_info,
2294};
2295
2296static int fd_htab_map_alloc_check(union bpf_attr *attr)
2297{
2298 if (attr->value_size != sizeof(u32))
2299 return -EINVAL;
2300 return htab_map_alloc_check(attr);
2301}
2302
2303static void fd_htab_map_free(struct bpf_map *map)
2304{
2305 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
2306 struct hlist_nulls_node *n;
2307 struct hlist_nulls_head *head;
2308 struct htab_elem *l;
2309 int i;
2310
2311 for (i = 0; i < htab->n_buckets; i++) {
2312 head = select_bucket(htab, i);
2313
2314 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
2315 void *ptr = fd_htab_map_get_ptr(map, l);
2316
2317 map->ops->map_fd_put_ptr(ptr);
2318 }
2319 }
2320
2321 htab_map_free(map);
2322}
2323
2324
2325int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
2326{
2327 void **ptr;
2328 int ret = 0;
2329
2330 if (!map->ops->map_fd_sys_lookup_elem)
2331 return -ENOTSUPP;
2332
2333 rcu_read_lock();
2334 ptr = htab_map_lookup_elem(map, key);
2335 if (ptr)
2336 *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
2337 else
2338 ret = -ENOENT;
2339 rcu_read_unlock();
2340
2341 return ret;
2342}
2343
2344
2345int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
2346 void *key, void *value, u64 map_flags)
2347{
2348 void *ptr;
2349 int ret;
2350 u32 ufd = *(u32 *)value;
2351
2352 ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
2353 if (IS_ERR(ptr))
2354 return PTR_ERR(ptr);
2355
2356 ret = htab_map_update_elem(map, key, &ptr, map_flags);
2357 if (ret)
2358 map->ops->map_fd_put_ptr(ptr);
2359
2360 return ret;
2361}
2362
2363static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
2364{
2365 struct bpf_map *map, *inner_map_meta;
2366
2367 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
2368 if (IS_ERR(inner_map_meta))
2369 return inner_map_meta;
2370
2371 map = htab_map_alloc(attr);
2372 if (IS_ERR(map)) {
2373 bpf_map_meta_free(inner_map_meta);
2374 return map;
2375 }
2376
2377 map->inner_map_meta = inner_map_meta;
2378
2379 return map;
2380}
2381
2382static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
2383{
2384 struct bpf_map **inner_map = htab_map_lookup_elem(map, key);
2385
2386 if (!inner_map)
2387 return NULL;
2388
2389 return READ_ONCE(*inner_map);
2390}
2391
2392static int htab_of_map_gen_lookup(struct bpf_map *map,
2393 struct bpf_insn *insn_buf)
2394{
2395 struct bpf_insn *insn = insn_buf;
2396 const int ret = BPF_REG_0;
2397
2398 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
2399 (void *(*)(struct bpf_map *map, void *key))NULL));
2400 *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
2401 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2);
2402 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
2403 offsetof(struct htab_elem, key) +
2404 round_up(map->key_size, 8));
2405 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
2406
2407 return insn - insn_buf;
2408}
2409
2410static void htab_of_map_free(struct bpf_map *map)
2411{
2412 bpf_map_meta_free(map->inner_map_meta);
2413 fd_htab_map_free(map);
2414}
2415
2416static int htab_of_maps_map_btf_id;
2417const struct bpf_map_ops htab_of_maps_map_ops = {
2418 .map_alloc_check = fd_htab_map_alloc_check,
2419 .map_alloc = htab_of_map_alloc,
2420 .map_free = htab_of_map_free,
2421 .map_get_next_key = htab_map_get_next_key,
2422 .map_lookup_elem = htab_of_map_lookup_elem,
2423 .map_delete_elem = htab_map_delete_elem,
2424 .map_fd_get_ptr = bpf_map_fd_get_ptr,
2425 .map_fd_put_ptr = bpf_map_fd_put_ptr,
2426 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
2427 .map_gen_lookup = htab_of_map_gen_lookup,
2428 .map_check_btf = map_check_no_btf,
2429 .map_btf_name = "bpf_htab",
2430 .map_btf_id = &htab_of_maps_map_btf_id,
2431};
2432