1
2
3
4
5#include <linux/bpf.h>
6#include <linux/btf.h>
7#include <linux/err.h>
8#include <linux/slab.h>
9#include <linux/mm.h>
10#include <linux/filter.h>
11#include <linux/perf_event.h>
12#include <uapi/linux/btf.h>
13#include <linux/rcupdate_trace.h>
14
15#include "map_in_map.h"
16
17#define ARRAY_CREATE_FLAG_MASK \
18 (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
19 BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
20
21static void bpf_array_free_percpu(struct bpf_array *array)
22{
23 int i;
24
25 for (i = 0; i < array->map.max_entries; i++) {
26 free_percpu(array->pptrs[i]);
27 cond_resched();
28 }
29}
30
31static int bpf_array_alloc_percpu(struct bpf_array *array)
32{
33 void __percpu *ptr;
34 int i;
35
36 for (i = 0; i < array->map.max_entries; i++) {
37 ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8,
38 GFP_USER | __GFP_NOWARN);
39 if (!ptr) {
40 bpf_array_free_percpu(array);
41 return -ENOMEM;
42 }
43 array->pptrs[i] = ptr;
44 cond_resched();
45 }
46
47 return 0;
48}
49
50
51int array_map_alloc_check(union bpf_attr *attr)
52{
53 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
54 int numa_node = bpf_map_attr_numa_node(attr);
55
56
57 if (attr->max_entries == 0 || attr->key_size != 4 ||
58 attr->value_size == 0 ||
59 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
60 !bpf_map_flags_access_ok(attr->map_flags) ||
61 (percpu && numa_node != NUMA_NO_NODE))
62 return -EINVAL;
63
64 if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
65 attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
66 return -EINVAL;
67
68 if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
69 attr->map_flags & BPF_F_PRESERVE_ELEMS)
70 return -EINVAL;
71
72 if (attr->value_size > KMALLOC_MAX_SIZE)
73
74
75
76 return -E2BIG;
77
78 return 0;
79}
80
81static struct bpf_map *array_map_alloc(union bpf_attr *attr)
82{
83 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
84 int numa_node = bpf_map_attr_numa_node(attr);
85 u32 elem_size, index_mask, max_entries;
86 bool bypass_spec_v1 = bpf_bypass_spec_v1();
87 u64 array_size, mask64;
88 struct bpf_array *array;
89
90 elem_size = round_up(attr->value_size, 8);
91
92 max_entries = attr->max_entries;
93
94
95
96
97
98 mask64 = fls_long(max_entries - 1);
99 mask64 = 1ULL << mask64;
100 mask64 -= 1;
101
102 index_mask = mask64;
103 if (!bypass_spec_v1) {
104
105
106
107 max_entries = index_mask + 1;
108
109 if (max_entries < attr->max_entries)
110 return ERR_PTR(-E2BIG);
111 }
112
113 array_size = sizeof(*array);
114 if (percpu) {
115 array_size += (u64) max_entries * sizeof(void *);
116 } else {
117
118
119
120 if (attr->map_flags & BPF_F_MMAPABLE) {
121 array_size = PAGE_ALIGN(array_size);
122 array_size += PAGE_ALIGN((u64) max_entries * elem_size);
123 } else {
124 array_size += (u64) max_entries * elem_size;
125 }
126 }
127
128
129 if (attr->map_flags & BPF_F_MMAPABLE) {
130 void *data;
131
132
133 data = bpf_map_area_mmapable_alloc(array_size, numa_node);
134 if (!data)
135 return ERR_PTR(-ENOMEM);
136 array = data + PAGE_ALIGN(sizeof(struct bpf_array))
137 - offsetof(struct bpf_array, value);
138 } else {
139 array = bpf_map_area_alloc(array_size, numa_node);
140 }
141 if (!array)
142 return ERR_PTR(-ENOMEM);
143 array->index_mask = index_mask;
144 array->map.bypass_spec_v1 = bypass_spec_v1;
145
146
147 bpf_map_init_from_attr(&array->map, attr);
148 array->elem_size = elem_size;
149
150 if (percpu && bpf_array_alloc_percpu(array)) {
151 bpf_map_area_free(array);
152 return ERR_PTR(-ENOMEM);
153 }
154
155 return &array->map;
156}
157
158
159static void *array_map_lookup_elem(struct bpf_map *map, void *key)
160{
161 struct bpf_array *array = container_of(map, struct bpf_array, map);
162 u32 index = *(u32 *)key;
163
164 if (unlikely(index >= array->map.max_entries))
165 return NULL;
166
167 return array->value + array->elem_size * (index & array->index_mask);
168}
169
170static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
171 u32 off)
172{
173 struct bpf_array *array = container_of(map, struct bpf_array, map);
174
175 if (map->max_entries != 1)
176 return -ENOTSUPP;
177 if (off >= map->value_size)
178 return -EINVAL;
179
180 *imm = (unsigned long)array->value;
181 return 0;
182}
183
184static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
185 u32 *off)
186{
187 struct bpf_array *array = container_of(map, struct bpf_array, map);
188 u64 base = (unsigned long)array->value;
189 u64 range = array->elem_size;
190
191 if (map->max_entries != 1)
192 return -ENOTSUPP;
193 if (imm < base || imm >= base + range)
194 return -ENOENT;
195
196 *off = imm - base;
197 return 0;
198}
199
200
201static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
202{
203 struct bpf_array *array = container_of(map, struct bpf_array, map);
204 struct bpf_insn *insn = insn_buf;
205 u32 elem_size = round_up(map->value_size, 8);
206 const int ret = BPF_REG_0;
207 const int map_ptr = BPF_REG_1;
208 const int index = BPF_REG_2;
209
210 if (map->map_flags & BPF_F_INNER_MAP)
211 return -EOPNOTSUPP;
212
213 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
214 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
215 if (!map->bypass_spec_v1) {
216 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
217 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
218 } else {
219 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
220 }
221
222 if (is_power_of_2(elem_size)) {
223 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
224 } else {
225 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
226 }
227 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
228 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
229 *insn++ = BPF_MOV64_IMM(ret, 0);
230 return insn - insn_buf;
231}
232
233
234static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
235{
236 struct bpf_array *array = container_of(map, struct bpf_array, map);
237 u32 index = *(u32 *)key;
238
239 if (unlikely(index >= array->map.max_entries))
240 return NULL;
241
242 return this_cpu_ptr(array->pptrs[index & array->index_mask]);
243}
244
245int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
246{
247 struct bpf_array *array = container_of(map, struct bpf_array, map);
248 u32 index = *(u32 *)key;
249 void __percpu *pptr;
250 int cpu, off = 0;
251 u32 size;
252
253 if (unlikely(index >= array->map.max_entries))
254 return -ENOENT;
255
256
257
258
259
260 size = round_up(map->value_size, 8);
261 rcu_read_lock();
262 pptr = array->pptrs[index & array->index_mask];
263 for_each_possible_cpu(cpu) {
264 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
265 off += size;
266 }
267 rcu_read_unlock();
268 return 0;
269}
270
271
272static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
273{
274 struct bpf_array *array = container_of(map, struct bpf_array, map);
275 u32 index = key ? *(u32 *)key : U32_MAX;
276 u32 *next = (u32 *)next_key;
277
278 if (index >= array->map.max_entries) {
279 *next = 0;
280 return 0;
281 }
282
283 if (index == array->map.max_entries - 1)
284 return -ENOENT;
285
286 *next = index + 1;
287 return 0;
288}
289
290static void check_and_free_timer_in_array(struct bpf_array *arr, void *val)
291{
292 if (unlikely(map_value_has_timer(&arr->map)))
293 bpf_timer_cancel_and_free(val + arr->map.timer_off);
294}
295
296
297static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
298 u64 map_flags)
299{
300 struct bpf_array *array = container_of(map, struct bpf_array, map);
301 u32 index = *(u32 *)key;
302 char *val;
303
304 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
305
306 return -EINVAL;
307
308 if (unlikely(index >= array->map.max_entries))
309
310 return -E2BIG;
311
312 if (unlikely(map_flags & BPF_NOEXIST))
313
314 return -EEXIST;
315
316 if (unlikely((map_flags & BPF_F_LOCK) &&
317 !map_value_has_spin_lock(map)))
318 return -EINVAL;
319
320 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
321 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
322 value, map->value_size);
323 } else {
324 val = array->value +
325 array->elem_size * (index & array->index_mask);
326 if (map_flags & BPF_F_LOCK)
327 copy_map_value_locked(map, val, value, false);
328 else
329 copy_map_value(map, val, value);
330 check_and_free_timer_in_array(array, val);
331 }
332 return 0;
333}
334
335int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
336 u64 map_flags)
337{
338 struct bpf_array *array = container_of(map, struct bpf_array, map);
339 u32 index = *(u32 *)key;
340 void __percpu *pptr;
341 int cpu, off = 0;
342 u32 size;
343
344 if (unlikely(map_flags > BPF_EXIST))
345
346 return -EINVAL;
347
348 if (unlikely(index >= array->map.max_entries))
349
350 return -E2BIG;
351
352 if (unlikely(map_flags == BPF_NOEXIST))
353
354 return -EEXIST;
355
356
357
358
359
360
361
362 size = round_up(map->value_size, 8);
363 rcu_read_lock();
364 pptr = array->pptrs[index & array->index_mask];
365 for_each_possible_cpu(cpu) {
366 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
367 off += size;
368 }
369 rcu_read_unlock();
370 return 0;
371}
372
373
374static int array_map_delete_elem(struct bpf_map *map, void *key)
375{
376 return -EINVAL;
377}
378
379static void *array_map_vmalloc_addr(struct bpf_array *array)
380{
381 return (void *)round_down((unsigned long)array, PAGE_SIZE);
382}
383
384static void array_map_free_timers(struct bpf_map *map)
385{
386 struct bpf_array *array = container_of(map, struct bpf_array, map);
387 int i;
388
389 if (likely(!map_value_has_timer(map)))
390 return;
391
392 for (i = 0; i < array->map.max_entries; i++)
393 bpf_timer_cancel_and_free(array->value + array->elem_size * i +
394 map->timer_off);
395}
396
397
398static void array_map_free(struct bpf_map *map)
399{
400 struct bpf_array *array = container_of(map, struct bpf_array, map);
401
402 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
403 bpf_array_free_percpu(array);
404
405 if (array->map.map_flags & BPF_F_MMAPABLE)
406 bpf_map_area_free(array_map_vmalloc_addr(array));
407 else
408 bpf_map_area_free(array);
409}
410
411static void array_map_seq_show_elem(struct bpf_map *map, void *key,
412 struct seq_file *m)
413{
414 void *value;
415
416 rcu_read_lock();
417
418 value = array_map_lookup_elem(map, key);
419 if (!value) {
420 rcu_read_unlock();
421 return;
422 }
423
424 if (map->btf_key_type_id)
425 seq_printf(m, "%u: ", *(u32 *)key);
426 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
427 seq_puts(m, "\n");
428
429 rcu_read_unlock();
430}
431
432static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
433 struct seq_file *m)
434{
435 struct bpf_array *array = container_of(map, struct bpf_array, map);
436 u32 index = *(u32 *)key;
437 void __percpu *pptr;
438 int cpu;
439
440 rcu_read_lock();
441
442 seq_printf(m, "%u: {\n", *(u32 *)key);
443 pptr = array->pptrs[index & array->index_mask];
444 for_each_possible_cpu(cpu) {
445 seq_printf(m, "\tcpu%d: ", cpu);
446 btf_type_seq_show(map->btf, map->btf_value_type_id,
447 per_cpu_ptr(pptr, cpu), m);
448 seq_puts(m, "\n");
449 }
450 seq_puts(m, "}\n");
451
452 rcu_read_unlock();
453}
454
455static int array_map_check_btf(const struct bpf_map *map,
456 const struct btf *btf,
457 const struct btf_type *key_type,
458 const struct btf_type *value_type)
459{
460 u32 int_data;
461
462
463 if (btf_type_is_void(key_type)) {
464 if (map->map_type != BPF_MAP_TYPE_ARRAY ||
465 map->max_entries != 1)
466 return -EINVAL;
467
468 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
469 return -EINVAL;
470
471 return 0;
472 }
473
474 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
475 return -EINVAL;
476
477 int_data = *(u32 *)(key_type + 1);
478
479
480
481 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
482 return -EINVAL;
483
484 return 0;
485}
486
487static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
488{
489 struct bpf_array *array = container_of(map, struct bpf_array, map);
490 pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
491
492 if (!(map->map_flags & BPF_F_MMAPABLE))
493 return -EINVAL;
494
495 if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) >
496 PAGE_ALIGN((u64)array->map.max_entries * array->elem_size))
497 return -EINVAL;
498
499 return remap_vmalloc_range(vma, array_map_vmalloc_addr(array),
500 vma->vm_pgoff + pgoff);
501}
502
503static bool array_map_meta_equal(const struct bpf_map *meta0,
504 const struct bpf_map *meta1)
505{
506 if (!bpf_map_meta_equal(meta0, meta1))
507 return false;
508 return meta0->map_flags & BPF_F_INNER_MAP ? true :
509 meta0->max_entries == meta1->max_entries;
510}
511
512struct bpf_iter_seq_array_map_info {
513 struct bpf_map *map;
514 void *percpu_value_buf;
515 u32 index;
516};
517
518static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
519{
520 struct bpf_iter_seq_array_map_info *info = seq->private;
521 struct bpf_map *map = info->map;
522 struct bpf_array *array;
523 u32 index;
524
525 if (info->index >= map->max_entries)
526 return NULL;
527
528 if (*pos == 0)
529 ++*pos;
530 array = container_of(map, struct bpf_array, map);
531 index = info->index & array->index_mask;
532 if (info->percpu_value_buf)
533 return array->pptrs[index];
534 return array->value + array->elem_size * index;
535}
536
537static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
538{
539 struct bpf_iter_seq_array_map_info *info = seq->private;
540 struct bpf_map *map = info->map;
541 struct bpf_array *array;
542 u32 index;
543
544 ++*pos;
545 ++info->index;
546 if (info->index >= map->max_entries)
547 return NULL;
548
549 array = container_of(map, struct bpf_array, map);
550 index = info->index & array->index_mask;
551 if (info->percpu_value_buf)
552 return array->pptrs[index];
553 return array->value + array->elem_size * index;
554}
555
556static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
557{
558 struct bpf_iter_seq_array_map_info *info = seq->private;
559 struct bpf_iter__bpf_map_elem ctx = {};
560 struct bpf_map *map = info->map;
561 struct bpf_iter_meta meta;
562 struct bpf_prog *prog;
563 int off = 0, cpu = 0;
564 void __percpu **pptr;
565 u32 size;
566
567 meta.seq = seq;
568 prog = bpf_iter_get_info(&meta, v == NULL);
569 if (!prog)
570 return 0;
571
572 ctx.meta = &meta;
573 ctx.map = info->map;
574 if (v) {
575 ctx.key = &info->index;
576
577 if (!info->percpu_value_buf) {
578 ctx.value = v;
579 } else {
580 pptr = v;
581 size = round_up(map->value_size, 8);
582 for_each_possible_cpu(cpu) {
583 bpf_long_memcpy(info->percpu_value_buf + off,
584 per_cpu_ptr(pptr, cpu),
585 size);
586 off += size;
587 }
588 ctx.value = info->percpu_value_buf;
589 }
590 }
591
592 return bpf_iter_run_prog(prog, &ctx);
593}
594
595static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
596{
597 return __bpf_array_map_seq_show(seq, v);
598}
599
600static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
601{
602 if (!v)
603 (void)__bpf_array_map_seq_show(seq, NULL);
604}
605
606static int bpf_iter_init_array_map(void *priv_data,
607 struct bpf_iter_aux_info *aux)
608{
609 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
610 struct bpf_map *map = aux->map;
611 void *value_buf;
612 u32 buf_size;
613
614 if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
615 buf_size = round_up(map->value_size, 8) * num_possible_cpus();
616 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
617 if (!value_buf)
618 return -ENOMEM;
619
620 seq_info->percpu_value_buf = value_buf;
621 }
622
623 seq_info->map = map;
624 return 0;
625}
626
627static void bpf_iter_fini_array_map(void *priv_data)
628{
629 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
630
631 kfree(seq_info->percpu_value_buf);
632}
633
634static const struct seq_operations bpf_array_map_seq_ops = {
635 .start = bpf_array_map_seq_start,
636 .next = bpf_array_map_seq_next,
637 .stop = bpf_array_map_seq_stop,
638 .show = bpf_array_map_seq_show,
639};
640
641static const struct bpf_iter_seq_info iter_seq_info = {
642 .seq_ops = &bpf_array_map_seq_ops,
643 .init_seq_private = bpf_iter_init_array_map,
644 .fini_seq_private = bpf_iter_fini_array_map,
645 .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
646};
647
648static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
649 void *callback_ctx, u64 flags)
650{
651 u32 i, key, num_elems = 0;
652 struct bpf_array *array;
653 bool is_percpu;
654 u64 ret = 0;
655 void *val;
656
657 if (flags != 0)
658 return -EINVAL;
659
660 is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
661 array = container_of(map, struct bpf_array, map);
662 if (is_percpu)
663 migrate_disable();
664 for (i = 0; i < map->max_entries; i++) {
665 if (is_percpu)
666 val = this_cpu_ptr(array->pptrs[i]);
667 else
668 val = array->value + array->elem_size * i;
669 num_elems++;
670 key = i;
671 ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
672 (u64)(long)&key, (u64)(long)val,
673 (u64)(long)callback_ctx, 0);
674
675 if (ret)
676 break;
677 }
678
679 if (is_percpu)
680 migrate_enable();
681 return num_elems;
682}
683
684static int array_map_btf_id;
685const struct bpf_map_ops array_map_ops = {
686 .map_meta_equal = array_map_meta_equal,
687 .map_alloc_check = array_map_alloc_check,
688 .map_alloc = array_map_alloc,
689 .map_free = array_map_free,
690 .map_get_next_key = array_map_get_next_key,
691 .map_release_uref = array_map_free_timers,
692 .map_lookup_elem = array_map_lookup_elem,
693 .map_update_elem = array_map_update_elem,
694 .map_delete_elem = array_map_delete_elem,
695 .map_gen_lookup = array_map_gen_lookup,
696 .map_direct_value_addr = array_map_direct_value_addr,
697 .map_direct_value_meta = array_map_direct_value_meta,
698 .map_mmap = array_map_mmap,
699 .map_seq_show_elem = array_map_seq_show_elem,
700 .map_check_btf = array_map_check_btf,
701 .map_lookup_batch = generic_map_lookup_batch,
702 .map_update_batch = generic_map_update_batch,
703 .map_set_for_each_callback_args = map_set_for_each_callback_args,
704 .map_for_each_callback = bpf_for_each_array_elem,
705 .map_btf_name = "bpf_array",
706 .map_btf_id = &array_map_btf_id,
707 .iter_seq_info = &iter_seq_info,
708};
709
710static int percpu_array_map_btf_id;
711const struct bpf_map_ops percpu_array_map_ops = {
712 .map_meta_equal = bpf_map_meta_equal,
713 .map_alloc_check = array_map_alloc_check,
714 .map_alloc = array_map_alloc,
715 .map_free = array_map_free,
716 .map_get_next_key = array_map_get_next_key,
717 .map_lookup_elem = percpu_array_map_lookup_elem,
718 .map_update_elem = array_map_update_elem,
719 .map_delete_elem = array_map_delete_elem,
720 .map_seq_show_elem = percpu_array_map_seq_show_elem,
721 .map_check_btf = array_map_check_btf,
722 .map_lookup_batch = generic_map_lookup_batch,
723 .map_update_batch = generic_map_update_batch,
724 .map_set_for_each_callback_args = map_set_for_each_callback_args,
725 .map_for_each_callback = bpf_for_each_array_elem,
726 .map_btf_name = "bpf_array",
727 .map_btf_id = &percpu_array_map_btf_id,
728 .iter_seq_info = &iter_seq_info,
729};
730
731static int fd_array_map_alloc_check(union bpf_attr *attr)
732{
733
734 if (attr->value_size != sizeof(u32))
735 return -EINVAL;
736
737 if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
738 return -EINVAL;
739 return array_map_alloc_check(attr);
740}
741
742static void fd_array_map_free(struct bpf_map *map)
743{
744 struct bpf_array *array = container_of(map, struct bpf_array, map);
745 int i;
746
747
748 for (i = 0; i < array->map.max_entries; i++)
749 BUG_ON(array->ptrs[i] != NULL);
750
751 bpf_map_area_free(array);
752}
753
754static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
755{
756 return ERR_PTR(-EOPNOTSUPP);
757}
758
759
760int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
761{
762 void **elem, *ptr;
763 int ret = 0;
764
765 if (!map->ops->map_fd_sys_lookup_elem)
766 return -ENOTSUPP;
767
768 rcu_read_lock();
769 elem = array_map_lookup_elem(map, key);
770 if (elem && (ptr = READ_ONCE(*elem)))
771 *value = map->ops->map_fd_sys_lookup_elem(ptr);
772 else
773 ret = -ENOENT;
774 rcu_read_unlock();
775
776 return ret;
777}
778
779
780int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
781 void *key, void *value, u64 map_flags)
782{
783 struct bpf_array *array = container_of(map, struct bpf_array, map);
784 void *new_ptr, *old_ptr;
785 u32 index = *(u32 *)key, ufd;
786
787 if (map_flags != BPF_ANY)
788 return -EINVAL;
789
790 if (index >= array->map.max_entries)
791 return -E2BIG;
792
793 ufd = *(u32 *)value;
794 new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
795 if (IS_ERR(new_ptr))
796 return PTR_ERR(new_ptr);
797
798 if (map->ops->map_poke_run) {
799 mutex_lock(&array->aux->poke_mutex);
800 old_ptr = xchg(array->ptrs + index, new_ptr);
801 map->ops->map_poke_run(map, index, old_ptr, new_ptr);
802 mutex_unlock(&array->aux->poke_mutex);
803 } else {
804 old_ptr = xchg(array->ptrs + index, new_ptr);
805 }
806
807 if (old_ptr)
808 map->ops->map_fd_put_ptr(old_ptr);
809 return 0;
810}
811
812static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
813{
814 struct bpf_array *array = container_of(map, struct bpf_array, map);
815 void *old_ptr;
816 u32 index = *(u32 *)key;
817
818 if (index >= array->map.max_entries)
819 return -E2BIG;
820
821 if (map->ops->map_poke_run) {
822 mutex_lock(&array->aux->poke_mutex);
823 old_ptr = xchg(array->ptrs + index, NULL);
824 map->ops->map_poke_run(map, index, old_ptr, NULL);
825 mutex_unlock(&array->aux->poke_mutex);
826 } else {
827 old_ptr = xchg(array->ptrs + index, NULL);
828 }
829
830 if (old_ptr) {
831 map->ops->map_fd_put_ptr(old_ptr);
832 return 0;
833 } else {
834 return -ENOENT;
835 }
836}
837
838static void *prog_fd_array_get_ptr(struct bpf_map *map,
839 struct file *map_file, int fd)
840{
841 struct bpf_array *array = container_of(map, struct bpf_array, map);
842 struct bpf_prog *prog = bpf_prog_get(fd);
843
844 if (IS_ERR(prog))
845 return prog;
846
847 if (!bpf_prog_array_compatible(array, prog)) {
848 bpf_prog_put(prog);
849 return ERR_PTR(-EINVAL);
850 }
851
852 return prog;
853}
854
855static void prog_fd_array_put_ptr(void *ptr)
856{
857 bpf_prog_put(ptr);
858}
859
860static u32 prog_fd_array_sys_lookup_elem(void *ptr)
861{
862 return ((struct bpf_prog *)ptr)->aux->id;
863}
864
865
866static void bpf_fd_array_map_clear(struct bpf_map *map)
867{
868 struct bpf_array *array = container_of(map, struct bpf_array, map);
869 int i;
870
871 for (i = 0; i < array->map.max_entries; i++)
872 fd_array_map_delete_elem(map, &i);
873}
874
875static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
876 struct seq_file *m)
877{
878 void **elem, *ptr;
879 u32 prog_id;
880
881 rcu_read_lock();
882
883 elem = array_map_lookup_elem(map, key);
884 if (elem) {
885 ptr = READ_ONCE(*elem);
886 if (ptr) {
887 seq_printf(m, "%u: ", *(u32 *)key);
888 prog_id = prog_fd_array_sys_lookup_elem(ptr);
889 btf_type_seq_show(map->btf, map->btf_value_type_id,
890 &prog_id, m);
891 seq_puts(m, "\n");
892 }
893 }
894
895 rcu_read_unlock();
896}
897
898struct prog_poke_elem {
899 struct list_head list;
900 struct bpf_prog_aux *aux;
901};
902
903static int prog_array_map_poke_track(struct bpf_map *map,
904 struct bpf_prog_aux *prog_aux)
905{
906 struct prog_poke_elem *elem;
907 struct bpf_array_aux *aux;
908 int ret = 0;
909
910 aux = container_of(map, struct bpf_array, map)->aux;
911 mutex_lock(&aux->poke_mutex);
912 list_for_each_entry(elem, &aux->poke_progs, list) {
913 if (elem->aux == prog_aux)
914 goto out;
915 }
916
917 elem = kmalloc(sizeof(*elem), GFP_KERNEL);
918 if (!elem) {
919 ret = -ENOMEM;
920 goto out;
921 }
922
923 INIT_LIST_HEAD(&elem->list);
924
925
926
927
928 elem->aux = prog_aux;
929
930 list_add_tail(&elem->list, &aux->poke_progs);
931out:
932 mutex_unlock(&aux->poke_mutex);
933 return ret;
934}
935
936static void prog_array_map_poke_untrack(struct bpf_map *map,
937 struct bpf_prog_aux *prog_aux)
938{
939 struct prog_poke_elem *elem, *tmp;
940 struct bpf_array_aux *aux;
941
942 aux = container_of(map, struct bpf_array, map)->aux;
943 mutex_lock(&aux->poke_mutex);
944 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
945 if (elem->aux == prog_aux) {
946 list_del_init(&elem->list);
947 kfree(elem);
948 break;
949 }
950 }
951 mutex_unlock(&aux->poke_mutex);
952}
953
954static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
955 struct bpf_prog *old,
956 struct bpf_prog *new)
957{
958 u8 *old_addr, *new_addr, *old_bypass_addr;
959 struct prog_poke_elem *elem;
960 struct bpf_array_aux *aux;
961
962 aux = container_of(map, struct bpf_array, map)->aux;
963 WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
964
965 list_for_each_entry(elem, &aux->poke_progs, list) {
966 struct bpf_jit_poke_descriptor *poke;
967 int i, ret;
968
969 for (i = 0; i < elem->aux->size_poke_tab; i++) {
970 poke = &elem->aux->poke_tab[i];
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002 if (!READ_ONCE(poke->tailcall_target_stable))
1003 continue;
1004 if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
1005 continue;
1006 if (poke->tail_call.map != map ||
1007 poke->tail_call.key != key)
1008 continue;
1009
1010 old_bypass_addr = old ? NULL : poke->bypass_addr;
1011 old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
1012 new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
1013
1014 if (new) {
1015 ret = bpf_arch_text_poke(poke->tailcall_target,
1016 BPF_MOD_JUMP,
1017 old_addr, new_addr);
1018 BUG_ON(ret < 0 && ret != -EINVAL);
1019 if (!old) {
1020 ret = bpf_arch_text_poke(poke->tailcall_bypass,
1021 BPF_MOD_JUMP,
1022 poke->bypass_addr,
1023 NULL);
1024 BUG_ON(ret < 0 && ret != -EINVAL);
1025 }
1026 } else {
1027 ret = bpf_arch_text_poke(poke->tailcall_bypass,
1028 BPF_MOD_JUMP,
1029 old_bypass_addr,
1030 poke->bypass_addr);
1031 BUG_ON(ret < 0 && ret != -EINVAL);
1032
1033
1034
1035
1036 if (!ret)
1037 synchronize_rcu();
1038 ret = bpf_arch_text_poke(poke->tailcall_target,
1039 BPF_MOD_JUMP,
1040 old_addr, NULL);
1041 BUG_ON(ret < 0 && ret != -EINVAL);
1042 }
1043 }
1044 }
1045}
1046
1047static void prog_array_map_clear_deferred(struct work_struct *work)
1048{
1049 struct bpf_map *map = container_of(work, struct bpf_array_aux,
1050 work)->map;
1051 bpf_fd_array_map_clear(map);
1052 bpf_map_put(map);
1053}
1054
1055static void prog_array_map_clear(struct bpf_map *map)
1056{
1057 struct bpf_array_aux *aux = container_of(map, struct bpf_array,
1058 map)->aux;
1059 bpf_map_inc(map);
1060 schedule_work(&aux->work);
1061}
1062
1063static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
1064{
1065 struct bpf_array_aux *aux;
1066 struct bpf_map *map;
1067
1068 aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT);
1069 if (!aux)
1070 return ERR_PTR(-ENOMEM);
1071
1072 INIT_WORK(&aux->work, prog_array_map_clear_deferred);
1073 INIT_LIST_HEAD(&aux->poke_progs);
1074 mutex_init(&aux->poke_mutex);
1075 spin_lock_init(&aux->owner.lock);
1076
1077 map = array_map_alloc(attr);
1078 if (IS_ERR(map)) {
1079 kfree(aux);
1080 return map;
1081 }
1082
1083 container_of(map, struct bpf_array, map)->aux = aux;
1084 aux->map = map;
1085
1086 return map;
1087}
1088
1089static void prog_array_map_free(struct bpf_map *map)
1090{
1091 struct prog_poke_elem *elem, *tmp;
1092 struct bpf_array_aux *aux;
1093
1094 aux = container_of(map, struct bpf_array, map)->aux;
1095 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1096 list_del_init(&elem->list);
1097 kfree(elem);
1098 }
1099 kfree(aux);
1100 fd_array_map_free(map);
1101}
1102
1103
1104
1105
1106
1107
1108static int prog_array_map_btf_id;
1109const struct bpf_map_ops prog_array_map_ops = {
1110 .map_alloc_check = fd_array_map_alloc_check,
1111 .map_alloc = prog_array_map_alloc,
1112 .map_free = prog_array_map_free,
1113 .map_poke_track = prog_array_map_poke_track,
1114 .map_poke_untrack = prog_array_map_poke_untrack,
1115 .map_poke_run = prog_array_map_poke_run,
1116 .map_get_next_key = array_map_get_next_key,
1117 .map_lookup_elem = fd_array_map_lookup_elem,
1118 .map_delete_elem = fd_array_map_delete_elem,
1119 .map_fd_get_ptr = prog_fd_array_get_ptr,
1120 .map_fd_put_ptr = prog_fd_array_put_ptr,
1121 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
1122 .map_release_uref = prog_array_map_clear,
1123 .map_seq_show_elem = prog_array_map_seq_show_elem,
1124 .map_btf_name = "bpf_array",
1125 .map_btf_id = &prog_array_map_btf_id,
1126};
1127
1128static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
1129 struct file *map_file)
1130{
1131 struct bpf_event_entry *ee;
1132
1133 ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
1134 if (ee) {
1135 ee->event = perf_file->private_data;
1136 ee->perf_file = perf_file;
1137 ee->map_file = map_file;
1138 }
1139
1140 return ee;
1141}
1142
1143static void __bpf_event_entry_free(struct rcu_head *rcu)
1144{
1145 struct bpf_event_entry *ee;
1146
1147 ee = container_of(rcu, struct bpf_event_entry, rcu);
1148 fput(ee->perf_file);
1149 kfree(ee);
1150}
1151
1152static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
1153{
1154 call_rcu(&ee->rcu, __bpf_event_entry_free);
1155}
1156
1157static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
1158 struct file *map_file, int fd)
1159{
1160 struct bpf_event_entry *ee;
1161 struct perf_event *event;
1162 struct file *perf_file;
1163 u64 value;
1164
1165 perf_file = perf_event_get(fd);
1166 if (IS_ERR(perf_file))
1167 return perf_file;
1168
1169 ee = ERR_PTR(-EOPNOTSUPP);
1170 event = perf_file->private_data;
1171 if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
1172 goto err_out;
1173
1174 ee = bpf_event_entry_gen(perf_file, map_file);
1175 if (ee)
1176 return ee;
1177 ee = ERR_PTR(-ENOMEM);
1178err_out:
1179 fput(perf_file);
1180 return ee;
1181}
1182
1183static void perf_event_fd_array_put_ptr(void *ptr)
1184{
1185 bpf_event_entry_free_rcu(ptr);
1186}
1187
1188static void perf_event_fd_array_release(struct bpf_map *map,
1189 struct file *map_file)
1190{
1191 struct bpf_array *array = container_of(map, struct bpf_array, map);
1192 struct bpf_event_entry *ee;
1193 int i;
1194
1195 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1196 return;
1197
1198 rcu_read_lock();
1199 for (i = 0; i < array->map.max_entries; i++) {
1200 ee = READ_ONCE(array->ptrs[i]);
1201 if (ee && ee->map_file == map_file)
1202 fd_array_map_delete_elem(map, &i);
1203 }
1204 rcu_read_unlock();
1205}
1206
1207static void perf_event_fd_array_map_free(struct bpf_map *map)
1208{
1209 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1210 bpf_fd_array_map_clear(map);
1211 fd_array_map_free(map);
1212}
1213
1214static int perf_event_array_map_btf_id;
1215const struct bpf_map_ops perf_event_array_map_ops = {
1216 .map_meta_equal = bpf_map_meta_equal,
1217 .map_alloc_check = fd_array_map_alloc_check,
1218 .map_alloc = array_map_alloc,
1219 .map_free = perf_event_fd_array_map_free,
1220 .map_get_next_key = array_map_get_next_key,
1221 .map_lookup_elem = fd_array_map_lookup_elem,
1222 .map_delete_elem = fd_array_map_delete_elem,
1223 .map_fd_get_ptr = perf_event_fd_array_get_ptr,
1224 .map_fd_put_ptr = perf_event_fd_array_put_ptr,
1225 .map_release = perf_event_fd_array_release,
1226 .map_check_btf = map_check_no_btf,
1227 .map_btf_name = "bpf_array",
1228 .map_btf_id = &perf_event_array_map_btf_id,
1229};
1230
1231#ifdef CONFIG_CGROUPS
1232static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
1233 struct file *map_file ,
1234 int fd)
1235{
1236 return cgroup_get_from_fd(fd);
1237}
1238
1239static void cgroup_fd_array_put_ptr(void *ptr)
1240{
1241
1242 cgroup_put(ptr);
1243}
1244
1245static void cgroup_fd_array_free(struct bpf_map *map)
1246{
1247 bpf_fd_array_map_clear(map);
1248 fd_array_map_free(map);
1249}
1250
1251static int cgroup_array_map_btf_id;
1252const struct bpf_map_ops cgroup_array_map_ops = {
1253 .map_meta_equal = bpf_map_meta_equal,
1254 .map_alloc_check = fd_array_map_alloc_check,
1255 .map_alloc = array_map_alloc,
1256 .map_free = cgroup_fd_array_free,
1257 .map_get_next_key = array_map_get_next_key,
1258 .map_lookup_elem = fd_array_map_lookup_elem,
1259 .map_delete_elem = fd_array_map_delete_elem,
1260 .map_fd_get_ptr = cgroup_fd_array_get_ptr,
1261 .map_fd_put_ptr = cgroup_fd_array_put_ptr,
1262 .map_check_btf = map_check_no_btf,
1263 .map_btf_name = "bpf_array",
1264 .map_btf_id = &cgroup_array_map_btf_id,
1265};
1266#endif
1267
1268static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
1269{
1270 struct bpf_map *map, *inner_map_meta;
1271
1272 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
1273 if (IS_ERR(inner_map_meta))
1274 return inner_map_meta;
1275
1276 map = array_map_alloc(attr);
1277 if (IS_ERR(map)) {
1278 bpf_map_meta_free(inner_map_meta);
1279 return map;
1280 }
1281
1282 map->inner_map_meta = inner_map_meta;
1283
1284 return map;
1285}
1286
1287static void array_of_map_free(struct bpf_map *map)
1288{
1289
1290
1291
1292 bpf_map_meta_free(map->inner_map_meta);
1293 bpf_fd_array_map_clear(map);
1294 fd_array_map_free(map);
1295}
1296
1297static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
1298{
1299 struct bpf_map **inner_map = array_map_lookup_elem(map, key);
1300
1301 if (!inner_map)
1302 return NULL;
1303
1304 return READ_ONCE(*inner_map);
1305}
1306
1307static int array_of_map_gen_lookup(struct bpf_map *map,
1308 struct bpf_insn *insn_buf)
1309{
1310 struct bpf_array *array = container_of(map, struct bpf_array, map);
1311 u32 elem_size = round_up(map->value_size, 8);
1312 struct bpf_insn *insn = insn_buf;
1313 const int ret = BPF_REG_0;
1314 const int map_ptr = BPF_REG_1;
1315 const int index = BPF_REG_2;
1316
1317 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
1318 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
1319 if (!map->bypass_spec_v1) {
1320 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
1321 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
1322 } else {
1323 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
1324 }
1325 if (is_power_of_2(elem_size))
1326 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
1327 else
1328 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
1329 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
1330 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
1331 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
1332 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1333 *insn++ = BPF_MOV64_IMM(ret, 0);
1334
1335 return insn - insn_buf;
1336}
1337
1338static int array_of_maps_map_btf_id;
1339const struct bpf_map_ops array_of_maps_map_ops = {
1340 .map_alloc_check = fd_array_map_alloc_check,
1341 .map_alloc = array_of_map_alloc,
1342 .map_free = array_of_map_free,
1343 .map_get_next_key = array_map_get_next_key,
1344 .map_lookup_elem = array_of_map_lookup_elem,
1345 .map_delete_elem = fd_array_map_delete_elem,
1346 .map_fd_get_ptr = bpf_map_fd_get_ptr,
1347 .map_fd_put_ptr = bpf_map_fd_put_ptr,
1348 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
1349 .map_gen_lookup = array_of_map_gen_lookup,
1350 .map_check_btf = map_check_no_btf,
1351 .map_btf_name = "bpf_array",
1352 .map_btf_id = &array_of_maps_map_btf_id,
1353};
1354