1
2
3
4
5#include <linux/bpf.h>
6#include <linux/btf.h>
7#include <linux/err.h>
8#include <linux/slab.h>
9#include <linux/mm.h>
10#include <linux/filter.h>
11#include <linux/perf_event.h>
12#include <uapi/linux/btf.h>
13#include <linux/rcupdate_trace.h>
14
15#include "map_in_map.h"
16
17#define ARRAY_CREATE_FLAG_MASK \
18 (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
19 BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
20
21static void bpf_array_free_percpu(struct bpf_array *array)
22{
23 int i;
24
25 for (i = 0; i < array->map.max_entries; i++) {
26 free_percpu(array->pptrs[i]);
27 cond_resched();
28 }
29}
30
31static int bpf_array_alloc_percpu(struct bpf_array *array)
32{
33 void __percpu *ptr;
34 int i;
35
36 for (i = 0; i < array->map.max_entries; i++) {
37 ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8,
38 GFP_USER | __GFP_NOWARN);
39 if (!ptr) {
40 bpf_array_free_percpu(array);
41 return -ENOMEM;
42 }
43 array->pptrs[i] = ptr;
44 cond_resched();
45 }
46
47 return 0;
48}
49
50
51int array_map_alloc_check(union bpf_attr *attr)
52{
53 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
54 int numa_node = bpf_map_attr_numa_node(attr);
55
56
57 if (attr->max_entries == 0 || attr->key_size != 4 ||
58 attr->value_size == 0 ||
59 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
60 !bpf_map_flags_access_ok(attr->map_flags) ||
61 (percpu && numa_node != NUMA_NO_NODE))
62 return -EINVAL;
63
64 if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
65 attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
66 return -EINVAL;
67
68 if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
69 attr->map_flags & BPF_F_PRESERVE_ELEMS)
70 return -EINVAL;
71
72 if (attr->value_size > KMALLOC_MAX_SIZE)
73
74
75
76 return -E2BIG;
77
78 return 0;
79}
80
81static struct bpf_map *array_map_alloc(union bpf_attr *attr)
82{
83 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
84 int numa_node = bpf_map_attr_numa_node(attr);
85 u32 elem_size, index_mask, max_entries;
86 bool bypass_spec_v1 = bpf_bypass_spec_v1();
87 u64 array_size, mask64;
88 struct bpf_array *array;
89
90 elem_size = round_up(attr->value_size, 8);
91
92 max_entries = attr->max_entries;
93
94
95
96
97
98 mask64 = fls_long(max_entries - 1);
99 mask64 = 1ULL << mask64;
100 mask64 -= 1;
101
102 index_mask = mask64;
103 if (!bypass_spec_v1) {
104
105
106
107 max_entries = index_mask + 1;
108
109 if (max_entries < attr->max_entries)
110 return ERR_PTR(-E2BIG);
111 }
112
113 array_size = sizeof(*array);
114 if (percpu) {
115 array_size += (u64) max_entries * sizeof(void *);
116 } else {
117
118
119
120 if (attr->map_flags & BPF_F_MMAPABLE) {
121 array_size = PAGE_ALIGN(array_size);
122 array_size += PAGE_ALIGN((u64) max_entries * elem_size);
123 } else {
124 array_size += (u64) max_entries * elem_size;
125 }
126 }
127
128
129 if (attr->map_flags & BPF_F_MMAPABLE) {
130 void *data;
131
132
133 data = bpf_map_area_mmapable_alloc(array_size, numa_node);
134 if (!data)
135 return ERR_PTR(-ENOMEM);
136 array = data + PAGE_ALIGN(sizeof(struct bpf_array))
137 - offsetof(struct bpf_array, value);
138 } else {
139 array = bpf_map_area_alloc(array_size, numa_node);
140 }
141 if (!array)
142 return ERR_PTR(-ENOMEM);
143 array->index_mask = index_mask;
144 array->map.bypass_spec_v1 = bypass_spec_v1;
145
146
147 bpf_map_init_from_attr(&array->map, attr);
148 array->elem_size = elem_size;
149
150 if (percpu && bpf_array_alloc_percpu(array)) {
151 bpf_map_area_free(array);
152 return ERR_PTR(-ENOMEM);
153 }
154
155 return &array->map;
156}
157
158
159static void *array_map_lookup_elem(struct bpf_map *map, void *key)
160{
161 struct bpf_array *array = container_of(map, struct bpf_array, map);
162 u32 index = *(u32 *)key;
163
164 if (unlikely(index >= array->map.max_entries))
165 return NULL;
166
167 return array->value + array->elem_size * (index & array->index_mask);
168}
169
170static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
171 u32 off)
172{
173 struct bpf_array *array = container_of(map, struct bpf_array, map);
174
175 if (map->max_entries != 1)
176 return -ENOTSUPP;
177 if (off >= map->value_size)
178 return -EINVAL;
179
180 *imm = (unsigned long)array->value;
181 return 0;
182}
183
184static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
185 u32 *off)
186{
187 struct bpf_array *array = container_of(map, struct bpf_array, map);
188 u64 base = (unsigned long)array->value;
189 u64 range = array->elem_size;
190
191 if (map->max_entries != 1)
192 return -ENOTSUPP;
193 if (imm < base || imm >= base + range)
194 return -ENOENT;
195
196 *off = imm - base;
197 return 0;
198}
199
200
201static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
202{
203 struct bpf_array *array = container_of(map, struct bpf_array, map);
204 struct bpf_insn *insn = insn_buf;
205 u32 elem_size = round_up(map->value_size, 8);
206 const int ret = BPF_REG_0;
207 const int map_ptr = BPF_REG_1;
208 const int index = BPF_REG_2;
209
210 if (map->map_flags & BPF_F_INNER_MAP)
211 return -EOPNOTSUPP;
212
213 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
214 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
215 if (!map->bypass_spec_v1) {
216 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
217 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
218 } else {
219 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
220 }
221
222 if (is_power_of_2(elem_size)) {
223 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
224 } else {
225 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
226 }
227 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
228 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
229 *insn++ = BPF_MOV64_IMM(ret, 0);
230 return insn - insn_buf;
231}
232
233
234static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
235{
236 struct bpf_array *array = container_of(map, struct bpf_array, map);
237 u32 index = *(u32 *)key;
238
239 if (unlikely(index >= array->map.max_entries))
240 return NULL;
241
242 return this_cpu_ptr(array->pptrs[index & array->index_mask]);
243}
244
245int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
246{
247 struct bpf_array *array = container_of(map, struct bpf_array, map);
248 u32 index = *(u32 *)key;
249 void __percpu *pptr;
250 int cpu, off = 0;
251 u32 size;
252
253 if (unlikely(index >= array->map.max_entries))
254 return -ENOENT;
255
256
257
258
259
260 size = round_up(map->value_size, 8);
261 rcu_read_lock();
262 pptr = array->pptrs[index & array->index_mask];
263 for_each_possible_cpu(cpu) {
264 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
265 off += size;
266 }
267 rcu_read_unlock();
268 return 0;
269}
270
271
272static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
273{
274 struct bpf_array *array = container_of(map, struct bpf_array, map);
275 u32 index = key ? *(u32 *)key : U32_MAX;
276 u32 *next = (u32 *)next_key;
277
278 if (index >= array->map.max_entries) {
279 *next = 0;
280 return 0;
281 }
282
283 if (index == array->map.max_entries - 1)
284 return -ENOENT;
285
286 *next = index + 1;
287 return 0;
288}
289
290
291static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
292 u64 map_flags)
293{
294 struct bpf_array *array = container_of(map, struct bpf_array, map);
295 u32 index = *(u32 *)key;
296 char *val;
297
298 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
299
300 return -EINVAL;
301
302 if (unlikely(index >= array->map.max_entries))
303
304 return -E2BIG;
305
306 if (unlikely(map_flags & BPF_NOEXIST))
307
308 return -EEXIST;
309
310 if (unlikely((map_flags & BPF_F_LOCK) &&
311 !map_value_has_spin_lock(map)))
312 return -EINVAL;
313
314 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
315 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
316 value, map->value_size);
317 } else {
318 val = array->value +
319 array->elem_size * (index & array->index_mask);
320 if (map_flags & BPF_F_LOCK)
321 copy_map_value_locked(map, val, value, false);
322 else
323 copy_map_value(map, val, value);
324 }
325 return 0;
326}
327
328int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
329 u64 map_flags)
330{
331 struct bpf_array *array = container_of(map, struct bpf_array, map);
332 u32 index = *(u32 *)key;
333 void __percpu *pptr;
334 int cpu, off = 0;
335 u32 size;
336
337 if (unlikely(map_flags > BPF_EXIST))
338
339 return -EINVAL;
340
341 if (unlikely(index >= array->map.max_entries))
342
343 return -E2BIG;
344
345 if (unlikely(map_flags == BPF_NOEXIST))
346
347 return -EEXIST;
348
349
350
351
352
353
354
355 size = round_up(map->value_size, 8);
356 rcu_read_lock();
357 pptr = array->pptrs[index & array->index_mask];
358 for_each_possible_cpu(cpu) {
359 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
360 off += size;
361 }
362 rcu_read_unlock();
363 return 0;
364}
365
366
367static int array_map_delete_elem(struct bpf_map *map, void *key)
368{
369 return -EINVAL;
370}
371
372static void *array_map_vmalloc_addr(struct bpf_array *array)
373{
374 return (void *)round_down((unsigned long)array, PAGE_SIZE);
375}
376
377
378static void array_map_free(struct bpf_map *map)
379{
380 struct bpf_array *array = container_of(map, struct bpf_array, map);
381
382 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
383 bpf_array_free_percpu(array);
384
385 if (array->map.map_flags & BPF_F_MMAPABLE)
386 bpf_map_area_free(array_map_vmalloc_addr(array));
387 else
388 bpf_map_area_free(array);
389}
390
391static void array_map_seq_show_elem(struct bpf_map *map, void *key,
392 struct seq_file *m)
393{
394 void *value;
395
396 rcu_read_lock();
397
398 value = array_map_lookup_elem(map, key);
399 if (!value) {
400 rcu_read_unlock();
401 return;
402 }
403
404 if (map->btf_key_type_id)
405 seq_printf(m, "%u: ", *(u32 *)key);
406 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
407 seq_puts(m, "\n");
408
409 rcu_read_unlock();
410}
411
412static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
413 struct seq_file *m)
414{
415 struct bpf_array *array = container_of(map, struct bpf_array, map);
416 u32 index = *(u32 *)key;
417 void __percpu *pptr;
418 int cpu;
419
420 rcu_read_lock();
421
422 seq_printf(m, "%u: {\n", *(u32 *)key);
423 pptr = array->pptrs[index & array->index_mask];
424 for_each_possible_cpu(cpu) {
425 seq_printf(m, "\tcpu%d: ", cpu);
426 btf_type_seq_show(map->btf, map->btf_value_type_id,
427 per_cpu_ptr(pptr, cpu), m);
428 seq_puts(m, "\n");
429 }
430 seq_puts(m, "}\n");
431
432 rcu_read_unlock();
433}
434
435static int array_map_check_btf(const struct bpf_map *map,
436 const struct btf *btf,
437 const struct btf_type *key_type,
438 const struct btf_type *value_type)
439{
440 u32 int_data;
441
442
443 if (btf_type_is_void(key_type)) {
444 if (map->map_type != BPF_MAP_TYPE_ARRAY ||
445 map->max_entries != 1)
446 return -EINVAL;
447
448 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
449 return -EINVAL;
450
451 return 0;
452 }
453
454 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
455 return -EINVAL;
456
457 int_data = *(u32 *)(key_type + 1);
458
459
460
461 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
462 return -EINVAL;
463
464 return 0;
465}
466
467static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
468{
469 struct bpf_array *array = container_of(map, struct bpf_array, map);
470 pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
471
472 if (!(map->map_flags & BPF_F_MMAPABLE))
473 return -EINVAL;
474
475 if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) >
476 PAGE_ALIGN((u64)array->map.max_entries * array->elem_size))
477 return -EINVAL;
478
479 return remap_vmalloc_range(vma, array_map_vmalloc_addr(array),
480 vma->vm_pgoff + pgoff);
481}
482
483static bool array_map_meta_equal(const struct bpf_map *meta0,
484 const struct bpf_map *meta1)
485{
486 if (!bpf_map_meta_equal(meta0, meta1))
487 return false;
488 return meta0->map_flags & BPF_F_INNER_MAP ? true :
489 meta0->max_entries == meta1->max_entries;
490}
491
492struct bpf_iter_seq_array_map_info {
493 struct bpf_map *map;
494 void *percpu_value_buf;
495 u32 index;
496};
497
498static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
499{
500 struct bpf_iter_seq_array_map_info *info = seq->private;
501 struct bpf_map *map = info->map;
502 struct bpf_array *array;
503 u32 index;
504
505 if (info->index >= map->max_entries)
506 return NULL;
507
508 if (*pos == 0)
509 ++*pos;
510 array = container_of(map, struct bpf_array, map);
511 index = info->index & array->index_mask;
512 if (info->percpu_value_buf)
513 return array->pptrs[index];
514 return array->value + array->elem_size * index;
515}
516
517static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
518{
519 struct bpf_iter_seq_array_map_info *info = seq->private;
520 struct bpf_map *map = info->map;
521 struct bpf_array *array;
522 u32 index;
523
524 ++*pos;
525 ++info->index;
526 if (info->index >= map->max_entries)
527 return NULL;
528
529 array = container_of(map, struct bpf_array, map);
530 index = info->index & array->index_mask;
531 if (info->percpu_value_buf)
532 return array->pptrs[index];
533 return array->value + array->elem_size * index;
534}
535
536static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
537{
538 struct bpf_iter_seq_array_map_info *info = seq->private;
539 struct bpf_iter__bpf_map_elem ctx = {};
540 struct bpf_map *map = info->map;
541 struct bpf_iter_meta meta;
542 struct bpf_prog *prog;
543 int off = 0, cpu = 0;
544 void __percpu **pptr;
545 u32 size;
546
547 meta.seq = seq;
548 prog = bpf_iter_get_info(&meta, v == NULL);
549 if (!prog)
550 return 0;
551
552 ctx.meta = &meta;
553 ctx.map = info->map;
554 if (v) {
555 ctx.key = &info->index;
556
557 if (!info->percpu_value_buf) {
558 ctx.value = v;
559 } else {
560 pptr = v;
561 size = round_up(map->value_size, 8);
562 for_each_possible_cpu(cpu) {
563 bpf_long_memcpy(info->percpu_value_buf + off,
564 per_cpu_ptr(pptr, cpu),
565 size);
566 off += size;
567 }
568 ctx.value = info->percpu_value_buf;
569 }
570 }
571
572 return bpf_iter_run_prog(prog, &ctx);
573}
574
575static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
576{
577 return __bpf_array_map_seq_show(seq, v);
578}
579
580static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
581{
582 if (!v)
583 (void)__bpf_array_map_seq_show(seq, NULL);
584}
585
586static int bpf_iter_init_array_map(void *priv_data,
587 struct bpf_iter_aux_info *aux)
588{
589 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
590 struct bpf_map *map = aux->map;
591 void *value_buf;
592 u32 buf_size;
593
594 if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
595 buf_size = round_up(map->value_size, 8) * num_possible_cpus();
596 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
597 if (!value_buf)
598 return -ENOMEM;
599
600 seq_info->percpu_value_buf = value_buf;
601 }
602
603 seq_info->map = map;
604 return 0;
605}
606
607static void bpf_iter_fini_array_map(void *priv_data)
608{
609 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
610
611 kfree(seq_info->percpu_value_buf);
612}
613
614static const struct seq_operations bpf_array_map_seq_ops = {
615 .start = bpf_array_map_seq_start,
616 .next = bpf_array_map_seq_next,
617 .stop = bpf_array_map_seq_stop,
618 .show = bpf_array_map_seq_show,
619};
620
621static const struct bpf_iter_seq_info iter_seq_info = {
622 .seq_ops = &bpf_array_map_seq_ops,
623 .init_seq_private = bpf_iter_init_array_map,
624 .fini_seq_private = bpf_iter_fini_array_map,
625 .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
626};
627
628static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
629 void *callback_ctx, u64 flags)
630{
631 u32 i, key, num_elems = 0;
632 struct bpf_array *array;
633 bool is_percpu;
634 u64 ret = 0;
635 void *val;
636
637 if (flags != 0)
638 return -EINVAL;
639
640 is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
641 array = container_of(map, struct bpf_array, map);
642 if (is_percpu)
643 migrate_disable();
644 for (i = 0; i < map->max_entries; i++) {
645 if (is_percpu)
646 val = this_cpu_ptr(array->pptrs[i]);
647 else
648 val = array->value + array->elem_size * i;
649 num_elems++;
650 key = i;
651 ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
652 (u64)(long)&key, (u64)(long)val,
653 (u64)(long)callback_ctx, 0);
654
655 if (ret)
656 break;
657 }
658
659 if (is_percpu)
660 migrate_enable();
661 return num_elems;
662}
663
664static int array_map_btf_id;
665const struct bpf_map_ops array_map_ops = {
666 .map_meta_equal = array_map_meta_equal,
667 .map_alloc_check = array_map_alloc_check,
668 .map_alloc = array_map_alloc,
669 .map_free = array_map_free,
670 .map_get_next_key = array_map_get_next_key,
671 .map_lookup_elem = array_map_lookup_elem,
672 .map_update_elem = array_map_update_elem,
673 .map_delete_elem = array_map_delete_elem,
674 .map_gen_lookup = array_map_gen_lookup,
675 .map_direct_value_addr = array_map_direct_value_addr,
676 .map_direct_value_meta = array_map_direct_value_meta,
677 .map_mmap = array_map_mmap,
678 .map_seq_show_elem = array_map_seq_show_elem,
679 .map_check_btf = array_map_check_btf,
680 .map_lookup_batch = generic_map_lookup_batch,
681 .map_update_batch = generic_map_update_batch,
682 .map_set_for_each_callback_args = map_set_for_each_callback_args,
683 .map_for_each_callback = bpf_for_each_array_elem,
684 .map_btf_name = "bpf_array",
685 .map_btf_id = &array_map_btf_id,
686 .iter_seq_info = &iter_seq_info,
687};
688
689static int percpu_array_map_btf_id;
690const struct bpf_map_ops percpu_array_map_ops = {
691 .map_meta_equal = bpf_map_meta_equal,
692 .map_alloc_check = array_map_alloc_check,
693 .map_alloc = array_map_alloc,
694 .map_free = array_map_free,
695 .map_get_next_key = array_map_get_next_key,
696 .map_lookup_elem = percpu_array_map_lookup_elem,
697 .map_update_elem = array_map_update_elem,
698 .map_delete_elem = array_map_delete_elem,
699 .map_seq_show_elem = percpu_array_map_seq_show_elem,
700 .map_check_btf = array_map_check_btf,
701 .map_lookup_batch = generic_map_lookup_batch,
702 .map_update_batch = generic_map_update_batch,
703 .map_set_for_each_callback_args = map_set_for_each_callback_args,
704 .map_for_each_callback = bpf_for_each_array_elem,
705 .map_btf_name = "bpf_array",
706 .map_btf_id = &percpu_array_map_btf_id,
707 .iter_seq_info = &iter_seq_info,
708};
709
710static int fd_array_map_alloc_check(union bpf_attr *attr)
711{
712
713 if (attr->value_size != sizeof(u32))
714 return -EINVAL;
715
716 if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
717 return -EINVAL;
718 return array_map_alloc_check(attr);
719}
720
721static void fd_array_map_free(struct bpf_map *map)
722{
723 struct bpf_array *array = container_of(map, struct bpf_array, map);
724 int i;
725
726
727 for (i = 0; i < array->map.max_entries; i++)
728 BUG_ON(array->ptrs[i] != NULL);
729
730 bpf_map_area_free(array);
731}
732
733static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
734{
735 return ERR_PTR(-EOPNOTSUPP);
736}
737
738
739int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
740{
741 void **elem, *ptr;
742 int ret = 0;
743
744 if (!map->ops->map_fd_sys_lookup_elem)
745 return -ENOTSUPP;
746
747 rcu_read_lock();
748 elem = array_map_lookup_elem(map, key);
749 if (elem && (ptr = READ_ONCE(*elem)))
750 *value = map->ops->map_fd_sys_lookup_elem(ptr);
751 else
752 ret = -ENOENT;
753 rcu_read_unlock();
754
755 return ret;
756}
757
758
759int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
760 void *key, void *value, u64 map_flags)
761{
762 struct bpf_array *array = container_of(map, struct bpf_array, map);
763 void *new_ptr, *old_ptr;
764 u32 index = *(u32 *)key, ufd;
765
766 if (map_flags != BPF_ANY)
767 return -EINVAL;
768
769 if (index >= array->map.max_entries)
770 return -E2BIG;
771
772 ufd = *(u32 *)value;
773 new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
774 if (IS_ERR(new_ptr))
775 return PTR_ERR(new_ptr);
776
777 if (map->ops->map_poke_run) {
778 mutex_lock(&array->aux->poke_mutex);
779 old_ptr = xchg(array->ptrs + index, new_ptr);
780 map->ops->map_poke_run(map, index, old_ptr, new_ptr);
781 mutex_unlock(&array->aux->poke_mutex);
782 } else {
783 old_ptr = xchg(array->ptrs + index, new_ptr);
784 }
785
786 if (old_ptr)
787 map->ops->map_fd_put_ptr(old_ptr);
788 return 0;
789}
790
791static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
792{
793 struct bpf_array *array = container_of(map, struct bpf_array, map);
794 void *old_ptr;
795 u32 index = *(u32 *)key;
796
797 if (index >= array->map.max_entries)
798 return -E2BIG;
799
800 if (map->ops->map_poke_run) {
801 mutex_lock(&array->aux->poke_mutex);
802 old_ptr = xchg(array->ptrs + index, NULL);
803 map->ops->map_poke_run(map, index, old_ptr, NULL);
804 mutex_unlock(&array->aux->poke_mutex);
805 } else {
806 old_ptr = xchg(array->ptrs + index, NULL);
807 }
808
809 if (old_ptr) {
810 map->ops->map_fd_put_ptr(old_ptr);
811 return 0;
812 } else {
813 return -ENOENT;
814 }
815}
816
817static void *prog_fd_array_get_ptr(struct bpf_map *map,
818 struct file *map_file, int fd)
819{
820 struct bpf_array *array = container_of(map, struct bpf_array, map);
821 struct bpf_prog *prog = bpf_prog_get(fd);
822
823 if (IS_ERR(prog))
824 return prog;
825
826 if (!bpf_prog_array_compatible(array, prog)) {
827 bpf_prog_put(prog);
828 return ERR_PTR(-EINVAL);
829 }
830
831 return prog;
832}
833
834static void prog_fd_array_put_ptr(void *ptr)
835{
836 bpf_prog_put(ptr);
837}
838
839static u32 prog_fd_array_sys_lookup_elem(void *ptr)
840{
841 return ((struct bpf_prog *)ptr)->aux->id;
842}
843
844
845static void bpf_fd_array_map_clear(struct bpf_map *map)
846{
847 struct bpf_array *array = container_of(map, struct bpf_array, map);
848 int i;
849
850 for (i = 0; i < array->map.max_entries; i++)
851 fd_array_map_delete_elem(map, &i);
852}
853
854static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
855 struct seq_file *m)
856{
857 void **elem, *ptr;
858 u32 prog_id;
859
860 rcu_read_lock();
861
862 elem = array_map_lookup_elem(map, key);
863 if (elem) {
864 ptr = READ_ONCE(*elem);
865 if (ptr) {
866 seq_printf(m, "%u: ", *(u32 *)key);
867 prog_id = prog_fd_array_sys_lookup_elem(ptr);
868 btf_type_seq_show(map->btf, map->btf_value_type_id,
869 &prog_id, m);
870 seq_puts(m, "\n");
871 }
872 }
873
874 rcu_read_unlock();
875}
876
877struct prog_poke_elem {
878 struct list_head list;
879 struct bpf_prog_aux *aux;
880};
881
882static int prog_array_map_poke_track(struct bpf_map *map,
883 struct bpf_prog_aux *prog_aux)
884{
885 struct prog_poke_elem *elem;
886 struct bpf_array_aux *aux;
887 int ret = 0;
888
889 aux = container_of(map, struct bpf_array, map)->aux;
890 mutex_lock(&aux->poke_mutex);
891 list_for_each_entry(elem, &aux->poke_progs, list) {
892 if (elem->aux == prog_aux)
893 goto out;
894 }
895
896 elem = kmalloc(sizeof(*elem), GFP_KERNEL);
897 if (!elem) {
898 ret = -ENOMEM;
899 goto out;
900 }
901
902 INIT_LIST_HEAD(&elem->list);
903
904
905
906
907 elem->aux = prog_aux;
908
909 list_add_tail(&elem->list, &aux->poke_progs);
910out:
911 mutex_unlock(&aux->poke_mutex);
912 return ret;
913}
914
915static void prog_array_map_poke_untrack(struct bpf_map *map,
916 struct bpf_prog_aux *prog_aux)
917{
918 struct prog_poke_elem *elem, *tmp;
919 struct bpf_array_aux *aux;
920
921 aux = container_of(map, struct bpf_array, map)->aux;
922 mutex_lock(&aux->poke_mutex);
923 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
924 if (elem->aux == prog_aux) {
925 list_del_init(&elem->list);
926 kfree(elem);
927 break;
928 }
929 }
930 mutex_unlock(&aux->poke_mutex);
931}
932
933static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
934 struct bpf_prog *old,
935 struct bpf_prog *new)
936{
937 u8 *old_addr, *new_addr, *old_bypass_addr;
938 struct prog_poke_elem *elem;
939 struct bpf_array_aux *aux;
940
941 aux = container_of(map, struct bpf_array, map)->aux;
942 WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
943
944 list_for_each_entry(elem, &aux->poke_progs, list) {
945 struct bpf_jit_poke_descriptor *poke;
946 int i, ret;
947
948 for (i = 0; i < elem->aux->size_poke_tab; i++) {
949 poke = &elem->aux->poke_tab[i];
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981 if (!READ_ONCE(poke->tailcall_target_stable))
982 continue;
983 if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
984 continue;
985 if (poke->tail_call.map != map ||
986 poke->tail_call.key != key)
987 continue;
988
989 old_bypass_addr = old ? NULL : poke->bypass_addr;
990 old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
991 new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
992
993 if (new) {
994 ret = bpf_arch_text_poke(poke->tailcall_target,
995 BPF_MOD_JUMP,
996 old_addr, new_addr);
997 BUG_ON(ret < 0 && ret != -EINVAL);
998 if (!old) {
999 ret = bpf_arch_text_poke(poke->tailcall_bypass,
1000 BPF_MOD_JUMP,
1001 poke->bypass_addr,
1002 NULL);
1003 BUG_ON(ret < 0 && ret != -EINVAL);
1004 }
1005 } else {
1006 ret = bpf_arch_text_poke(poke->tailcall_bypass,
1007 BPF_MOD_JUMP,
1008 old_bypass_addr,
1009 poke->bypass_addr);
1010 BUG_ON(ret < 0 && ret != -EINVAL);
1011
1012
1013
1014
1015 if (!ret)
1016 synchronize_rcu();
1017 ret = bpf_arch_text_poke(poke->tailcall_target,
1018 BPF_MOD_JUMP,
1019 old_addr, NULL);
1020 BUG_ON(ret < 0 && ret != -EINVAL);
1021 }
1022 }
1023 }
1024}
1025
1026static void prog_array_map_clear_deferred(struct work_struct *work)
1027{
1028 struct bpf_map *map = container_of(work, struct bpf_array_aux,
1029 work)->map;
1030 bpf_fd_array_map_clear(map);
1031 bpf_map_put(map);
1032}
1033
1034static void prog_array_map_clear(struct bpf_map *map)
1035{
1036 struct bpf_array_aux *aux = container_of(map, struct bpf_array,
1037 map)->aux;
1038 bpf_map_inc(map);
1039 schedule_work(&aux->work);
1040}
1041
1042static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
1043{
1044 struct bpf_array_aux *aux;
1045 struct bpf_map *map;
1046
1047 aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT);
1048 if (!aux)
1049 return ERR_PTR(-ENOMEM);
1050
1051 INIT_WORK(&aux->work, prog_array_map_clear_deferred);
1052 INIT_LIST_HEAD(&aux->poke_progs);
1053 mutex_init(&aux->poke_mutex);
1054
1055 map = array_map_alloc(attr);
1056 if (IS_ERR(map)) {
1057 kfree(aux);
1058 return map;
1059 }
1060
1061 container_of(map, struct bpf_array, map)->aux = aux;
1062 aux->map = map;
1063
1064 return map;
1065}
1066
1067static void prog_array_map_free(struct bpf_map *map)
1068{
1069 struct prog_poke_elem *elem, *tmp;
1070 struct bpf_array_aux *aux;
1071
1072 aux = container_of(map, struct bpf_array, map)->aux;
1073 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1074 list_del_init(&elem->list);
1075 kfree(elem);
1076 }
1077 kfree(aux);
1078 fd_array_map_free(map);
1079}
1080
1081
1082
1083
1084
1085
1086static int prog_array_map_btf_id;
1087const struct bpf_map_ops prog_array_map_ops = {
1088 .map_alloc_check = fd_array_map_alloc_check,
1089 .map_alloc = prog_array_map_alloc,
1090 .map_free = prog_array_map_free,
1091 .map_poke_track = prog_array_map_poke_track,
1092 .map_poke_untrack = prog_array_map_poke_untrack,
1093 .map_poke_run = prog_array_map_poke_run,
1094 .map_get_next_key = array_map_get_next_key,
1095 .map_lookup_elem = fd_array_map_lookup_elem,
1096 .map_delete_elem = fd_array_map_delete_elem,
1097 .map_fd_get_ptr = prog_fd_array_get_ptr,
1098 .map_fd_put_ptr = prog_fd_array_put_ptr,
1099 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
1100 .map_release_uref = prog_array_map_clear,
1101 .map_seq_show_elem = prog_array_map_seq_show_elem,
1102 .map_btf_name = "bpf_array",
1103 .map_btf_id = &prog_array_map_btf_id,
1104};
1105
1106static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
1107 struct file *map_file)
1108{
1109 struct bpf_event_entry *ee;
1110
1111 ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
1112 if (ee) {
1113 ee->event = perf_file->private_data;
1114 ee->perf_file = perf_file;
1115 ee->map_file = map_file;
1116 }
1117
1118 return ee;
1119}
1120
1121static void __bpf_event_entry_free(struct rcu_head *rcu)
1122{
1123 struct bpf_event_entry *ee;
1124
1125 ee = container_of(rcu, struct bpf_event_entry, rcu);
1126 fput(ee->perf_file);
1127 kfree(ee);
1128}
1129
1130static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
1131{
1132 call_rcu(&ee->rcu, __bpf_event_entry_free);
1133}
1134
1135static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
1136 struct file *map_file, int fd)
1137{
1138 struct bpf_event_entry *ee;
1139 struct perf_event *event;
1140 struct file *perf_file;
1141 u64 value;
1142
1143 perf_file = perf_event_get(fd);
1144 if (IS_ERR(perf_file))
1145 return perf_file;
1146
1147 ee = ERR_PTR(-EOPNOTSUPP);
1148 event = perf_file->private_data;
1149 if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
1150 goto err_out;
1151
1152 ee = bpf_event_entry_gen(perf_file, map_file);
1153 if (ee)
1154 return ee;
1155 ee = ERR_PTR(-ENOMEM);
1156err_out:
1157 fput(perf_file);
1158 return ee;
1159}
1160
1161static void perf_event_fd_array_put_ptr(void *ptr)
1162{
1163 bpf_event_entry_free_rcu(ptr);
1164}
1165
1166static void perf_event_fd_array_release(struct bpf_map *map,
1167 struct file *map_file)
1168{
1169 struct bpf_array *array = container_of(map, struct bpf_array, map);
1170 struct bpf_event_entry *ee;
1171 int i;
1172
1173 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1174 return;
1175
1176 rcu_read_lock();
1177 for (i = 0; i < array->map.max_entries; i++) {
1178 ee = READ_ONCE(array->ptrs[i]);
1179 if (ee && ee->map_file == map_file)
1180 fd_array_map_delete_elem(map, &i);
1181 }
1182 rcu_read_unlock();
1183}
1184
1185static void perf_event_fd_array_map_free(struct bpf_map *map)
1186{
1187 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1188 bpf_fd_array_map_clear(map);
1189 fd_array_map_free(map);
1190}
1191
1192static int perf_event_array_map_btf_id;
1193const struct bpf_map_ops perf_event_array_map_ops = {
1194 .map_meta_equal = bpf_map_meta_equal,
1195 .map_alloc_check = fd_array_map_alloc_check,
1196 .map_alloc = array_map_alloc,
1197 .map_free = perf_event_fd_array_map_free,
1198 .map_get_next_key = array_map_get_next_key,
1199 .map_lookup_elem = fd_array_map_lookup_elem,
1200 .map_delete_elem = fd_array_map_delete_elem,
1201 .map_fd_get_ptr = perf_event_fd_array_get_ptr,
1202 .map_fd_put_ptr = perf_event_fd_array_put_ptr,
1203 .map_release = perf_event_fd_array_release,
1204 .map_check_btf = map_check_no_btf,
1205 .map_btf_name = "bpf_array",
1206 .map_btf_id = &perf_event_array_map_btf_id,
1207};
1208
1209#ifdef CONFIG_CGROUPS
1210static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
1211 struct file *map_file ,
1212 int fd)
1213{
1214 return cgroup_get_from_fd(fd);
1215}
1216
1217static void cgroup_fd_array_put_ptr(void *ptr)
1218{
1219
1220 cgroup_put(ptr);
1221}
1222
1223static void cgroup_fd_array_free(struct bpf_map *map)
1224{
1225 bpf_fd_array_map_clear(map);
1226 fd_array_map_free(map);
1227}
1228
1229static int cgroup_array_map_btf_id;
1230const struct bpf_map_ops cgroup_array_map_ops = {
1231 .map_meta_equal = bpf_map_meta_equal,
1232 .map_alloc_check = fd_array_map_alloc_check,
1233 .map_alloc = array_map_alloc,
1234 .map_free = cgroup_fd_array_free,
1235 .map_get_next_key = array_map_get_next_key,
1236 .map_lookup_elem = fd_array_map_lookup_elem,
1237 .map_delete_elem = fd_array_map_delete_elem,
1238 .map_fd_get_ptr = cgroup_fd_array_get_ptr,
1239 .map_fd_put_ptr = cgroup_fd_array_put_ptr,
1240 .map_check_btf = map_check_no_btf,
1241 .map_btf_name = "bpf_array",
1242 .map_btf_id = &cgroup_array_map_btf_id,
1243};
1244#endif
1245
1246static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
1247{
1248 struct bpf_map *map, *inner_map_meta;
1249
1250 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
1251 if (IS_ERR(inner_map_meta))
1252 return inner_map_meta;
1253
1254 map = array_map_alloc(attr);
1255 if (IS_ERR(map)) {
1256 bpf_map_meta_free(inner_map_meta);
1257 return map;
1258 }
1259
1260 map->inner_map_meta = inner_map_meta;
1261
1262 return map;
1263}
1264
1265static void array_of_map_free(struct bpf_map *map)
1266{
1267
1268
1269
1270 bpf_map_meta_free(map->inner_map_meta);
1271 bpf_fd_array_map_clear(map);
1272 fd_array_map_free(map);
1273}
1274
1275static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
1276{
1277 struct bpf_map **inner_map = array_map_lookup_elem(map, key);
1278
1279 if (!inner_map)
1280 return NULL;
1281
1282 return READ_ONCE(*inner_map);
1283}
1284
1285static int array_of_map_gen_lookup(struct bpf_map *map,
1286 struct bpf_insn *insn_buf)
1287{
1288 struct bpf_array *array = container_of(map, struct bpf_array, map);
1289 u32 elem_size = round_up(map->value_size, 8);
1290 struct bpf_insn *insn = insn_buf;
1291 const int ret = BPF_REG_0;
1292 const int map_ptr = BPF_REG_1;
1293 const int index = BPF_REG_2;
1294
1295 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
1296 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
1297 if (!map->bypass_spec_v1) {
1298 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
1299 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
1300 } else {
1301 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
1302 }
1303 if (is_power_of_2(elem_size))
1304 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
1305 else
1306 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
1307 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
1308 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
1309 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
1310 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1311 *insn++ = BPF_MOV64_IMM(ret, 0);
1312
1313 return insn - insn_buf;
1314}
1315
1316static int array_of_maps_map_btf_id;
1317const struct bpf_map_ops array_of_maps_map_ops = {
1318 .map_alloc_check = fd_array_map_alloc_check,
1319 .map_alloc = array_of_map_alloc,
1320 .map_free = array_of_map_free,
1321 .map_get_next_key = array_map_get_next_key,
1322 .map_lookup_elem = array_of_map_lookup_elem,
1323 .map_delete_elem = fd_array_map_delete_elem,
1324 .map_fd_get_ptr = bpf_map_fd_get_ptr,
1325 .map_fd_put_ptr = bpf_map_fd_put_ptr,
1326 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
1327 .map_gen_lookup = array_of_map_gen_lookup,
1328 .map_check_btf = map_check_no_btf,
1329 .map_btf_name = "bpf_array",
1330 .map_btf_id = &array_of_maps_map_btf_id,
1331};
1332