1
2
3
4
5#include <linux/bpf.h>
6#include <linux/btf.h>
7#include <linux/err.h>
8#include <linux/slab.h>
9#include <linux/mm.h>
10#include <linux/filter.h>
11#include <linux/perf_event.h>
12#include <uapi/linux/btf.h>
13#include <linux/rcupdate_trace.h>
14
15#include "map_in_map.h"
16
17#define ARRAY_CREATE_FLAG_MASK \
18 (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
19 BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
20
21static void bpf_array_free_percpu(struct bpf_array *array)
22{
23 int i;
24
25 for (i = 0; i < array->map.max_entries; i++) {
26 free_percpu(array->pptrs[i]);
27 cond_resched();
28 }
29}
30
31static int bpf_array_alloc_percpu(struct bpf_array *array)
32{
33 void __percpu *ptr;
34 int i;
35
36 for (i = 0; i < array->map.max_entries; i++) {
37 ptr = __alloc_percpu_gfp(array->elem_size, 8,
38 GFP_USER | __GFP_NOWARN);
39 if (!ptr) {
40 bpf_array_free_percpu(array);
41 return -ENOMEM;
42 }
43 array->pptrs[i] = ptr;
44 cond_resched();
45 }
46
47 return 0;
48}
49
50
51int array_map_alloc_check(union bpf_attr *attr)
52{
53 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
54 int numa_node = bpf_map_attr_numa_node(attr);
55
56
57 if (attr->max_entries == 0 || attr->key_size != 4 ||
58 attr->value_size == 0 ||
59 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
60 !bpf_map_flags_access_ok(attr->map_flags) ||
61 (percpu && numa_node != NUMA_NO_NODE))
62 return -EINVAL;
63
64 if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
65 attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
66 return -EINVAL;
67
68 if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
69 attr->map_flags & BPF_F_PRESERVE_ELEMS)
70 return -EINVAL;
71
72 if (attr->value_size > KMALLOC_MAX_SIZE)
73
74
75
76 return -E2BIG;
77
78 return 0;
79}
80
81static struct bpf_map *array_map_alloc(union bpf_attr *attr)
82{
83 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
84 int ret, numa_node = bpf_map_attr_numa_node(attr);
85 u32 elem_size, index_mask, max_entries;
86 bool bypass_spec_v1 = bpf_bypass_spec_v1();
87 u64 cost, array_size, mask64;
88 struct bpf_map_memory mem;
89 struct bpf_array *array;
90
91 elem_size = round_up(attr->value_size, 8);
92
93 max_entries = attr->max_entries;
94
95
96
97
98
99 mask64 = fls_long(max_entries - 1);
100 mask64 = 1ULL << mask64;
101 mask64 -= 1;
102
103 index_mask = mask64;
104 if (!bypass_spec_v1) {
105
106
107
108 max_entries = index_mask + 1;
109
110 if (max_entries < attr->max_entries)
111 return ERR_PTR(-E2BIG);
112 }
113
114 array_size = sizeof(*array);
115 if (percpu) {
116 array_size += (u64) max_entries * sizeof(void *);
117 } else {
118
119
120
121 if (attr->map_flags & BPF_F_MMAPABLE) {
122 array_size = PAGE_ALIGN(array_size);
123 array_size += PAGE_ALIGN((u64) max_entries * elem_size);
124 } else {
125 array_size += (u64) max_entries * elem_size;
126 }
127 }
128
129
130 cost = array_size;
131 if (percpu)
132 cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
133
134 ret = bpf_map_charge_init(&mem, cost);
135 if (ret < 0)
136 return ERR_PTR(ret);
137
138
139 if (attr->map_flags & BPF_F_MMAPABLE) {
140 void *data;
141
142
143 data = bpf_map_area_mmapable_alloc(array_size, numa_node);
144 if (!data) {
145 bpf_map_charge_finish(&mem);
146 return ERR_PTR(-ENOMEM);
147 }
148 array = data + PAGE_ALIGN(sizeof(struct bpf_array))
149 - offsetof(struct bpf_array, value);
150 } else {
151 array = bpf_map_area_alloc(array_size, numa_node);
152 }
153 if (!array) {
154 bpf_map_charge_finish(&mem);
155 return ERR_PTR(-ENOMEM);
156 }
157 array->index_mask = index_mask;
158 array->map.bypass_spec_v1 = bypass_spec_v1;
159
160
161 bpf_map_init_from_attr(&array->map, attr);
162 bpf_map_charge_move(&array->map.memory, &mem);
163 array->elem_size = elem_size;
164
165 if (percpu && bpf_array_alloc_percpu(array)) {
166 bpf_map_charge_finish(&array->map.memory);
167 bpf_map_area_free(array);
168 return ERR_PTR(-ENOMEM);
169 }
170
171 return &array->map;
172}
173
174
175static void *array_map_lookup_elem(struct bpf_map *map, void *key)
176{
177 struct bpf_array *array = container_of(map, struct bpf_array, map);
178 u32 index = *(u32 *)key;
179
180 if (unlikely(index >= array->map.max_entries))
181 return NULL;
182
183 return array->value + array->elem_size * (index & array->index_mask);
184}
185
186static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
187 u32 off)
188{
189 struct bpf_array *array = container_of(map, struct bpf_array, map);
190
191 if (map->max_entries != 1)
192 return -ENOTSUPP;
193 if (off >= map->value_size)
194 return -EINVAL;
195
196 *imm = (unsigned long)array->value;
197 return 0;
198}
199
200static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
201 u32 *off)
202{
203 struct bpf_array *array = container_of(map, struct bpf_array, map);
204 u64 base = (unsigned long)array->value;
205 u64 range = array->elem_size;
206
207 if (map->max_entries != 1)
208 return -ENOTSUPP;
209 if (imm < base || imm >= base + range)
210 return -ENOENT;
211
212 *off = imm - base;
213 return 0;
214}
215
216
217static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
218{
219 struct bpf_array *array = container_of(map, struct bpf_array, map);
220 struct bpf_insn *insn = insn_buf;
221 u32 elem_size = round_up(map->value_size, 8);
222 const int ret = BPF_REG_0;
223 const int map_ptr = BPF_REG_1;
224 const int index = BPF_REG_2;
225
226 if (map->map_flags & BPF_F_INNER_MAP)
227 return -EOPNOTSUPP;
228
229 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
230 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
231 if (!map->bypass_spec_v1) {
232 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
233 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
234 } else {
235 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
236 }
237
238 if (is_power_of_2(elem_size)) {
239 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
240 } else {
241 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
242 }
243 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
244 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
245 *insn++ = BPF_MOV64_IMM(ret, 0);
246 return insn - insn_buf;
247}
248
249
250static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
251{
252 struct bpf_array *array = container_of(map, struct bpf_array, map);
253 u32 index = *(u32 *)key;
254
255 if (unlikely(index >= array->map.max_entries))
256 return NULL;
257
258 return this_cpu_ptr(array->pptrs[index & array->index_mask]);
259}
260
261int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
262{
263 struct bpf_array *array = container_of(map, struct bpf_array, map);
264 u32 index = *(u32 *)key;
265 void __percpu *pptr;
266 int cpu, off = 0;
267 u32 size;
268
269 if (unlikely(index >= array->map.max_entries))
270 return -ENOENT;
271
272
273
274
275
276 size = round_up(map->value_size, 8);
277 rcu_read_lock();
278 pptr = array->pptrs[index & array->index_mask];
279 for_each_possible_cpu(cpu) {
280 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
281 off += size;
282 }
283 rcu_read_unlock();
284 return 0;
285}
286
287
288static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
289{
290 struct bpf_array *array = container_of(map, struct bpf_array, map);
291 u32 index = key ? *(u32 *)key : U32_MAX;
292 u32 *next = (u32 *)next_key;
293
294 if (index >= array->map.max_entries) {
295 *next = 0;
296 return 0;
297 }
298
299 if (index == array->map.max_entries - 1)
300 return -ENOENT;
301
302 *next = index + 1;
303 return 0;
304}
305
306
307static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
308 u64 map_flags)
309{
310 struct bpf_array *array = container_of(map, struct bpf_array, map);
311 u32 index = *(u32 *)key;
312 char *val;
313
314 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
315
316 return -EINVAL;
317
318 if (unlikely(index >= array->map.max_entries))
319
320 return -E2BIG;
321
322 if (unlikely(map_flags & BPF_NOEXIST))
323
324 return -EEXIST;
325
326 if (unlikely((map_flags & BPF_F_LOCK) &&
327 !map_value_has_spin_lock(map)))
328 return -EINVAL;
329
330 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
331 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
332 value, map->value_size);
333 } else {
334 val = array->value +
335 array->elem_size * (index & array->index_mask);
336 if (map_flags & BPF_F_LOCK)
337 copy_map_value_locked(map, val, value, false);
338 else
339 copy_map_value(map, val, value);
340 }
341 return 0;
342}
343
344int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
345 u64 map_flags)
346{
347 struct bpf_array *array = container_of(map, struct bpf_array, map);
348 u32 index = *(u32 *)key;
349 void __percpu *pptr;
350 int cpu, off = 0;
351 u32 size;
352
353 if (unlikely(map_flags > BPF_EXIST))
354
355 return -EINVAL;
356
357 if (unlikely(index >= array->map.max_entries))
358
359 return -E2BIG;
360
361 if (unlikely(map_flags == BPF_NOEXIST))
362
363 return -EEXIST;
364
365
366
367
368
369
370
371 size = round_up(map->value_size, 8);
372 rcu_read_lock();
373 pptr = array->pptrs[index & array->index_mask];
374 for_each_possible_cpu(cpu) {
375 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
376 off += size;
377 }
378 rcu_read_unlock();
379 return 0;
380}
381
382
383static int array_map_delete_elem(struct bpf_map *map, void *key)
384{
385 return -EINVAL;
386}
387
388static void *array_map_vmalloc_addr(struct bpf_array *array)
389{
390 return (void *)round_down((unsigned long)array, PAGE_SIZE);
391}
392
393
394static void array_map_free(struct bpf_map *map)
395{
396 struct bpf_array *array = container_of(map, struct bpf_array, map);
397
398 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
399 bpf_array_free_percpu(array);
400
401 if (array->map.map_flags & BPF_F_MMAPABLE)
402 bpf_map_area_free(array_map_vmalloc_addr(array));
403 else
404 bpf_map_area_free(array);
405}
406
407static void array_map_seq_show_elem(struct bpf_map *map, void *key,
408 struct seq_file *m)
409{
410 void *value;
411
412 rcu_read_lock();
413
414 value = array_map_lookup_elem(map, key);
415 if (!value) {
416 rcu_read_unlock();
417 return;
418 }
419
420 if (map->btf_key_type_id)
421 seq_printf(m, "%u: ", *(u32 *)key);
422 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
423 seq_puts(m, "\n");
424
425 rcu_read_unlock();
426}
427
428static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
429 struct seq_file *m)
430{
431 struct bpf_array *array = container_of(map, struct bpf_array, map);
432 u32 index = *(u32 *)key;
433 void __percpu *pptr;
434 int cpu;
435
436 rcu_read_lock();
437
438 seq_printf(m, "%u: {\n", *(u32 *)key);
439 pptr = array->pptrs[index & array->index_mask];
440 for_each_possible_cpu(cpu) {
441 seq_printf(m, "\tcpu%d: ", cpu);
442 btf_type_seq_show(map->btf, map->btf_value_type_id,
443 per_cpu_ptr(pptr, cpu), m);
444 seq_puts(m, "\n");
445 }
446 seq_puts(m, "}\n");
447
448 rcu_read_unlock();
449}
450
451static int array_map_check_btf(const struct bpf_map *map,
452 const struct btf *btf,
453 const struct btf_type *key_type,
454 const struct btf_type *value_type)
455{
456 u32 int_data;
457
458
459 if (btf_type_is_void(key_type)) {
460 if (map->map_type != BPF_MAP_TYPE_ARRAY ||
461 map->max_entries != 1)
462 return -EINVAL;
463
464 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
465 return -EINVAL;
466
467 return 0;
468 }
469
470 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
471 return -EINVAL;
472
473 int_data = *(u32 *)(key_type + 1);
474
475
476
477 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
478 return -EINVAL;
479
480 return 0;
481}
482
483static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
484{
485 struct bpf_array *array = container_of(map, struct bpf_array, map);
486 pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
487
488 if (!(map->map_flags & BPF_F_MMAPABLE))
489 return -EINVAL;
490
491 if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) >
492 PAGE_ALIGN((u64)array->map.max_entries * array->elem_size))
493 return -EINVAL;
494
495 return remap_vmalloc_range(vma, array_map_vmalloc_addr(array),
496 vma->vm_pgoff + pgoff);
497}
498
499static bool array_map_meta_equal(const struct bpf_map *meta0,
500 const struct bpf_map *meta1)
501{
502 if (!bpf_map_meta_equal(meta0, meta1))
503 return false;
504 return meta0->map_flags & BPF_F_INNER_MAP ? true :
505 meta0->max_entries == meta1->max_entries;
506}
507
508struct bpf_iter_seq_array_map_info {
509 struct bpf_map *map;
510 void *percpu_value_buf;
511 u32 index;
512};
513
514static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
515{
516 struct bpf_iter_seq_array_map_info *info = seq->private;
517 struct bpf_map *map = info->map;
518 struct bpf_array *array;
519 u32 index;
520
521 if (info->index >= map->max_entries)
522 return NULL;
523
524 if (*pos == 0)
525 ++*pos;
526 array = container_of(map, struct bpf_array, map);
527 index = info->index & array->index_mask;
528 if (info->percpu_value_buf)
529 return array->pptrs[index];
530 return array->value + array->elem_size * index;
531}
532
533static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
534{
535 struct bpf_iter_seq_array_map_info *info = seq->private;
536 struct bpf_map *map = info->map;
537 struct bpf_array *array;
538 u32 index;
539
540 ++*pos;
541 ++info->index;
542 if (info->index >= map->max_entries)
543 return NULL;
544
545 array = container_of(map, struct bpf_array, map);
546 index = info->index & array->index_mask;
547 if (info->percpu_value_buf)
548 return array->pptrs[index];
549 return array->value + array->elem_size * index;
550}
551
552static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
553{
554 struct bpf_iter_seq_array_map_info *info = seq->private;
555 struct bpf_iter__bpf_map_elem ctx = {};
556 struct bpf_map *map = info->map;
557 struct bpf_iter_meta meta;
558 struct bpf_prog *prog;
559 int off = 0, cpu = 0;
560 void __percpu **pptr;
561 u32 size;
562
563 meta.seq = seq;
564 prog = bpf_iter_get_info(&meta, v == NULL);
565 if (!prog)
566 return 0;
567
568 ctx.meta = &meta;
569 ctx.map = info->map;
570 if (v) {
571 ctx.key = &info->index;
572
573 if (!info->percpu_value_buf) {
574 ctx.value = v;
575 } else {
576 pptr = v;
577 size = round_up(map->value_size, 8);
578 for_each_possible_cpu(cpu) {
579 bpf_long_memcpy(info->percpu_value_buf + off,
580 per_cpu_ptr(pptr, cpu),
581 size);
582 off += size;
583 }
584 ctx.value = info->percpu_value_buf;
585 }
586 }
587
588 return bpf_iter_run_prog(prog, &ctx);
589}
590
591static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
592{
593 return __bpf_array_map_seq_show(seq, v);
594}
595
596static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
597{
598 if (!v)
599 (void)__bpf_array_map_seq_show(seq, NULL);
600}
601
602static int bpf_iter_init_array_map(void *priv_data,
603 struct bpf_iter_aux_info *aux)
604{
605 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
606 struct bpf_map *map = aux->map;
607 void *value_buf;
608 u32 buf_size;
609
610 if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
611 buf_size = round_up(map->value_size, 8) * num_possible_cpus();
612 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
613 if (!value_buf)
614 return -ENOMEM;
615
616 seq_info->percpu_value_buf = value_buf;
617 }
618
619 seq_info->map = map;
620 return 0;
621}
622
623static void bpf_iter_fini_array_map(void *priv_data)
624{
625 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
626
627 kfree(seq_info->percpu_value_buf);
628}
629
630static const struct seq_operations bpf_array_map_seq_ops = {
631 .start = bpf_array_map_seq_start,
632 .next = bpf_array_map_seq_next,
633 .stop = bpf_array_map_seq_stop,
634 .show = bpf_array_map_seq_show,
635};
636
637static const struct bpf_iter_seq_info iter_seq_info = {
638 .seq_ops = &bpf_array_map_seq_ops,
639 .init_seq_private = bpf_iter_init_array_map,
640 .fini_seq_private = bpf_iter_fini_array_map,
641 .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
642};
643
644static int array_map_btf_id;
645const struct bpf_map_ops array_map_ops = {
646 .map_meta_equal = array_map_meta_equal,
647 .map_alloc_check = array_map_alloc_check,
648 .map_alloc = array_map_alloc,
649 .map_free = array_map_free,
650 .map_get_next_key = array_map_get_next_key,
651 .map_lookup_elem = array_map_lookup_elem,
652 .map_update_elem = array_map_update_elem,
653 .map_delete_elem = array_map_delete_elem,
654 .map_gen_lookup = array_map_gen_lookup,
655 .map_direct_value_addr = array_map_direct_value_addr,
656 .map_direct_value_meta = array_map_direct_value_meta,
657 .map_mmap = array_map_mmap,
658 .map_seq_show_elem = array_map_seq_show_elem,
659 .map_check_btf = array_map_check_btf,
660 .map_lookup_batch = generic_map_lookup_batch,
661 .map_update_batch = generic_map_update_batch,
662 .map_btf_name = "bpf_array",
663 .map_btf_id = &array_map_btf_id,
664 .iter_seq_info = &iter_seq_info,
665};
666
667static int percpu_array_map_btf_id;
668const struct bpf_map_ops percpu_array_map_ops = {
669 .map_meta_equal = bpf_map_meta_equal,
670 .map_alloc_check = array_map_alloc_check,
671 .map_alloc = array_map_alloc,
672 .map_free = array_map_free,
673 .map_get_next_key = array_map_get_next_key,
674 .map_lookup_elem = percpu_array_map_lookup_elem,
675 .map_update_elem = array_map_update_elem,
676 .map_delete_elem = array_map_delete_elem,
677 .map_seq_show_elem = percpu_array_map_seq_show_elem,
678 .map_check_btf = array_map_check_btf,
679 .map_btf_name = "bpf_array",
680 .map_btf_id = &percpu_array_map_btf_id,
681 .iter_seq_info = &iter_seq_info,
682};
683
684static int fd_array_map_alloc_check(union bpf_attr *attr)
685{
686
687 if (attr->value_size != sizeof(u32))
688 return -EINVAL;
689
690 if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
691 return -EINVAL;
692 return array_map_alloc_check(attr);
693}
694
695static void fd_array_map_free(struct bpf_map *map)
696{
697 struct bpf_array *array = container_of(map, struct bpf_array, map);
698 int i;
699
700
701 for (i = 0; i < array->map.max_entries; i++)
702 BUG_ON(array->ptrs[i] != NULL);
703
704 bpf_map_area_free(array);
705}
706
707static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
708{
709 return ERR_PTR(-EOPNOTSUPP);
710}
711
712
713int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
714{
715 void **elem, *ptr;
716 int ret = 0;
717
718 if (!map->ops->map_fd_sys_lookup_elem)
719 return -ENOTSUPP;
720
721 rcu_read_lock();
722 elem = array_map_lookup_elem(map, key);
723 if (elem && (ptr = READ_ONCE(*elem)))
724 *value = map->ops->map_fd_sys_lookup_elem(ptr);
725 else
726 ret = -ENOENT;
727 rcu_read_unlock();
728
729 return ret;
730}
731
732
733int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
734 void *key, void *value, u64 map_flags)
735{
736 struct bpf_array *array = container_of(map, struct bpf_array, map);
737 void *new_ptr, *old_ptr;
738 u32 index = *(u32 *)key, ufd;
739
740 if (map_flags != BPF_ANY)
741 return -EINVAL;
742
743 if (index >= array->map.max_entries)
744 return -E2BIG;
745
746 ufd = *(u32 *)value;
747 new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
748 if (IS_ERR(new_ptr))
749 return PTR_ERR(new_ptr);
750
751 if (map->ops->map_poke_run) {
752 mutex_lock(&array->aux->poke_mutex);
753 old_ptr = xchg(array->ptrs + index, new_ptr);
754 map->ops->map_poke_run(map, index, old_ptr, new_ptr);
755 mutex_unlock(&array->aux->poke_mutex);
756 } else {
757 old_ptr = xchg(array->ptrs + index, new_ptr);
758 }
759
760 if (old_ptr)
761 map->ops->map_fd_put_ptr(old_ptr);
762 return 0;
763}
764
765static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
766{
767 struct bpf_array *array = container_of(map, struct bpf_array, map);
768 void *old_ptr;
769 u32 index = *(u32 *)key;
770
771 if (index >= array->map.max_entries)
772 return -E2BIG;
773
774 if (map->ops->map_poke_run) {
775 mutex_lock(&array->aux->poke_mutex);
776 old_ptr = xchg(array->ptrs + index, NULL);
777 map->ops->map_poke_run(map, index, old_ptr, NULL);
778 mutex_unlock(&array->aux->poke_mutex);
779 } else {
780 old_ptr = xchg(array->ptrs + index, NULL);
781 }
782
783 if (old_ptr) {
784 map->ops->map_fd_put_ptr(old_ptr);
785 return 0;
786 } else {
787 return -ENOENT;
788 }
789}
790
791static void *prog_fd_array_get_ptr(struct bpf_map *map,
792 struct file *map_file, int fd)
793{
794 struct bpf_array *array = container_of(map, struct bpf_array, map);
795 struct bpf_prog *prog = bpf_prog_get(fd);
796
797 if (IS_ERR(prog))
798 return prog;
799
800 if (!bpf_prog_array_compatible(array, prog)) {
801 bpf_prog_put(prog);
802 return ERR_PTR(-EINVAL);
803 }
804
805 return prog;
806}
807
808static void prog_fd_array_put_ptr(void *ptr)
809{
810 bpf_prog_put(ptr);
811}
812
813static u32 prog_fd_array_sys_lookup_elem(void *ptr)
814{
815 return ((struct bpf_prog *)ptr)->aux->id;
816}
817
818
819static void bpf_fd_array_map_clear(struct bpf_map *map)
820{
821 struct bpf_array *array = container_of(map, struct bpf_array, map);
822 int i;
823
824 for (i = 0; i < array->map.max_entries; i++)
825 fd_array_map_delete_elem(map, &i);
826}
827
828static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
829 struct seq_file *m)
830{
831 void **elem, *ptr;
832 u32 prog_id;
833
834 rcu_read_lock();
835
836 elem = array_map_lookup_elem(map, key);
837 if (elem) {
838 ptr = READ_ONCE(*elem);
839 if (ptr) {
840 seq_printf(m, "%u: ", *(u32 *)key);
841 prog_id = prog_fd_array_sys_lookup_elem(ptr);
842 btf_type_seq_show(map->btf, map->btf_value_type_id,
843 &prog_id, m);
844 seq_puts(m, "\n");
845 }
846 }
847
848 rcu_read_unlock();
849}
850
851struct prog_poke_elem {
852 struct list_head list;
853 struct bpf_prog_aux *aux;
854};
855
856static int prog_array_map_poke_track(struct bpf_map *map,
857 struct bpf_prog_aux *prog_aux)
858{
859 struct prog_poke_elem *elem;
860 struct bpf_array_aux *aux;
861 int ret = 0;
862
863 aux = container_of(map, struct bpf_array, map)->aux;
864 mutex_lock(&aux->poke_mutex);
865 list_for_each_entry(elem, &aux->poke_progs, list) {
866 if (elem->aux == prog_aux)
867 goto out;
868 }
869
870 elem = kmalloc(sizeof(*elem), GFP_KERNEL);
871 if (!elem) {
872 ret = -ENOMEM;
873 goto out;
874 }
875
876 INIT_LIST_HEAD(&elem->list);
877
878
879
880
881 elem->aux = prog_aux;
882
883 list_add_tail(&elem->list, &aux->poke_progs);
884out:
885 mutex_unlock(&aux->poke_mutex);
886 return ret;
887}
888
889static void prog_array_map_poke_untrack(struct bpf_map *map,
890 struct bpf_prog_aux *prog_aux)
891{
892 struct prog_poke_elem *elem, *tmp;
893 struct bpf_array_aux *aux;
894
895 aux = container_of(map, struct bpf_array, map)->aux;
896 mutex_lock(&aux->poke_mutex);
897 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
898 if (elem->aux == prog_aux) {
899 list_del_init(&elem->list);
900 kfree(elem);
901 break;
902 }
903 }
904 mutex_unlock(&aux->poke_mutex);
905}
906
907static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
908 struct bpf_prog *old,
909 struct bpf_prog *new)
910{
911 u8 *old_addr, *new_addr, *old_bypass_addr;
912 struct prog_poke_elem *elem;
913 struct bpf_array_aux *aux;
914
915 aux = container_of(map, struct bpf_array, map)->aux;
916 WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
917
918 list_for_each_entry(elem, &aux->poke_progs, list) {
919 struct bpf_jit_poke_descriptor *poke;
920 int i, ret;
921
922 for (i = 0; i < elem->aux->size_poke_tab; i++) {
923 poke = &elem->aux->poke_tab[i];
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955 if (!READ_ONCE(poke->tailcall_target_stable))
956 continue;
957 if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
958 continue;
959 if (poke->tail_call.map != map ||
960 poke->tail_call.key != key)
961 continue;
962
963 old_bypass_addr = old ? NULL : poke->bypass_addr;
964 old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
965 new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
966
967 if (new) {
968 ret = bpf_arch_text_poke(poke->tailcall_target,
969 BPF_MOD_JUMP,
970 old_addr, new_addr);
971 BUG_ON(ret < 0 && ret != -EINVAL);
972 if (!old) {
973 ret = bpf_arch_text_poke(poke->tailcall_bypass,
974 BPF_MOD_JUMP,
975 poke->bypass_addr,
976 NULL);
977 BUG_ON(ret < 0 && ret != -EINVAL);
978 }
979 } else {
980 ret = bpf_arch_text_poke(poke->tailcall_bypass,
981 BPF_MOD_JUMP,
982 old_bypass_addr,
983 poke->bypass_addr);
984 BUG_ON(ret < 0 && ret != -EINVAL);
985
986
987
988
989 if (!ret)
990 synchronize_rcu();
991 ret = bpf_arch_text_poke(poke->tailcall_target,
992 BPF_MOD_JUMP,
993 old_addr, NULL);
994 BUG_ON(ret < 0 && ret != -EINVAL);
995 }
996 }
997 }
998}
999
1000static void prog_array_map_clear_deferred(struct work_struct *work)
1001{
1002 struct bpf_map *map = container_of(work, struct bpf_array_aux,
1003 work)->map;
1004 bpf_fd_array_map_clear(map);
1005 bpf_map_put(map);
1006}
1007
1008static void prog_array_map_clear(struct bpf_map *map)
1009{
1010 struct bpf_array_aux *aux = container_of(map, struct bpf_array,
1011 map)->aux;
1012 bpf_map_inc(map);
1013 schedule_work(&aux->work);
1014}
1015
1016static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
1017{
1018 struct bpf_array_aux *aux;
1019 struct bpf_map *map;
1020
1021 aux = kzalloc(sizeof(*aux), GFP_KERNEL);
1022 if (!aux)
1023 return ERR_PTR(-ENOMEM);
1024
1025 INIT_WORK(&aux->work, prog_array_map_clear_deferred);
1026 INIT_LIST_HEAD(&aux->poke_progs);
1027 mutex_init(&aux->poke_mutex);
1028
1029 map = array_map_alloc(attr);
1030 if (IS_ERR(map)) {
1031 kfree(aux);
1032 return map;
1033 }
1034
1035 container_of(map, struct bpf_array, map)->aux = aux;
1036 aux->map = map;
1037
1038 return map;
1039}
1040
1041static void prog_array_map_free(struct bpf_map *map)
1042{
1043 struct prog_poke_elem *elem, *tmp;
1044 struct bpf_array_aux *aux;
1045
1046 aux = container_of(map, struct bpf_array, map)->aux;
1047 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1048 list_del_init(&elem->list);
1049 kfree(elem);
1050 }
1051 kfree(aux);
1052 fd_array_map_free(map);
1053}
1054
1055
1056
1057
1058
1059
1060static int prog_array_map_btf_id;
1061const struct bpf_map_ops prog_array_map_ops = {
1062 .map_alloc_check = fd_array_map_alloc_check,
1063 .map_alloc = prog_array_map_alloc,
1064 .map_free = prog_array_map_free,
1065 .map_poke_track = prog_array_map_poke_track,
1066 .map_poke_untrack = prog_array_map_poke_untrack,
1067 .map_poke_run = prog_array_map_poke_run,
1068 .map_get_next_key = array_map_get_next_key,
1069 .map_lookup_elem = fd_array_map_lookup_elem,
1070 .map_delete_elem = fd_array_map_delete_elem,
1071 .map_fd_get_ptr = prog_fd_array_get_ptr,
1072 .map_fd_put_ptr = prog_fd_array_put_ptr,
1073 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
1074 .map_release_uref = prog_array_map_clear,
1075 .map_seq_show_elem = prog_array_map_seq_show_elem,
1076 .map_btf_name = "bpf_array",
1077 .map_btf_id = &prog_array_map_btf_id,
1078};
1079
1080static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
1081 struct file *map_file)
1082{
1083 struct bpf_event_entry *ee;
1084
1085 ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
1086 if (ee) {
1087 ee->event = perf_file->private_data;
1088 ee->perf_file = perf_file;
1089 ee->map_file = map_file;
1090 }
1091
1092 return ee;
1093}
1094
1095static void __bpf_event_entry_free(struct rcu_head *rcu)
1096{
1097 struct bpf_event_entry *ee;
1098
1099 ee = container_of(rcu, struct bpf_event_entry, rcu);
1100 fput(ee->perf_file);
1101 kfree(ee);
1102}
1103
1104static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
1105{
1106 call_rcu(&ee->rcu, __bpf_event_entry_free);
1107}
1108
1109static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
1110 struct file *map_file, int fd)
1111{
1112 struct bpf_event_entry *ee;
1113 struct perf_event *event;
1114 struct file *perf_file;
1115 u64 value;
1116
1117 perf_file = perf_event_get(fd);
1118 if (IS_ERR(perf_file))
1119 return perf_file;
1120
1121 ee = ERR_PTR(-EOPNOTSUPP);
1122 event = perf_file->private_data;
1123 if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
1124 goto err_out;
1125
1126 ee = bpf_event_entry_gen(perf_file, map_file);
1127 if (ee)
1128 return ee;
1129 ee = ERR_PTR(-ENOMEM);
1130err_out:
1131 fput(perf_file);
1132 return ee;
1133}
1134
1135static void perf_event_fd_array_put_ptr(void *ptr)
1136{
1137 bpf_event_entry_free_rcu(ptr);
1138}
1139
1140static void perf_event_fd_array_release(struct bpf_map *map,
1141 struct file *map_file)
1142{
1143 struct bpf_array *array = container_of(map, struct bpf_array, map);
1144 struct bpf_event_entry *ee;
1145 int i;
1146
1147 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1148 return;
1149
1150 rcu_read_lock();
1151 for (i = 0; i < array->map.max_entries; i++) {
1152 ee = READ_ONCE(array->ptrs[i]);
1153 if (ee && ee->map_file == map_file)
1154 fd_array_map_delete_elem(map, &i);
1155 }
1156 rcu_read_unlock();
1157}
1158
1159static void perf_event_fd_array_map_free(struct bpf_map *map)
1160{
1161 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1162 bpf_fd_array_map_clear(map);
1163 fd_array_map_free(map);
1164}
1165
1166static int perf_event_array_map_btf_id;
1167const struct bpf_map_ops perf_event_array_map_ops = {
1168 .map_meta_equal = bpf_map_meta_equal,
1169 .map_alloc_check = fd_array_map_alloc_check,
1170 .map_alloc = array_map_alloc,
1171 .map_free = perf_event_fd_array_map_free,
1172 .map_get_next_key = array_map_get_next_key,
1173 .map_lookup_elem = fd_array_map_lookup_elem,
1174 .map_delete_elem = fd_array_map_delete_elem,
1175 .map_fd_get_ptr = perf_event_fd_array_get_ptr,
1176 .map_fd_put_ptr = perf_event_fd_array_put_ptr,
1177 .map_release = perf_event_fd_array_release,
1178 .map_check_btf = map_check_no_btf,
1179 .map_btf_name = "bpf_array",
1180 .map_btf_id = &perf_event_array_map_btf_id,
1181};
1182
1183#ifdef CONFIG_CGROUPS
1184static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
1185 struct file *map_file ,
1186 int fd)
1187{
1188 return cgroup_get_from_fd(fd);
1189}
1190
1191static void cgroup_fd_array_put_ptr(void *ptr)
1192{
1193
1194 cgroup_put(ptr);
1195}
1196
1197static void cgroup_fd_array_free(struct bpf_map *map)
1198{
1199 bpf_fd_array_map_clear(map);
1200 fd_array_map_free(map);
1201}
1202
1203static int cgroup_array_map_btf_id;
1204const struct bpf_map_ops cgroup_array_map_ops = {
1205 .map_meta_equal = bpf_map_meta_equal,
1206 .map_alloc_check = fd_array_map_alloc_check,
1207 .map_alloc = array_map_alloc,
1208 .map_free = cgroup_fd_array_free,
1209 .map_get_next_key = array_map_get_next_key,
1210 .map_lookup_elem = fd_array_map_lookup_elem,
1211 .map_delete_elem = fd_array_map_delete_elem,
1212 .map_fd_get_ptr = cgroup_fd_array_get_ptr,
1213 .map_fd_put_ptr = cgroup_fd_array_put_ptr,
1214 .map_check_btf = map_check_no_btf,
1215 .map_btf_name = "bpf_array",
1216 .map_btf_id = &cgroup_array_map_btf_id,
1217};
1218#endif
1219
1220static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
1221{
1222 struct bpf_map *map, *inner_map_meta;
1223
1224 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
1225 if (IS_ERR(inner_map_meta))
1226 return inner_map_meta;
1227
1228 map = array_map_alloc(attr);
1229 if (IS_ERR(map)) {
1230 bpf_map_meta_free(inner_map_meta);
1231 return map;
1232 }
1233
1234 map->inner_map_meta = inner_map_meta;
1235
1236 return map;
1237}
1238
1239static void array_of_map_free(struct bpf_map *map)
1240{
1241
1242
1243
1244 bpf_map_meta_free(map->inner_map_meta);
1245 bpf_fd_array_map_clear(map);
1246 fd_array_map_free(map);
1247}
1248
1249static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
1250{
1251 struct bpf_map **inner_map = array_map_lookup_elem(map, key);
1252
1253 if (!inner_map)
1254 return NULL;
1255
1256 return READ_ONCE(*inner_map);
1257}
1258
1259static int array_of_map_gen_lookup(struct bpf_map *map,
1260 struct bpf_insn *insn_buf)
1261{
1262 struct bpf_array *array = container_of(map, struct bpf_array, map);
1263 u32 elem_size = round_up(map->value_size, 8);
1264 struct bpf_insn *insn = insn_buf;
1265 const int ret = BPF_REG_0;
1266 const int map_ptr = BPF_REG_1;
1267 const int index = BPF_REG_2;
1268
1269 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
1270 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
1271 if (!map->bypass_spec_v1) {
1272 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
1273 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
1274 } else {
1275 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
1276 }
1277 if (is_power_of_2(elem_size))
1278 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
1279 else
1280 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
1281 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
1282 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
1283 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
1284 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1285 *insn++ = BPF_MOV64_IMM(ret, 0);
1286
1287 return insn - insn_buf;
1288}
1289
1290static int array_of_maps_map_btf_id;
1291const struct bpf_map_ops array_of_maps_map_ops = {
1292 .map_alloc_check = fd_array_map_alloc_check,
1293 .map_alloc = array_of_map_alloc,
1294 .map_free = array_of_map_free,
1295 .map_get_next_key = array_map_get_next_key,
1296 .map_lookup_elem = array_of_map_lookup_elem,
1297 .map_delete_elem = fd_array_map_delete_elem,
1298 .map_fd_get_ptr = bpf_map_fd_get_ptr,
1299 .map_fd_put_ptr = bpf_map_fd_put_ptr,
1300 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
1301 .map_gen_lookup = array_of_map_gen_lookup,
1302 .map_check_btf = map_check_no_btf,
1303 .map_btf_name = "bpf_array",
1304 .map_btf_id = &array_of_maps_map_btf_id,
1305};
1306