1
2
3
4
5#include <linux/bpf.h>
6#include <linux/btf.h>
7#include <linux/err.h>
8#include <linux/slab.h>
9#include <linux/mm.h>
10#include <linux/filter.h>
11#include <linux/perf_event.h>
12#include <uapi/linux/btf.h>
13#include <linux/rcupdate_trace.h>
14
15#include "map_in_map.h"
16
17#define ARRAY_CREATE_FLAG_MASK \
18 (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
19 BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
20
21static void bpf_array_free_percpu(struct bpf_array *array)
22{
23 int i;
24
25 for (i = 0; i < array->map.max_entries; i++) {
26 free_percpu(array->pptrs[i]);
27 cond_resched();
28 }
29}
30
31static int bpf_array_alloc_percpu(struct bpf_array *array)
32{
33 void __percpu *ptr;
34 int i;
35
36 for (i = 0; i < array->map.max_entries; i++) {
37 ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8,
38 GFP_USER | __GFP_NOWARN);
39 if (!ptr) {
40 bpf_array_free_percpu(array);
41 return -ENOMEM;
42 }
43 array->pptrs[i] = ptr;
44 cond_resched();
45 }
46
47 return 0;
48}
49
50
51int array_map_alloc_check(union bpf_attr *attr)
52{
53 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
54 int numa_node = bpf_map_attr_numa_node(attr);
55
56
57 if (attr->max_entries == 0 || attr->key_size != 4 ||
58 attr->value_size == 0 ||
59 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
60 !bpf_map_flags_access_ok(attr->map_flags) ||
61 (percpu && numa_node != NUMA_NO_NODE))
62 return -EINVAL;
63
64 if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
65 attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
66 return -EINVAL;
67
68 if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
69 attr->map_flags & BPF_F_PRESERVE_ELEMS)
70 return -EINVAL;
71
72 if (attr->value_size > KMALLOC_MAX_SIZE)
73
74
75
76 return -E2BIG;
77
78 return 0;
79}
80
81static struct bpf_map *array_map_alloc(union bpf_attr *attr)
82{
83 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
84 int numa_node = bpf_map_attr_numa_node(attr);
85 u32 elem_size, index_mask, max_entries;
86 bool bypass_spec_v1 = bpf_bypass_spec_v1();
87 u64 array_size, mask64;
88 struct bpf_array *array;
89
90 elem_size = round_up(attr->value_size, 8);
91
92 max_entries = attr->max_entries;
93
94
95
96
97
98 mask64 = fls_long(max_entries - 1);
99 mask64 = 1ULL << mask64;
100 mask64 -= 1;
101
102 index_mask = mask64;
103 if (!bypass_spec_v1) {
104
105
106
107 max_entries = index_mask + 1;
108
109 if (max_entries < attr->max_entries)
110 return ERR_PTR(-E2BIG);
111 }
112
113 array_size = sizeof(*array);
114 if (percpu) {
115 array_size += (u64) max_entries * sizeof(void *);
116 } else {
117
118
119
120 if (attr->map_flags & BPF_F_MMAPABLE) {
121 array_size = PAGE_ALIGN(array_size);
122 array_size += PAGE_ALIGN((u64) max_entries * elem_size);
123 } else {
124 array_size += (u64) max_entries * elem_size;
125 }
126 }
127
128
129 if (attr->map_flags & BPF_F_MMAPABLE) {
130 void *data;
131
132
133 data = bpf_map_area_mmapable_alloc(array_size, numa_node);
134 if (!data)
135 return ERR_PTR(-ENOMEM);
136 array = data + PAGE_ALIGN(sizeof(struct bpf_array))
137 - offsetof(struct bpf_array, value);
138 } else {
139 array = bpf_map_area_alloc(array_size, numa_node);
140 }
141 if (!array)
142 return ERR_PTR(-ENOMEM);
143 array->index_mask = index_mask;
144 array->map.bypass_spec_v1 = bypass_spec_v1;
145
146
147 bpf_map_init_from_attr(&array->map, attr);
148 array->elem_size = elem_size;
149
150 if (percpu && bpf_array_alloc_percpu(array)) {
151 bpf_map_area_free(array);
152 return ERR_PTR(-ENOMEM);
153 }
154
155 return &array->map;
156}
157
158
159static void *array_map_lookup_elem(struct bpf_map *map, void *key)
160{
161 struct bpf_array *array = container_of(map, struct bpf_array, map);
162 u32 index = *(u32 *)key;
163
164 if (unlikely(index >= array->map.max_entries))
165 return NULL;
166
167 return array->value + array->elem_size * (index & array->index_mask);
168}
169
170static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
171 u32 off)
172{
173 struct bpf_array *array = container_of(map, struct bpf_array, map);
174
175 if (map->max_entries != 1)
176 return -ENOTSUPP;
177 if (off >= map->value_size)
178 return -EINVAL;
179
180 *imm = (unsigned long)array->value;
181 return 0;
182}
183
184static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
185 u32 *off)
186{
187 struct bpf_array *array = container_of(map, struct bpf_array, map);
188 u64 base = (unsigned long)array->value;
189 u64 range = array->elem_size;
190
191 if (map->max_entries != 1)
192 return -ENOTSUPP;
193 if (imm < base || imm >= base + range)
194 return -ENOENT;
195
196 *off = imm - base;
197 return 0;
198}
199
200
201static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
202{
203 struct bpf_array *array = container_of(map, struct bpf_array, map);
204 struct bpf_insn *insn = insn_buf;
205 u32 elem_size = round_up(map->value_size, 8);
206 const int ret = BPF_REG_0;
207 const int map_ptr = BPF_REG_1;
208 const int index = BPF_REG_2;
209
210 if (map->map_flags & BPF_F_INNER_MAP)
211 return -EOPNOTSUPP;
212
213 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
214 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
215 if (!map->bypass_spec_v1) {
216 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
217 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
218 } else {
219 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
220 }
221
222 if (is_power_of_2(elem_size)) {
223 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
224 } else {
225 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
226 }
227 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
228 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
229 *insn++ = BPF_MOV64_IMM(ret, 0);
230 return insn - insn_buf;
231}
232
233
234static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
235{
236 struct bpf_array *array = container_of(map, struct bpf_array, map);
237 u32 index = *(u32 *)key;
238
239 if (unlikely(index >= array->map.max_entries))
240 return NULL;
241
242 return this_cpu_ptr(array->pptrs[index & array->index_mask]);
243}
244
245int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
246{
247 struct bpf_array *array = container_of(map, struct bpf_array, map);
248 u32 index = *(u32 *)key;
249 void __percpu *pptr;
250 int cpu, off = 0;
251 u32 size;
252
253 if (unlikely(index >= array->map.max_entries))
254 return -ENOENT;
255
256
257
258
259
260 size = round_up(map->value_size, 8);
261 rcu_read_lock();
262 pptr = array->pptrs[index & array->index_mask];
263 for_each_possible_cpu(cpu) {
264 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
265 off += size;
266 }
267 rcu_read_unlock();
268 return 0;
269}
270
271
272static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
273{
274 struct bpf_array *array = container_of(map, struct bpf_array, map);
275 u32 index = key ? *(u32 *)key : U32_MAX;
276 u32 *next = (u32 *)next_key;
277
278 if (index >= array->map.max_entries) {
279 *next = 0;
280 return 0;
281 }
282
283 if (index == array->map.max_entries - 1)
284 return -ENOENT;
285
286 *next = index + 1;
287 return 0;
288}
289
290
291static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
292 u64 map_flags)
293{
294 struct bpf_array *array = container_of(map, struct bpf_array, map);
295 u32 index = *(u32 *)key;
296 char *val;
297
298 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
299
300 return -EINVAL;
301
302 if (unlikely(index >= array->map.max_entries))
303
304 return -E2BIG;
305
306 if (unlikely(map_flags & BPF_NOEXIST))
307
308 return -EEXIST;
309
310 if (unlikely((map_flags & BPF_F_LOCK) &&
311 !map_value_has_spin_lock(map)))
312 return -EINVAL;
313
314 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
315 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
316 value, map->value_size);
317 } else {
318 val = array->value +
319 array->elem_size * (index & array->index_mask);
320 if (map_flags & BPF_F_LOCK)
321 copy_map_value_locked(map, val, value, false);
322 else
323 copy_map_value(map, val, value);
324 }
325 return 0;
326}
327
328int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
329 u64 map_flags)
330{
331 struct bpf_array *array = container_of(map, struct bpf_array, map);
332 u32 index = *(u32 *)key;
333 void __percpu *pptr;
334 int cpu, off = 0;
335 u32 size;
336
337 if (unlikely(map_flags > BPF_EXIST))
338
339 return -EINVAL;
340
341 if (unlikely(index >= array->map.max_entries))
342
343 return -E2BIG;
344
345 if (unlikely(map_flags == BPF_NOEXIST))
346
347 return -EEXIST;
348
349
350
351
352
353
354
355 size = round_up(map->value_size, 8);
356 rcu_read_lock();
357 pptr = array->pptrs[index & array->index_mask];
358 for_each_possible_cpu(cpu) {
359 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
360 off += size;
361 }
362 rcu_read_unlock();
363 return 0;
364}
365
366
367static int array_map_delete_elem(struct bpf_map *map, void *key)
368{
369 return -EINVAL;
370}
371
372static void *array_map_vmalloc_addr(struct bpf_array *array)
373{
374 return (void *)round_down((unsigned long)array, PAGE_SIZE);
375}
376
377
378static void array_map_free(struct bpf_map *map)
379{
380 struct bpf_array *array = container_of(map, struct bpf_array, map);
381
382 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
383 bpf_array_free_percpu(array);
384
385 if (array->map.map_flags & BPF_F_MMAPABLE)
386 bpf_map_area_free(array_map_vmalloc_addr(array));
387 else
388 bpf_map_area_free(array);
389}
390
391static void array_map_seq_show_elem(struct bpf_map *map, void *key,
392 struct seq_file *m)
393{
394 void *value;
395
396 rcu_read_lock();
397
398 value = array_map_lookup_elem(map, key);
399 if (!value) {
400 rcu_read_unlock();
401 return;
402 }
403
404 if (map->btf_key_type_id)
405 seq_printf(m, "%u: ", *(u32 *)key);
406 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
407 seq_puts(m, "\n");
408
409 rcu_read_unlock();
410}
411
412static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
413 struct seq_file *m)
414{
415 struct bpf_array *array = container_of(map, struct bpf_array, map);
416 u32 index = *(u32 *)key;
417 void __percpu *pptr;
418 int cpu;
419
420 rcu_read_lock();
421
422 seq_printf(m, "%u: {\n", *(u32 *)key);
423 pptr = array->pptrs[index & array->index_mask];
424 for_each_possible_cpu(cpu) {
425 seq_printf(m, "\tcpu%d: ", cpu);
426 btf_type_seq_show(map->btf, map->btf_value_type_id,
427 per_cpu_ptr(pptr, cpu), m);
428 seq_puts(m, "\n");
429 }
430 seq_puts(m, "}\n");
431
432 rcu_read_unlock();
433}
434
435static int array_map_check_btf(const struct bpf_map *map,
436 const struct btf *btf,
437 const struct btf_type *key_type,
438 const struct btf_type *value_type)
439{
440 u32 int_data;
441
442
443 if (btf_type_is_void(key_type)) {
444 if (map->map_type != BPF_MAP_TYPE_ARRAY ||
445 map->max_entries != 1)
446 return -EINVAL;
447
448 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
449 return -EINVAL;
450
451 return 0;
452 }
453
454 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
455 return -EINVAL;
456
457 int_data = *(u32 *)(key_type + 1);
458
459
460
461 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
462 return -EINVAL;
463
464 return 0;
465}
466
467static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
468{
469 struct bpf_array *array = container_of(map, struct bpf_array, map);
470 pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
471
472 if (!(map->map_flags & BPF_F_MMAPABLE))
473 return -EINVAL;
474
475 if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) >
476 PAGE_ALIGN((u64)array->map.max_entries * array->elem_size))
477 return -EINVAL;
478
479 return remap_vmalloc_range(vma, array_map_vmalloc_addr(array),
480 vma->vm_pgoff + pgoff);
481}
482
483static bool array_map_meta_equal(const struct bpf_map *meta0,
484 const struct bpf_map *meta1)
485{
486 if (!bpf_map_meta_equal(meta0, meta1))
487 return false;
488 return meta0->map_flags & BPF_F_INNER_MAP ? true :
489 meta0->max_entries == meta1->max_entries;
490}
491
492struct bpf_iter_seq_array_map_info {
493 struct bpf_map *map;
494 void *percpu_value_buf;
495 u32 index;
496};
497
498static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
499{
500 struct bpf_iter_seq_array_map_info *info = seq->private;
501 struct bpf_map *map = info->map;
502 struct bpf_array *array;
503 u32 index;
504
505 if (info->index >= map->max_entries)
506 return NULL;
507
508 if (*pos == 0)
509 ++*pos;
510 array = container_of(map, struct bpf_array, map);
511 index = info->index & array->index_mask;
512 if (info->percpu_value_buf)
513 return array->pptrs[index];
514 return array->value + array->elem_size * index;
515}
516
517static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
518{
519 struct bpf_iter_seq_array_map_info *info = seq->private;
520 struct bpf_map *map = info->map;
521 struct bpf_array *array;
522 u32 index;
523
524 ++*pos;
525 ++info->index;
526 if (info->index >= map->max_entries)
527 return NULL;
528
529 array = container_of(map, struct bpf_array, map);
530 index = info->index & array->index_mask;
531 if (info->percpu_value_buf)
532 return array->pptrs[index];
533 return array->value + array->elem_size * index;
534}
535
536static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
537{
538 struct bpf_iter_seq_array_map_info *info = seq->private;
539 struct bpf_iter__bpf_map_elem ctx = {};
540 struct bpf_map *map = info->map;
541 struct bpf_iter_meta meta;
542 struct bpf_prog *prog;
543 int off = 0, cpu = 0;
544 void __percpu **pptr;
545 u32 size;
546
547 meta.seq = seq;
548 prog = bpf_iter_get_info(&meta, v == NULL);
549 if (!prog)
550 return 0;
551
552 ctx.meta = &meta;
553 ctx.map = info->map;
554 if (v) {
555 ctx.key = &info->index;
556
557 if (!info->percpu_value_buf) {
558 ctx.value = v;
559 } else {
560 pptr = v;
561 size = round_up(map->value_size, 8);
562 for_each_possible_cpu(cpu) {
563 bpf_long_memcpy(info->percpu_value_buf + off,
564 per_cpu_ptr(pptr, cpu),
565 size);
566 off += size;
567 }
568 ctx.value = info->percpu_value_buf;
569 }
570 }
571
572 return bpf_iter_run_prog(prog, &ctx);
573}
574
575static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
576{
577 return __bpf_array_map_seq_show(seq, v);
578}
579
580static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
581{
582 if (!v)
583 (void)__bpf_array_map_seq_show(seq, NULL);
584}
585
586static int bpf_iter_init_array_map(void *priv_data,
587 struct bpf_iter_aux_info *aux)
588{
589 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
590 struct bpf_map *map = aux->map;
591 void *value_buf;
592 u32 buf_size;
593
594 if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
595 buf_size = round_up(map->value_size, 8) * num_possible_cpus();
596 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
597 if (!value_buf)
598 return -ENOMEM;
599
600 seq_info->percpu_value_buf = value_buf;
601 }
602
603 seq_info->map = map;
604 return 0;
605}
606
607static void bpf_iter_fini_array_map(void *priv_data)
608{
609 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
610
611 kfree(seq_info->percpu_value_buf);
612}
613
614static const struct seq_operations bpf_array_map_seq_ops = {
615 .start = bpf_array_map_seq_start,
616 .next = bpf_array_map_seq_next,
617 .stop = bpf_array_map_seq_stop,
618 .show = bpf_array_map_seq_show,
619};
620
621static const struct bpf_iter_seq_info iter_seq_info = {
622 .seq_ops = &bpf_array_map_seq_ops,
623 .init_seq_private = bpf_iter_init_array_map,
624 .fini_seq_private = bpf_iter_fini_array_map,
625 .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
626};
627
628static int array_map_btf_id;
629const struct bpf_map_ops array_map_ops = {
630 .map_meta_equal = array_map_meta_equal,
631 .map_alloc_check = array_map_alloc_check,
632 .map_alloc = array_map_alloc,
633 .map_free = array_map_free,
634 .map_get_next_key = array_map_get_next_key,
635 .map_lookup_elem = array_map_lookup_elem,
636 .map_update_elem = array_map_update_elem,
637 .map_delete_elem = array_map_delete_elem,
638 .map_gen_lookup = array_map_gen_lookup,
639 .map_direct_value_addr = array_map_direct_value_addr,
640 .map_direct_value_meta = array_map_direct_value_meta,
641 .map_mmap = array_map_mmap,
642 .map_seq_show_elem = array_map_seq_show_elem,
643 .map_check_btf = array_map_check_btf,
644 .map_lookup_batch = generic_map_lookup_batch,
645 .map_update_batch = generic_map_update_batch,
646 .map_btf_name = "bpf_array",
647 .map_btf_id = &array_map_btf_id,
648 .iter_seq_info = &iter_seq_info,
649};
650
651static int percpu_array_map_btf_id;
652const struct bpf_map_ops percpu_array_map_ops = {
653 .map_meta_equal = bpf_map_meta_equal,
654 .map_alloc_check = array_map_alloc_check,
655 .map_alloc = array_map_alloc,
656 .map_free = array_map_free,
657 .map_get_next_key = array_map_get_next_key,
658 .map_lookup_elem = percpu_array_map_lookup_elem,
659 .map_update_elem = array_map_update_elem,
660 .map_delete_elem = array_map_delete_elem,
661 .map_seq_show_elem = percpu_array_map_seq_show_elem,
662 .map_check_btf = array_map_check_btf,
663 .map_btf_name = "bpf_array",
664 .map_btf_id = &percpu_array_map_btf_id,
665 .iter_seq_info = &iter_seq_info,
666};
667
668static int fd_array_map_alloc_check(union bpf_attr *attr)
669{
670
671 if (attr->value_size != sizeof(u32))
672 return -EINVAL;
673
674 if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
675 return -EINVAL;
676 return array_map_alloc_check(attr);
677}
678
679static void fd_array_map_free(struct bpf_map *map)
680{
681 struct bpf_array *array = container_of(map, struct bpf_array, map);
682 int i;
683
684
685 for (i = 0; i < array->map.max_entries; i++)
686 BUG_ON(array->ptrs[i] != NULL);
687
688 bpf_map_area_free(array);
689}
690
691static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
692{
693 return ERR_PTR(-EOPNOTSUPP);
694}
695
696
697int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
698{
699 void **elem, *ptr;
700 int ret = 0;
701
702 if (!map->ops->map_fd_sys_lookup_elem)
703 return -ENOTSUPP;
704
705 rcu_read_lock();
706 elem = array_map_lookup_elem(map, key);
707 if (elem && (ptr = READ_ONCE(*elem)))
708 *value = map->ops->map_fd_sys_lookup_elem(ptr);
709 else
710 ret = -ENOENT;
711 rcu_read_unlock();
712
713 return ret;
714}
715
716
717int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
718 void *key, void *value, u64 map_flags)
719{
720 struct bpf_array *array = container_of(map, struct bpf_array, map);
721 void *new_ptr, *old_ptr;
722 u32 index = *(u32 *)key, ufd;
723
724 if (map_flags != BPF_ANY)
725 return -EINVAL;
726
727 if (index >= array->map.max_entries)
728 return -E2BIG;
729
730 ufd = *(u32 *)value;
731 new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
732 if (IS_ERR(new_ptr))
733 return PTR_ERR(new_ptr);
734
735 if (map->ops->map_poke_run) {
736 mutex_lock(&array->aux->poke_mutex);
737 old_ptr = xchg(array->ptrs + index, new_ptr);
738 map->ops->map_poke_run(map, index, old_ptr, new_ptr);
739 mutex_unlock(&array->aux->poke_mutex);
740 } else {
741 old_ptr = xchg(array->ptrs + index, new_ptr);
742 }
743
744 if (old_ptr)
745 map->ops->map_fd_put_ptr(old_ptr);
746 return 0;
747}
748
749static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
750{
751 struct bpf_array *array = container_of(map, struct bpf_array, map);
752 void *old_ptr;
753 u32 index = *(u32 *)key;
754
755 if (index >= array->map.max_entries)
756 return -E2BIG;
757
758 if (map->ops->map_poke_run) {
759 mutex_lock(&array->aux->poke_mutex);
760 old_ptr = xchg(array->ptrs + index, NULL);
761 map->ops->map_poke_run(map, index, old_ptr, NULL);
762 mutex_unlock(&array->aux->poke_mutex);
763 } else {
764 old_ptr = xchg(array->ptrs + index, NULL);
765 }
766
767 if (old_ptr) {
768 map->ops->map_fd_put_ptr(old_ptr);
769 return 0;
770 } else {
771 return -ENOENT;
772 }
773}
774
775static void *prog_fd_array_get_ptr(struct bpf_map *map,
776 struct file *map_file, int fd)
777{
778 struct bpf_array *array = container_of(map, struct bpf_array, map);
779 struct bpf_prog *prog = bpf_prog_get(fd);
780
781 if (IS_ERR(prog))
782 return prog;
783
784 if (!bpf_prog_array_compatible(array, prog)) {
785 bpf_prog_put(prog);
786 return ERR_PTR(-EINVAL);
787 }
788
789 return prog;
790}
791
792static void prog_fd_array_put_ptr(void *ptr)
793{
794 bpf_prog_put(ptr);
795}
796
797static u32 prog_fd_array_sys_lookup_elem(void *ptr)
798{
799 return ((struct bpf_prog *)ptr)->aux->id;
800}
801
802
803static void bpf_fd_array_map_clear(struct bpf_map *map)
804{
805 struct bpf_array *array = container_of(map, struct bpf_array, map);
806 int i;
807
808 for (i = 0; i < array->map.max_entries; i++)
809 fd_array_map_delete_elem(map, &i);
810}
811
812static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
813 struct seq_file *m)
814{
815 void **elem, *ptr;
816 u32 prog_id;
817
818 rcu_read_lock();
819
820 elem = array_map_lookup_elem(map, key);
821 if (elem) {
822 ptr = READ_ONCE(*elem);
823 if (ptr) {
824 seq_printf(m, "%u: ", *(u32 *)key);
825 prog_id = prog_fd_array_sys_lookup_elem(ptr);
826 btf_type_seq_show(map->btf, map->btf_value_type_id,
827 &prog_id, m);
828 seq_puts(m, "\n");
829 }
830 }
831
832 rcu_read_unlock();
833}
834
835struct prog_poke_elem {
836 struct list_head list;
837 struct bpf_prog_aux *aux;
838};
839
840static int prog_array_map_poke_track(struct bpf_map *map,
841 struct bpf_prog_aux *prog_aux)
842{
843 struct prog_poke_elem *elem;
844 struct bpf_array_aux *aux;
845 int ret = 0;
846
847 aux = container_of(map, struct bpf_array, map)->aux;
848 mutex_lock(&aux->poke_mutex);
849 list_for_each_entry(elem, &aux->poke_progs, list) {
850 if (elem->aux == prog_aux)
851 goto out;
852 }
853
854 elem = kmalloc(sizeof(*elem), GFP_KERNEL);
855 if (!elem) {
856 ret = -ENOMEM;
857 goto out;
858 }
859
860 INIT_LIST_HEAD(&elem->list);
861
862
863
864
865 elem->aux = prog_aux;
866
867 list_add_tail(&elem->list, &aux->poke_progs);
868out:
869 mutex_unlock(&aux->poke_mutex);
870 return ret;
871}
872
873static void prog_array_map_poke_untrack(struct bpf_map *map,
874 struct bpf_prog_aux *prog_aux)
875{
876 struct prog_poke_elem *elem, *tmp;
877 struct bpf_array_aux *aux;
878
879 aux = container_of(map, struct bpf_array, map)->aux;
880 mutex_lock(&aux->poke_mutex);
881 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
882 if (elem->aux == prog_aux) {
883 list_del_init(&elem->list);
884 kfree(elem);
885 break;
886 }
887 }
888 mutex_unlock(&aux->poke_mutex);
889}
890
891static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
892 struct bpf_prog *old,
893 struct bpf_prog *new)
894{
895 u8 *old_addr, *new_addr, *old_bypass_addr;
896 struct prog_poke_elem *elem;
897 struct bpf_array_aux *aux;
898
899 aux = container_of(map, struct bpf_array, map)->aux;
900 WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
901
902 list_for_each_entry(elem, &aux->poke_progs, list) {
903 struct bpf_jit_poke_descriptor *poke;
904 int i, ret;
905
906 for (i = 0; i < elem->aux->size_poke_tab; i++) {
907 poke = &elem->aux->poke_tab[i];
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939 if (!READ_ONCE(poke->tailcall_target_stable))
940 continue;
941 if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
942 continue;
943 if (poke->tail_call.map != map ||
944 poke->tail_call.key != key)
945 continue;
946
947 old_bypass_addr = old ? NULL : poke->bypass_addr;
948 old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
949 new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
950
951 if (new) {
952 ret = bpf_arch_text_poke(poke->tailcall_target,
953 BPF_MOD_JUMP,
954 old_addr, new_addr);
955 BUG_ON(ret < 0 && ret != -EINVAL);
956 if (!old) {
957 ret = bpf_arch_text_poke(poke->tailcall_bypass,
958 BPF_MOD_JUMP,
959 poke->bypass_addr,
960 NULL);
961 BUG_ON(ret < 0 && ret != -EINVAL);
962 }
963 } else {
964 ret = bpf_arch_text_poke(poke->tailcall_bypass,
965 BPF_MOD_JUMP,
966 old_bypass_addr,
967 poke->bypass_addr);
968 BUG_ON(ret < 0 && ret != -EINVAL);
969
970
971
972
973 if (!ret)
974 synchronize_rcu();
975 ret = bpf_arch_text_poke(poke->tailcall_target,
976 BPF_MOD_JUMP,
977 old_addr, NULL);
978 BUG_ON(ret < 0 && ret != -EINVAL);
979 }
980 }
981 }
982}
983
984static void prog_array_map_clear_deferred(struct work_struct *work)
985{
986 struct bpf_map *map = container_of(work, struct bpf_array_aux,
987 work)->map;
988 bpf_fd_array_map_clear(map);
989 bpf_map_put(map);
990}
991
992static void prog_array_map_clear(struct bpf_map *map)
993{
994 struct bpf_array_aux *aux = container_of(map, struct bpf_array,
995 map)->aux;
996 bpf_map_inc(map);
997 schedule_work(&aux->work);
998}
999
1000static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
1001{
1002 struct bpf_array_aux *aux;
1003 struct bpf_map *map;
1004
1005 aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT);
1006 if (!aux)
1007 return ERR_PTR(-ENOMEM);
1008
1009 INIT_WORK(&aux->work, prog_array_map_clear_deferred);
1010 INIT_LIST_HEAD(&aux->poke_progs);
1011 mutex_init(&aux->poke_mutex);
1012
1013 map = array_map_alloc(attr);
1014 if (IS_ERR(map)) {
1015 kfree(aux);
1016 return map;
1017 }
1018
1019 container_of(map, struct bpf_array, map)->aux = aux;
1020 aux->map = map;
1021
1022 return map;
1023}
1024
1025static void prog_array_map_free(struct bpf_map *map)
1026{
1027 struct prog_poke_elem *elem, *tmp;
1028 struct bpf_array_aux *aux;
1029
1030 aux = container_of(map, struct bpf_array, map)->aux;
1031 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1032 list_del_init(&elem->list);
1033 kfree(elem);
1034 }
1035 kfree(aux);
1036 fd_array_map_free(map);
1037}
1038
1039
1040
1041
1042
1043
1044static int prog_array_map_btf_id;
1045const struct bpf_map_ops prog_array_map_ops = {
1046 .map_alloc_check = fd_array_map_alloc_check,
1047 .map_alloc = prog_array_map_alloc,
1048 .map_free = prog_array_map_free,
1049 .map_poke_track = prog_array_map_poke_track,
1050 .map_poke_untrack = prog_array_map_poke_untrack,
1051 .map_poke_run = prog_array_map_poke_run,
1052 .map_get_next_key = array_map_get_next_key,
1053 .map_lookup_elem = fd_array_map_lookup_elem,
1054 .map_delete_elem = fd_array_map_delete_elem,
1055 .map_fd_get_ptr = prog_fd_array_get_ptr,
1056 .map_fd_put_ptr = prog_fd_array_put_ptr,
1057 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
1058 .map_release_uref = prog_array_map_clear,
1059 .map_seq_show_elem = prog_array_map_seq_show_elem,
1060 .map_btf_name = "bpf_array",
1061 .map_btf_id = &prog_array_map_btf_id,
1062};
1063
1064static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
1065 struct file *map_file)
1066{
1067 struct bpf_event_entry *ee;
1068
1069 ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
1070 if (ee) {
1071 ee->event = perf_file->private_data;
1072 ee->perf_file = perf_file;
1073 ee->map_file = map_file;
1074 }
1075
1076 return ee;
1077}
1078
1079static void __bpf_event_entry_free(struct rcu_head *rcu)
1080{
1081 struct bpf_event_entry *ee;
1082
1083 ee = container_of(rcu, struct bpf_event_entry, rcu);
1084 fput(ee->perf_file);
1085 kfree(ee);
1086}
1087
1088static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
1089{
1090 call_rcu(&ee->rcu, __bpf_event_entry_free);
1091}
1092
1093static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
1094 struct file *map_file, int fd)
1095{
1096 struct bpf_event_entry *ee;
1097 struct perf_event *event;
1098 struct file *perf_file;
1099 u64 value;
1100
1101 perf_file = perf_event_get(fd);
1102 if (IS_ERR(perf_file))
1103 return perf_file;
1104
1105 ee = ERR_PTR(-EOPNOTSUPP);
1106 event = perf_file->private_data;
1107 if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
1108 goto err_out;
1109
1110 ee = bpf_event_entry_gen(perf_file, map_file);
1111 if (ee)
1112 return ee;
1113 ee = ERR_PTR(-ENOMEM);
1114err_out:
1115 fput(perf_file);
1116 return ee;
1117}
1118
1119static void perf_event_fd_array_put_ptr(void *ptr)
1120{
1121 bpf_event_entry_free_rcu(ptr);
1122}
1123
1124static void perf_event_fd_array_release(struct bpf_map *map,
1125 struct file *map_file)
1126{
1127 struct bpf_array *array = container_of(map, struct bpf_array, map);
1128 struct bpf_event_entry *ee;
1129 int i;
1130
1131 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1132 return;
1133
1134 rcu_read_lock();
1135 for (i = 0; i < array->map.max_entries; i++) {
1136 ee = READ_ONCE(array->ptrs[i]);
1137 if (ee && ee->map_file == map_file)
1138 fd_array_map_delete_elem(map, &i);
1139 }
1140 rcu_read_unlock();
1141}
1142
1143static void perf_event_fd_array_map_free(struct bpf_map *map)
1144{
1145 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1146 bpf_fd_array_map_clear(map);
1147 fd_array_map_free(map);
1148}
1149
1150static int perf_event_array_map_btf_id;
1151const struct bpf_map_ops perf_event_array_map_ops = {
1152 .map_meta_equal = bpf_map_meta_equal,
1153 .map_alloc_check = fd_array_map_alloc_check,
1154 .map_alloc = array_map_alloc,
1155 .map_free = perf_event_fd_array_map_free,
1156 .map_get_next_key = array_map_get_next_key,
1157 .map_lookup_elem = fd_array_map_lookup_elem,
1158 .map_delete_elem = fd_array_map_delete_elem,
1159 .map_fd_get_ptr = perf_event_fd_array_get_ptr,
1160 .map_fd_put_ptr = perf_event_fd_array_put_ptr,
1161 .map_release = perf_event_fd_array_release,
1162 .map_check_btf = map_check_no_btf,
1163 .map_btf_name = "bpf_array",
1164 .map_btf_id = &perf_event_array_map_btf_id,
1165};
1166
1167#ifdef CONFIG_CGROUPS
1168static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
1169 struct file *map_file ,
1170 int fd)
1171{
1172 return cgroup_get_from_fd(fd);
1173}
1174
1175static void cgroup_fd_array_put_ptr(void *ptr)
1176{
1177
1178 cgroup_put(ptr);
1179}
1180
1181static void cgroup_fd_array_free(struct bpf_map *map)
1182{
1183 bpf_fd_array_map_clear(map);
1184 fd_array_map_free(map);
1185}
1186
1187static int cgroup_array_map_btf_id;
1188const struct bpf_map_ops cgroup_array_map_ops = {
1189 .map_meta_equal = bpf_map_meta_equal,
1190 .map_alloc_check = fd_array_map_alloc_check,
1191 .map_alloc = array_map_alloc,
1192 .map_free = cgroup_fd_array_free,
1193 .map_get_next_key = array_map_get_next_key,
1194 .map_lookup_elem = fd_array_map_lookup_elem,
1195 .map_delete_elem = fd_array_map_delete_elem,
1196 .map_fd_get_ptr = cgroup_fd_array_get_ptr,
1197 .map_fd_put_ptr = cgroup_fd_array_put_ptr,
1198 .map_check_btf = map_check_no_btf,
1199 .map_btf_name = "bpf_array",
1200 .map_btf_id = &cgroup_array_map_btf_id,
1201};
1202#endif
1203
1204static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
1205{
1206 struct bpf_map *map, *inner_map_meta;
1207
1208 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
1209 if (IS_ERR(inner_map_meta))
1210 return inner_map_meta;
1211
1212 map = array_map_alloc(attr);
1213 if (IS_ERR(map)) {
1214 bpf_map_meta_free(inner_map_meta);
1215 return map;
1216 }
1217
1218 map->inner_map_meta = inner_map_meta;
1219
1220 return map;
1221}
1222
1223static void array_of_map_free(struct bpf_map *map)
1224{
1225
1226
1227
1228 bpf_map_meta_free(map->inner_map_meta);
1229 bpf_fd_array_map_clear(map);
1230 fd_array_map_free(map);
1231}
1232
1233static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
1234{
1235 struct bpf_map **inner_map = array_map_lookup_elem(map, key);
1236
1237 if (!inner_map)
1238 return NULL;
1239
1240 return READ_ONCE(*inner_map);
1241}
1242
1243static int array_of_map_gen_lookup(struct bpf_map *map,
1244 struct bpf_insn *insn_buf)
1245{
1246 struct bpf_array *array = container_of(map, struct bpf_array, map);
1247 u32 elem_size = round_up(map->value_size, 8);
1248 struct bpf_insn *insn = insn_buf;
1249 const int ret = BPF_REG_0;
1250 const int map_ptr = BPF_REG_1;
1251 const int index = BPF_REG_2;
1252
1253 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
1254 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
1255 if (!map->bypass_spec_v1) {
1256 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
1257 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
1258 } else {
1259 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
1260 }
1261 if (is_power_of_2(elem_size))
1262 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
1263 else
1264 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
1265 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
1266 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
1267 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
1268 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1269 *insn++ = BPF_MOV64_IMM(ret, 0);
1270
1271 return insn - insn_buf;
1272}
1273
1274static int array_of_maps_map_btf_id;
1275const struct bpf_map_ops array_of_maps_map_ops = {
1276 .map_alloc_check = fd_array_map_alloc_check,
1277 .map_alloc = array_of_map_alloc,
1278 .map_free = array_of_map_free,
1279 .map_get_next_key = array_map_get_next_key,
1280 .map_lookup_elem = array_of_map_lookup_elem,
1281 .map_delete_elem = fd_array_map_delete_elem,
1282 .map_fd_get_ptr = bpf_map_fd_get_ptr,
1283 .map_fd_put_ptr = bpf_map_fd_put_ptr,
1284 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
1285 .map_gen_lookup = array_of_map_gen_lookup,
1286 .map_check_btf = map_check_no_btf,
1287 .map_btf_name = "bpf_array",
1288 .map_btf_id = &array_of_maps_map_btf_id,
1289};
1290