1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <stdio.h>
15#include <stdlib.h>
16#include <unistd.h>
17#include <string.h>
18#include <stdbool.h>
19#include <stdint.h>
20#include <errno.h>
21#include <fcntl.h>
22#include <stdarg.h>
23#include <limits.h>
24#include <assert.h>
25
26#ifdef HAVE_ELF
27#include <libelf.h>
28#include <gelf.h>
29#endif
30
31#include <sys/types.h>
32#include <sys/stat.h>
33#include <sys/un.h>
34#include <sys/vfs.h>
35#include <sys/mount.h>
36#include <sys/syscall.h>
37#include <sys/sendfile.h>
38#include <sys/resource.h>
39
40#include <arpa/inet.h>
41
42#include "utils.h"
43#include "json_print.h"
44
45#include "bpf_util.h"
46#include "bpf_elf.h"
47#include "bpf_scm.h"
48
49struct bpf_prog_meta {
50 const char *type;
51 const char *subdir;
52 const char *section;
53 bool may_uds_export;
54};
55
56static const enum bpf_prog_type __bpf_types[] = {
57 BPF_PROG_TYPE_SCHED_CLS,
58 BPF_PROG_TYPE_SCHED_ACT,
59 BPF_PROG_TYPE_XDP,
60 BPF_PROG_TYPE_LWT_IN,
61 BPF_PROG_TYPE_LWT_OUT,
62 BPF_PROG_TYPE_LWT_XMIT,
63};
64
65static const struct bpf_prog_meta __bpf_prog_meta[] = {
66 [BPF_PROG_TYPE_SCHED_CLS] = {
67 .type = "cls",
68 .subdir = "tc",
69 .section = ELF_SECTION_CLASSIFIER,
70 .may_uds_export = true,
71 },
72 [BPF_PROG_TYPE_SCHED_ACT] = {
73 .type = "act",
74 .subdir = "tc",
75 .section = ELF_SECTION_ACTION,
76 .may_uds_export = true,
77 },
78 [BPF_PROG_TYPE_XDP] = {
79 .type = "xdp",
80 .subdir = "xdp",
81 .section = ELF_SECTION_PROG,
82 },
83 [BPF_PROG_TYPE_LWT_IN] = {
84 .type = "lwt_in",
85 .subdir = "ip",
86 .section = ELF_SECTION_PROG,
87 },
88 [BPF_PROG_TYPE_LWT_OUT] = {
89 .type = "lwt_out",
90 .subdir = "ip",
91 .section = ELF_SECTION_PROG,
92 },
93 [BPF_PROG_TYPE_LWT_XMIT] = {
94 .type = "lwt_xmit",
95 .subdir = "ip",
96 .section = ELF_SECTION_PROG,
97 },
98 [BPF_PROG_TYPE_LWT_SEG6LOCAL] = {
99 .type = "lwt_seg6local",
100 .subdir = "ip",
101 .section = ELF_SECTION_PROG,
102 },
103};
104
105static const char *bpf_prog_to_subdir(enum bpf_prog_type type)
106{
107 assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
108 __bpf_prog_meta[type].subdir);
109 return __bpf_prog_meta[type].subdir;
110}
111
112const char *bpf_prog_to_default_section(enum bpf_prog_type type)
113{
114 assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
115 __bpf_prog_meta[type].section);
116 return __bpf_prog_meta[type].section;
117}
118
119#ifdef HAVE_ELF
120static int bpf_obj_open(const char *path, enum bpf_prog_type type,
121 const char *sec, __u32 ifindex, bool verbose);
122#else
123static int bpf_obj_open(const char *path, enum bpf_prog_type type,
124 const char *sec, __u32 ifindex, bool verbose)
125{
126 fprintf(stderr, "No ELF library support compiled in.\n");
127 errno = ENOSYS;
128 return -1;
129}
130#endif
131
132static inline __u64 bpf_ptr_to_u64(const void *ptr)
133{
134 return (__u64)(unsigned long)ptr;
135}
136
137static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
138{
139#ifdef __NR_bpf
140 return syscall(__NR_bpf, cmd, attr, size);
141#else
142 fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
143 errno = ENOSYS;
144 return -1;
145#endif
146}
147
148static int bpf_map_update(int fd, const void *key, const void *value,
149 uint64_t flags)
150{
151 union bpf_attr attr = {};
152
153 attr.map_fd = fd;
154 attr.key = bpf_ptr_to_u64(key);
155 attr.value = bpf_ptr_to_u64(value);
156 attr.flags = flags;
157
158 return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
159}
160
161static int bpf_prog_fd_by_id(uint32_t id)
162{
163 union bpf_attr attr = {};
164
165 attr.prog_id = id;
166
167 return bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
168}
169
170static int bpf_prog_info_by_fd(int fd, struct bpf_prog_info *info,
171 uint32_t *info_len)
172{
173 union bpf_attr attr = {};
174 int ret;
175
176 attr.info.bpf_fd = fd;
177 attr.info.info = bpf_ptr_to_u64(info);
178 attr.info.info_len = *info_len;
179
180 *info_len = 0;
181 ret = bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
182 if (!ret)
183 *info_len = attr.info.info_len;
184
185 return ret;
186}
187
188int bpf_dump_prog_info(FILE *f, uint32_t id)
189{
190 struct bpf_prog_info info = {};
191 uint32_t len = sizeof(info);
192 int fd, ret, dump_ok = 0;
193 SPRINT_BUF(tmp);
194
195 open_json_object("prog");
196 print_uint(PRINT_ANY, "id", "id %u ", id);
197
198 fd = bpf_prog_fd_by_id(id);
199 if (fd < 0)
200 goto out;
201
202 ret = bpf_prog_info_by_fd(fd, &info, &len);
203 if (!ret && len) {
204 int jited = !!info.jited_prog_len;
205
206 print_string(PRINT_ANY, "name", "name %s ", info.name);
207 print_string(PRINT_ANY, "tag", "tag %s ",
208 hexstring_n2a(info.tag, sizeof(info.tag),
209 tmp, sizeof(tmp)));
210 print_uint(PRINT_JSON, "jited", NULL, jited);
211 if (jited && !is_json_context())
212 fprintf(f, "jited ");
213
214 if (show_details) {
215 if (info.load_time) {
216
217 print_lluint(PRINT_ANY, "load_time",
218 "load_time %llu ", info.load_time);
219
220 print_luint(PRINT_ANY, "created_by_uid",
221 "created_by_uid %lu ",
222 info.created_by_uid);
223 }
224
225 if (info.btf_id)
226 print_luint(PRINT_ANY, "btf_id", "btf_id %lu ",
227 info.btf_id);
228 }
229
230 dump_ok = 1;
231 }
232
233 close(fd);
234out:
235 close_json_object();
236 return dump_ok;
237}
238
239static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
240 char **bpf_string, bool *need_release,
241 const char separator)
242{
243 char sp;
244
245 if (from_file) {
246 size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
247 char *tmp_string, *pos, c_prev = ' ';
248 FILE *fp;
249 int c;
250
251 tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
252 tmp_string = pos = calloc(1, tmp_len);
253 if (tmp_string == NULL)
254 return -ENOMEM;
255
256 fp = fopen(arg, "r");
257 if (fp == NULL) {
258 perror("Cannot fopen");
259 free(tmp_string);
260 return -ENOENT;
261 }
262
263 while ((c = fgetc(fp)) != EOF) {
264 switch (c) {
265 case '\n':
266 if (c_prev != ',')
267 *(pos++) = ',';
268 c_prev = ',';
269 break;
270 case ' ':
271 case '\t':
272 if (c_prev != ' ')
273 *(pos++) = c;
274 c_prev = ' ';
275 break;
276 default:
277 *(pos++) = c;
278 c_prev = c;
279 }
280 if (pos - tmp_string == tmp_len)
281 break;
282 }
283
284 if (!feof(fp)) {
285 free(tmp_string);
286 fclose(fp);
287 return -E2BIG;
288 }
289
290 fclose(fp);
291 *pos = 0;
292
293 *need_release = true;
294 *bpf_string = tmp_string;
295 } else {
296 *need_release = false;
297 *bpf_string = arg;
298 }
299
300 if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
301 sp != separator) {
302 if (*need_release)
303 free(*bpf_string);
304 return -EINVAL;
305 }
306
307 return 0;
308}
309
310static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
311 bool from_file)
312{
313 char *bpf_string, *token, separator = ',';
314 int ret = 0, i = 0;
315 bool need_release;
316 __u16 bpf_len = 0;
317
318 if (argc < 1)
319 return -EINVAL;
320 if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
321 &need_release, separator))
322 return -EINVAL;
323 if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
324 ret = -EINVAL;
325 goto out;
326 }
327
328 token = bpf_string;
329 while ((token = strchr(token, separator)) && (++token)[0]) {
330 if (i >= bpf_len) {
331 fprintf(stderr, "Real program length exceeds encoded length parameter!\n");
332 ret = -EINVAL;
333 goto out;
334 }
335
336 if (sscanf(token, "%hu %hhu %hhu %u,",
337 &bpf_ops[i].code, &bpf_ops[i].jt,
338 &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
339 fprintf(stderr, "Error at instruction %d!\n", i);
340 ret = -EINVAL;
341 goto out;
342 }
343
344 i++;
345 }
346
347 if (i != bpf_len) {
348 fprintf(stderr, "Parsed program length is less than encoded length parameter!\n");
349 ret = -EINVAL;
350 goto out;
351 }
352 ret = bpf_len;
353out:
354 if (need_release)
355 free(bpf_string);
356
357 return ret;
358}
359
360void bpf_print_ops(struct rtattr *bpf_ops, __u16 len)
361{
362 struct sock_filter *ops = RTA_DATA(bpf_ops);
363 int i;
364
365 if (len == 0)
366 return;
367
368 open_json_object("bytecode");
369 print_uint(PRINT_ANY, "length", "bytecode \'%u,", len);
370 open_json_array(PRINT_JSON, "insns");
371
372 for (i = 0; i < len; i++) {
373 open_json_object(NULL);
374 print_hu(PRINT_ANY, "code", "%hu ", ops[i].code);
375 print_hhu(PRINT_ANY, "jt", "%hhu ", ops[i].jt);
376 print_hhu(PRINT_ANY, "jf", "%hhu ", ops[i].jf);
377 if (i == len - 1)
378 print_uint(PRINT_ANY, "k", "%u\'", ops[i].k);
379 else
380 print_uint(PRINT_ANY, "k", "%u,", ops[i].k);
381 close_json_object();
382 }
383
384 close_json_array(PRINT_JSON, NULL);
385 close_json_object();
386}
387
388static void bpf_map_pin_report(const struct bpf_elf_map *pin,
389 const struct bpf_elf_map *obj)
390{
391 fprintf(stderr, "Map specification differs from pinned file!\n");
392
393 if (obj->type != pin->type)
394 fprintf(stderr, " - Type: %u (obj) != %u (pin)\n",
395 obj->type, pin->type);
396 if (obj->size_key != pin->size_key)
397 fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n",
398 obj->size_key, pin->size_key);
399 if (obj->size_value != pin->size_value)
400 fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n",
401 obj->size_value, pin->size_value);
402 if (obj->max_elem != pin->max_elem)
403 fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n",
404 obj->max_elem, pin->max_elem);
405 if (obj->flags != pin->flags)
406 fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n",
407 obj->flags, pin->flags);
408
409 fprintf(stderr, "\n");
410}
411
412struct bpf_prog_data {
413 unsigned int type;
414 unsigned int jited;
415};
416
417struct bpf_map_ext {
418 struct bpf_prog_data owner;
419 unsigned int btf_id_key;
420 unsigned int btf_id_val;
421};
422
423static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map,
424 struct bpf_map_ext *ext)
425{
426 unsigned int val, owner_type = 0, owner_jited = 0;
427 char file[PATH_MAX], buff[4096];
428 FILE *fp;
429
430 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
431 memset(map, 0, sizeof(*map));
432
433 fp = fopen(file, "r");
434 if (!fp) {
435 fprintf(stderr, "No procfs support?!\n");
436 return -EIO;
437 }
438
439 while (fgets(buff, sizeof(buff), fp)) {
440 if (sscanf(buff, "map_type:\t%u", &val) == 1)
441 map->type = val;
442 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
443 map->size_key = val;
444 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
445 map->size_value = val;
446 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
447 map->max_elem = val;
448 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
449 map->flags = val;
450 else if (sscanf(buff, "owner_prog_type:\t%i", &val) == 1)
451 owner_type = val;
452 else if (sscanf(buff, "owner_jited:\t%i", &val) == 1)
453 owner_jited = val;
454 }
455
456 fclose(fp);
457 if (ext) {
458 memset(ext, 0, sizeof(*ext));
459 ext->owner.type = owner_type;
460 ext->owner.jited = owner_jited;
461 }
462
463 return 0;
464}
465
466static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
467 struct bpf_map_ext *ext, int length,
468 enum bpf_prog_type type)
469{
470 struct bpf_elf_map tmp, zero = {};
471 int ret;
472
473 ret = bpf_derive_elf_map_from_fdinfo(fd, &tmp, ext);
474 if (ret < 0)
475 return ret;
476
477
478
479
480 if (ext->owner.type && ext->owner.type != type)
481 fprintf(stderr, "Program array map owner types differ: %u (obj) != %u (pin)\n",
482 type, ext->owner.type);
483
484 if (!memcmp(&tmp, map, length)) {
485 return 0;
486 } else {
487
488
489
490
491
492 if (!memcmp(&tmp, &zero, length))
493 return 0;
494
495 bpf_map_pin_report(&tmp, map);
496 return -EINVAL;
497 }
498}
499
500static int bpf_mnt_fs(const char *target)
501{
502 bool bind_done = false;
503
504 while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
505 if (errno != EINVAL || bind_done) {
506 fprintf(stderr, "mount --make-private %s failed: %s\n",
507 target, strerror(errno));
508 return -1;
509 }
510
511 if (mount(target, target, "none", MS_BIND, NULL)) {
512 fprintf(stderr, "mount --bind %s %s failed: %s\n",
513 target, target, strerror(errno));
514 return -1;
515 }
516
517 bind_done = true;
518 }
519
520 if (mount("bpf", target, "bpf", 0, "mode=0700")) {
521 fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
522 target, strerror(errno));
523 return -1;
524 }
525
526 return 0;
527}
528
529static int bpf_mnt_check_target(const char *target)
530{
531 int ret;
532
533 ret = mkdir(target, S_IRWXU);
534 if (ret) {
535 if (errno == EEXIST)
536 return 0;
537 fprintf(stderr, "mkdir %s failed: %s\n", target,
538 strerror(errno));
539 }
540
541 return ret;
542}
543
544static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
545{
546 struct statfs st_fs;
547
548 if (statfs(mnt, &st_fs) < 0)
549 return -ENOENT;
550 if ((unsigned long)st_fs.f_type != magic)
551 return -ENOENT;
552
553 return 0;
554}
555
556static const char *bpf_find_mntpt_single(unsigned long magic, char *mnt,
557 int len, const char *mntpt)
558{
559 int ret;
560
561 ret = bpf_valid_mntpt(mntpt, magic);
562 if (!ret) {
563 strlcpy(mnt, mntpt, len);
564 return mnt;
565 }
566
567 return NULL;
568}
569
570static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
571 char *mnt, int len,
572 const char * const *known_mnts)
573{
574 const char * const *ptr;
575 char type[100];
576 FILE *fp;
577
578 if (known_mnts) {
579 ptr = known_mnts;
580 while (*ptr) {
581 if (bpf_find_mntpt_single(magic, mnt, len, *ptr))
582 return mnt;
583 ptr++;
584 }
585 }
586
587 if (len != PATH_MAX)
588 return NULL;
589
590 fp = fopen("/proc/mounts", "r");
591 if (fp == NULL)
592 return NULL;
593
594 while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
595 mnt, type) == 2) {
596 if (strcmp(type, fstype) == 0)
597 break;
598 }
599
600 fclose(fp);
601 if (strcmp(type, fstype) != 0)
602 return NULL;
603
604 return mnt;
605}
606
607int bpf_trace_pipe(void)
608{
609 char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
610 static const char * const tracefs_known_mnts[] = {
611 TRACE_DIR_MNT,
612 "/sys/kernel/debug/tracing",
613 "/tracing",
614 "/trace",
615 0,
616 };
617 int fd_in, fd_out = STDERR_FILENO;
618 char tpipe[PATH_MAX];
619 const char *mnt;
620
621 mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
622 sizeof(tracefs_mnt), tracefs_known_mnts);
623 if (!mnt) {
624 fprintf(stderr, "tracefs not mounted?\n");
625 return -1;
626 }
627
628 snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
629
630 fd_in = open(tpipe, O_RDONLY);
631 if (fd_in < 0)
632 return -1;
633
634 fprintf(stderr, "Running! Hang up with ^C!\n\n");
635 while (1) {
636 static char buff[4096];
637 ssize_t ret;
638
639 ret = read(fd_in, buff, sizeof(buff));
640 if (ret > 0 && write(fd_out, buff, ret) == ret)
641 continue;
642 break;
643 }
644
645 close(fd_in);
646 return -1;
647}
648
649static int bpf_gen_global(const char *bpf_sub_dir)
650{
651 char bpf_glo_dir[PATH_MAX];
652 int ret;
653
654 snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s/",
655 bpf_sub_dir, BPF_DIR_GLOBALS);
656
657 ret = mkdir(bpf_glo_dir, S_IRWXU);
658 if (ret && errno != EEXIST) {
659 fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
660 strerror(errno));
661 return ret;
662 }
663
664 return 0;
665}
666
667static int bpf_gen_master(const char *base, const char *name)
668{
669 char bpf_sub_dir[PATH_MAX + NAME_MAX + 1];
670 int ret;
671
672 snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s/", base, name);
673
674 ret = mkdir(bpf_sub_dir, S_IRWXU);
675 if (ret && errno != EEXIST) {
676 fprintf(stderr, "mkdir %s failed: %s\n", bpf_sub_dir,
677 strerror(errno));
678 return ret;
679 }
680
681 return bpf_gen_global(bpf_sub_dir);
682}
683
684static int bpf_slave_via_bind_mnt(const char *full_name,
685 const char *full_link)
686{
687 int ret;
688
689 ret = mkdir(full_name, S_IRWXU);
690 if (ret) {
691 assert(errno != EEXIST);
692 fprintf(stderr, "mkdir %s failed: %s\n", full_name,
693 strerror(errno));
694 return ret;
695 }
696
697 ret = mount(full_link, full_name, "none", MS_BIND, NULL);
698 if (ret) {
699 rmdir(full_name);
700 fprintf(stderr, "mount --bind %s %s failed: %s\n",
701 full_link, full_name, strerror(errno));
702 }
703
704 return ret;
705}
706
707static int bpf_gen_slave(const char *base, const char *name,
708 const char *link)
709{
710 char bpf_lnk_dir[PATH_MAX + NAME_MAX + 1];
711 char bpf_sub_dir[PATH_MAX + NAME_MAX];
712 struct stat sb = {};
713 int ret;
714
715 snprintf(bpf_lnk_dir, sizeof(bpf_lnk_dir), "%s%s/", base, link);
716 snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s", base, name);
717
718 ret = symlink(bpf_lnk_dir, bpf_sub_dir);
719 if (ret) {
720 if (errno != EEXIST) {
721 if (errno != EPERM) {
722 fprintf(stderr, "symlink %s failed: %s\n",
723 bpf_sub_dir, strerror(errno));
724 return ret;
725 }
726
727 return bpf_slave_via_bind_mnt(bpf_sub_dir,
728 bpf_lnk_dir);
729 }
730
731 ret = lstat(bpf_sub_dir, &sb);
732 if (ret) {
733 fprintf(stderr, "lstat %s failed: %s\n",
734 bpf_sub_dir, strerror(errno));
735 return ret;
736 }
737
738 if ((sb.st_mode & S_IFMT) != S_IFLNK)
739 return bpf_gen_global(bpf_sub_dir);
740 }
741
742 return 0;
743}
744
745static int bpf_gen_hierarchy(const char *base)
746{
747 int ret, i;
748
749 ret = bpf_gen_master(base, bpf_prog_to_subdir(__bpf_types[0]));
750 for (i = 1; i < ARRAY_SIZE(__bpf_types) && !ret; i++)
751 ret = bpf_gen_slave(base,
752 bpf_prog_to_subdir(__bpf_types[i]),
753 bpf_prog_to_subdir(__bpf_types[0]));
754 return ret;
755}
756
757static const char *bpf_get_work_dir(enum bpf_prog_type type)
758{
759 static char bpf_tmp[PATH_MAX] = BPF_DIR_MNT;
760 static char bpf_wrk_dir[PATH_MAX];
761 static const char *mnt;
762 static bool bpf_mnt_cached;
763 const char *mnt_env = getenv(BPF_ENV_MNT);
764 static const char * const bpf_known_mnts[] = {
765 BPF_DIR_MNT,
766 "/bpf",
767 0,
768 };
769 int ret;
770
771 if (bpf_mnt_cached) {
772 const char *out = mnt;
773
774 if (out && type) {
775 snprintf(bpf_tmp, sizeof(bpf_tmp), "%s%s/",
776 out, bpf_prog_to_subdir(type));
777 out = bpf_tmp;
778 }
779 return out;
780 }
781
782 if (mnt_env)
783 mnt = bpf_find_mntpt_single(BPF_FS_MAGIC, bpf_tmp,
784 sizeof(bpf_tmp), mnt_env);
785 else
786 mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp,
787 sizeof(bpf_tmp), bpf_known_mnts);
788 if (!mnt) {
789 mnt = mnt_env ? : BPF_DIR_MNT;
790 ret = bpf_mnt_check_target(mnt);
791 if (!ret)
792 ret = bpf_mnt_fs(mnt);
793 if (ret) {
794 mnt = NULL;
795 goto out;
796 }
797 }
798
799 ret = snprintf(bpf_wrk_dir, sizeof(bpf_wrk_dir), "%s/", mnt);
800 if (ret < 0 || ret >= sizeof(bpf_wrk_dir)) {
801 mnt = NULL;
802 goto out;
803 }
804
805 ret = bpf_gen_hierarchy(bpf_wrk_dir);
806 if (ret) {
807 mnt = NULL;
808 goto out;
809 }
810
811 mnt = bpf_wrk_dir;
812out:
813 bpf_mnt_cached = true;
814 return mnt;
815}
816
817static int bpf_obj_get(const char *pathname, enum bpf_prog_type type)
818{
819 union bpf_attr attr = {};
820 char tmp[PATH_MAX];
821
822 if (strlen(pathname) > 2 && pathname[0] == 'm' &&
823 pathname[1] == ':' && bpf_get_work_dir(type)) {
824 snprintf(tmp, sizeof(tmp), "%s/%s",
825 bpf_get_work_dir(type), pathname + 2);
826 pathname = tmp;
827 }
828
829 attr.pathname = bpf_ptr_to_u64(pathname);
830
831 return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
832}
833
834static int bpf_obj_pinned(const char *pathname, enum bpf_prog_type type)
835{
836 int prog_fd = bpf_obj_get(pathname, type);
837
838 if (prog_fd < 0)
839 fprintf(stderr, "Couldn\'t retrieve pinned program \'%s\': %s\n",
840 pathname, strerror(errno));
841 return prog_fd;
842}
843
844static int bpf_do_parse(struct bpf_cfg_in *cfg, const bool *opt_tbl)
845{
846 const char *file, *section, *uds_name;
847 bool verbose = false;
848 int i, ret, argc;
849 char **argv;
850
851 argv = cfg->argv;
852 argc = cfg->argc;
853
854 if (opt_tbl[CBPF_BYTECODE] &&
855 (matches(*argv, "bytecode") == 0 ||
856 strcmp(*argv, "bc") == 0)) {
857 cfg->mode = CBPF_BYTECODE;
858 } else if (opt_tbl[CBPF_FILE] &&
859 (matches(*argv, "bytecode-file") == 0 ||
860 strcmp(*argv, "bcf") == 0)) {
861 cfg->mode = CBPF_FILE;
862 } else if (opt_tbl[EBPF_OBJECT] &&
863 (matches(*argv, "object-file") == 0 ||
864 strcmp(*argv, "obj") == 0)) {
865 cfg->mode = EBPF_OBJECT;
866 } else if (opt_tbl[EBPF_PINNED] &&
867 (matches(*argv, "object-pinned") == 0 ||
868 matches(*argv, "pinned") == 0 ||
869 matches(*argv, "fd") == 0)) {
870 cfg->mode = EBPF_PINNED;
871 } else {
872 fprintf(stderr, "What mode is \"%s\"?\n", *argv);
873 return -1;
874 }
875
876 NEXT_ARG();
877 file = section = uds_name = NULL;
878 if (cfg->mode == EBPF_OBJECT || cfg->mode == EBPF_PINNED) {
879 file = *argv;
880 NEXT_ARG_FWD();
881
882 if (cfg->type == BPF_PROG_TYPE_UNSPEC) {
883 if (argc > 0 && matches(*argv, "type") == 0) {
884 NEXT_ARG();
885 for (i = 0; i < ARRAY_SIZE(__bpf_prog_meta);
886 i++) {
887 if (!__bpf_prog_meta[i].type)
888 continue;
889 if (!matches(*argv,
890 __bpf_prog_meta[i].type)) {
891 cfg->type = i;
892 break;
893 }
894 }
895
896 if (cfg->type == BPF_PROG_TYPE_UNSPEC) {
897 fprintf(stderr, "What type is \"%s\"?\n",
898 *argv);
899 return -1;
900 }
901 NEXT_ARG_FWD();
902 } else {
903 cfg->type = BPF_PROG_TYPE_SCHED_CLS;
904 }
905 }
906
907 section = bpf_prog_to_default_section(cfg->type);
908 if (argc > 0 && matches(*argv, "section") == 0) {
909 NEXT_ARG();
910 section = *argv;
911 NEXT_ARG_FWD();
912 }
913
914 if (__bpf_prog_meta[cfg->type].may_uds_export) {
915 uds_name = getenv(BPF_ENV_UDS);
916 if (argc > 0 && !uds_name &&
917 matches(*argv, "export") == 0) {
918 NEXT_ARG();
919 uds_name = *argv;
920 NEXT_ARG_FWD();
921 }
922 }
923
924 if (argc > 0 && matches(*argv, "verbose") == 0) {
925 verbose = true;
926 NEXT_ARG_FWD();
927 }
928
929 PREV_ARG();
930 }
931
932 if (cfg->mode == CBPF_BYTECODE || cfg->mode == CBPF_FILE) {
933 ret = bpf_ops_parse(argc, argv, cfg->opcodes,
934 cfg->mode == CBPF_FILE);
935 cfg->n_opcodes = ret;
936 } else if (cfg->mode == EBPF_OBJECT) {
937 ret = 0;
938 } else if (cfg->mode == EBPF_PINNED) {
939 ret = bpf_obj_pinned(file, cfg->type);
940 cfg->prog_fd = ret;
941 } else {
942 return -1;
943 }
944
945 cfg->object = file;
946 cfg->section = section;
947 cfg->uds = uds_name;
948 cfg->argc = argc;
949 cfg->argv = argv;
950 cfg->verbose = verbose;
951
952 return ret;
953}
954
955static int bpf_do_load(struct bpf_cfg_in *cfg)
956{
957 if (cfg->mode == EBPF_OBJECT) {
958#ifdef HAVE_LIBBPF
959 return iproute2_load_libbpf(cfg);
960#endif
961 cfg->prog_fd = bpf_obj_open(cfg->object, cfg->type,
962 cfg->section, cfg->ifindex,
963 cfg->verbose);
964 return cfg->prog_fd;
965 }
966 return 0;
967}
968
969int bpf_load_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops,
970 void *nl)
971{
972 char annotation[256];
973 int ret;
974
975 ret = bpf_do_load(cfg);
976 if (ret < 0)
977 return ret;
978
979 if (cfg->mode == CBPF_BYTECODE || cfg->mode == CBPF_FILE)
980 ops->cbpf_cb(nl, cfg->opcodes, cfg->n_opcodes);
981 if (cfg->mode == EBPF_OBJECT || cfg->mode == EBPF_PINNED) {
982 snprintf(annotation, sizeof(annotation), "%s:[%s]",
983 basename(cfg->object), cfg->mode == EBPF_PINNED ?
984 "*fsobj" : cfg->section);
985 ops->ebpf_cb(nl, cfg->prog_fd, annotation);
986 }
987
988 return 0;
989}
990
991int bpf_parse_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops)
992{
993 bool opt_tbl[BPF_MODE_MAX] = {};
994
995 if (ops->cbpf_cb) {
996 opt_tbl[CBPF_BYTECODE] = true;
997 opt_tbl[CBPF_FILE] = true;
998 }
999
1000 if (ops->ebpf_cb) {
1001 opt_tbl[EBPF_OBJECT] = true;
1002 opt_tbl[EBPF_PINNED] = true;
1003 }
1004
1005 return bpf_do_parse(cfg, opt_tbl);
1006}
1007
1008int bpf_parse_and_load_common(struct bpf_cfg_in *cfg,
1009 const struct bpf_cfg_ops *ops, void *nl)
1010{
1011 int ret;
1012
1013 ret = bpf_parse_common(cfg, ops);
1014 if (ret < 0)
1015 return ret;
1016
1017 return bpf_load_common(cfg, ops, nl);
1018}
1019
1020int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
1021{
1022 const bool opt_tbl[BPF_MODE_MAX] = {
1023 [EBPF_OBJECT] = true,
1024 [EBPF_PINNED] = true,
1025 };
1026 const struct bpf_elf_map test = {
1027 .type = BPF_MAP_TYPE_PROG_ARRAY,
1028 .size_key = sizeof(int),
1029 .size_value = sizeof(int),
1030 };
1031 struct bpf_cfg_in cfg = {
1032 .type = BPF_PROG_TYPE_UNSPEC,
1033 .argc = argc,
1034 .argv = argv,
1035 };
1036 struct bpf_map_ext ext = {};
1037 int ret, prog_fd, map_fd;
1038 uint32_t map_key;
1039
1040 ret = bpf_do_parse(&cfg, opt_tbl);
1041 if (ret < 0)
1042 return ret;
1043
1044 ret = bpf_do_load(&cfg);
1045 if (ret < 0)
1046 return ret;
1047
1048 prog_fd = cfg.prog_fd;
1049
1050 if (key) {
1051 map_key = *key;
1052 } else {
1053 ret = sscanf(cfg.section, "%*i/%i", &map_key);
1054 if (ret != 1) {
1055 fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n");
1056 ret = -EINVAL;
1057 goto out_prog;
1058 }
1059 }
1060
1061 map_fd = bpf_obj_get(map_path, cfg.type);
1062 if (map_fd < 0) {
1063 fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n",
1064 map_path, strerror(errno));
1065 ret = map_fd;
1066 goto out_prog;
1067 }
1068
1069 ret = bpf_map_selfcheck_pinned(map_fd, &test, &ext,
1070 offsetof(struct bpf_elf_map, max_elem),
1071 cfg.type);
1072 if (ret < 0) {
1073 fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path);
1074 goto out_map;
1075 }
1076
1077 ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY);
1078 if (ret < 0)
1079 fprintf(stderr, "Map update failed: %s\n", strerror(errno));
1080out_map:
1081 close(map_fd);
1082out_prog:
1083 close(prog_fd);
1084 return ret;
1085}
1086
1087int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type)
1088{
1089 union bpf_attr attr = {};
1090
1091 attr.target_fd = target_fd;
1092 attr.attach_bpf_fd = prog_fd;
1093 attr.attach_type = type;
1094
1095 return bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
1096}
1097
1098int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type)
1099{
1100 union bpf_attr attr = {};
1101
1102 attr.target_fd = target_fd;
1103 attr.attach_type = type;
1104
1105 return bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
1106}
1107
1108int bpf_prog_load_dev(enum bpf_prog_type type, const struct bpf_insn *insns,
1109 size_t size_insns, const char *license, __u32 ifindex,
1110 char *log, size_t size_log)
1111{
1112 union bpf_attr attr = {};
1113
1114 attr.prog_type = type;
1115 attr.insns = bpf_ptr_to_u64(insns);
1116 attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
1117 attr.license = bpf_ptr_to_u64(license);
1118 attr.prog_ifindex = ifindex;
1119
1120 if (size_log > 0) {
1121 attr.log_buf = bpf_ptr_to_u64(log);
1122 attr.log_size = size_log;
1123 attr.log_level = 1;
1124 }
1125
1126 return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
1127}
1128
1129#ifdef HAVE_ELF
1130struct bpf_elf_prog {
1131 enum bpf_prog_type type;
1132 struct bpf_insn *insns;
1133 unsigned int insns_num;
1134 size_t size;
1135 const char *license;
1136};
1137
1138struct bpf_hash_entry {
1139 unsigned int pinning;
1140 const char *subpath;
1141 struct bpf_hash_entry *next;
1142};
1143
1144struct bpf_config {
1145 unsigned int jit_enabled;
1146};
1147
1148struct bpf_btf {
1149 const struct btf_header *hdr;
1150 const void *raw;
1151 const char *strings;
1152 const struct btf_type **types;
1153 int types_num;
1154};
1155
1156struct bpf_elf_ctx {
1157 struct bpf_config cfg;
1158 Elf *elf_fd;
1159 GElf_Ehdr elf_hdr;
1160 Elf_Data *sym_tab;
1161 Elf_Data *str_tab;
1162 Elf_Data *btf_data;
1163 char obj_uid[64];
1164 int obj_fd;
1165 int btf_fd;
1166 int map_fds[ELF_MAX_MAPS];
1167 struct bpf_elf_map maps[ELF_MAX_MAPS];
1168 struct bpf_map_ext maps_ext[ELF_MAX_MAPS];
1169 struct bpf_elf_prog prog_text;
1170 struct bpf_btf btf;
1171 int sym_num;
1172 int map_num;
1173 int map_len;
1174 bool *sec_done;
1175 int sec_maps;
1176 int sec_text;
1177 int sec_btf;
1178 char license[ELF_MAX_LICENSE_LEN];
1179 enum bpf_prog_type type;
1180 __u32 ifindex;
1181 bool verbose;
1182 bool noafalg;
1183 struct bpf_elf_st stat;
1184 struct bpf_hash_entry *ht[256];
1185 char *log;
1186 size_t log_size;
1187};
1188
1189struct bpf_elf_sec_data {
1190 GElf_Shdr sec_hdr;
1191 Elf_Data *sec_data;
1192 const char *sec_name;
1193};
1194
1195struct bpf_map_data {
1196 int *fds;
1197 const char *obj;
1198 struct bpf_elf_st *st;
1199 struct bpf_elf_map *ent;
1200};
1201
1202static bool bpf_log_has_data(struct bpf_elf_ctx *ctx)
1203{
1204 return ctx->log && ctx->log[0];
1205}
1206
1207static __check_format_string(2, 3) void
1208bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...)
1209{
1210 va_list vl;
1211
1212 va_start(vl, format);
1213 vfprintf(stderr, format, vl);
1214 va_end(vl);
1215
1216 if (bpf_log_has_data(ctx)) {
1217 if (ctx->verbose) {
1218 fprintf(stderr, "%s\n", ctx->log);
1219 } else {
1220 unsigned int off = 0, len = strlen(ctx->log);
1221
1222 if (len > BPF_MAX_LOG) {
1223 off = len - BPF_MAX_LOG;
1224 fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n",
1225 off);
1226 }
1227 fprintf(stderr, "%s\n", ctx->log + off);
1228 }
1229
1230 memset(ctx->log, 0, ctx->log_size);
1231 }
1232}
1233
1234static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
1235{
1236 const size_t log_max = UINT_MAX >> 8;
1237 size_t log_size = ctx->log_size;
1238 char *ptr;
1239
1240 if (!ctx->log) {
1241 log_size = 65536;
1242 } else if (log_size < log_max) {
1243 log_size <<= 1;
1244 if (log_size > log_max)
1245 log_size = log_max;
1246 } else {
1247 return -EINVAL;
1248 }
1249
1250 ptr = realloc(ctx->log, log_size);
1251 if (!ptr)
1252 return -ENOMEM;
1253
1254 ptr[0] = 0;
1255 ctx->log = ptr;
1256 ctx->log_size = log_size;
1257
1258 return 0;
1259}
1260
1261static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
1262 uint32_t size_value, uint32_t max_elem,
1263 uint32_t flags, int inner_fd, int btf_fd,
1264 uint32_t ifindex, uint32_t btf_id_key,
1265 uint32_t btf_id_val)
1266{
1267 union bpf_attr attr = {};
1268
1269 attr.map_type = type;
1270 attr.key_size = size_key;
1271 attr.value_size = inner_fd ? sizeof(int) : size_value;
1272 attr.max_entries = max_elem;
1273 attr.map_flags = flags;
1274 attr.inner_map_fd = inner_fd;
1275 attr.map_ifindex = ifindex;
1276 attr.btf_fd = btf_fd;
1277 attr.btf_key_type_id = btf_id_key;
1278 attr.btf_value_type_id = btf_id_val;
1279
1280 return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
1281}
1282
1283static int bpf_btf_load(void *btf, size_t size_btf,
1284 char *log, size_t size_log)
1285{
1286 union bpf_attr attr = {};
1287
1288 attr.btf = bpf_ptr_to_u64(btf);
1289 attr.btf_size = size_btf;
1290
1291 if (size_log > 0) {
1292 attr.btf_log_buf = bpf_ptr_to_u64(log);
1293 attr.btf_log_size = size_log;
1294 attr.btf_log_level = 1;
1295 }
1296
1297 return bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
1298}
1299
1300static int bpf_obj_pin(int fd, const char *pathname)
1301{
1302 union bpf_attr attr = {};
1303
1304 attr.pathname = bpf_ptr_to_u64(pathname);
1305 attr.bpf_fd = fd;
1306
1307 return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
1308}
1309
1310static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
1311{
1312 struct sockaddr_alg alg = {
1313 .salg_family = AF_ALG,
1314 .salg_type = "hash",
1315 .salg_name = "sha1",
1316 };
1317 int ret, cfd, ofd, ffd;
1318 struct stat stbuff;
1319 ssize_t size;
1320
1321 if (!object || len != 20)
1322 return -EINVAL;
1323
1324 cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
1325 if (cfd < 0)
1326 return cfd;
1327
1328 ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
1329 if (ret < 0)
1330 goto out_cfd;
1331
1332 ofd = accept(cfd, NULL, 0);
1333 if (ofd < 0) {
1334 ret = ofd;
1335 goto out_cfd;
1336 }
1337
1338 ffd = open(object, O_RDONLY);
1339 if (ffd < 0) {
1340 fprintf(stderr, "Error opening object %s: %s\n",
1341 object, strerror(errno));
1342 ret = ffd;
1343 goto out_ofd;
1344 }
1345
1346 ret = fstat(ffd, &stbuff);
1347 if (ret < 0) {
1348 fprintf(stderr, "Error doing fstat: %s\n",
1349 strerror(errno));
1350 goto out_ffd;
1351 }
1352
1353 size = sendfile(ofd, ffd, NULL, stbuff.st_size);
1354 if (size != stbuff.st_size) {
1355 fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
1356 size, stbuff.st_size, strerror(errno));
1357 ret = -1;
1358 goto out_ffd;
1359 }
1360
1361 size = read(ofd, out, len);
1362 if (size != len) {
1363 fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
1364 size, len, strerror(errno));
1365 ret = -1;
1366 } else {
1367 ret = 0;
1368 }
1369out_ffd:
1370 close(ffd);
1371out_ofd:
1372 close(ofd);
1373out_cfd:
1374 close(cfd);
1375 return ret;
1376}
1377
1378static void bpf_init_env(void)
1379{
1380 struct rlimit limit = {
1381 .rlim_cur = RLIM_INFINITY,
1382 .rlim_max = RLIM_INFINITY,
1383 };
1384
1385
1386 setrlimit(RLIMIT_MEMLOCK, &limit);
1387
1388 if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC))
1389 fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n");
1390}
1391
1392static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx,
1393 uint32_t pinning)
1394{
1395 struct bpf_hash_entry *entry;
1396
1397 entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
1398 while (entry && entry->pinning != pinning)
1399 entry = entry->next;
1400
1401 return entry ? entry->subpath : NULL;
1402}
1403
1404static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx,
1405 uint32_t pinning)
1406{
1407 switch (pinning) {
1408 case PIN_OBJECT_NS:
1409 case PIN_GLOBAL_NS:
1410 return false;
1411 case PIN_NONE:
1412 return true;
1413 default:
1414 return !bpf_custom_pinning(ctx, pinning);
1415 }
1416}
1417
1418static void bpf_make_pathname(char *pathname, size_t len, const char *name,
1419 const struct bpf_elf_ctx *ctx, uint32_t pinning)
1420{
1421 switch (pinning) {
1422 case PIN_OBJECT_NS:
1423 snprintf(pathname, len, "%s/%s/%s",
1424 bpf_get_work_dir(ctx->type),
1425 ctx->obj_uid, name);
1426 break;
1427 case PIN_GLOBAL_NS:
1428 snprintf(pathname, len, "%s/%s/%s",
1429 bpf_get_work_dir(ctx->type),
1430 BPF_DIR_GLOBALS, name);
1431 break;
1432 default:
1433 snprintf(pathname, len, "%s/../%s/%s",
1434 bpf_get_work_dir(ctx->type),
1435 bpf_custom_pinning(ctx, pinning), name);
1436 break;
1437 }
1438}
1439
1440static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx,
1441 uint32_t pinning)
1442{
1443 char pathname[PATH_MAX];
1444
1445 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
1446 return 0;
1447
1448 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
1449 return bpf_obj_get(pathname, ctx->type);
1450}
1451
1452static int bpf_make_obj_path(const struct bpf_elf_ctx *ctx)
1453{
1454 char tmp[PATH_MAX];
1455 int ret;
1456
1457 snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_work_dir(ctx->type),
1458 ctx->obj_uid);
1459
1460 ret = mkdir(tmp, S_IRWXU);
1461 if (ret && errno != EEXIST) {
1462 fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno));
1463 return ret;
1464 }
1465
1466 return 0;
1467}
1468
1469static int bpf_make_custom_path(const struct bpf_elf_ctx *ctx,
1470 const char *todo)
1471{
1472 char tmp[PATH_MAX], rem[PATH_MAX], *sub;
1473 int ret;
1474
1475 snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_work_dir(ctx->type));
1476 snprintf(rem, sizeof(rem), "%s/", todo);
1477 sub = strtok(rem, "/");
1478
1479 while (sub) {
1480 if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX)
1481 return -EINVAL;
1482
1483 strcat(tmp, sub);
1484 strcat(tmp, "/");
1485
1486 ret = mkdir(tmp, S_IRWXU);
1487 if (ret && errno != EEXIST) {
1488 fprintf(stderr, "mkdir %s failed: %s\n", tmp,
1489 strerror(errno));
1490 return ret;
1491 }
1492
1493 sub = strtok(NULL, "/");
1494 }
1495
1496 return 0;
1497}
1498
1499static int bpf_place_pinned(int fd, const char *name,
1500 const struct bpf_elf_ctx *ctx, uint32_t pinning)
1501{
1502 char pathname[PATH_MAX];
1503 const char *tmp;
1504 int ret = 0;
1505
1506 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
1507 return 0;
1508
1509 if (pinning == PIN_OBJECT_NS)
1510 ret = bpf_make_obj_path(ctx);
1511 else if ((tmp = bpf_custom_pinning(ctx, pinning)))
1512 ret = bpf_make_custom_path(ctx, tmp);
1513 if (ret < 0)
1514 return ret;
1515
1516 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
1517 return bpf_obj_pin(fd, pathname);
1518}
1519
1520static void bpf_prog_report(int fd, const char *section,
1521 const struct bpf_elf_prog *prog,
1522 struct bpf_elf_ctx *ctx)
1523{
1524 unsigned int insns = prog->size / sizeof(struct bpf_insn);
1525
1526 fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section,
1527 fd < 0 ? "rejected: " : "loaded",
1528 fd < 0 ? strerror(errno) : "",
1529 fd < 0 ? errno : fd);
1530
1531 fprintf(stderr, " - Type: %u\n", prog->type);
1532 fprintf(stderr, " - Instructions: %u (%u over limit)\n",
1533 insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0);
1534 fprintf(stderr, " - License: %s\n\n", prog->license);
1535
1536 bpf_dump_error(ctx, "Verifier analysis:\n\n");
1537}
1538
1539static int bpf_prog_attach(const char *section,
1540 const struct bpf_elf_prog *prog,
1541 struct bpf_elf_ctx *ctx)
1542{
1543 int tries = 0, fd;
1544retry:
1545 errno = 0;
1546 fd = bpf_prog_load_dev(prog->type, prog->insns, prog->size,
1547 prog->license, ctx->ifindex,
1548 ctx->log, ctx->log_size);
1549 if (fd < 0 || ctx->verbose) {
1550
1551
1552
1553
1554
1555 if (fd < 0 && errno == ENOSPC) {
1556 if (tries++ < 10 && !bpf_log_realloc(ctx))
1557 goto retry;
1558
1559 fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
1560 ctx->log_size, tries);
1561 return fd;
1562 }
1563
1564 bpf_prog_report(fd, section, prog, ctx);
1565 }
1566
1567 return fd;
1568}
1569
1570static void bpf_map_report(int fd, const char *name,
1571 const struct bpf_elf_map *map,
1572 struct bpf_elf_ctx *ctx, int inner_fd)
1573{
1574 fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
1575 fd < 0 ? "rejected: " : "loaded",
1576 fd < 0 ? strerror(errno) : "",
1577 fd < 0 ? errno : fd);
1578
1579 fprintf(stderr, " - Type: %u\n", map->type);
1580 fprintf(stderr, " - Identifier: %u\n", map->id);
1581 fprintf(stderr, " - Pinning: %u\n", map->pinning);
1582 fprintf(stderr, " - Size key: %u\n", map->size_key);
1583 fprintf(stderr, " - Size value: %u\n",
1584 inner_fd ? (int)sizeof(int) : map->size_value);
1585 fprintf(stderr, " - Max elems: %u\n", map->max_elem);
1586 fprintf(stderr, " - Flags: %#x\n\n", map->flags);
1587}
1588
1589static int bpf_find_map_id(const struct bpf_elf_ctx *ctx, uint32_t id)
1590{
1591 int i;
1592
1593 for (i = 0; i < ctx->map_num; i++) {
1594 if (ctx->maps[i].id != id)
1595 continue;
1596 if (ctx->map_fds[i] < 0)
1597 return -EINVAL;
1598
1599 return ctx->map_fds[i];
1600 }
1601
1602 return -ENOENT;
1603}
1604
1605static void bpf_report_map_in_map(int outer_fd, uint32_t idx)
1606{
1607 struct bpf_elf_map outer_map;
1608 int ret;
1609
1610 fprintf(stderr, "Cannot insert map into map! ");
1611
1612 ret = bpf_derive_elf_map_from_fdinfo(outer_fd, &outer_map, NULL);
1613 if (!ret) {
1614 if (idx >= outer_map.max_elem &&
1615 outer_map.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
1616 fprintf(stderr, "Outer map has %u elements, index %u is invalid!\n",
1617 outer_map.max_elem, idx);
1618 return;
1619 }
1620 }
1621
1622 fprintf(stderr, "Different map specs used for outer and inner map?\n");
1623}
1624
1625static bool bpf_is_map_in_map_type(const struct bpf_elf_map *map)
1626{
1627 return map->type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1628 map->type == BPF_MAP_TYPE_HASH_OF_MAPS;
1629}
1630
1631static bool bpf_map_offload_neutral(enum bpf_map_type type)
1632{
1633 return type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
1634}
1635
1636static int bpf_map_attach(const char *name, struct bpf_elf_ctx *ctx,
1637 const struct bpf_elf_map *map, struct bpf_map_ext *ext,
1638 int *have_map_in_map)
1639{
1640 int fd, ifindex, ret, map_inner_fd = 0;
1641 bool retried = false;
1642
1643probe:
1644 fd = bpf_probe_pinned(name, ctx, map->pinning);
1645 if (fd > 0) {
1646 ret = bpf_map_selfcheck_pinned(fd, map, ext,
1647 offsetof(struct bpf_elf_map,
1648 id), ctx->type);
1649 if (ret < 0) {
1650 close(fd);
1651 fprintf(stderr, "Map \'%s\' self-check failed!\n",
1652 name);
1653 return ret;
1654 }
1655 if (ctx->verbose)
1656 fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
1657 name);
1658 return fd;
1659 }
1660
1661 if (have_map_in_map && bpf_is_map_in_map_type(map)) {
1662 (*have_map_in_map)++;
1663 if (map->inner_id)
1664 return 0;
1665 fprintf(stderr, "Map \'%s\' cannot be created since no inner map ID defined!\n",
1666 name);
1667 return -EINVAL;
1668 }
1669
1670 if (!have_map_in_map && bpf_is_map_in_map_type(map)) {
1671 map_inner_fd = bpf_find_map_id(ctx, map->inner_id);
1672 if (map_inner_fd < 0) {
1673 fprintf(stderr, "Map \'%s\' cannot be loaded. Inner map with ID %u not found!\n",
1674 name, map->inner_id);
1675 return -EINVAL;
1676 }
1677 }
1678
1679 ifindex = bpf_map_offload_neutral(map->type) ? 0 : ctx->ifindex;
1680 errno = 0;
1681 fd = bpf_map_create(map->type, map->size_key, map->size_value,
1682 map->max_elem, map->flags, map_inner_fd, ctx->btf_fd,
1683 ifindex, ext->btf_id_key, ext->btf_id_val);
1684
1685 if (fd < 0 || ctx->verbose) {
1686 bpf_map_report(fd, name, map, ctx, map_inner_fd);
1687 if (fd < 0)
1688 return fd;
1689 }
1690
1691 ret = bpf_place_pinned(fd, name, ctx, map->pinning);
1692 if (ret < 0) {
1693 close(fd);
1694 if (!retried && errno == EEXIST) {
1695 retried = true;
1696 goto probe;
1697 }
1698 fprintf(stderr, "Could not pin %s map: %s\n", name,
1699 strerror(errno));
1700 return ret;
1701 }
1702
1703 return fd;
1704}
1705
1706static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
1707 const GElf_Sym *sym)
1708{
1709 return ctx->str_tab->d_buf + sym->st_name;
1710}
1711
1712static int bpf_btf_find(struct bpf_elf_ctx *ctx, const char *name)
1713{
1714 const struct btf_type *type;
1715 const char *res;
1716 int id;
1717
1718 for (id = 1; id < ctx->btf.types_num; id++) {
1719 type = ctx->btf.types[id];
1720 if (type->name_off >= ctx->btf.hdr->str_len)
1721 continue;
1722 res = &ctx->btf.strings[type->name_off];
1723 if (!strcmp(res, name))
1724 return id;
1725 }
1726
1727 return -ENOENT;
1728}
1729
1730static int bpf_btf_find_kv(struct bpf_elf_ctx *ctx, const struct bpf_elf_map *map,
1731 const char *name, uint32_t *id_key, uint32_t *id_val)
1732{
1733 const struct btf_member *key, *val;
1734 const struct btf_type *type;
1735 char btf_name[512];
1736 const char *res;
1737 int id;
1738
1739 snprintf(btf_name, sizeof(btf_name), "____btf_map_%s", name);
1740 id = bpf_btf_find(ctx, btf_name);
1741 if (id < 0)
1742 return id;
1743
1744 type = ctx->btf.types[id];
1745 if (BTF_INFO_KIND(type->info) != BTF_KIND_STRUCT)
1746 return -EINVAL;
1747 if (BTF_INFO_VLEN(type->info) != 2)
1748 return -EINVAL;
1749
1750 key = ((void *) type) + sizeof(*type);
1751 val = key + 1;
1752 if (!key->type || key->type >= ctx->btf.types_num ||
1753 !val->type || val->type >= ctx->btf.types_num)
1754 return -EINVAL;
1755
1756 if (key->name_off >= ctx->btf.hdr->str_len ||
1757 val->name_off >= ctx->btf.hdr->str_len)
1758 return -EINVAL;
1759
1760 res = &ctx->btf.strings[key->name_off];
1761 if (strcmp(res, "key"))
1762 return -EINVAL;
1763
1764 res = &ctx->btf.strings[val->name_off];
1765 if (strcmp(res, "value"))
1766 return -EINVAL;
1767
1768 *id_key = key->type;
1769 *id_val = val->type;
1770 return 0;
1771}
1772
1773static void bpf_btf_annotate(struct bpf_elf_ctx *ctx, int which, const char *name)
1774{
1775 uint32_t id_key = 0, id_val = 0;
1776
1777 if (!bpf_btf_find_kv(ctx, &ctx->maps[which], name, &id_key, &id_val)) {
1778 ctx->maps_ext[which].btf_id_key = id_key;
1779 ctx->maps_ext[which].btf_id_val = id_val;
1780 }
1781}
1782
1783static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
1784{
1785 const char *name;
1786 GElf_Sym sym;
1787 int i;
1788
1789 for (i = 0; i < ctx->sym_num; i++) {
1790 int type;
1791
1792 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1793 continue;
1794
1795 type = GELF_ST_TYPE(sym.st_info);
1796 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1797 (type != STT_NOTYPE && type != STT_OBJECT) ||
1798 sym.st_shndx != ctx->sec_maps ||
1799 sym.st_value / ctx->map_len != which)
1800 continue;
1801
1802 name = bpf_str_tab_name(ctx, &sym);
1803 bpf_btf_annotate(ctx, which, name);
1804 return name;
1805 }
1806
1807 return NULL;
1808}
1809
1810static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
1811{
1812 int i, j, ret, fd, inner_fd, inner_idx, have_map_in_map = 0;
1813 const char *map_name;
1814
1815 for (i = 0; i < ctx->map_num; i++) {
1816 if (ctx->maps[i].pinning == PIN_OBJECT_NS &&
1817 ctx->noafalg) {
1818 fprintf(stderr, "Missing kernel AF_ALG support for PIN_OBJECT_NS!\n");
1819 return -ENOTSUP;
1820 }
1821
1822 map_name = bpf_map_fetch_name(ctx, i);
1823 if (!map_name)
1824 return -EIO;
1825
1826 fd = bpf_map_attach(map_name, ctx, &ctx->maps[i],
1827 &ctx->maps_ext[i], &have_map_in_map);
1828 if (fd < 0)
1829 return fd;
1830
1831 ctx->map_fds[i] = !fd ? -1 : fd;
1832 }
1833
1834 for (i = 0; have_map_in_map && i < ctx->map_num; i++) {
1835 if (ctx->map_fds[i] >= 0)
1836 continue;
1837
1838 map_name = bpf_map_fetch_name(ctx, i);
1839 if (!map_name)
1840 return -EIO;
1841
1842 fd = bpf_map_attach(map_name, ctx, &ctx->maps[i],
1843 &ctx->maps_ext[i], NULL);
1844 if (fd < 0)
1845 return fd;
1846
1847 ctx->map_fds[i] = fd;
1848 }
1849
1850 for (i = 0; have_map_in_map && i < ctx->map_num; i++) {
1851 if (!ctx->maps[i].id ||
1852 ctx->maps[i].inner_id ||
1853 ctx->maps[i].inner_idx == -1)
1854 continue;
1855
1856 inner_fd = ctx->map_fds[i];
1857 inner_idx = ctx->maps[i].inner_idx;
1858
1859 for (j = 0; j < ctx->map_num; j++) {
1860 if (!bpf_is_map_in_map_type(&ctx->maps[j]))
1861 continue;
1862 if (ctx->maps[j].inner_id != ctx->maps[i].id)
1863 continue;
1864
1865 ret = bpf_map_update(ctx->map_fds[j], &inner_idx,
1866 &inner_fd, BPF_ANY);
1867 if (ret < 0) {
1868 bpf_report_map_in_map(ctx->map_fds[j],
1869 inner_idx);
1870 return ret;
1871 }
1872 }
1873 }
1874
1875 return 0;
1876}
1877
1878static int bpf_map_num_sym(struct bpf_elf_ctx *ctx)
1879{
1880 int i, num = 0;
1881 GElf_Sym sym;
1882
1883 for (i = 0; i < ctx->sym_num; i++) {
1884 int type;
1885
1886 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1887 continue;
1888
1889 type = GELF_ST_TYPE(sym.st_info);
1890 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1891 (type != STT_NOTYPE && type != STT_OBJECT) ||
1892 sym.st_shndx != ctx->sec_maps)
1893 continue;
1894 num++;
1895 }
1896
1897 return num;
1898}
1899
1900static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
1901 struct bpf_elf_sec_data *data)
1902{
1903 Elf_Data *sec_edata;
1904 GElf_Shdr sec_hdr;
1905 Elf_Scn *sec_fd;
1906 char *sec_name;
1907
1908 memset(data, 0, sizeof(*data));
1909
1910 sec_fd = elf_getscn(ctx->elf_fd, section);
1911 if (!sec_fd)
1912 return -EINVAL;
1913 if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
1914 return -EIO;
1915
1916 sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
1917 sec_hdr.sh_name);
1918 if (!sec_name || !sec_hdr.sh_size)
1919 return -ENOENT;
1920
1921 sec_edata = elf_getdata(sec_fd, NULL);
1922 if (!sec_edata || elf_getdata(sec_fd, sec_edata))
1923 return -EIO;
1924
1925 memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
1926
1927 data->sec_name = sec_name;
1928 data->sec_data = sec_edata;
1929 return 0;
1930}
1931
1932struct bpf_elf_map_min {
1933 __u32 type;
1934 __u32 size_key;
1935 __u32 size_value;
1936 __u32 max_elem;
1937};
1938
1939static int bpf_fetch_maps_begin(struct bpf_elf_ctx *ctx, int section,
1940 struct bpf_elf_sec_data *data)
1941{
1942 ctx->map_num = data->sec_data->d_size;
1943 ctx->sec_maps = section;
1944 ctx->sec_done[section] = true;
1945
1946 if (ctx->map_num > sizeof(ctx->maps)) {
1947 fprintf(stderr, "Too many BPF maps in ELF section!\n");
1948 return -ENOMEM;
1949 }
1950
1951 memcpy(ctx->maps, data->sec_data->d_buf, ctx->map_num);
1952 return 0;
1953}
1954
1955static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end)
1956{
1957 GElf_Sym sym;
1958 int off, i;
1959
1960 for (off = 0; off < end; off += ctx->map_len) {
1961
1962
1963
1964 for (i = 0; i < ctx->sym_num; i++) {
1965 int type;
1966
1967 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1968 continue;
1969
1970 type = GELF_ST_TYPE(sym.st_info);
1971 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1972 (type != STT_NOTYPE && type != STT_OBJECT) ||
1973 sym.st_shndx != ctx->sec_maps)
1974 continue;
1975 if (sym.st_value == off)
1976 break;
1977 if (i == ctx->sym_num - 1)
1978 return -1;
1979 }
1980 }
1981
1982 return off == end ? 0 : -1;
1983}
1984
1985static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx)
1986{
1987 struct bpf_elf_map fixup[ARRAY_SIZE(ctx->maps)] = {};
1988 int i, sym_num = bpf_map_num_sym(ctx);
1989 __u8 *buff;
1990
1991 if (sym_num == 0 || sym_num > ARRAY_SIZE(ctx->maps)) {
1992 fprintf(stderr, "%u maps not supported in current map section!\n",
1993 sym_num);
1994 return -EINVAL;
1995 }
1996
1997 if (ctx->map_num % sym_num != 0 ||
1998 ctx->map_num % sizeof(__u32) != 0) {
1999 fprintf(stderr, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n");
2000 return -EINVAL;
2001 }
2002
2003 ctx->map_len = ctx->map_num / sym_num;
2004 if (bpf_map_verify_all_offs(ctx, ctx->map_num)) {
2005 fprintf(stderr, "Different struct bpf_elf_map in use!\n");
2006 return -EINVAL;
2007 }
2008
2009 if (ctx->map_len == sizeof(struct bpf_elf_map)) {
2010 ctx->map_num = sym_num;
2011 return 0;
2012 } else if (ctx->map_len > sizeof(struct bpf_elf_map)) {
2013 fprintf(stderr, "struct bpf_elf_map not supported, coming from future version?\n");
2014 return -EINVAL;
2015 } else if (ctx->map_len < sizeof(struct bpf_elf_map_min)) {
2016 fprintf(stderr, "struct bpf_elf_map too small, not supported!\n");
2017 return -EINVAL;
2018 }
2019
2020 ctx->map_num = sym_num;
2021 for (i = 0, buff = (void *)ctx->maps; i < ctx->map_num;
2022 i++, buff += ctx->map_len) {
2023
2024
2025
2026
2027 memcpy(&fixup[i], buff, ctx->map_len);
2028 }
2029
2030 memcpy(ctx->maps, fixup, sizeof(fixup));
2031 if (ctx->verbose)
2032 printf("%zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n",
2033 sizeof(struct bpf_elf_map) - ctx->map_len);
2034 return 0;
2035}
2036
2037static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
2038 struct bpf_elf_sec_data *data)
2039{
2040 if (data->sec_data->d_size > sizeof(ctx->license))
2041 return -ENOMEM;
2042
2043 memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
2044 ctx->sec_done[section] = true;
2045 return 0;
2046}
2047
2048static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
2049 struct bpf_elf_sec_data *data)
2050{
2051 ctx->sym_tab = data->sec_data;
2052 ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
2053 ctx->sec_done[section] = true;
2054 return 0;
2055}
2056
2057static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
2058 struct bpf_elf_sec_data *data)
2059{
2060 ctx->str_tab = data->sec_data;
2061 ctx->sec_done[section] = true;
2062 return 0;
2063}
2064
2065static int bpf_fetch_text(struct bpf_elf_ctx *ctx, int section,
2066 struct bpf_elf_sec_data *data)
2067{
2068 ctx->sec_text = section;
2069 ctx->sec_done[section] = true;
2070 return 0;
2071}
2072
2073static void bpf_btf_report(int fd, struct bpf_elf_ctx *ctx)
2074{
2075 fprintf(stderr, "\nBTF debug data section \'.BTF\' %s%s (%d)!\n",
2076 fd < 0 ? "rejected: " : "loaded",
2077 fd < 0 ? strerror(errno) : "",
2078 fd < 0 ? errno : fd);
2079
2080 fprintf(stderr, " - Length: %zu\n", ctx->btf_data->d_size);
2081
2082 bpf_dump_error(ctx, "Verifier analysis:\n\n");
2083}
2084
2085static int bpf_btf_attach(struct bpf_elf_ctx *ctx)
2086{
2087 int tries = 0, fd;
2088retry:
2089 errno = 0;
2090 fd = bpf_btf_load(ctx->btf_data->d_buf, ctx->btf_data->d_size,
2091 ctx->log, ctx->log_size);
2092 if (fd < 0 || ctx->verbose) {
2093 if (fd < 0 && errno == ENOSPC) {
2094 if (tries++ < 10 && !bpf_log_realloc(ctx))
2095 goto retry;
2096
2097 fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
2098 ctx->log_size, tries);
2099 return fd;
2100 }
2101
2102 if (bpf_log_has_data(ctx))
2103 bpf_btf_report(fd, ctx);
2104 }
2105
2106 return fd;
2107}
2108
2109static int bpf_fetch_btf_begin(struct bpf_elf_ctx *ctx, int section,
2110 struct bpf_elf_sec_data *data)
2111{
2112 ctx->btf_data = data->sec_data;
2113 ctx->sec_btf = section;
2114 ctx->sec_done[section] = true;
2115 return 0;
2116}
2117
2118static int bpf_btf_check_header(struct bpf_elf_ctx *ctx)
2119{
2120 const struct btf_header *hdr = ctx->btf_data->d_buf;
2121 const char *str_start, *str_end;
2122 unsigned int data_len;
2123
2124 if (hdr->magic != BTF_MAGIC) {
2125 fprintf(stderr, "Object has wrong BTF magic: %x, expected: %x!\n",
2126 hdr->magic, BTF_MAGIC);
2127 return -EINVAL;
2128 }
2129
2130 if (hdr->version != BTF_VERSION) {
2131 fprintf(stderr, "Object has wrong BTF version: %u, expected: %u!\n",
2132 hdr->version, BTF_VERSION);
2133 return -EINVAL;
2134 }
2135
2136 if (hdr->flags) {
2137 fprintf(stderr, "Object has unsupported BTF flags %x!\n",
2138 hdr->flags);
2139 return -EINVAL;
2140 }
2141
2142 data_len = ctx->btf_data->d_size - sizeof(*hdr);
2143 if (data_len < hdr->type_off ||
2144 data_len < hdr->str_off ||
2145 data_len < hdr->type_len + hdr->str_len ||
2146 hdr->type_off >= hdr->str_off ||
2147 hdr->type_off + hdr->type_len != hdr->str_off ||
2148 hdr->str_off + hdr->str_len != data_len ||
2149 (hdr->type_off & (sizeof(uint32_t) - 1))) {
2150 fprintf(stderr, "Object has malformed BTF data!\n");
2151 return -EINVAL;
2152 }
2153
2154 ctx->btf.hdr = hdr;
2155 ctx->btf.raw = hdr + 1;
2156
2157 str_start = ctx->btf.raw + hdr->str_off;
2158 str_end = str_start + hdr->str_len;
2159 if (!hdr->str_len ||
2160 hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
2161 str_start[0] || str_end[-1]) {
2162 fprintf(stderr, "Object has malformed BTF string data!\n");
2163 return -EINVAL;
2164 }
2165
2166 ctx->btf.strings = str_start;
2167 return 0;
2168}
2169
2170static int bpf_btf_register_type(struct bpf_elf_ctx *ctx,
2171 const struct btf_type *type)
2172{
2173 int cur = ctx->btf.types_num, num = cur + 1;
2174 const struct btf_type **types;
2175
2176 types = realloc(ctx->btf.types, num * sizeof(type));
2177 if (!types) {
2178 free(ctx->btf.types);
2179 ctx->btf.types = NULL;
2180 ctx->btf.types_num = 0;
2181 return -ENOMEM;
2182 }
2183
2184 ctx->btf.types = types;
2185 ctx->btf.types[cur] = type;
2186 ctx->btf.types_num = num;
2187 return 0;
2188}
2189
2190static struct btf_type btf_type_void;
2191
2192static int bpf_btf_prep_type_data(struct bpf_elf_ctx *ctx)
2193{
2194 const void *type_cur = ctx->btf.raw + ctx->btf.hdr->type_off;
2195 const void *type_end = ctx->btf.raw + ctx->btf.hdr->str_off;
2196 const struct btf_type *type;
2197 uint16_t var_len;
2198 int ret, kind;
2199
2200 ret = bpf_btf_register_type(ctx, &btf_type_void);
2201 if (ret < 0)
2202 return ret;
2203
2204 while (type_cur < type_end) {
2205 type = type_cur;
2206 type_cur += sizeof(*type);
2207
2208 var_len = BTF_INFO_VLEN(type->info);
2209 kind = BTF_INFO_KIND(type->info);
2210
2211 switch (kind) {
2212 case BTF_KIND_INT:
2213 type_cur += sizeof(int);
2214 break;
2215 case BTF_KIND_ARRAY:
2216 type_cur += sizeof(struct btf_array);
2217 break;
2218 case BTF_KIND_STRUCT:
2219 case BTF_KIND_UNION:
2220 type_cur += var_len * sizeof(struct btf_member);
2221 break;
2222 case BTF_KIND_ENUM:
2223 type_cur += var_len * sizeof(struct btf_enum);
2224 break;
2225 case BTF_KIND_FUNC_PROTO:
2226 type_cur += var_len * sizeof(struct btf_param);
2227 break;
2228 case BTF_KIND_TYPEDEF:
2229 case BTF_KIND_PTR:
2230 case BTF_KIND_FWD:
2231 case BTF_KIND_VOLATILE:
2232 case BTF_KIND_CONST:
2233 case BTF_KIND_RESTRICT:
2234 case BTF_KIND_FUNC:
2235 break;
2236 default:
2237 fprintf(stderr, "Object has unknown BTF type: %u!\n", kind);
2238 return -EINVAL;
2239 }
2240
2241 ret = bpf_btf_register_type(ctx, type);
2242 if (ret < 0)
2243 return ret;
2244 }
2245
2246 return 0;
2247}
2248
2249static int bpf_btf_prep_data(struct bpf_elf_ctx *ctx)
2250{
2251 int ret = bpf_btf_check_header(ctx);
2252
2253 if (!ret)
2254 return bpf_btf_prep_type_data(ctx);
2255 return ret;
2256}
2257
2258static void bpf_fetch_btf_end(struct bpf_elf_ctx *ctx)
2259{
2260 int fd = bpf_btf_attach(ctx);
2261
2262 if (fd < 0)
2263 return;
2264 ctx->btf_fd = fd;
2265 if (bpf_btf_prep_data(ctx) < 0) {
2266 close(ctx->btf_fd);
2267 ctx->btf_fd = 0;
2268 }
2269}
2270
2271static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
2272{
2273 return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
2274}
2275
2276static bool bpf_has_btf_data(const struct bpf_elf_ctx *ctx)
2277{
2278 return ctx->sec_btf;
2279}
2280
2281static bool bpf_has_call_data(const struct bpf_elf_ctx *ctx)
2282{
2283 return ctx->sec_text;
2284}
2285
2286static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx, bool check_text_sec)
2287{
2288 struct bpf_elf_sec_data data;
2289 int i, ret = -1;
2290
2291 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
2292 ret = bpf_fill_section_data(ctx, i, &data);
2293 if (ret < 0)
2294 continue;
2295
2296 if (data.sec_hdr.sh_type == SHT_PROGBITS &&
2297 !strcmp(data.sec_name, ELF_SECTION_MAPS))
2298 ret = bpf_fetch_maps_begin(ctx, i, &data);
2299 else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
2300 !strcmp(data.sec_name, ELF_SECTION_LICENSE))
2301 ret = bpf_fetch_license(ctx, i, &data);
2302 else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
2303 (data.sec_hdr.sh_flags & SHF_EXECINSTR) &&
2304 !strcmp(data.sec_name, ".text") &&
2305 check_text_sec)
2306 ret = bpf_fetch_text(ctx, i, &data);
2307 else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
2308 !strcmp(data.sec_name, ".symtab"))
2309 ret = bpf_fetch_symtab(ctx, i, &data);
2310 else if (data.sec_hdr.sh_type == SHT_STRTAB &&
2311 !strcmp(data.sec_name, ".strtab"))
2312 ret = bpf_fetch_strtab(ctx, i, &data);
2313 else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
2314 !strcmp(data.sec_name, ".BTF"))
2315 ret = bpf_fetch_btf_begin(ctx, i, &data);
2316 if (ret < 0) {
2317 fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
2318 i);
2319 return ret;
2320 }
2321 }
2322
2323 if (bpf_has_btf_data(ctx))
2324 bpf_fetch_btf_end(ctx);
2325 if (bpf_has_map_data(ctx)) {
2326 ret = bpf_fetch_maps_end(ctx);
2327 if (ret < 0) {
2328 fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
2329 return ret;
2330 }
2331
2332 ret = bpf_maps_attach_all(ctx);
2333 if (ret < 0) {
2334 fprintf(stderr, "Error loading maps into kernel!\n");
2335 return ret;
2336 }
2337 }
2338
2339 return ret;
2340}
2341
2342static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
2343 bool *sseen)
2344{
2345 struct bpf_elf_sec_data data;
2346 struct bpf_elf_prog prog;
2347 int ret, i, fd = -1;
2348
2349 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
2350 if (ctx->sec_done[i])
2351 continue;
2352
2353 ret = bpf_fill_section_data(ctx, i, &data);
2354 if (ret < 0 ||
2355 !(data.sec_hdr.sh_type == SHT_PROGBITS &&
2356 (data.sec_hdr.sh_flags & SHF_EXECINSTR) &&
2357 !strcmp(data.sec_name, section)))
2358 continue;
2359
2360 *sseen = true;
2361
2362 memset(&prog, 0, sizeof(prog));
2363 prog.type = ctx->type;
2364 prog.license = ctx->license;
2365 prog.size = data.sec_data->d_size;
2366 prog.insns_num = prog.size / sizeof(struct bpf_insn);
2367 prog.insns = data.sec_data->d_buf;
2368
2369 fd = bpf_prog_attach(section, &prog, ctx);
2370 if (fd < 0)
2371 return fd;
2372
2373 ctx->sec_done[i] = true;
2374 break;
2375 }
2376
2377 return fd;
2378}
2379
2380struct bpf_relo_props {
2381 struct bpf_tail_call {
2382 unsigned int total;
2383 unsigned int jited;
2384 } tc;
2385 int main_num;
2386};
2387
2388static int bpf_apply_relo_map(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog,
2389 GElf_Rel *relo, GElf_Sym *sym,
2390 struct bpf_relo_props *props)
2391{
2392 unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn);
2393 unsigned int map_idx = sym->st_value / ctx->map_len;
2394
2395 if (insn_off >= prog->insns_num)
2396 return -EINVAL;
2397 if (prog->insns[insn_off].code != (BPF_LD | BPF_IMM | BPF_DW)) {
2398 fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
2399 insn_off);
2400 return -EINVAL;
2401 }
2402
2403 if (map_idx >= ARRAY_SIZE(ctx->map_fds))
2404 return -EINVAL;
2405 if (!ctx->map_fds[map_idx])
2406 return -EINVAL;
2407 if (ctx->maps[map_idx].type == BPF_MAP_TYPE_PROG_ARRAY) {
2408 props->tc.total++;
2409 if (ctx->maps_ext[map_idx].owner.jited ||
2410 (ctx->maps_ext[map_idx].owner.type == 0 &&
2411 ctx->cfg.jit_enabled))
2412 props->tc.jited++;
2413 }
2414
2415 prog->insns[insn_off].src_reg = BPF_PSEUDO_MAP_FD;
2416 prog->insns[insn_off].imm = ctx->map_fds[map_idx];
2417 return 0;
2418}
2419
2420static int bpf_apply_relo_call(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog,
2421 GElf_Rel *relo, GElf_Sym *sym,
2422 struct bpf_relo_props *props)
2423{
2424 unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn);
2425 struct bpf_elf_prog *prog_text = &ctx->prog_text;
2426
2427 if (insn_off >= prog->insns_num)
2428 return -EINVAL;
2429 if (prog->insns[insn_off].code != (BPF_JMP | BPF_CALL) &&
2430 prog->insns[insn_off].src_reg != BPF_PSEUDO_CALL) {
2431 fprintf(stderr, "ELF contains relo data for non call instruction at offset %u! Compiler bug?!\n",
2432 insn_off);
2433 return -EINVAL;
2434 }
2435
2436 if (!props->main_num) {
2437 struct bpf_insn *insns = realloc(prog->insns,
2438 prog->size + prog_text->size);
2439 if (!insns)
2440 return -ENOMEM;
2441
2442 memcpy(insns + prog->insns_num, prog_text->insns,
2443 prog_text->size);
2444 props->main_num = prog->insns_num;
2445 prog->insns = insns;
2446 prog->insns_num += prog_text->insns_num;
2447 prog->size += prog_text->size;
2448 }
2449
2450 prog->insns[insn_off].imm += props->main_num - insn_off;
2451 return 0;
2452}
2453
2454static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
2455 struct bpf_elf_sec_data *data_relo,
2456 struct bpf_elf_prog *prog,
2457 struct bpf_relo_props *props)
2458{
2459 GElf_Shdr *rhdr = &data_relo->sec_hdr;
2460 int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
2461
2462 for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
2463 GElf_Rel relo;
2464 GElf_Sym sym;
2465 int ret = -EIO;
2466
2467 if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
2468 return -EIO;
2469 if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
2470 return -EIO;
2471
2472 if (sym.st_shndx == ctx->sec_maps)
2473 ret = bpf_apply_relo_map(ctx, prog, &relo, &sym, props);
2474 else if (sym.st_shndx == ctx->sec_text)
2475 ret = bpf_apply_relo_call(ctx, prog, &relo, &sym, props);
2476 else
2477 fprintf(stderr, "ELF contains non-{map,call} related relo data in entry %u pointing to section %u! Compiler bug?!\n",
2478 relo_ent, sym.st_shndx);
2479 if (ret < 0)
2480 return ret;
2481 }
2482
2483 return 0;
2484}
2485
2486static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
2487 bool *lderr, bool *sseen, struct bpf_elf_prog *prog)
2488{
2489 struct bpf_elf_sec_data data_relo, data_insn;
2490 int ret, idx, i, fd = -1;
2491
2492 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
2493 struct bpf_relo_props props = {};
2494
2495 ret = bpf_fill_section_data(ctx, i, &data_relo);
2496 if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
2497 continue;
2498
2499 idx = data_relo.sec_hdr.sh_info;
2500
2501 ret = bpf_fill_section_data(ctx, idx, &data_insn);
2502 if (ret < 0 ||
2503 !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
2504 (data_insn.sec_hdr.sh_flags & SHF_EXECINSTR) &&
2505 !strcmp(data_insn.sec_name, section)))
2506 continue;
2507 if (sseen)
2508 *sseen = true;
2509
2510 memset(prog, 0, sizeof(*prog));
2511 prog->type = ctx->type;
2512 prog->license = ctx->license;
2513 prog->size = data_insn.sec_data->d_size;
2514 prog->insns_num = prog->size / sizeof(struct bpf_insn);
2515 prog->insns = malloc(prog->size);
2516 if (!prog->insns) {
2517 *lderr = true;
2518 return -ENOMEM;
2519 }
2520
2521 memcpy(prog->insns, data_insn.sec_data->d_buf, prog->size);
2522
2523 ret = bpf_apply_relo_data(ctx, &data_relo, prog, &props);
2524 if (ret < 0) {
2525 *lderr = true;
2526 if (ctx->sec_text != idx)
2527 free(prog->insns);
2528 return ret;
2529 }
2530 if (ctx->sec_text == idx) {
2531 fd = 0;
2532 goto out;
2533 }
2534
2535 fd = bpf_prog_attach(section, prog, ctx);
2536 free(prog->insns);
2537 if (fd < 0) {
2538 *lderr = true;
2539 if (props.tc.total) {
2540 if (ctx->cfg.jit_enabled &&
2541 props.tc.total != props.tc.jited)
2542 fprintf(stderr, "JIT enabled, but only %u/%u tail call maps in the program have JITed owner!\n",
2543 props.tc.jited, props.tc.total);
2544 if (!ctx->cfg.jit_enabled &&
2545 props.tc.jited)
2546 fprintf(stderr, "JIT disabled, but %u/%u tail call maps in the program have JITed owner!\n",
2547 props.tc.jited, props.tc.total);
2548 }
2549 return fd;
2550 }
2551out:
2552 ctx->sec_done[i] = true;
2553 ctx->sec_done[idx] = true;
2554 break;
2555 }
2556
2557 return fd;
2558}
2559
2560static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
2561{
2562 bool lderr = false, sseen = false;
2563 struct bpf_elf_prog prog;
2564 int ret = -1;
2565
2566 if (bpf_has_call_data(ctx)) {
2567 ret = bpf_fetch_prog_relo(ctx, ".text", &lderr, NULL,
2568 &ctx->prog_text);
2569 if (ret < 0)
2570 return ret;
2571 }
2572
2573 if (bpf_has_map_data(ctx) || bpf_has_call_data(ctx))
2574 ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen, &prog);
2575 if (ret < 0 && !lderr)
2576 ret = bpf_fetch_prog(ctx, section, &sseen);
2577 if (ret < 0 && !sseen)
2578 fprintf(stderr, "Program section \'%s\' not found in ELF file!\n",
2579 section);
2580 return ret;
2581}
2582
2583static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
2584{
2585 int i;
2586
2587 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
2588 if (ctx->map_fds[i] && ctx->maps[i].id == id &&
2589 ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
2590 return i;
2591 return -1;
2592}
2593
2594struct bpf_jited_aux {
2595 int prog_fd;
2596 int map_fd;
2597 struct bpf_prog_data prog;
2598 struct bpf_map_ext map;
2599};
2600
2601static int bpf_derive_prog_from_fdinfo(int fd, struct bpf_prog_data *prog)
2602{
2603 char file[PATH_MAX], buff[4096];
2604 unsigned int val;
2605 FILE *fp;
2606
2607 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
2608 memset(prog, 0, sizeof(*prog));
2609
2610 fp = fopen(file, "r");
2611 if (!fp) {
2612 fprintf(stderr, "No procfs support?!\n");
2613 return -EIO;
2614 }
2615
2616 while (fgets(buff, sizeof(buff), fp)) {
2617 if (sscanf(buff, "prog_type:\t%u", &val) == 1)
2618 prog->type = val;
2619 else if (sscanf(buff, "prog_jited:\t%u", &val) == 1)
2620 prog->jited = val;
2621 }
2622
2623 fclose(fp);
2624 return 0;
2625}
2626
2627static int bpf_tail_call_get_aux(struct bpf_jited_aux *aux)
2628{
2629 struct bpf_elf_map tmp;
2630 int ret;
2631
2632 ret = bpf_derive_elf_map_from_fdinfo(aux->map_fd, &tmp, &aux->map);
2633 if (!ret)
2634 ret = bpf_derive_prog_from_fdinfo(aux->prog_fd, &aux->prog);
2635
2636 return ret;
2637}
2638
2639static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
2640{
2641 struct bpf_elf_sec_data data;
2642 uint32_t map_id, key_id;
2643 int fd, i, ret, idx;
2644
2645 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
2646 if (ctx->sec_done[i])
2647 continue;
2648
2649 ret = bpf_fill_section_data(ctx, i, &data);
2650 if (ret < 0)
2651 continue;
2652
2653 ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
2654 if (ret != 2)
2655 continue;
2656
2657 idx = bpf_find_map_by_id(ctx, map_id);
2658 if (idx < 0)
2659 continue;
2660
2661 fd = bpf_fetch_prog_sec(ctx, data.sec_name);
2662 if (fd < 0)
2663 return -EIO;
2664
2665 ret = bpf_map_update(ctx->map_fds[idx], &key_id,
2666 &fd, BPF_ANY);
2667 if (ret < 0) {
2668 struct bpf_jited_aux aux = {};
2669
2670 ret = -errno;
2671 if (errno == E2BIG) {
2672 fprintf(stderr, "Tail call key %u for map %u out of bounds?\n",
2673 key_id, map_id);
2674 return ret;
2675 }
2676
2677 aux.map_fd = ctx->map_fds[idx];
2678 aux.prog_fd = fd;
2679
2680 if (bpf_tail_call_get_aux(&aux))
2681 return ret;
2682 if (!aux.map.owner.type)
2683 return ret;
2684
2685 if (aux.prog.type != aux.map.owner.type)
2686 fprintf(stderr, "Tail call map owned by prog type %u, but prog type is %u!\n",
2687 aux.map.owner.type, aux.prog.type);
2688 if (aux.prog.jited != aux.map.owner.jited)
2689 fprintf(stderr, "Tail call map %s jited, but prog %s!\n",
2690 aux.map.owner.jited ? "is" : "not",
2691 aux.prog.jited ? "is" : "not");
2692 return ret;
2693 }
2694
2695 ctx->sec_done[i] = true;
2696 }
2697
2698 return 0;
2699}
2700
2701static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
2702{
2703 struct stat st;
2704 int ret;
2705
2706 memset(&ctx->stat, 0, sizeof(ctx->stat));
2707
2708 ret = fstat(ctx->obj_fd, &st);
2709 if (ret < 0) {
2710 fprintf(stderr, "Stat of elf file failed: %s\n",
2711 strerror(errno));
2712 return;
2713 }
2714
2715 ctx->stat.st_dev = st.st_dev;
2716 ctx->stat.st_ino = st.st_ino;
2717}
2718
2719static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path)
2720{
2721 char buff[PATH_MAX];
2722
2723 while (fgets(buff, sizeof(buff), fp)) {
2724 char *ptr = buff;
2725
2726 while (*ptr == ' ' || *ptr == '\t')
2727 ptr++;
2728
2729 if (*ptr == '#' || *ptr == '\n' || *ptr == 0)
2730 continue;
2731
2732 if (sscanf(ptr, "%i %s\n", id, path) != 2 &&
2733 sscanf(ptr, "%i %s #", id, path) != 2) {
2734 strcpy(path, ptr);
2735 return -1;
2736 }
2737
2738 return 1;
2739 }
2740
2741 return 0;
2742}
2743
2744static bool bpf_pinning_reserved(uint32_t pinning)
2745{
2746 switch (pinning) {
2747 case PIN_NONE:
2748 case PIN_OBJECT_NS:
2749 case PIN_GLOBAL_NS:
2750 return true;
2751 default:
2752 return false;
2753 }
2754}
2755
2756static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file)
2757{
2758 struct bpf_hash_entry *entry;
2759 char subpath[PATH_MAX] = {};
2760 uint32_t pinning;
2761 FILE *fp;
2762 int ret;
2763
2764 fp = fopen(db_file, "r");
2765 if (!fp)
2766 return;
2767
2768 while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) {
2769 if (ret == -1) {
2770 fprintf(stderr, "Database %s is corrupted at: %s\n",
2771 db_file, subpath);
2772 fclose(fp);
2773 return;
2774 }
2775
2776 if (bpf_pinning_reserved(pinning)) {
2777 fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n",
2778 db_file, pinning);
2779 continue;
2780 }
2781
2782 entry = malloc(sizeof(*entry));
2783 if (!entry) {
2784 fprintf(stderr, "No memory left for db entry!\n");
2785 continue;
2786 }
2787
2788 entry->pinning = pinning;
2789 entry->subpath = strdup(subpath);
2790 if (!entry->subpath) {
2791 fprintf(stderr, "No memory left for db entry!\n");
2792 free(entry);
2793 continue;
2794 }
2795
2796 entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
2797 ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry;
2798 }
2799
2800 fclose(fp);
2801}
2802
2803static void bpf_hash_destroy(struct bpf_elf_ctx *ctx)
2804{
2805 struct bpf_hash_entry *entry;
2806 int i;
2807
2808 for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) {
2809 while ((entry = ctx->ht[i]) != NULL) {
2810 ctx->ht[i] = entry->next;
2811 free((char *)entry->subpath);
2812 free(entry);
2813 }
2814 }
2815}
2816
2817static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
2818{
2819 if (ctx->elf_hdr.e_type != ET_REL ||
2820 (ctx->elf_hdr.e_machine != EM_NONE &&
2821 ctx->elf_hdr.e_machine != EM_BPF) ||
2822 ctx->elf_hdr.e_version != EV_CURRENT) {
2823 fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
2824 return -EINVAL;
2825 }
2826
2827 switch (ctx->elf_hdr.e_ident[EI_DATA]) {
2828 default:
2829 fprintf(stderr, "ELF format error, wrong endianness info?\n");
2830 return -EINVAL;
2831 case ELFDATA2LSB:
2832 if (htons(1) == 1) {
2833 fprintf(stderr,
2834 "We are big endian, eBPF object is little endian!\n");
2835 return -EIO;
2836 }
2837 break;
2838 case ELFDATA2MSB:
2839 if (htons(1) != 1) {
2840 fprintf(stderr,
2841 "We are little endian, eBPF object is big endian!\n");
2842 return -EIO;
2843 }
2844 break;
2845 }
2846
2847 return 0;
2848}
2849
2850static void bpf_get_cfg(struct bpf_elf_ctx *ctx)
2851{
2852 static const char *path_jit = "/proc/sys/net/core/bpf_jit_enable";
2853 int fd;
2854
2855 fd = open(path_jit, O_RDONLY);
2856 if (fd >= 0) {
2857 char tmp[16] = {};
2858
2859 if (read(fd, tmp, sizeof(tmp)) > 0)
2860 ctx->cfg.jit_enabled = atoi(tmp);
2861 close(fd);
2862 }
2863}
2864
2865static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
2866 enum bpf_prog_type type, __u32 ifindex,
2867 bool verbose)
2868{
2869 uint8_t tmp[20];
2870 int ret;
2871
2872 if (elf_version(EV_CURRENT) == EV_NONE)
2873 return -EINVAL;
2874
2875 bpf_init_env();
2876
2877 memset(ctx, 0, sizeof(*ctx));
2878 bpf_get_cfg(ctx);
2879
2880 ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
2881 if (ret)
2882 ctx->noafalg = true;
2883 else
2884 hexstring_n2a(tmp, sizeof(tmp), ctx->obj_uid,
2885 sizeof(ctx->obj_uid));
2886
2887 ctx->verbose = verbose;
2888 ctx->type = type;
2889 ctx->ifindex = ifindex;
2890
2891 ctx->obj_fd = open(pathname, O_RDONLY);
2892 if (ctx->obj_fd < 0)
2893 return ctx->obj_fd;
2894
2895 ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
2896 if (!ctx->elf_fd) {
2897 ret = -EINVAL;
2898 goto out_fd;
2899 }
2900
2901 if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
2902 ret = -EINVAL;
2903 goto out_fd;
2904 }
2905
2906 if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
2907 &ctx->elf_hdr) {
2908 ret = -EIO;
2909 goto out_elf;
2910 }
2911
2912 ret = bpf_elf_check_ehdr(ctx);
2913 if (ret < 0)
2914 goto out_elf;
2915
2916 ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
2917 sizeof(*(ctx->sec_done)));
2918 if (!ctx->sec_done) {
2919 ret = -ENOMEM;
2920 goto out_elf;
2921 }
2922
2923 if (ctx->verbose && bpf_log_realloc(ctx)) {
2924 ret = -ENOMEM;
2925 goto out_free;
2926 }
2927
2928 bpf_save_finfo(ctx);
2929 bpf_hash_init(ctx, CONFDIR "/bpf_pinning");
2930
2931 return 0;
2932out_free:
2933 free(ctx->sec_done);
2934out_elf:
2935 elf_end(ctx->elf_fd);
2936out_fd:
2937 close(ctx->obj_fd);
2938 return ret;
2939}
2940
2941static int bpf_maps_count(struct bpf_elf_ctx *ctx)
2942{
2943 int i, count = 0;
2944
2945 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
2946 if (!ctx->map_fds[i])
2947 break;
2948 count++;
2949 }
2950
2951 return count;
2952}
2953
2954static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
2955{
2956 int i;
2957
2958 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
2959 if (ctx->map_fds[i])
2960 close(ctx->map_fds[i]);
2961 }
2962
2963 if (ctx->btf_fd)
2964 close(ctx->btf_fd);
2965 free(ctx->btf.types);
2966}
2967
2968static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
2969{
2970 if (failure)
2971 bpf_maps_teardown(ctx);
2972
2973 bpf_hash_destroy(ctx);
2974
2975 free(ctx->prog_text.insns);
2976 free(ctx->sec_done);
2977 free(ctx->log);
2978
2979 elf_end(ctx->elf_fd);
2980 close(ctx->obj_fd);
2981}
2982
2983static struct bpf_elf_ctx __ctx;
2984
2985static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
2986 const char *section, __u32 ifindex, bool verbose)
2987{
2988 struct bpf_elf_ctx *ctx = &__ctx;
2989 int fd = 0, ret;
2990
2991 ret = bpf_elf_ctx_init(ctx, pathname, type, ifindex, verbose);
2992 if (ret < 0) {
2993 fprintf(stderr, "Cannot initialize ELF context!\n");
2994 return ret;
2995 }
2996
2997 ret = bpf_fetch_ancillary(ctx, strcmp(section, ".text"));
2998 if (ret < 0) {
2999 fprintf(stderr, "Error fetching ELF ancillary data!\n");
3000 goto out;
3001 }
3002
3003 fd = bpf_fetch_prog_sec(ctx, section);
3004 if (fd < 0) {
3005 fprintf(stderr, "Error fetching program/map!\n");
3006 ret = fd;
3007 goto out;
3008 }
3009
3010 ret = bpf_fill_prog_arrays(ctx);
3011 if (ret < 0)
3012 fprintf(stderr, "Error filling program arrays!\n");
3013out:
3014 bpf_elf_ctx_destroy(ctx, ret < 0);
3015 if (ret < 0) {
3016 if (fd >= 0)
3017 close(fd);
3018 return ret;
3019 }
3020
3021 return fd;
3022}
3023
3024static int
3025bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
3026 const struct bpf_map_data *aux, unsigned int entries)
3027{
3028 struct bpf_map_set_msg msg = {
3029 .aux.uds_ver = BPF_SCM_AUX_VER,
3030 .aux.num_ent = entries,
3031 };
3032 int *cmsg_buf, min_fd;
3033 char *amsg_buf;
3034 int i;
3035
3036 strlcpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
3037 memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
3038
3039 cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
3040 amsg_buf = (char *)msg.aux.ent;
3041
3042 for (i = 0; i < entries; i += min_fd) {
3043 int ret;
3044
3045 min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
3046 bpf_map_set_init_single(&msg, min_fd);
3047
3048 memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
3049 memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
3050
3051 ret = sendmsg(fd, &msg.hdr, 0);
3052 if (ret <= 0)
3053 return ret ? : -1;
3054 }
3055
3056 return 0;
3057}
3058
3059static int
3060bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
3061 unsigned int entries)
3062{
3063 struct bpf_map_set_msg msg;
3064 int *cmsg_buf, min_fd;
3065 char *amsg_buf, *mmsg_buf;
3066 unsigned int needed = 1;
3067 int i;
3068
3069 cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
3070 amsg_buf = (char *)msg.aux.ent;
3071 mmsg_buf = (char *)&msg.aux;
3072
3073 for (i = 0; i < min(entries, needed); i += min_fd) {
3074 struct cmsghdr *cmsg;
3075 int ret;
3076
3077 min_fd = min(entries, entries - i);
3078 bpf_map_set_init_single(&msg, min_fd);
3079
3080 ret = recvmsg(fd, &msg.hdr, 0);
3081 if (ret <= 0)
3082 return ret ? : -1;
3083
3084 cmsg = CMSG_FIRSTHDR(&msg.hdr);
3085 if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
3086 return -EINVAL;
3087 if (msg.hdr.msg_flags & MSG_CTRUNC)
3088 return -EIO;
3089 if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
3090 return -ENOSYS;
3091
3092 min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
3093 if (min_fd > entries || min_fd <= 0)
3094 return -EINVAL;
3095
3096 memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
3097 memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
3098 memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
3099
3100 needed = aux->num_ent;
3101 }
3102
3103 return 0;
3104}
3105
3106int bpf_send_map_fds(const char *path, const char *obj)
3107{
3108 struct bpf_elf_ctx *ctx = &__ctx;
3109 struct sockaddr_un addr = { .sun_family = AF_UNIX };
3110 struct bpf_map_data bpf_aux = {
3111 .fds = ctx->map_fds,
3112 .ent = ctx->maps,
3113 .st = &ctx->stat,
3114 .obj = obj,
3115 };
3116 int fd, ret = -1;
3117
3118 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
3119 if (fd < 0) {
3120 fprintf(stderr, "Cannot open socket: %s\n",
3121 strerror(errno));
3122 goto out;
3123 }
3124
3125 strlcpy(addr.sun_path, path, sizeof(addr.sun_path));
3126
3127 ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
3128 if (ret < 0) {
3129 fprintf(stderr, "Cannot connect to %s: %s\n",
3130 path, strerror(errno));
3131 goto out;
3132 }
3133
3134 ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
3135 bpf_maps_count(ctx));
3136 if (ret < 0)
3137 fprintf(stderr, "Cannot send fds to %s: %s\n",
3138 path, strerror(errno));
3139
3140 bpf_maps_teardown(ctx);
3141out:
3142 if (fd >= 0)
3143 close(fd);
3144 return ret;
3145}
3146
3147int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
3148 unsigned int entries)
3149{
3150 struct sockaddr_un addr = { .sun_family = AF_UNIX };
3151 int fd, ret = -1;
3152
3153 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
3154 if (fd < 0) {
3155 fprintf(stderr, "Cannot open socket: %s\n",
3156 strerror(errno));
3157 goto out;
3158 }
3159
3160 strlcpy(addr.sun_path, path, sizeof(addr.sun_path));
3161
3162 ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
3163 if (ret < 0) {
3164 fprintf(stderr, "Cannot bind to socket: %s\n",
3165 strerror(errno));
3166 goto out;
3167 }
3168
3169 ret = bpf_map_set_recv(fd, fds, aux, entries);
3170 if (ret < 0)
3171 fprintf(stderr, "Cannot recv fds from %s: %s\n",
3172 path, strerror(errno));
3173
3174 unlink(addr.sun_path);
3175
3176out:
3177 if (fd >= 0)
3178 close(fd);
3179 return ret;
3180}
3181
3182#ifdef HAVE_LIBBPF
3183
3184
3185
3186
3187int iproute2_bpf_elf_ctx_init(struct bpf_cfg_in *cfg)
3188{
3189 struct bpf_elf_ctx *ctx = &__ctx;
3190
3191 return bpf_elf_ctx_init(ctx, cfg->object, cfg->type, cfg->ifindex, cfg->verbose);
3192}
3193
3194int iproute2_bpf_fetch_ancillary(void)
3195{
3196 struct bpf_elf_ctx *ctx = &__ctx;
3197 struct bpf_elf_sec_data data;
3198 int i, ret = 0;
3199
3200 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
3201 ret = bpf_fill_section_data(ctx, i, &data);
3202 if (ret < 0)
3203 continue;
3204
3205 if (data.sec_hdr.sh_type == SHT_PROGBITS &&
3206 !strcmp(data.sec_name, ELF_SECTION_MAPS))
3207 ret = bpf_fetch_maps_begin(ctx, i, &data);
3208 else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
3209 !strcmp(data.sec_name, ".symtab"))
3210 ret = bpf_fetch_symtab(ctx, i, &data);
3211 else if (data.sec_hdr.sh_type == SHT_STRTAB &&
3212 !strcmp(data.sec_name, ".strtab"))
3213 ret = bpf_fetch_strtab(ctx, i, &data);
3214 if (ret < 0) {
3215 fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
3216 i);
3217 return ret;
3218 }
3219 }
3220
3221 if (bpf_has_map_data(ctx)) {
3222 ret = bpf_fetch_maps_end(ctx);
3223 if (ret < 0) {
3224 fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
3225 return ret;
3226 }
3227 }
3228
3229 return ret;
3230}
3231
3232int iproute2_get_root_path(char *root_path, size_t len)
3233{
3234 struct bpf_elf_ctx *ctx = &__ctx;
3235 int ret = 0;
3236
3237 snprintf(root_path, len, "%s/%s",
3238 bpf_get_work_dir(ctx->type), BPF_DIR_GLOBALS);
3239
3240 ret = mkdir(root_path, S_IRWXU);
3241 if (ret && errno != EEXIST) {
3242 fprintf(stderr, "mkdir %s failed: %s\n", root_path, strerror(errno));
3243 return ret;
3244 }
3245
3246 return 0;
3247}
3248
3249bool iproute2_is_pin_map(const char *libbpf_map_name, char *pathname)
3250{
3251 struct bpf_elf_ctx *ctx = &__ctx;
3252 const char *map_name, *tmp;
3253 unsigned int pinning;
3254 int i, ret = 0;
3255
3256 for (i = 0; i < ctx->map_num; i++) {
3257 if (ctx->maps[i].pinning == PIN_OBJECT_NS &&
3258 ctx->noafalg) {
3259 fprintf(stderr, "Missing kernel AF_ALG support for PIN_OBJECT_NS!\n");
3260 return false;
3261 }
3262
3263 map_name = bpf_map_fetch_name(ctx, i);
3264 if (!map_name) {
3265 return false;
3266 }
3267
3268 if (strcmp(libbpf_map_name, map_name))
3269 continue;
3270
3271 pinning = ctx->maps[i].pinning;
3272
3273 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
3274 return false;
3275
3276 if (pinning == PIN_OBJECT_NS)
3277 ret = bpf_make_obj_path(ctx);
3278 else if ((tmp = bpf_custom_pinning(ctx, pinning)))
3279 ret = bpf_make_custom_path(ctx, tmp);
3280 if (ret < 0)
3281 return false;
3282
3283 bpf_make_pathname(pathname, PATH_MAX, map_name, ctx, pinning);
3284
3285 return true;
3286 }
3287
3288 return false;
3289}
3290
3291bool iproute2_is_map_in_map(const char *libbpf_map_name, struct bpf_elf_map *imap,
3292 struct bpf_elf_map *omap, char *omap_name)
3293{
3294 struct bpf_elf_ctx *ctx = &__ctx;
3295 const char *inner_map_name, *outer_map_name;
3296 int i, j;
3297
3298 for (i = 0; i < ctx->map_num; i++) {
3299 inner_map_name = bpf_map_fetch_name(ctx, i);
3300 if (!inner_map_name) {
3301 return false;
3302 }
3303
3304 if (strcmp(libbpf_map_name, inner_map_name))
3305 continue;
3306
3307 if (!ctx->maps[i].id ||
3308 ctx->maps[i].inner_id)
3309 continue;
3310
3311 *imap = ctx->maps[i];
3312
3313 for (j = 0; j < ctx->map_num; j++) {
3314 if (!bpf_is_map_in_map_type(&ctx->maps[j]))
3315 continue;
3316 if (ctx->maps[j].inner_id != ctx->maps[i].id)
3317 continue;
3318
3319 *omap = ctx->maps[j];
3320 outer_map_name = bpf_map_fetch_name(ctx, j);
3321 if (!outer_map_name)
3322 return false;
3323
3324 memcpy(omap_name, outer_map_name, strlen(outer_map_name) + 1);
3325
3326 return true;
3327 }
3328 }
3329
3330 return false;
3331}
3332
3333int iproute2_find_map_name_by_id(unsigned int map_id, char *name)
3334{
3335 struct bpf_elf_ctx *ctx = &__ctx;
3336 const char *map_name;
3337 int i, idx = -1;
3338
3339 for (i = 0; i < ctx->map_num; i++) {
3340 if (ctx->maps[i].id == map_id &&
3341 ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) {
3342 idx = i;
3343 break;
3344 }
3345 }
3346
3347 if (idx < 0)
3348 return -1;
3349
3350 map_name = bpf_map_fetch_name(ctx, idx);
3351 if (!map_name)
3352 return -1;
3353
3354 memcpy(name, map_name, strlen(map_name) + 1);
3355 return 0;
3356}
3357#endif
3358#endif
3359