1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <stdio.h>
15#include <stdlib.h>
16#include <unistd.h>
17#include <string.h>
18#include <stdbool.h>
19#include <stdint.h>
20#include <errno.h>
21#include <fcntl.h>
22#include <stdarg.h>
23#include <limits.h>
24#include <assert.h>
25
26#ifdef HAVE_ELF
27#include <libelf.h>
28#include <gelf.h>
29#endif
30
31#include <sys/types.h>
32#include <sys/stat.h>
33#include <sys/un.h>
34#include <sys/vfs.h>
35#include <sys/mount.h>
36#include <sys/syscall.h>
37#include <sys/sendfile.h>
38#include <sys/resource.h>
39
40#include <arpa/inet.h>
41
42#include "utils.h"
43#include "json_print.h"
44
45#include "bpf_util.h"
46#include "bpf_elf.h"
47#include "bpf_scm.h"
48
49struct bpf_prog_meta {
50 const char *type;
51 const char *subdir;
52 const char *section;
53 bool may_uds_export;
54};
55
56static const enum bpf_prog_type __bpf_types[] = {
57 BPF_PROG_TYPE_SCHED_CLS,
58 BPF_PROG_TYPE_SCHED_ACT,
59 BPF_PROG_TYPE_XDP,
60 BPF_PROG_TYPE_LWT_IN,
61 BPF_PROG_TYPE_LWT_OUT,
62 BPF_PROG_TYPE_LWT_XMIT,
63};
64
65static const struct bpf_prog_meta __bpf_prog_meta[] = {
66 [BPF_PROG_TYPE_SCHED_CLS] = {
67 .type = "cls",
68 .subdir = "tc",
69 .section = ELF_SECTION_CLASSIFIER,
70 .may_uds_export = true,
71 },
72 [BPF_PROG_TYPE_SCHED_ACT] = {
73 .type = "act",
74 .subdir = "tc",
75 .section = ELF_SECTION_ACTION,
76 .may_uds_export = true,
77 },
78 [BPF_PROG_TYPE_XDP] = {
79 .type = "xdp",
80 .subdir = "xdp",
81 .section = ELF_SECTION_PROG,
82 },
83 [BPF_PROG_TYPE_LWT_IN] = {
84 .type = "lwt_in",
85 .subdir = "ip",
86 .section = ELF_SECTION_PROG,
87 },
88 [BPF_PROG_TYPE_LWT_OUT] = {
89 .type = "lwt_out",
90 .subdir = "ip",
91 .section = ELF_SECTION_PROG,
92 },
93 [BPF_PROG_TYPE_LWT_XMIT] = {
94 .type = "lwt_xmit",
95 .subdir = "ip",
96 .section = ELF_SECTION_PROG,
97 },
98 [BPF_PROG_TYPE_LWT_SEG6LOCAL] = {
99 .type = "lwt_seg6local",
100 .subdir = "ip",
101 .section = ELF_SECTION_PROG,
102 },
103};
104
105static const char *bpf_prog_to_subdir(enum bpf_prog_type type)
106{
107 assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
108 __bpf_prog_meta[type].subdir);
109 return __bpf_prog_meta[type].subdir;
110}
111
112const char *bpf_prog_to_default_section(enum bpf_prog_type type)
113{
114 assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
115 __bpf_prog_meta[type].section);
116 return __bpf_prog_meta[type].section;
117}
118
119#ifdef HAVE_ELF
120static int bpf_obj_open(const char *path, enum bpf_prog_type type,
121 const char *sec, __u32 ifindex, bool verbose);
122#else
123static int bpf_obj_open(const char *path, enum bpf_prog_type type,
124 const char *sec, __u32 ifindex, bool verbose)
125{
126 fprintf(stderr, "No ELF library support compiled in.\n");
127 errno = ENOSYS;
128 return -1;
129}
130#endif
131
132static inline __u64 bpf_ptr_to_u64(const void *ptr)
133{
134 return (__u64)(unsigned long)ptr;
135}
136
137static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
138{
139#ifdef __NR_bpf
140 return syscall(__NR_bpf, cmd, attr, size);
141#else
142 fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
143 errno = ENOSYS;
144 return -1;
145#endif
146}
147
148static int bpf_map_update(int fd, const void *key, const void *value,
149 uint64_t flags)
150{
151 union bpf_attr attr = {};
152
153 attr.map_fd = fd;
154 attr.key = bpf_ptr_to_u64(key);
155 attr.value = bpf_ptr_to_u64(value);
156 attr.flags = flags;
157
158 return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
159}
160
161static int bpf_prog_fd_by_id(uint32_t id)
162{
163 union bpf_attr attr = {};
164
165 attr.prog_id = id;
166
167 return bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
168}
169
170static int bpf_prog_info_by_fd(int fd, struct bpf_prog_info *info,
171 uint32_t *info_len)
172{
173 union bpf_attr attr = {};
174 int ret;
175
176 attr.info.bpf_fd = fd;
177 attr.info.info = bpf_ptr_to_u64(info);
178 attr.info.info_len = *info_len;
179
180 *info_len = 0;
181 ret = bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
182 if (!ret)
183 *info_len = attr.info.info_len;
184
185 return ret;
186}
187
188int bpf_dump_prog_info(FILE *f, uint32_t id)
189{
190 struct bpf_prog_info info = {};
191 uint32_t len = sizeof(info);
192 int fd, ret, dump_ok = 0;
193 SPRINT_BUF(tmp);
194
195 open_json_object("prog");
196 print_uint(PRINT_ANY, "id", "id %u ", id);
197
198 fd = bpf_prog_fd_by_id(id);
199 if (fd < 0)
200 goto out;
201
202 ret = bpf_prog_info_by_fd(fd, &info, &len);
203 if (!ret && len) {
204 int jited = !!info.jited_prog_len;
205
206 print_string(PRINT_ANY, "tag", "tag %s ",
207 hexstring_n2a(info.tag, sizeof(info.tag),
208 tmp, sizeof(tmp)));
209 print_uint(PRINT_JSON, "jited", NULL, jited);
210 if (jited && !is_json_context())
211 fprintf(f, "jited ");
212 dump_ok = 1;
213 }
214
215 close(fd);
216out:
217 close_json_object();
218 return dump_ok;
219}
220
221static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
222 char **bpf_string, bool *need_release,
223 const char separator)
224{
225 char sp;
226
227 if (from_file) {
228 size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
229 char *tmp_string, *pos, c_prev = ' ';
230 FILE *fp;
231 int c;
232
233 tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
234 tmp_string = pos = calloc(1, tmp_len);
235 if (tmp_string == NULL)
236 return -ENOMEM;
237
238 fp = fopen(arg, "r");
239 if (fp == NULL) {
240 perror("Cannot fopen");
241 free(tmp_string);
242 return -ENOENT;
243 }
244
245 while ((c = fgetc(fp)) != EOF) {
246 switch (c) {
247 case '\n':
248 if (c_prev != ',')
249 *(pos++) = ',';
250 c_prev = ',';
251 break;
252 case ' ':
253 case '\t':
254 if (c_prev != ' ')
255 *(pos++) = c;
256 c_prev = ' ';
257 break;
258 default:
259 *(pos++) = c;
260 c_prev = c;
261 }
262 if (pos - tmp_string == tmp_len)
263 break;
264 }
265
266 if (!feof(fp)) {
267 free(tmp_string);
268 fclose(fp);
269 return -E2BIG;
270 }
271
272 fclose(fp);
273 *pos = 0;
274
275 *need_release = true;
276 *bpf_string = tmp_string;
277 } else {
278 *need_release = false;
279 *bpf_string = arg;
280 }
281
282 if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
283 sp != separator) {
284 if (*need_release)
285 free(*bpf_string);
286 return -EINVAL;
287 }
288
289 return 0;
290}
291
292static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
293 bool from_file)
294{
295 char *bpf_string, *token, separator = ',';
296 int ret = 0, i = 0;
297 bool need_release;
298 __u16 bpf_len = 0;
299
300 if (argc < 1)
301 return -EINVAL;
302 if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
303 &need_release, separator))
304 return -EINVAL;
305 if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
306 ret = -EINVAL;
307 goto out;
308 }
309
310 token = bpf_string;
311 while ((token = strchr(token, separator)) && (++token)[0]) {
312 if (i >= bpf_len) {
313 fprintf(stderr, "Real program length exceeds encoded length parameter!\n");
314 ret = -EINVAL;
315 goto out;
316 }
317
318 if (sscanf(token, "%hu %hhu %hhu %u,",
319 &bpf_ops[i].code, &bpf_ops[i].jt,
320 &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
321 fprintf(stderr, "Error at instruction %d!\n", i);
322 ret = -EINVAL;
323 goto out;
324 }
325
326 i++;
327 }
328
329 if (i != bpf_len) {
330 fprintf(stderr, "Parsed program length is less than encoded length parameter!\n");
331 ret = -EINVAL;
332 goto out;
333 }
334 ret = bpf_len;
335out:
336 if (need_release)
337 free(bpf_string);
338
339 return ret;
340}
341
342void bpf_print_ops(struct rtattr *bpf_ops, __u16 len)
343{
344 struct sock_filter *ops = RTA_DATA(bpf_ops);
345 int i;
346
347 if (len == 0)
348 return;
349
350 open_json_object("bytecode");
351 print_uint(PRINT_ANY, "length", "bytecode \'%u,", len);
352 open_json_array(PRINT_JSON, "insns");
353
354 for (i = 0; i < len; i++) {
355 open_json_object(NULL);
356 print_hu(PRINT_ANY, "code", "%hu ", ops[i].code);
357 print_hhu(PRINT_ANY, "jt", "%hhu ", ops[i].jt);
358 print_hhu(PRINT_ANY, "jf", "%hhu ", ops[i].jf);
359 if (i == len - 1)
360 print_uint(PRINT_ANY, "k", "%u\'", ops[i].k);
361 else
362 print_uint(PRINT_ANY, "k", "%u,", ops[i].k);
363 close_json_object();
364 }
365
366 close_json_array(PRINT_JSON, NULL);
367 close_json_object();
368}
369
370static void bpf_map_pin_report(const struct bpf_elf_map *pin,
371 const struct bpf_elf_map *obj)
372{
373 fprintf(stderr, "Map specification differs from pinned file!\n");
374
375 if (obj->type != pin->type)
376 fprintf(stderr, " - Type: %u (obj) != %u (pin)\n",
377 obj->type, pin->type);
378 if (obj->size_key != pin->size_key)
379 fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n",
380 obj->size_key, pin->size_key);
381 if (obj->size_value != pin->size_value)
382 fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n",
383 obj->size_value, pin->size_value);
384 if (obj->max_elem != pin->max_elem)
385 fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n",
386 obj->max_elem, pin->max_elem);
387 if (obj->flags != pin->flags)
388 fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n",
389 obj->flags, pin->flags);
390
391 fprintf(stderr, "\n");
392}
393
394struct bpf_prog_data {
395 unsigned int type;
396 unsigned int jited;
397};
398
399struct bpf_map_ext {
400 struct bpf_prog_data owner;
401 unsigned int btf_id_key;
402 unsigned int btf_id_val;
403};
404
405static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map,
406 struct bpf_map_ext *ext)
407{
408 unsigned int val, owner_type = 0, owner_jited = 0;
409 char file[PATH_MAX], buff[4096];
410 FILE *fp;
411
412 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
413 memset(map, 0, sizeof(*map));
414
415 fp = fopen(file, "r");
416 if (!fp) {
417 fprintf(stderr, "No procfs support?!\n");
418 return -EIO;
419 }
420
421 while (fgets(buff, sizeof(buff), fp)) {
422 if (sscanf(buff, "map_type:\t%u", &val) == 1)
423 map->type = val;
424 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
425 map->size_key = val;
426 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
427 map->size_value = val;
428 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
429 map->max_elem = val;
430 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
431 map->flags = val;
432 else if (sscanf(buff, "owner_prog_type:\t%i", &val) == 1)
433 owner_type = val;
434 else if (sscanf(buff, "owner_jited:\t%i", &val) == 1)
435 owner_jited = val;
436 }
437
438 fclose(fp);
439 if (ext) {
440 memset(ext, 0, sizeof(*ext));
441 ext->owner.type = owner_type;
442 ext->owner.jited = owner_jited;
443 }
444
445 return 0;
446}
447
448static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
449 struct bpf_map_ext *ext, int length,
450 enum bpf_prog_type type)
451{
452 struct bpf_elf_map tmp, zero = {};
453 int ret;
454
455 ret = bpf_derive_elf_map_from_fdinfo(fd, &tmp, ext);
456 if (ret < 0)
457 return ret;
458
459
460
461
462 if (ext->owner.type && ext->owner.type != type)
463 fprintf(stderr, "Program array map owner types differ: %u (obj) != %u (pin)\n",
464 type, ext->owner.type);
465
466 if (!memcmp(&tmp, map, length)) {
467 return 0;
468 } else {
469
470
471
472
473
474 if (!memcmp(&tmp, &zero, length))
475 return 0;
476
477 bpf_map_pin_report(&tmp, map);
478 return -EINVAL;
479 }
480}
481
482static int bpf_mnt_fs(const char *target)
483{
484 bool bind_done = false;
485
486 while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
487 if (errno != EINVAL || bind_done) {
488 fprintf(stderr, "mount --make-private %s failed: %s\n",
489 target, strerror(errno));
490 return -1;
491 }
492
493 if (mount(target, target, "none", MS_BIND, NULL)) {
494 fprintf(stderr, "mount --bind %s %s failed: %s\n",
495 target, target, strerror(errno));
496 return -1;
497 }
498
499 bind_done = true;
500 }
501
502 if (mount("bpf", target, "bpf", 0, "mode=0700")) {
503 fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
504 target, strerror(errno));
505 return -1;
506 }
507
508 return 0;
509}
510
511static int bpf_mnt_check_target(const char *target)
512{
513 int ret;
514
515 ret = mkdir(target, S_IRWXU);
516 if (ret) {
517 if (errno == EEXIST)
518 return 0;
519 fprintf(stderr, "mkdir %s failed: %s\n", target,
520 strerror(errno));
521 }
522
523 return ret;
524}
525
526static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
527{
528 struct statfs st_fs;
529
530 if (statfs(mnt, &st_fs) < 0)
531 return -ENOENT;
532 if ((unsigned long)st_fs.f_type != magic)
533 return -ENOENT;
534
535 return 0;
536}
537
538static const char *bpf_find_mntpt_single(unsigned long magic, char *mnt,
539 int len, const char *mntpt)
540{
541 int ret;
542
543 ret = bpf_valid_mntpt(mntpt, magic);
544 if (!ret) {
545 strlcpy(mnt, mntpt, len);
546 return mnt;
547 }
548
549 return NULL;
550}
551
552static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
553 char *mnt, int len,
554 const char * const *known_mnts)
555{
556 const char * const *ptr;
557 char type[100];
558 FILE *fp;
559
560 if (known_mnts) {
561 ptr = known_mnts;
562 while (*ptr) {
563 if (bpf_find_mntpt_single(magic, mnt, len, *ptr))
564 return mnt;
565 ptr++;
566 }
567 }
568
569 if (len != PATH_MAX)
570 return NULL;
571
572 fp = fopen("/proc/mounts", "r");
573 if (fp == NULL)
574 return NULL;
575
576 while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
577 mnt, type) == 2) {
578 if (strcmp(type, fstype) == 0)
579 break;
580 }
581
582 fclose(fp);
583 if (strcmp(type, fstype) != 0)
584 return NULL;
585
586 return mnt;
587}
588
589int bpf_trace_pipe(void)
590{
591 char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
592 static const char * const tracefs_known_mnts[] = {
593 TRACE_DIR_MNT,
594 "/sys/kernel/debug/tracing",
595 "/tracing",
596 "/trace",
597 0,
598 };
599 int fd_in, fd_out = STDERR_FILENO;
600 char tpipe[PATH_MAX];
601 const char *mnt;
602
603 mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
604 sizeof(tracefs_mnt), tracefs_known_mnts);
605 if (!mnt) {
606 fprintf(stderr, "tracefs not mounted?\n");
607 return -1;
608 }
609
610 snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
611
612 fd_in = open(tpipe, O_RDONLY);
613 if (fd_in < 0)
614 return -1;
615
616 fprintf(stderr, "Running! Hang up with ^C!\n\n");
617 while (1) {
618 static char buff[4096];
619 ssize_t ret;
620
621 ret = read(fd_in, buff, sizeof(buff));
622 if (ret > 0 && write(fd_out, buff, ret) == ret)
623 continue;
624 break;
625 }
626
627 close(fd_in);
628 return -1;
629}
630
631static int bpf_gen_global(const char *bpf_sub_dir)
632{
633 char bpf_glo_dir[PATH_MAX];
634 int ret;
635
636 snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s/",
637 bpf_sub_dir, BPF_DIR_GLOBALS);
638
639 ret = mkdir(bpf_glo_dir, S_IRWXU);
640 if (ret && errno != EEXIST) {
641 fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
642 strerror(errno));
643 return ret;
644 }
645
646 return 0;
647}
648
649static int bpf_gen_master(const char *base, const char *name)
650{
651 char bpf_sub_dir[PATH_MAX + NAME_MAX + 1];
652 int ret;
653
654 snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s/", base, name);
655
656 ret = mkdir(bpf_sub_dir, S_IRWXU);
657 if (ret && errno != EEXIST) {
658 fprintf(stderr, "mkdir %s failed: %s\n", bpf_sub_dir,
659 strerror(errno));
660 return ret;
661 }
662
663 return bpf_gen_global(bpf_sub_dir);
664}
665
666static int bpf_slave_via_bind_mnt(const char *full_name,
667 const char *full_link)
668{
669 int ret;
670
671 ret = mkdir(full_name, S_IRWXU);
672 if (ret) {
673 assert(errno != EEXIST);
674 fprintf(stderr, "mkdir %s failed: %s\n", full_name,
675 strerror(errno));
676 return ret;
677 }
678
679 ret = mount(full_link, full_name, "none", MS_BIND, NULL);
680 if (ret) {
681 rmdir(full_name);
682 fprintf(stderr, "mount --bind %s %s failed: %s\n",
683 full_link, full_name, strerror(errno));
684 }
685
686 return ret;
687}
688
689static int bpf_gen_slave(const char *base, const char *name,
690 const char *link)
691{
692 char bpf_lnk_dir[PATH_MAX + NAME_MAX + 1];
693 char bpf_sub_dir[PATH_MAX + NAME_MAX];
694 struct stat sb = {};
695 int ret;
696
697 snprintf(bpf_lnk_dir, sizeof(bpf_lnk_dir), "%s%s/", base, link);
698 snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s", base, name);
699
700 ret = symlink(bpf_lnk_dir, bpf_sub_dir);
701 if (ret) {
702 if (errno != EEXIST) {
703 if (errno != EPERM) {
704 fprintf(stderr, "symlink %s failed: %s\n",
705 bpf_sub_dir, strerror(errno));
706 return ret;
707 }
708
709 return bpf_slave_via_bind_mnt(bpf_sub_dir,
710 bpf_lnk_dir);
711 }
712
713 ret = lstat(bpf_sub_dir, &sb);
714 if (ret) {
715 fprintf(stderr, "lstat %s failed: %s\n",
716 bpf_sub_dir, strerror(errno));
717 return ret;
718 }
719
720 if ((sb.st_mode & S_IFMT) != S_IFLNK)
721 return bpf_gen_global(bpf_sub_dir);
722 }
723
724 return 0;
725}
726
727static int bpf_gen_hierarchy(const char *base)
728{
729 int ret, i;
730
731 ret = bpf_gen_master(base, bpf_prog_to_subdir(__bpf_types[0]));
732 for (i = 1; i < ARRAY_SIZE(__bpf_types) && !ret; i++)
733 ret = bpf_gen_slave(base,
734 bpf_prog_to_subdir(__bpf_types[i]),
735 bpf_prog_to_subdir(__bpf_types[0]));
736 return ret;
737}
738
739static const char *bpf_get_work_dir(enum bpf_prog_type type)
740{
741 static char bpf_tmp[PATH_MAX] = BPF_DIR_MNT;
742 static char bpf_wrk_dir[PATH_MAX];
743 static const char *mnt;
744 static bool bpf_mnt_cached;
745 const char *mnt_env = getenv(BPF_ENV_MNT);
746 static const char * const bpf_known_mnts[] = {
747 BPF_DIR_MNT,
748 "/bpf",
749 0,
750 };
751 int ret;
752
753 if (bpf_mnt_cached) {
754 const char *out = mnt;
755
756 if (out && type) {
757 snprintf(bpf_tmp, sizeof(bpf_tmp), "%s%s/",
758 out, bpf_prog_to_subdir(type));
759 out = bpf_tmp;
760 }
761 return out;
762 }
763
764 if (mnt_env)
765 mnt = bpf_find_mntpt_single(BPF_FS_MAGIC, bpf_tmp,
766 sizeof(bpf_tmp), mnt_env);
767 else
768 mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp,
769 sizeof(bpf_tmp), bpf_known_mnts);
770 if (!mnt) {
771 mnt = mnt_env ? : BPF_DIR_MNT;
772 ret = bpf_mnt_check_target(mnt);
773 if (!ret)
774 ret = bpf_mnt_fs(mnt);
775 if (ret) {
776 mnt = NULL;
777 goto out;
778 }
779 }
780
781 ret = snprintf(bpf_wrk_dir, sizeof(bpf_wrk_dir), "%s/", mnt);
782 if (ret < 0 || ret >= sizeof(bpf_wrk_dir)) {
783 mnt = NULL;
784 goto out;
785 }
786
787 ret = bpf_gen_hierarchy(bpf_wrk_dir);
788 if (ret) {
789 mnt = NULL;
790 goto out;
791 }
792
793 mnt = bpf_wrk_dir;
794out:
795 bpf_mnt_cached = true;
796 return mnt;
797}
798
799static int bpf_obj_get(const char *pathname, enum bpf_prog_type type)
800{
801 union bpf_attr attr = {};
802 char tmp[PATH_MAX];
803
804 if (strlen(pathname) > 2 && pathname[0] == 'm' &&
805 pathname[1] == ':' && bpf_get_work_dir(type)) {
806 snprintf(tmp, sizeof(tmp), "%s/%s",
807 bpf_get_work_dir(type), pathname + 2);
808 pathname = tmp;
809 }
810
811 attr.pathname = bpf_ptr_to_u64(pathname);
812
813 return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
814}
815
816static int bpf_obj_pinned(const char *pathname, enum bpf_prog_type type)
817{
818 int prog_fd = bpf_obj_get(pathname, type);
819
820 if (prog_fd < 0)
821 fprintf(stderr, "Couldn\'t retrieve pinned program \'%s\': %s\n",
822 pathname, strerror(errno));
823 return prog_fd;
824}
825
826static int bpf_do_parse(struct bpf_cfg_in *cfg, const bool *opt_tbl)
827{
828 const char *file, *section, *uds_name;
829 bool verbose = false;
830 int i, ret, argc;
831 char **argv;
832
833 argv = cfg->argv;
834 argc = cfg->argc;
835
836 if (opt_tbl[CBPF_BYTECODE] &&
837 (matches(*argv, "bytecode") == 0 ||
838 strcmp(*argv, "bc") == 0)) {
839 cfg->mode = CBPF_BYTECODE;
840 } else if (opt_tbl[CBPF_FILE] &&
841 (matches(*argv, "bytecode-file") == 0 ||
842 strcmp(*argv, "bcf") == 0)) {
843 cfg->mode = CBPF_FILE;
844 } else if (opt_tbl[EBPF_OBJECT] &&
845 (matches(*argv, "object-file") == 0 ||
846 strcmp(*argv, "obj") == 0)) {
847 cfg->mode = EBPF_OBJECT;
848 } else if (opt_tbl[EBPF_PINNED] &&
849 (matches(*argv, "object-pinned") == 0 ||
850 matches(*argv, "pinned") == 0 ||
851 matches(*argv, "fd") == 0)) {
852 cfg->mode = EBPF_PINNED;
853 } else {
854 fprintf(stderr, "What mode is \"%s\"?\n", *argv);
855 return -1;
856 }
857
858 NEXT_ARG();
859 file = section = uds_name = NULL;
860 if (cfg->mode == EBPF_OBJECT || cfg->mode == EBPF_PINNED) {
861 file = *argv;
862 NEXT_ARG_FWD();
863
864 if (cfg->type == BPF_PROG_TYPE_UNSPEC) {
865 if (argc > 0 && matches(*argv, "type") == 0) {
866 NEXT_ARG();
867 for (i = 0; i < ARRAY_SIZE(__bpf_prog_meta);
868 i++) {
869 if (!__bpf_prog_meta[i].type)
870 continue;
871 if (!matches(*argv,
872 __bpf_prog_meta[i].type)) {
873 cfg->type = i;
874 break;
875 }
876 }
877
878 if (cfg->type == BPF_PROG_TYPE_UNSPEC) {
879 fprintf(stderr, "What type is \"%s\"?\n",
880 *argv);
881 return -1;
882 }
883 NEXT_ARG_FWD();
884 } else {
885 cfg->type = BPF_PROG_TYPE_SCHED_CLS;
886 }
887 }
888
889 section = bpf_prog_to_default_section(cfg->type);
890 if (argc > 0 && matches(*argv, "section") == 0) {
891 NEXT_ARG();
892 section = *argv;
893 NEXT_ARG_FWD();
894 }
895
896 if (__bpf_prog_meta[cfg->type].may_uds_export) {
897 uds_name = getenv(BPF_ENV_UDS);
898 if (argc > 0 && !uds_name &&
899 matches(*argv, "export") == 0) {
900 NEXT_ARG();
901 uds_name = *argv;
902 NEXT_ARG_FWD();
903 }
904 }
905
906 if (argc > 0 && matches(*argv, "verbose") == 0) {
907 verbose = true;
908 NEXT_ARG_FWD();
909 }
910
911 PREV_ARG();
912 }
913
914 if (cfg->mode == CBPF_BYTECODE || cfg->mode == CBPF_FILE) {
915 ret = bpf_ops_parse(argc, argv, cfg->opcodes,
916 cfg->mode == CBPF_FILE);
917 cfg->n_opcodes = ret;
918 } else if (cfg->mode == EBPF_OBJECT) {
919 ret = 0;
920 } else if (cfg->mode == EBPF_PINNED) {
921 ret = bpf_obj_pinned(file, cfg->type);
922 cfg->prog_fd = ret;
923 } else {
924 return -1;
925 }
926
927 cfg->object = file;
928 cfg->section = section;
929 cfg->uds = uds_name;
930 cfg->argc = argc;
931 cfg->argv = argv;
932 cfg->verbose = verbose;
933
934 return ret;
935}
936
937static int bpf_do_load(struct bpf_cfg_in *cfg)
938{
939 if (cfg->mode == EBPF_OBJECT) {
940#ifdef HAVE_LIBBPF
941 return iproute2_load_libbpf(cfg);
942#endif
943 cfg->prog_fd = bpf_obj_open(cfg->object, cfg->type,
944 cfg->section, cfg->ifindex,
945 cfg->verbose);
946 return cfg->prog_fd;
947 }
948 return 0;
949}
950
951int bpf_load_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops,
952 void *nl)
953{
954 char annotation[256];
955 int ret;
956
957 ret = bpf_do_load(cfg);
958 if (ret < 0)
959 return ret;
960
961 if (cfg->mode == CBPF_BYTECODE || cfg->mode == CBPF_FILE)
962 ops->cbpf_cb(nl, cfg->opcodes, cfg->n_opcodes);
963 if (cfg->mode == EBPF_OBJECT || cfg->mode == EBPF_PINNED) {
964 snprintf(annotation, sizeof(annotation), "%s:[%s]",
965 basename(cfg->object), cfg->mode == EBPF_PINNED ?
966 "*fsobj" : cfg->section);
967 ops->ebpf_cb(nl, cfg->prog_fd, annotation);
968 }
969
970 return 0;
971}
972
973int bpf_parse_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops)
974{
975 bool opt_tbl[BPF_MODE_MAX] = {};
976
977 if (ops->cbpf_cb) {
978 opt_tbl[CBPF_BYTECODE] = true;
979 opt_tbl[CBPF_FILE] = true;
980 }
981
982 if (ops->ebpf_cb) {
983 opt_tbl[EBPF_OBJECT] = true;
984 opt_tbl[EBPF_PINNED] = true;
985 }
986
987 return bpf_do_parse(cfg, opt_tbl);
988}
989
990int bpf_parse_and_load_common(struct bpf_cfg_in *cfg,
991 const struct bpf_cfg_ops *ops, void *nl)
992{
993 int ret;
994
995 ret = bpf_parse_common(cfg, ops);
996 if (ret < 0)
997 return ret;
998
999 return bpf_load_common(cfg, ops, nl);
1000}
1001
1002int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
1003{
1004 const bool opt_tbl[BPF_MODE_MAX] = {
1005 [EBPF_OBJECT] = true,
1006 [EBPF_PINNED] = true,
1007 };
1008 const struct bpf_elf_map test = {
1009 .type = BPF_MAP_TYPE_PROG_ARRAY,
1010 .size_key = sizeof(int),
1011 .size_value = sizeof(int),
1012 };
1013 struct bpf_cfg_in cfg = {
1014 .type = BPF_PROG_TYPE_UNSPEC,
1015 .argc = argc,
1016 .argv = argv,
1017 };
1018 struct bpf_map_ext ext = {};
1019 int ret, prog_fd, map_fd;
1020 uint32_t map_key;
1021
1022 ret = bpf_do_parse(&cfg, opt_tbl);
1023 if (ret < 0)
1024 return ret;
1025
1026 ret = bpf_do_load(&cfg);
1027 if (ret < 0)
1028 return ret;
1029
1030 prog_fd = cfg.prog_fd;
1031
1032 if (key) {
1033 map_key = *key;
1034 } else {
1035 ret = sscanf(cfg.section, "%*i/%i", &map_key);
1036 if (ret != 1) {
1037 fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n");
1038 ret = -EINVAL;
1039 goto out_prog;
1040 }
1041 }
1042
1043 map_fd = bpf_obj_get(map_path, cfg.type);
1044 if (map_fd < 0) {
1045 fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n",
1046 map_path, strerror(errno));
1047 ret = map_fd;
1048 goto out_prog;
1049 }
1050
1051 ret = bpf_map_selfcheck_pinned(map_fd, &test, &ext,
1052 offsetof(struct bpf_elf_map, max_elem),
1053 cfg.type);
1054 if (ret < 0) {
1055 fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path);
1056 goto out_map;
1057 }
1058
1059 ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY);
1060 if (ret < 0)
1061 fprintf(stderr, "Map update failed: %s\n", strerror(errno));
1062out_map:
1063 close(map_fd);
1064out_prog:
1065 close(prog_fd);
1066 return ret;
1067}
1068
1069int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type)
1070{
1071 union bpf_attr attr = {};
1072
1073 attr.target_fd = target_fd;
1074 attr.attach_bpf_fd = prog_fd;
1075 attr.attach_type = type;
1076
1077 return bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
1078}
1079
1080int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type)
1081{
1082 union bpf_attr attr = {};
1083
1084 attr.target_fd = target_fd;
1085 attr.attach_type = type;
1086
1087 return bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
1088}
1089
1090int bpf_prog_load_dev(enum bpf_prog_type type, const struct bpf_insn *insns,
1091 size_t size_insns, const char *license, __u32 ifindex,
1092 char *log, size_t size_log)
1093{
1094 union bpf_attr attr = {};
1095
1096 attr.prog_type = type;
1097 attr.insns = bpf_ptr_to_u64(insns);
1098 attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
1099 attr.license = bpf_ptr_to_u64(license);
1100 attr.prog_ifindex = ifindex;
1101
1102 if (size_log > 0) {
1103 attr.log_buf = bpf_ptr_to_u64(log);
1104 attr.log_size = size_log;
1105 attr.log_level = 1;
1106 }
1107
1108 return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
1109}
1110
1111#ifdef HAVE_ELF
1112struct bpf_elf_prog {
1113 enum bpf_prog_type type;
1114 struct bpf_insn *insns;
1115 unsigned int insns_num;
1116 size_t size;
1117 const char *license;
1118};
1119
1120struct bpf_hash_entry {
1121 unsigned int pinning;
1122 const char *subpath;
1123 struct bpf_hash_entry *next;
1124};
1125
1126struct bpf_config {
1127 unsigned int jit_enabled;
1128};
1129
1130struct bpf_btf {
1131 const struct btf_header *hdr;
1132 const void *raw;
1133 const char *strings;
1134 const struct btf_type **types;
1135 int types_num;
1136};
1137
1138struct bpf_elf_ctx {
1139 struct bpf_config cfg;
1140 Elf *elf_fd;
1141 GElf_Ehdr elf_hdr;
1142 Elf_Data *sym_tab;
1143 Elf_Data *str_tab;
1144 Elf_Data *btf_data;
1145 char obj_uid[64];
1146 int obj_fd;
1147 int btf_fd;
1148 int map_fds[ELF_MAX_MAPS];
1149 struct bpf_elf_map maps[ELF_MAX_MAPS];
1150 struct bpf_map_ext maps_ext[ELF_MAX_MAPS];
1151 struct bpf_elf_prog prog_text;
1152 struct bpf_btf btf;
1153 int sym_num;
1154 int map_num;
1155 int map_len;
1156 bool *sec_done;
1157 int sec_maps;
1158 int sec_text;
1159 int sec_btf;
1160 char license[ELF_MAX_LICENSE_LEN];
1161 enum bpf_prog_type type;
1162 __u32 ifindex;
1163 bool verbose;
1164 bool noafalg;
1165 struct bpf_elf_st stat;
1166 struct bpf_hash_entry *ht[256];
1167 char *log;
1168 size_t log_size;
1169};
1170
1171struct bpf_elf_sec_data {
1172 GElf_Shdr sec_hdr;
1173 Elf_Data *sec_data;
1174 const char *sec_name;
1175};
1176
1177struct bpf_map_data {
1178 int *fds;
1179 const char *obj;
1180 struct bpf_elf_st *st;
1181 struct bpf_elf_map *ent;
1182};
1183
1184static bool bpf_log_has_data(struct bpf_elf_ctx *ctx)
1185{
1186 return ctx->log && ctx->log[0];
1187}
1188
1189static __check_format_string(2, 3) void
1190bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...)
1191{
1192 va_list vl;
1193
1194 va_start(vl, format);
1195 vfprintf(stderr, format, vl);
1196 va_end(vl);
1197
1198 if (bpf_log_has_data(ctx)) {
1199 if (ctx->verbose) {
1200 fprintf(stderr, "%s\n", ctx->log);
1201 } else {
1202 unsigned int off = 0, len = strlen(ctx->log);
1203
1204 if (len > BPF_MAX_LOG) {
1205 off = len - BPF_MAX_LOG;
1206 fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n",
1207 off);
1208 }
1209 fprintf(stderr, "%s\n", ctx->log + off);
1210 }
1211
1212 memset(ctx->log, 0, ctx->log_size);
1213 }
1214}
1215
1216static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
1217{
1218 const size_t log_max = UINT_MAX >> 8;
1219 size_t log_size = ctx->log_size;
1220 char *ptr;
1221
1222 if (!ctx->log) {
1223 log_size = 65536;
1224 } else if (log_size < log_max) {
1225 log_size <<= 1;
1226 if (log_size > log_max)
1227 log_size = log_max;
1228 } else {
1229 return -EINVAL;
1230 }
1231
1232 ptr = realloc(ctx->log, log_size);
1233 if (!ptr)
1234 return -ENOMEM;
1235
1236 ptr[0] = 0;
1237 ctx->log = ptr;
1238 ctx->log_size = log_size;
1239
1240 return 0;
1241}
1242
1243static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
1244 uint32_t size_value, uint32_t max_elem,
1245 uint32_t flags, int inner_fd, int btf_fd,
1246 uint32_t ifindex, uint32_t btf_id_key,
1247 uint32_t btf_id_val)
1248{
1249 union bpf_attr attr = {};
1250
1251 attr.map_type = type;
1252 attr.key_size = size_key;
1253 attr.value_size = inner_fd ? sizeof(int) : size_value;
1254 attr.max_entries = max_elem;
1255 attr.map_flags = flags;
1256 attr.inner_map_fd = inner_fd;
1257 attr.map_ifindex = ifindex;
1258 attr.btf_fd = btf_fd;
1259 attr.btf_key_type_id = btf_id_key;
1260 attr.btf_value_type_id = btf_id_val;
1261
1262 return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
1263}
1264
1265static int bpf_btf_load(void *btf, size_t size_btf,
1266 char *log, size_t size_log)
1267{
1268 union bpf_attr attr = {};
1269
1270 attr.btf = bpf_ptr_to_u64(btf);
1271 attr.btf_size = size_btf;
1272
1273 if (size_log > 0) {
1274 attr.btf_log_buf = bpf_ptr_to_u64(log);
1275 attr.btf_log_size = size_log;
1276 attr.btf_log_level = 1;
1277 }
1278
1279 return bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
1280}
1281
1282static int bpf_obj_pin(int fd, const char *pathname)
1283{
1284 union bpf_attr attr = {};
1285
1286 attr.pathname = bpf_ptr_to_u64(pathname);
1287 attr.bpf_fd = fd;
1288
1289 return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
1290}
1291
1292static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
1293{
1294 struct sockaddr_alg alg = {
1295 .salg_family = AF_ALG,
1296 .salg_type = "hash",
1297 .salg_name = "sha1",
1298 };
1299 int ret, cfd, ofd, ffd;
1300 struct stat stbuff;
1301 ssize_t size;
1302
1303 if (!object || len != 20)
1304 return -EINVAL;
1305
1306 cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
1307 if (cfd < 0)
1308 return cfd;
1309
1310 ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
1311 if (ret < 0)
1312 goto out_cfd;
1313
1314 ofd = accept(cfd, NULL, 0);
1315 if (ofd < 0) {
1316 ret = ofd;
1317 goto out_cfd;
1318 }
1319
1320 ffd = open(object, O_RDONLY);
1321 if (ffd < 0) {
1322 fprintf(stderr, "Error opening object %s: %s\n",
1323 object, strerror(errno));
1324 ret = ffd;
1325 goto out_ofd;
1326 }
1327
1328 ret = fstat(ffd, &stbuff);
1329 if (ret < 0) {
1330 fprintf(stderr, "Error doing fstat: %s\n",
1331 strerror(errno));
1332 goto out_ffd;
1333 }
1334
1335 size = sendfile(ofd, ffd, NULL, stbuff.st_size);
1336 if (size != stbuff.st_size) {
1337 fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
1338 size, stbuff.st_size, strerror(errno));
1339 ret = -1;
1340 goto out_ffd;
1341 }
1342
1343 size = read(ofd, out, len);
1344 if (size != len) {
1345 fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
1346 size, len, strerror(errno));
1347 ret = -1;
1348 } else {
1349 ret = 0;
1350 }
1351out_ffd:
1352 close(ffd);
1353out_ofd:
1354 close(ofd);
1355out_cfd:
1356 close(cfd);
1357 return ret;
1358}
1359
1360static void bpf_init_env(void)
1361{
1362 struct rlimit limit = {
1363 .rlim_cur = RLIM_INFINITY,
1364 .rlim_max = RLIM_INFINITY,
1365 };
1366
1367
1368 setrlimit(RLIMIT_MEMLOCK, &limit);
1369
1370 if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC))
1371 fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n");
1372}
1373
1374static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx,
1375 uint32_t pinning)
1376{
1377 struct bpf_hash_entry *entry;
1378
1379 entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
1380 while (entry && entry->pinning != pinning)
1381 entry = entry->next;
1382
1383 return entry ? entry->subpath : NULL;
1384}
1385
1386static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx,
1387 uint32_t pinning)
1388{
1389 switch (pinning) {
1390 case PIN_OBJECT_NS:
1391 case PIN_GLOBAL_NS:
1392 return false;
1393 case PIN_NONE:
1394 return true;
1395 default:
1396 return !bpf_custom_pinning(ctx, pinning);
1397 }
1398}
1399
1400static void bpf_make_pathname(char *pathname, size_t len, const char *name,
1401 const struct bpf_elf_ctx *ctx, uint32_t pinning)
1402{
1403 switch (pinning) {
1404 case PIN_OBJECT_NS:
1405 snprintf(pathname, len, "%s/%s/%s",
1406 bpf_get_work_dir(ctx->type),
1407 ctx->obj_uid, name);
1408 break;
1409 case PIN_GLOBAL_NS:
1410 snprintf(pathname, len, "%s/%s/%s",
1411 bpf_get_work_dir(ctx->type),
1412 BPF_DIR_GLOBALS, name);
1413 break;
1414 default:
1415 snprintf(pathname, len, "%s/../%s/%s",
1416 bpf_get_work_dir(ctx->type),
1417 bpf_custom_pinning(ctx, pinning), name);
1418 break;
1419 }
1420}
1421
1422static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx,
1423 uint32_t pinning)
1424{
1425 char pathname[PATH_MAX];
1426
1427 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
1428 return 0;
1429
1430 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
1431 return bpf_obj_get(pathname, ctx->type);
1432}
1433
1434static int bpf_make_obj_path(const struct bpf_elf_ctx *ctx)
1435{
1436 char tmp[PATH_MAX];
1437 int ret;
1438
1439 snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_work_dir(ctx->type),
1440 ctx->obj_uid);
1441
1442 ret = mkdir(tmp, S_IRWXU);
1443 if (ret && errno != EEXIST) {
1444 fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno));
1445 return ret;
1446 }
1447
1448 return 0;
1449}
1450
1451static int bpf_make_custom_path(const struct bpf_elf_ctx *ctx,
1452 const char *todo)
1453{
1454 char tmp[PATH_MAX], rem[PATH_MAX], *sub;
1455 int ret;
1456
1457 snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_work_dir(ctx->type));
1458 snprintf(rem, sizeof(rem), "%s/", todo);
1459 sub = strtok(rem, "/");
1460
1461 while (sub) {
1462 if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX)
1463 return -EINVAL;
1464
1465 strcat(tmp, sub);
1466 strcat(tmp, "/");
1467
1468 ret = mkdir(tmp, S_IRWXU);
1469 if (ret && errno != EEXIST) {
1470 fprintf(stderr, "mkdir %s failed: %s\n", tmp,
1471 strerror(errno));
1472 return ret;
1473 }
1474
1475 sub = strtok(NULL, "/");
1476 }
1477
1478 return 0;
1479}
1480
1481static int bpf_place_pinned(int fd, const char *name,
1482 const struct bpf_elf_ctx *ctx, uint32_t pinning)
1483{
1484 char pathname[PATH_MAX];
1485 const char *tmp;
1486 int ret = 0;
1487
1488 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
1489 return 0;
1490
1491 if (pinning == PIN_OBJECT_NS)
1492 ret = bpf_make_obj_path(ctx);
1493 else if ((tmp = bpf_custom_pinning(ctx, pinning)))
1494 ret = bpf_make_custom_path(ctx, tmp);
1495 if (ret < 0)
1496 return ret;
1497
1498 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
1499 return bpf_obj_pin(fd, pathname);
1500}
1501
1502static void bpf_prog_report(int fd, const char *section,
1503 const struct bpf_elf_prog *prog,
1504 struct bpf_elf_ctx *ctx)
1505{
1506 unsigned int insns = prog->size / sizeof(struct bpf_insn);
1507
1508 fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section,
1509 fd < 0 ? "rejected: " : "loaded",
1510 fd < 0 ? strerror(errno) : "",
1511 fd < 0 ? errno : fd);
1512
1513 fprintf(stderr, " - Type: %u\n", prog->type);
1514 fprintf(stderr, " - Instructions: %u (%u over limit)\n",
1515 insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0);
1516 fprintf(stderr, " - License: %s\n\n", prog->license);
1517
1518 bpf_dump_error(ctx, "Verifier analysis:\n\n");
1519}
1520
1521static int bpf_prog_attach(const char *section,
1522 const struct bpf_elf_prog *prog,
1523 struct bpf_elf_ctx *ctx)
1524{
1525 int tries = 0, fd;
1526retry:
1527 errno = 0;
1528 fd = bpf_prog_load_dev(prog->type, prog->insns, prog->size,
1529 prog->license, ctx->ifindex,
1530 ctx->log, ctx->log_size);
1531 if (fd < 0 || ctx->verbose) {
1532
1533
1534
1535
1536
1537 if (fd < 0 && errno == ENOSPC) {
1538 if (tries++ < 10 && !bpf_log_realloc(ctx))
1539 goto retry;
1540
1541 fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
1542 ctx->log_size, tries);
1543 return fd;
1544 }
1545
1546 bpf_prog_report(fd, section, prog, ctx);
1547 }
1548
1549 return fd;
1550}
1551
1552static void bpf_map_report(int fd, const char *name,
1553 const struct bpf_elf_map *map,
1554 struct bpf_elf_ctx *ctx, int inner_fd)
1555{
1556 fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
1557 fd < 0 ? "rejected: " : "loaded",
1558 fd < 0 ? strerror(errno) : "",
1559 fd < 0 ? errno : fd);
1560
1561 fprintf(stderr, " - Type: %u\n", map->type);
1562 fprintf(stderr, " - Identifier: %u\n", map->id);
1563 fprintf(stderr, " - Pinning: %u\n", map->pinning);
1564 fprintf(stderr, " - Size key: %u\n", map->size_key);
1565 fprintf(stderr, " - Size value: %u\n",
1566 inner_fd ? (int)sizeof(int) : map->size_value);
1567 fprintf(stderr, " - Max elems: %u\n", map->max_elem);
1568 fprintf(stderr, " - Flags: %#x\n\n", map->flags);
1569}
1570
1571static int bpf_find_map_id(const struct bpf_elf_ctx *ctx, uint32_t id)
1572{
1573 int i;
1574
1575 for (i = 0; i < ctx->map_num; i++) {
1576 if (ctx->maps[i].id != id)
1577 continue;
1578 if (ctx->map_fds[i] < 0)
1579 return -EINVAL;
1580
1581 return ctx->map_fds[i];
1582 }
1583
1584 return -ENOENT;
1585}
1586
1587static void bpf_report_map_in_map(int outer_fd, uint32_t idx)
1588{
1589 struct bpf_elf_map outer_map;
1590 int ret;
1591
1592 fprintf(stderr, "Cannot insert map into map! ");
1593
1594 ret = bpf_derive_elf_map_from_fdinfo(outer_fd, &outer_map, NULL);
1595 if (!ret) {
1596 if (idx >= outer_map.max_elem &&
1597 outer_map.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
1598 fprintf(stderr, "Outer map has %u elements, index %u is invalid!\n",
1599 outer_map.max_elem, idx);
1600 return;
1601 }
1602 }
1603
1604 fprintf(stderr, "Different map specs used for outer and inner map?\n");
1605}
1606
1607static bool bpf_is_map_in_map_type(const struct bpf_elf_map *map)
1608{
1609 return map->type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1610 map->type == BPF_MAP_TYPE_HASH_OF_MAPS;
1611}
1612
1613static bool bpf_map_offload_neutral(enum bpf_map_type type)
1614{
1615 return type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
1616}
1617
1618static int bpf_map_attach(const char *name, struct bpf_elf_ctx *ctx,
1619 const struct bpf_elf_map *map, struct bpf_map_ext *ext,
1620 int *have_map_in_map)
1621{
1622 int fd, ifindex, ret, map_inner_fd = 0;
1623 bool retried = false;
1624
1625probe:
1626 fd = bpf_probe_pinned(name, ctx, map->pinning);
1627 if (fd > 0) {
1628 ret = bpf_map_selfcheck_pinned(fd, map, ext,
1629 offsetof(struct bpf_elf_map,
1630 id), ctx->type);
1631 if (ret < 0) {
1632 close(fd);
1633 fprintf(stderr, "Map \'%s\' self-check failed!\n",
1634 name);
1635 return ret;
1636 }
1637 if (ctx->verbose)
1638 fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
1639 name);
1640 return fd;
1641 }
1642
1643 if (have_map_in_map && bpf_is_map_in_map_type(map)) {
1644 (*have_map_in_map)++;
1645 if (map->inner_id)
1646 return 0;
1647 fprintf(stderr, "Map \'%s\' cannot be created since no inner map ID defined!\n",
1648 name);
1649 return -EINVAL;
1650 }
1651
1652 if (!have_map_in_map && bpf_is_map_in_map_type(map)) {
1653 map_inner_fd = bpf_find_map_id(ctx, map->inner_id);
1654 if (map_inner_fd < 0) {
1655 fprintf(stderr, "Map \'%s\' cannot be loaded. Inner map with ID %u not found!\n",
1656 name, map->inner_id);
1657 return -EINVAL;
1658 }
1659 }
1660
1661 ifindex = bpf_map_offload_neutral(map->type) ? 0 : ctx->ifindex;
1662 errno = 0;
1663 fd = bpf_map_create(map->type, map->size_key, map->size_value,
1664 map->max_elem, map->flags, map_inner_fd, ctx->btf_fd,
1665 ifindex, ext->btf_id_key, ext->btf_id_val);
1666
1667 if (fd < 0 || ctx->verbose) {
1668 bpf_map_report(fd, name, map, ctx, map_inner_fd);
1669 if (fd < 0)
1670 return fd;
1671 }
1672
1673 ret = bpf_place_pinned(fd, name, ctx, map->pinning);
1674 if (ret < 0) {
1675 close(fd);
1676 if (!retried && errno == EEXIST) {
1677 retried = true;
1678 goto probe;
1679 }
1680 fprintf(stderr, "Could not pin %s map: %s\n", name,
1681 strerror(errno));
1682 return ret;
1683 }
1684
1685 return fd;
1686}
1687
1688static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
1689 const GElf_Sym *sym)
1690{
1691 return ctx->str_tab->d_buf + sym->st_name;
1692}
1693
1694static int bpf_btf_find(struct bpf_elf_ctx *ctx, const char *name)
1695{
1696 const struct btf_type *type;
1697 const char *res;
1698 int id;
1699
1700 for (id = 1; id < ctx->btf.types_num; id++) {
1701 type = ctx->btf.types[id];
1702 if (type->name_off >= ctx->btf.hdr->str_len)
1703 continue;
1704 res = &ctx->btf.strings[type->name_off];
1705 if (!strcmp(res, name))
1706 return id;
1707 }
1708
1709 return -ENOENT;
1710}
1711
1712static int bpf_btf_find_kv(struct bpf_elf_ctx *ctx, const struct bpf_elf_map *map,
1713 const char *name, uint32_t *id_key, uint32_t *id_val)
1714{
1715 const struct btf_member *key, *val;
1716 const struct btf_type *type;
1717 char btf_name[512];
1718 const char *res;
1719 int id;
1720
1721 snprintf(btf_name, sizeof(btf_name), "____btf_map_%s", name);
1722 id = bpf_btf_find(ctx, btf_name);
1723 if (id < 0)
1724 return id;
1725
1726 type = ctx->btf.types[id];
1727 if (BTF_INFO_KIND(type->info) != BTF_KIND_STRUCT)
1728 return -EINVAL;
1729 if (BTF_INFO_VLEN(type->info) != 2)
1730 return -EINVAL;
1731
1732 key = ((void *) type) + sizeof(*type);
1733 val = key + 1;
1734 if (!key->type || key->type >= ctx->btf.types_num ||
1735 !val->type || val->type >= ctx->btf.types_num)
1736 return -EINVAL;
1737
1738 if (key->name_off >= ctx->btf.hdr->str_len ||
1739 val->name_off >= ctx->btf.hdr->str_len)
1740 return -EINVAL;
1741
1742 res = &ctx->btf.strings[key->name_off];
1743 if (strcmp(res, "key"))
1744 return -EINVAL;
1745
1746 res = &ctx->btf.strings[val->name_off];
1747 if (strcmp(res, "value"))
1748 return -EINVAL;
1749
1750 *id_key = key->type;
1751 *id_val = val->type;
1752 return 0;
1753}
1754
1755static void bpf_btf_annotate(struct bpf_elf_ctx *ctx, int which, const char *name)
1756{
1757 uint32_t id_key = 0, id_val = 0;
1758
1759 if (!bpf_btf_find_kv(ctx, &ctx->maps[which], name, &id_key, &id_val)) {
1760 ctx->maps_ext[which].btf_id_key = id_key;
1761 ctx->maps_ext[which].btf_id_val = id_val;
1762 }
1763}
1764
1765static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
1766{
1767 const char *name;
1768 GElf_Sym sym;
1769 int i;
1770
1771 for (i = 0; i < ctx->sym_num; i++) {
1772 int type;
1773
1774 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1775 continue;
1776
1777 type = GELF_ST_TYPE(sym.st_info);
1778 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1779 (type != STT_NOTYPE && type != STT_OBJECT) ||
1780 sym.st_shndx != ctx->sec_maps ||
1781 sym.st_value / ctx->map_len != which)
1782 continue;
1783
1784 name = bpf_str_tab_name(ctx, &sym);
1785 bpf_btf_annotate(ctx, which, name);
1786 return name;
1787 }
1788
1789 return NULL;
1790}
1791
1792static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
1793{
1794 int i, j, ret, fd, inner_fd, inner_idx, have_map_in_map = 0;
1795 const char *map_name;
1796
1797 for (i = 0; i < ctx->map_num; i++) {
1798 if (ctx->maps[i].pinning == PIN_OBJECT_NS &&
1799 ctx->noafalg) {
1800 fprintf(stderr, "Missing kernel AF_ALG support for PIN_OBJECT_NS!\n");
1801 return -ENOTSUP;
1802 }
1803
1804 map_name = bpf_map_fetch_name(ctx, i);
1805 if (!map_name)
1806 return -EIO;
1807
1808 fd = bpf_map_attach(map_name, ctx, &ctx->maps[i],
1809 &ctx->maps_ext[i], &have_map_in_map);
1810 if (fd < 0)
1811 return fd;
1812
1813 ctx->map_fds[i] = !fd ? -1 : fd;
1814 }
1815
1816 for (i = 0; have_map_in_map && i < ctx->map_num; i++) {
1817 if (ctx->map_fds[i] >= 0)
1818 continue;
1819
1820 map_name = bpf_map_fetch_name(ctx, i);
1821 if (!map_name)
1822 return -EIO;
1823
1824 fd = bpf_map_attach(map_name, ctx, &ctx->maps[i],
1825 &ctx->maps_ext[i], NULL);
1826 if (fd < 0)
1827 return fd;
1828
1829 ctx->map_fds[i] = fd;
1830 }
1831
1832 for (i = 0; have_map_in_map && i < ctx->map_num; i++) {
1833 if (!ctx->maps[i].id ||
1834 ctx->maps[i].inner_id ||
1835 ctx->maps[i].inner_idx == -1)
1836 continue;
1837
1838 inner_fd = ctx->map_fds[i];
1839 inner_idx = ctx->maps[i].inner_idx;
1840
1841 for (j = 0; j < ctx->map_num; j++) {
1842 if (!bpf_is_map_in_map_type(&ctx->maps[j]))
1843 continue;
1844 if (ctx->maps[j].inner_id != ctx->maps[i].id)
1845 continue;
1846
1847 ret = bpf_map_update(ctx->map_fds[j], &inner_idx,
1848 &inner_fd, BPF_ANY);
1849 if (ret < 0) {
1850 bpf_report_map_in_map(ctx->map_fds[j],
1851 inner_idx);
1852 return ret;
1853 }
1854 }
1855 }
1856
1857 return 0;
1858}
1859
1860static int bpf_map_num_sym(struct bpf_elf_ctx *ctx)
1861{
1862 int i, num = 0;
1863 GElf_Sym sym;
1864
1865 for (i = 0; i < ctx->sym_num; i++) {
1866 int type;
1867
1868 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1869 continue;
1870
1871 type = GELF_ST_TYPE(sym.st_info);
1872 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1873 (type != STT_NOTYPE && type != STT_OBJECT) ||
1874 sym.st_shndx != ctx->sec_maps)
1875 continue;
1876 num++;
1877 }
1878
1879 return num;
1880}
1881
1882static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
1883 struct bpf_elf_sec_data *data)
1884{
1885 Elf_Data *sec_edata;
1886 GElf_Shdr sec_hdr;
1887 Elf_Scn *sec_fd;
1888 char *sec_name;
1889
1890 memset(data, 0, sizeof(*data));
1891
1892 sec_fd = elf_getscn(ctx->elf_fd, section);
1893 if (!sec_fd)
1894 return -EINVAL;
1895 if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
1896 return -EIO;
1897
1898 sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
1899 sec_hdr.sh_name);
1900 if (!sec_name || !sec_hdr.sh_size)
1901 return -ENOENT;
1902
1903 sec_edata = elf_getdata(sec_fd, NULL);
1904 if (!sec_edata || elf_getdata(sec_fd, sec_edata))
1905 return -EIO;
1906
1907 memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
1908
1909 data->sec_name = sec_name;
1910 data->sec_data = sec_edata;
1911 return 0;
1912}
1913
1914struct bpf_elf_map_min {
1915 __u32 type;
1916 __u32 size_key;
1917 __u32 size_value;
1918 __u32 max_elem;
1919};
1920
1921static int bpf_fetch_maps_begin(struct bpf_elf_ctx *ctx, int section,
1922 struct bpf_elf_sec_data *data)
1923{
1924 ctx->map_num = data->sec_data->d_size;
1925 ctx->sec_maps = section;
1926 ctx->sec_done[section] = true;
1927
1928 if (ctx->map_num > sizeof(ctx->maps)) {
1929 fprintf(stderr, "Too many BPF maps in ELF section!\n");
1930 return -ENOMEM;
1931 }
1932
1933 memcpy(ctx->maps, data->sec_data->d_buf, ctx->map_num);
1934 return 0;
1935}
1936
1937static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end)
1938{
1939 GElf_Sym sym;
1940 int off, i;
1941
1942 for (off = 0; off < end; off += ctx->map_len) {
1943
1944
1945
1946 for (i = 0; i < ctx->sym_num; i++) {
1947 int type;
1948
1949 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1950 continue;
1951
1952 type = GELF_ST_TYPE(sym.st_info);
1953 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1954 (type != STT_NOTYPE && type != STT_OBJECT) ||
1955 sym.st_shndx != ctx->sec_maps)
1956 continue;
1957 if (sym.st_value == off)
1958 break;
1959 if (i == ctx->sym_num - 1)
1960 return -1;
1961 }
1962 }
1963
1964 return off == end ? 0 : -1;
1965}
1966
1967static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx)
1968{
1969 struct bpf_elf_map fixup[ARRAY_SIZE(ctx->maps)] = {};
1970 int i, sym_num = bpf_map_num_sym(ctx);
1971 __u8 *buff;
1972
1973 if (sym_num == 0 || sym_num > ARRAY_SIZE(ctx->maps)) {
1974 fprintf(stderr, "%u maps not supported in current map section!\n",
1975 sym_num);
1976 return -EINVAL;
1977 }
1978
1979 if (ctx->map_num % sym_num != 0 ||
1980 ctx->map_num % sizeof(__u32) != 0) {
1981 fprintf(stderr, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n");
1982 return -EINVAL;
1983 }
1984
1985 ctx->map_len = ctx->map_num / sym_num;
1986 if (bpf_map_verify_all_offs(ctx, ctx->map_num)) {
1987 fprintf(stderr, "Different struct bpf_elf_map in use!\n");
1988 return -EINVAL;
1989 }
1990
1991 if (ctx->map_len == sizeof(struct bpf_elf_map)) {
1992 ctx->map_num = sym_num;
1993 return 0;
1994 } else if (ctx->map_len > sizeof(struct bpf_elf_map)) {
1995 fprintf(stderr, "struct bpf_elf_map not supported, coming from future version?\n");
1996 return -EINVAL;
1997 } else if (ctx->map_len < sizeof(struct bpf_elf_map_min)) {
1998 fprintf(stderr, "struct bpf_elf_map too small, not supported!\n");
1999 return -EINVAL;
2000 }
2001
2002 ctx->map_num = sym_num;
2003 for (i = 0, buff = (void *)ctx->maps; i < ctx->map_num;
2004 i++, buff += ctx->map_len) {
2005
2006
2007
2008
2009 memcpy(&fixup[i], buff, ctx->map_len);
2010 }
2011
2012 memcpy(ctx->maps, fixup, sizeof(fixup));
2013 if (ctx->verbose)
2014 printf("%zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n",
2015 sizeof(struct bpf_elf_map) - ctx->map_len);
2016 return 0;
2017}
2018
2019static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
2020 struct bpf_elf_sec_data *data)
2021{
2022 if (data->sec_data->d_size > sizeof(ctx->license))
2023 return -ENOMEM;
2024
2025 memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
2026 ctx->sec_done[section] = true;
2027 return 0;
2028}
2029
2030static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
2031 struct bpf_elf_sec_data *data)
2032{
2033 ctx->sym_tab = data->sec_data;
2034 ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
2035 ctx->sec_done[section] = true;
2036 return 0;
2037}
2038
2039static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
2040 struct bpf_elf_sec_data *data)
2041{
2042 ctx->str_tab = data->sec_data;
2043 ctx->sec_done[section] = true;
2044 return 0;
2045}
2046
2047static int bpf_fetch_text(struct bpf_elf_ctx *ctx, int section,
2048 struct bpf_elf_sec_data *data)
2049{
2050 ctx->sec_text = section;
2051 ctx->sec_done[section] = true;
2052 return 0;
2053}
2054
2055static void bpf_btf_report(int fd, struct bpf_elf_ctx *ctx)
2056{
2057 fprintf(stderr, "\nBTF debug data section \'.BTF\' %s%s (%d)!\n",
2058 fd < 0 ? "rejected: " : "loaded",
2059 fd < 0 ? strerror(errno) : "",
2060 fd < 0 ? errno : fd);
2061
2062 fprintf(stderr, " - Length: %zu\n", ctx->btf_data->d_size);
2063
2064 bpf_dump_error(ctx, "Verifier analysis:\n\n");
2065}
2066
2067static int bpf_btf_attach(struct bpf_elf_ctx *ctx)
2068{
2069 int tries = 0, fd;
2070retry:
2071 errno = 0;
2072 fd = bpf_btf_load(ctx->btf_data->d_buf, ctx->btf_data->d_size,
2073 ctx->log, ctx->log_size);
2074 if (fd < 0 || ctx->verbose) {
2075 if (fd < 0 && errno == ENOSPC) {
2076 if (tries++ < 10 && !bpf_log_realloc(ctx))
2077 goto retry;
2078
2079 fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
2080 ctx->log_size, tries);
2081 return fd;
2082 }
2083
2084 if (bpf_log_has_data(ctx))
2085 bpf_btf_report(fd, ctx);
2086 }
2087
2088 return fd;
2089}
2090
2091static int bpf_fetch_btf_begin(struct bpf_elf_ctx *ctx, int section,
2092 struct bpf_elf_sec_data *data)
2093{
2094 ctx->btf_data = data->sec_data;
2095 ctx->sec_btf = section;
2096 ctx->sec_done[section] = true;
2097 return 0;
2098}
2099
2100static int bpf_btf_check_header(struct bpf_elf_ctx *ctx)
2101{
2102 const struct btf_header *hdr = ctx->btf_data->d_buf;
2103 const char *str_start, *str_end;
2104 unsigned int data_len;
2105
2106 if (hdr->magic != BTF_MAGIC) {
2107 fprintf(stderr, "Object has wrong BTF magic: %x, expected: %x!\n",
2108 hdr->magic, BTF_MAGIC);
2109 return -EINVAL;
2110 }
2111
2112 if (hdr->version != BTF_VERSION) {
2113 fprintf(stderr, "Object has wrong BTF version: %u, expected: %u!\n",
2114 hdr->version, BTF_VERSION);
2115 return -EINVAL;
2116 }
2117
2118 if (hdr->flags) {
2119 fprintf(stderr, "Object has unsupported BTF flags %x!\n",
2120 hdr->flags);
2121 return -EINVAL;
2122 }
2123
2124 data_len = ctx->btf_data->d_size - sizeof(*hdr);
2125 if (data_len < hdr->type_off ||
2126 data_len < hdr->str_off ||
2127 data_len < hdr->type_len + hdr->str_len ||
2128 hdr->type_off >= hdr->str_off ||
2129 hdr->type_off + hdr->type_len != hdr->str_off ||
2130 hdr->str_off + hdr->str_len != data_len ||
2131 (hdr->type_off & (sizeof(uint32_t) - 1))) {
2132 fprintf(stderr, "Object has malformed BTF data!\n");
2133 return -EINVAL;
2134 }
2135
2136 ctx->btf.hdr = hdr;
2137 ctx->btf.raw = hdr + 1;
2138
2139 str_start = ctx->btf.raw + hdr->str_off;
2140 str_end = str_start + hdr->str_len;
2141 if (!hdr->str_len ||
2142 hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
2143 str_start[0] || str_end[-1]) {
2144 fprintf(stderr, "Object has malformed BTF string data!\n");
2145 return -EINVAL;
2146 }
2147
2148 ctx->btf.strings = str_start;
2149 return 0;
2150}
2151
2152static int bpf_btf_register_type(struct bpf_elf_ctx *ctx,
2153 const struct btf_type *type)
2154{
2155 int cur = ctx->btf.types_num, num = cur + 1;
2156 const struct btf_type **types;
2157
2158 types = realloc(ctx->btf.types, num * sizeof(type));
2159 if (!types) {
2160 free(ctx->btf.types);
2161 ctx->btf.types = NULL;
2162 ctx->btf.types_num = 0;
2163 return -ENOMEM;
2164 }
2165
2166 ctx->btf.types = types;
2167 ctx->btf.types[cur] = type;
2168 ctx->btf.types_num = num;
2169 return 0;
2170}
2171
2172static struct btf_type btf_type_void;
2173
2174static int bpf_btf_prep_type_data(struct bpf_elf_ctx *ctx)
2175{
2176 const void *type_cur = ctx->btf.raw + ctx->btf.hdr->type_off;
2177 const void *type_end = ctx->btf.raw + ctx->btf.hdr->str_off;
2178 const struct btf_type *type;
2179 uint16_t var_len;
2180 int ret, kind;
2181
2182 ret = bpf_btf_register_type(ctx, &btf_type_void);
2183 if (ret < 0)
2184 return ret;
2185
2186 while (type_cur < type_end) {
2187 type = type_cur;
2188 type_cur += sizeof(*type);
2189
2190 var_len = BTF_INFO_VLEN(type->info);
2191 kind = BTF_INFO_KIND(type->info);
2192
2193 switch (kind) {
2194 case BTF_KIND_INT:
2195 type_cur += sizeof(int);
2196 break;
2197 case BTF_KIND_ARRAY:
2198 type_cur += sizeof(struct btf_array);
2199 break;
2200 case BTF_KIND_STRUCT:
2201 case BTF_KIND_UNION:
2202 type_cur += var_len * sizeof(struct btf_member);
2203 break;
2204 case BTF_KIND_ENUM:
2205 type_cur += var_len * sizeof(struct btf_enum);
2206 break;
2207 case BTF_KIND_FUNC_PROTO:
2208 type_cur += var_len * sizeof(struct btf_param);
2209 break;
2210 case BTF_KIND_TYPEDEF:
2211 case BTF_KIND_PTR:
2212 case BTF_KIND_FWD:
2213 case BTF_KIND_VOLATILE:
2214 case BTF_KIND_CONST:
2215 case BTF_KIND_RESTRICT:
2216 case BTF_KIND_FUNC:
2217 break;
2218 default:
2219 fprintf(stderr, "Object has unknown BTF type: %u!\n", kind);
2220 return -EINVAL;
2221 }
2222
2223 ret = bpf_btf_register_type(ctx, type);
2224 if (ret < 0)
2225 return ret;
2226 }
2227
2228 return 0;
2229}
2230
2231static int bpf_btf_prep_data(struct bpf_elf_ctx *ctx)
2232{
2233 int ret = bpf_btf_check_header(ctx);
2234
2235 if (!ret)
2236 return bpf_btf_prep_type_data(ctx);
2237 return ret;
2238}
2239
2240static void bpf_fetch_btf_end(struct bpf_elf_ctx *ctx)
2241{
2242 int fd = bpf_btf_attach(ctx);
2243
2244 if (fd < 0)
2245 return;
2246 ctx->btf_fd = fd;
2247 if (bpf_btf_prep_data(ctx) < 0) {
2248 close(ctx->btf_fd);
2249 ctx->btf_fd = 0;
2250 }
2251}
2252
2253static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
2254{
2255 return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
2256}
2257
2258static bool bpf_has_btf_data(const struct bpf_elf_ctx *ctx)
2259{
2260 return ctx->sec_btf;
2261}
2262
2263static bool bpf_has_call_data(const struct bpf_elf_ctx *ctx)
2264{
2265 return ctx->sec_text;
2266}
2267
2268static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx, bool check_text_sec)
2269{
2270 struct bpf_elf_sec_data data;
2271 int i, ret = -1;
2272
2273 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
2274 ret = bpf_fill_section_data(ctx, i, &data);
2275 if (ret < 0)
2276 continue;
2277
2278 if (data.sec_hdr.sh_type == SHT_PROGBITS &&
2279 !strcmp(data.sec_name, ELF_SECTION_MAPS))
2280 ret = bpf_fetch_maps_begin(ctx, i, &data);
2281 else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
2282 !strcmp(data.sec_name, ELF_SECTION_LICENSE))
2283 ret = bpf_fetch_license(ctx, i, &data);
2284 else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
2285 (data.sec_hdr.sh_flags & SHF_EXECINSTR) &&
2286 !strcmp(data.sec_name, ".text") &&
2287 check_text_sec)
2288 ret = bpf_fetch_text(ctx, i, &data);
2289 else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
2290 !strcmp(data.sec_name, ".symtab"))
2291 ret = bpf_fetch_symtab(ctx, i, &data);
2292 else if (data.sec_hdr.sh_type == SHT_STRTAB &&
2293 !strcmp(data.sec_name, ".strtab"))
2294 ret = bpf_fetch_strtab(ctx, i, &data);
2295 else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
2296 !strcmp(data.sec_name, ".BTF"))
2297 ret = bpf_fetch_btf_begin(ctx, i, &data);
2298 if (ret < 0) {
2299 fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
2300 i);
2301 return ret;
2302 }
2303 }
2304
2305 if (bpf_has_btf_data(ctx))
2306 bpf_fetch_btf_end(ctx);
2307 if (bpf_has_map_data(ctx)) {
2308 ret = bpf_fetch_maps_end(ctx);
2309 if (ret < 0) {
2310 fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
2311 return ret;
2312 }
2313
2314 ret = bpf_maps_attach_all(ctx);
2315 if (ret < 0) {
2316 fprintf(stderr, "Error loading maps into kernel!\n");
2317 return ret;
2318 }
2319 }
2320
2321 return ret;
2322}
2323
2324static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
2325 bool *sseen)
2326{
2327 struct bpf_elf_sec_data data;
2328 struct bpf_elf_prog prog;
2329 int ret, i, fd = -1;
2330
2331 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
2332 if (ctx->sec_done[i])
2333 continue;
2334
2335 ret = bpf_fill_section_data(ctx, i, &data);
2336 if (ret < 0 ||
2337 !(data.sec_hdr.sh_type == SHT_PROGBITS &&
2338 (data.sec_hdr.sh_flags & SHF_EXECINSTR) &&
2339 !strcmp(data.sec_name, section)))
2340 continue;
2341
2342 *sseen = true;
2343
2344 memset(&prog, 0, sizeof(prog));
2345 prog.type = ctx->type;
2346 prog.license = ctx->license;
2347 prog.size = data.sec_data->d_size;
2348 prog.insns_num = prog.size / sizeof(struct bpf_insn);
2349 prog.insns = data.sec_data->d_buf;
2350
2351 fd = bpf_prog_attach(section, &prog, ctx);
2352 if (fd < 0)
2353 return fd;
2354
2355 ctx->sec_done[i] = true;
2356 break;
2357 }
2358
2359 return fd;
2360}
2361
2362struct bpf_relo_props {
2363 struct bpf_tail_call {
2364 unsigned int total;
2365 unsigned int jited;
2366 } tc;
2367 int main_num;
2368};
2369
2370static int bpf_apply_relo_map(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog,
2371 GElf_Rel *relo, GElf_Sym *sym,
2372 struct bpf_relo_props *props)
2373{
2374 unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn);
2375 unsigned int map_idx = sym->st_value / ctx->map_len;
2376
2377 if (insn_off >= prog->insns_num)
2378 return -EINVAL;
2379 if (prog->insns[insn_off].code != (BPF_LD | BPF_IMM | BPF_DW)) {
2380 fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
2381 insn_off);
2382 return -EINVAL;
2383 }
2384
2385 if (map_idx >= ARRAY_SIZE(ctx->map_fds))
2386 return -EINVAL;
2387 if (!ctx->map_fds[map_idx])
2388 return -EINVAL;
2389 if (ctx->maps[map_idx].type == BPF_MAP_TYPE_PROG_ARRAY) {
2390 props->tc.total++;
2391 if (ctx->maps_ext[map_idx].owner.jited ||
2392 (ctx->maps_ext[map_idx].owner.type == 0 &&
2393 ctx->cfg.jit_enabled))
2394 props->tc.jited++;
2395 }
2396
2397 prog->insns[insn_off].src_reg = BPF_PSEUDO_MAP_FD;
2398 prog->insns[insn_off].imm = ctx->map_fds[map_idx];
2399 return 0;
2400}
2401
2402static int bpf_apply_relo_call(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog,
2403 GElf_Rel *relo, GElf_Sym *sym,
2404 struct bpf_relo_props *props)
2405{
2406 unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn);
2407 struct bpf_elf_prog *prog_text = &ctx->prog_text;
2408
2409 if (insn_off >= prog->insns_num)
2410 return -EINVAL;
2411 if (prog->insns[insn_off].code != (BPF_JMP | BPF_CALL) &&
2412 prog->insns[insn_off].src_reg != BPF_PSEUDO_CALL) {
2413 fprintf(stderr, "ELF contains relo data for non call instruction at offset %u! Compiler bug?!\n",
2414 insn_off);
2415 return -EINVAL;
2416 }
2417
2418 if (!props->main_num) {
2419 struct bpf_insn *insns = realloc(prog->insns,
2420 prog->size + prog_text->size);
2421 if (!insns)
2422 return -ENOMEM;
2423
2424 memcpy(insns + prog->insns_num, prog_text->insns,
2425 prog_text->size);
2426 props->main_num = prog->insns_num;
2427 prog->insns = insns;
2428 prog->insns_num += prog_text->insns_num;
2429 prog->size += prog_text->size;
2430 }
2431
2432 prog->insns[insn_off].imm += props->main_num - insn_off;
2433 return 0;
2434}
2435
2436static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
2437 struct bpf_elf_sec_data *data_relo,
2438 struct bpf_elf_prog *prog,
2439 struct bpf_relo_props *props)
2440{
2441 GElf_Shdr *rhdr = &data_relo->sec_hdr;
2442 int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
2443
2444 for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
2445 GElf_Rel relo;
2446 GElf_Sym sym;
2447 int ret = -EIO;
2448
2449 if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
2450 return -EIO;
2451 if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
2452 return -EIO;
2453
2454 if (sym.st_shndx == ctx->sec_maps)
2455 ret = bpf_apply_relo_map(ctx, prog, &relo, &sym, props);
2456 else if (sym.st_shndx == ctx->sec_text)
2457 ret = bpf_apply_relo_call(ctx, prog, &relo, &sym, props);
2458 else
2459 fprintf(stderr, "ELF contains non-{map,call} related relo data in entry %u pointing to section %u! Compiler bug?!\n",
2460 relo_ent, sym.st_shndx);
2461 if (ret < 0)
2462 return ret;
2463 }
2464
2465 return 0;
2466}
2467
2468static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
2469 bool *lderr, bool *sseen, struct bpf_elf_prog *prog)
2470{
2471 struct bpf_elf_sec_data data_relo, data_insn;
2472 int ret, idx, i, fd = -1;
2473
2474 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
2475 struct bpf_relo_props props = {};
2476
2477 ret = bpf_fill_section_data(ctx, i, &data_relo);
2478 if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
2479 continue;
2480
2481 idx = data_relo.sec_hdr.sh_info;
2482
2483 ret = bpf_fill_section_data(ctx, idx, &data_insn);
2484 if (ret < 0 ||
2485 !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
2486 (data_insn.sec_hdr.sh_flags & SHF_EXECINSTR) &&
2487 !strcmp(data_insn.sec_name, section)))
2488 continue;
2489 if (sseen)
2490 *sseen = true;
2491
2492 memset(prog, 0, sizeof(*prog));
2493 prog->type = ctx->type;
2494 prog->license = ctx->license;
2495 prog->size = data_insn.sec_data->d_size;
2496 prog->insns_num = prog->size / sizeof(struct bpf_insn);
2497 prog->insns = malloc(prog->size);
2498 if (!prog->insns) {
2499 *lderr = true;
2500 return -ENOMEM;
2501 }
2502
2503 memcpy(prog->insns, data_insn.sec_data->d_buf, prog->size);
2504
2505 ret = bpf_apply_relo_data(ctx, &data_relo, prog, &props);
2506 if (ret < 0) {
2507 *lderr = true;
2508 if (ctx->sec_text != idx)
2509 free(prog->insns);
2510 return ret;
2511 }
2512 if (ctx->sec_text == idx) {
2513 fd = 0;
2514 goto out;
2515 }
2516
2517 fd = bpf_prog_attach(section, prog, ctx);
2518 free(prog->insns);
2519 if (fd < 0) {
2520 *lderr = true;
2521 if (props.tc.total) {
2522 if (ctx->cfg.jit_enabled &&
2523 props.tc.total != props.tc.jited)
2524 fprintf(stderr, "JIT enabled, but only %u/%u tail call maps in the program have JITed owner!\n",
2525 props.tc.jited, props.tc.total);
2526 if (!ctx->cfg.jit_enabled &&
2527 props.tc.jited)
2528 fprintf(stderr, "JIT disabled, but %u/%u tail call maps in the program have JITed owner!\n",
2529 props.tc.jited, props.tc.total);
2530 }
2531 return fd;
2532 }
2533out:
2534 ctx->sec_done[i] = true;
2535 ctx->sec_done[idx] = true;
2536 break;
2537 }
2538
2539 return fd;
2540}
2541
2542static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
2543{
2544 bool lderr = false, sseen = false;
2545 struct bpf_elf_prog prog;
2546 int ret = -1;
2547
2548 if (bpf_has_call_data(ctx)) {
2549 ret = bpf_fetch_prog_relo(ctx, ".text", &lderr, NULL,
2550 &ctx->prog_text);
2551 if (ret < 0)
2552 return ret;
2553 }
2554
2555 if (bpf_has_map_data(ctx) || bpf_has_call_data(ctx))
2556 ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen, &prog);
2557 if (ret < 0 && !lderr)
2558 ret = bpf_fetch_prog(ctx, section, &sseen);
2559 if (ret < 0 && !sseen)
2560 fprintf(stderr, "Program section \'%s\' not found in ELF file!\n",
2561 section);
2562 return ret;
2563}
2564
2565static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
2566{
2567 int i;
2568
2569 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
2570 if (ctx->map_fds[i] && ctx->maps[i].id == id &&
2571 ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
2572 return i;
2573 return -1;
2574}
2575
2576struct bpf_jited_aux {
2577 int prog_fd;
2578 int map_fd;
2579 struct bpf_prog_data prog;
2580 struct bpf_map_ext map;
2581};
2582
2583static int bpf_derive_prog_from_fdinfo(int fd, struct bpf_prog_data *prog)
2584{
2585 char file[PATH_MAX], buff[4096];
2586 unsigned int val;
2587 FILE *fp;
2588
2589 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
2590 memset(prog, 0, sizeof(*prog));
2591
2592 fp = fopen(file, "r");
2593 if (!fp) {
2594 fprintf(stderr, "No procfs support?!\n");
2595 return -EIO;
2596 }
2597
2598 while (fgets(buff, sizeof(buff), fp)) {
2599 if (sscanf(buff, "prog_type:\t%u", &val) == 1)
2600 prog->type = val;
2601 else if (sscanf(buff, "prog_jited:\t%u", &val) == 1)
2602 prog->jited = val;
2603 }
2604
2605 fclose(fp);
2606 return 0;
2607}
2608
2609static int bpf_tail_call_get_aux(struct bpf_jited_aux *aux)
2610{
2611 struct bpf_elf_map tmp;
2612 int ret;
2613
2614 ret = bpf_derive_elf_map_from_fdinfo(aux->map_fd, &tmp, &aux->map);
2615 if (!ret)
2616 ret = bpf_derive_prog_from_fdinfo(aux->prog_fd, &aux->prog);
2617
2618 return ret;
2619}
2620
2621static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
2622{
2623 struct bpf_elf_sec_data data;
2624 uint32_t map_id, key_id;
2625 int fd, i, ret, idx;
2626
2627 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
2628 if (ctx->sec_done[i])
2629 continue;
2630
2631 ret = bpf_fill_section_data(ctx, i, &data);
2632 if (ret < 0)
2633 continue;
2634
2635 ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
2636 if (ret != 2)
2637 continue;
2638
2639 idx = bpf_find_map_by_id(ctx, map_id);
2640 if (idx < 0)
2641 continue;
2642
2643 fd = bpf_fetch_prog_sec(ctx, data.sec_name);
2644 if (fd < 0)
2645 return -EIO;
2646
2647 ret = bpf_map_update(ctx->map_fds[idx], &key_id,
2648 &fd, BPF_ANY);
2649 if (ret < 0) {
2650 struct bpf_jited_aux aux = {};
2651
2652 ret = -errno;
2653 if (errno == E2BIG) {
2654 fprintf(stderr, "Tail call key %u for map %u out of bounds?\n",
2655 key_id, map_id);
2656 return ret;
2657 }
2658
2659 aux.map_fd = ctx->map_fds[idx];
2660 aux.prog_fd = fd;
2661
2662 if (bpf_tail_call_get_aux(&aux))
2663 return ret;
2664 if (!aux.map.owner.type)
2665 return ret;
2666
2667 if (aux.prog.type != aux.map.owner.type)
2668 fprintf(stderr, "Tail call map owned by prog type %u, but prog type is %u!\n",
2669 aux.map.owner.type, aux.prog.type);
2670 if (aux.prog.jited != aux.map.owner.jited)
2671 fprintf(stderr, "Tail call map %s jited, but prog %s!\n",
2672 aux.map.owner.jited ? "is" : "not",
2673 aux.prog.jited ? "is" : "not");
2674 return ret;
2675 }
2676
2677 ctx->sec_done[i] = true;
2678 }
2679
2680 return 0;
2681}
2682
2683static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
2684{
2685 struct stat st;
2686 int ret;
2687
2688 memset(&ctx->stat, 0, sizeof(ctx->stat));
2689
2690 ret = fstat(ctx->obj_fd, &st);
2691 if (ret < 0) {
2692 fprintf(stderr, "Stat of elf file failed: %s\n",
2693 strerror(errno));
2694 return;
2695 }
2696
2697 ctx->stat.st_dev = st.st_dev;
2698 ctx->stat.st_ino = st.st_ino;
2699}
2700
2701static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path)
2702{
2703 char buff[PATH_MAX];
2704
2705 while (fgets(buff, sizeof(buff), fp)) {
2706 char *ptr = buff;
2707
2708 while (*ptr == ' ' || *ptr == '\t')
2709 ptr++;
2710
2711 if (*ptr == '#' || *ptr == '\n' || *ptr == 0)
2712 continue;
2713
2714 if (sscanf(ptr, "%i %s\n", id, path) != 2 &&
2715 sscanf(ptr, "%i %s #", id, path) != 2) {
2716 strcpy(path, ptr);
2717 return -1;
2718 }
2719
2720 return 1;
2721 }
2722
2723 return 0;
2724}
2725
2726static bool bpf_pinning_reserved(uint32_t pinning)
2727{
2728 switch (pinning) {
2729 case PIN_NONE:
2730 case PIN_OBJECT_NS:
2731 case PIN_GLOBAL_NS:
2732 return true;
2733 default:
2734 return false;
2735 }
2736}
2737
2738static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file)
2739{
2740 struct bpf_hash_entry *entry;
2741 char subpath[PATH_MAX] = {};
2742 uint32_t pinning;
2743 FILE *fp;
2744 int ret;
2745
2746 fp = fopen(db_file, "r");
2747 if (!fp)
2748 return;
2749
2750 while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) {
2751 if (ret == -1) {
2752 fprintf(stderr, "Database %s is corrupted at: %s\n",
2753 db_file, subpath);
2754 fclose(fp);
2755 return;
2756 }
2757
2758 if (bpf_pinning_reserved(pinning)) {
2759 fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n",
2760 db_file, pinning);
2761 continue;
2762 }
2763
2764 entry = malloc(sizeof(*entry));
2765 if (!entry) {
2766 fprintf(stderr, "No memory left for db entry!\n");
2767 continue;
2768 }
2769
2770 entry->pinning = pinning;
2771 entry->subpath = strdup(subpath);
2772 if (!entry->subpath) {
2773 fprintf(stderr, "No memory left for db entry!\n");
2774 free(entry);
2775 continue;
2776 }
2777
2778 entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
2779 ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry;
2780 }
2781
2782 fclose(fp);
2783}
2784
2785static void bpf_hash_destroy(struct bpf_elf_ctx *ctx)
2786{
2787 struct bpf_hash_entry *entry;
2788 int i;
2789
2790 for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) {
2791 while ((entry = ctx->ht[i]) != NULL) {
2792 ctx->ht[i] = entry->next;
2793 free((char *)entry->subpath);
2794 free(entry);
2795 }
2796 }
2797}
2798
2799static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
2800{
2801 if (ctx->elf_hdr.e_type != ET_REL ||
2802 (ctx->elf_hdr.e_machine != EM_NONE &&
2803 ctx->elf_hdr.e_machine != EM_BPF) ||
2804 ctx->elf_hdr.e_version != EV_CURRENT) {
2805 fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
2806 return -EINVAL;
2807 }
2808
2809 switch (ctx->elf_hdr.e_ident[EI_DATA]) {
2810 default:
2811 fprintf(stderr, "ELF format error, wrong endianness info?\n");
2812 return -EINVAL;
2813 case ELFDATA2LSB:
2814 if (htons(1) == 1) {
2815 fprintf(stderr,
2816 "We are big endian, eBPF object is little endian!\n");
2817 return -EIO;
2818 }
2819 break;
2820 case ELFDATA2MSB:
2821 if (htons(1) != 1) {
2822 fprintf(stderr,
2823 "We are little endian, eBPF object is big endian!\n");
2824 return -EIO;
2825 }
2826 break;
2827 }
2828
2829 return 0;
2830}
2831
2832static void bpf_get_cfg(struct bpf_elf_ctx *ctx)
2833{
2834 static const char *path_jit = "/proc/sys/net/core/bpf_jit_enable";
2835 int fd;
2836
2837 fd = open(path_jit, O_RDONLY);
2838 if (fd >= 0) {
2839 char tmp[16] = {};
2840
2841 if (read(fd, tmp, sizeof(tmp)) > 0)
2842 ctx->cfg.jit_enabled = atoi(tmp);
2843 close(fd);
2844 }
2845}
2846
2847static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
2848 enum bpf_prog_type type, __u32 ifindex,
2849 bool verbose)
2850{
2851 uint8_t tmp[20];
2852 int ret;
2853
2854 if (elf_version(EV_CURRENT) == EV_NONE)
2855 return -EINVAL;
2856
2857 bpf_init_env();
2858
2859 memset(ctx, 0, sizeof(*ctx));
2860 bpf_get_cfg(ctx);
2861
2862 ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
2863 if (ret)
2864 ctx->noafalg = true;
2865 else
2866 hexstring_n2a(tmp, sizeof(tmp), ctx->obj_uid,
2867 sizeof(ctx->obj_uid));
2868
2869 ctx->verbose = verbose;
2870 ctx->type = type;
2871 ctx->ifindex = ifindex;
2872
2873 ctx->obj_fd = open(pathname, O_RDONLY);
2874 if (ctx->obj_fd < 0)
2875 return ctx->obj_fd;
2876
2877 ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
2878 if (!ctx->elf_fd) {
2879 ret = -EINVAL;
2880 goto out_fd;
2881 }
2882
2883 if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
2884 ret = -EINVAL;
2885 goto out_fd;
2886 }
2887
2888 if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
2889 &ctx->elf_hdr) {
2890 ret = -EIO;
2891 goto out_elf;
2892 }
2893
2894 ret = bpf_elf_check_ehdr(ctx);
2895 if (ret < 0)
2896 goto out_elf;
2897
2898 ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
2899 sizeof(*(ctx->sec_done)));
2900 if (!ctx->sec_done) {
2901 ret = -ENOMEM;
2902 goto out_elf;
2903 }
2904
2905 if (ctx->verbose && bpf_log_realloc(ctx)) {
2906 ret = -ENOMEM;
2907 goto out_free;
2908 }
2909
2910 bpf_save_finfo(ctx);
2911 bpf_hash_init(ctx, CONFDIR "/bpf_pinning");
2912
2913 return 0;
2914out_free:
2915 free(ctx->sec_done);
2916out_elf:
2917 elf_end(ctx->elf_fd);
2918out_fd:
2919 close(ctx->obj_fd);
2920 return ret;
2921}
2922
2923static int bpf_maps_count(struct bpf_elf_ctx *ctx)
2924{
2925 int i, count = 0;
2926
2927 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
2928 if (!ctx->map_fds[i])
2929 break;
2930 count++;
2931 }
2932
2933 return count;
2934}
2935
2936static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
2937{
2938 int i;
2939
2940 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
2941 if (ctx->map_fds[i])
2942 close(ctx->map_fds[i]);
2943 }
2944
2945 if (ctx->btf_fd)
2946 close(ctx->btf_fd);
2947 free(ctx->btf.types);
2948}
2949
2950static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
2951{
2952 if (failure)
2953 bpf_maps_teardown(ctx);
2954
2955 bpf_hash_destroy(ctx);
2956
2957 free(ctx->prog_text.insns);
2958 free(ctx->sec_done);
2959 free(ctx->log);
2960
2961 elf_end(ctx->elf_fd);
2962 close(ctx->obj_fd);
2963}
2964
2965static struct bpf_elf_ctx __ctx;
2966
2967static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
2968 const char *section, __u32 ifindex, bool verbose)
2969{
2970 struct bpf_elf_ctx *ctx = &__ctx;
2971 int fd = 0, ret;
2972
2973 ret = bpf_elf_ctx_init(ctx, pathname, type, ifindex, verbose);
2974 if (ret < 0) {
2975 fprintf(stderr, "Cannot initialize ELF context!\n");
2976 return ret;
2977 }
2978
2979 ret = bpf_fetch_ancillary(ctx, strcmp(section, ".text"));
2980 if (ret < 0) {
2981 fprintf(stderr, "Error fetching ELF ancillary data!\n");
2982 goto out;
2983 }
2984
2985 fd = bpf_fetch_prog_sec(ctx, section);
2986 if (fd < 0) {
2987 fprintf(stderr, "Error fetching program/map!\n");
2988 ret = fd;
2989 goto out;
2990 }
2991
2992 ret = bpf_fill_prog_arrays(ctx);
2993 if (ret < 0)
2994 fprintf(stderr, "Error filling program arrays!\n");
2995out:
2996 bpf_elf_ctx_destroy(ctx, ret < 0);
2997 if (ret < 0) {
2998 if (fd >= 0)
2999 close(fd);
3000 return ret;
3001 }
3002
3003 return fd;
3004}
3005
3006static int
3007bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
3008 const struct bpf_map_data *aux, unsigned int entries)
3009{
3010 struct bpf_map_set_msg msg = {
3011 .aux.uds_ver = BPF_SCM_AUX_VER,
3012 .aux.num_ent = entries,
3013 };
3014 int *cmsg_buf, min_fd;
3015 char *amsg_buf;
3016 int i;
3017
3018 strlcpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
3019 memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
3020
3021 cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
3022 amsg_buf = (char *)msg.aux.ent;
3023
3024 for (i = 0; i < entries; i += min_fd) {
3025 int ret;
3026
3027 min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
3028 bpf_map_set_init_single(&msg, min_fd);
3029
3030 memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
3031 memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
3032
3033 ret = sendmsg(fd, &msg.hdr, 0);
3034 if (ret <= 0)
3035 return ret ? : -1;
3036 }
3037
3038 return 0;
3039}
3040
3041static int
3042bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
3043 unsigned int entries)
3044{
3045 struct bpf_map_set_msg msg;
3046 int *cmsg_buf, min_fd;
3047 char *amsg_buf, *mmsg_buf;
3048 unsigned int needed = 1;
3049 int i;
3050
3051 cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
3052 amsg_buf = (char *)msg.aux.ent;
3053 mmsg_buf = (char *)&msg.aux;
3054
3055 for (i = 0; i < min(entries, needed); i += min_fd) {
3056 struct cmsghdr *cmsg;
3057 int ret;
3058
3059 min_fd = min(entries, entries - i);
3060 bpf_map_set_init_single(&msg, min_fd);
3061
3062 ret = recvmsg(fd, &msg.hdr, 0);
3063 if (ret <= 0)
3064 return ret ? : -1;
3065
3066 cmsg = CMSG_FIRSTHDR(&msg.hdr);
3067 if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
3068 return -EINVAL;
3069 if (msg.hdr.msg_flags & MSG_CTRUNC)
3070 return -EIO;
3071 if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
3072 return -ENOSYS;
3073
3074 min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
3075 if (min_fd > entries || min_fd <= 0)
3076 return -EINVAL;
3077
3078 memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
3079 memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
3080 memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
3081
3082 needed = aux->num_ent;
3083 }
3084
3085 return 0;
3086}
3087
3088int bpf_send_map_fds(const char *path, const char *obj)
3089{
3090 struct bpf_elf_ctx *ctx = &__ctx;
3091 struct sockaddr_un addr = { .sun_family = AF_UNIX };
3092 struct bpf_map_data bpf_aux = {
3093 .fds = ctx->map_fds,
3094 .ent = ctx->maps,
3095 .st = &ctx->stat,
3096 .obj = obj,
3097 };
3098 int fd, ret = -1;
3099
3100 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
3101 if (fd < 0) {
3102 fprintf(stderr, "Cannot open socket: %s\n",
3103 strerror(errno));
3104 goto out;
3105 }
3106
3107 strlcpy(addr.sun_path, path, sizeof(addr.sun_path));
3108
3109 ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
3110 if (ret < 0) {
3111 fprintf(stderr, "Cannot connect to %s: %s\n",
3112 path, strerror(errno));
3113 goto out;
3114 }
3115
3116 ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
3117 bpf_maps_count(ctx));
3118 if (ret < 0)
3119 fprintf(stderr, "Cannot send fds to %s: %s\n",
3120 path, strerror(errno));
3121
3122 bpf_maps_teardown(ctx);
3123out:
3124 if (fd >= 0)
3125 close(fd);
3126 return ret;
3127}
3128
3129int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
3130 unsigned int entries)
3131{
3132 struct sockaddr_un addr = { .sun_family = AF_UNIX };
3133 int fd, ret = -1;
3134
3135 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
3136 if (fd < 0) {
3137 fprintf(stderr, "Cannot open socket: %s\n",
3138 strerror(errno));
3139 goto out;
3140 }
3141
3142 strlcpy(addr.sun_path, path, sizeof(addr.sun_path));
3143
3144 ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
3145 if (ret < 0) {
3146 fprintf(stderr, "Cannot bind to socket: %s\n",
3147 strerror(errno));
3148 goto out;
3149 }
3150
3151 ret = bpf_map_set_recv(fd, fds, aux, entries);
3152 if (ret < 0)
3153 fprintf(stderr, "Cannot recv fds from %s: %s\n",
3154 path, strerror(errno));
3155
3156 unlink(addr.sun_path);
3157
3158out:
3159 if (fd >= 0)
3160 close(fd);
3161 return ret;
3162}
3163
3164#ifdef HAVE_LIBBPF
3165
3166
3167
3168
3169int iproute2_bpf_elf_ctx_init(struct bpf_cfg_in *cfg)
3170{
3171 struct bpf_elf_ctx *ctx = &__ctx;
3172
3173 return bpf_elf_ctx_init(ctx, cfg->object, cfg->type, cfg->ifindex, cfg->verbose);
3174}
3175
3176int iproute2_bpf_fetch_ancillary(void)
3177{
3178 struct bpf_elf_ctx *ctx = &__ctx;
3179 struct bpf_elf_sec_data data;
3180 int i, ret = 0;
3181
3182 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
3183 ret = bpf_fill_section_data(ctx, i, &data);
3184 if (ret < 0)
3185 continue;
3186
3187 if (data.sec_hdr.sh_type == SHT_PROGBITS &&
3188 !strcmp(data.sec_name, ELF_SECTION_MAPS))
3189 ret = bpf_fetch_maps_begin(ctx, i, &data);
3190 else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
3191 !strcmp(data.sec_name, ".symtab"))
3192 ret = bpf_fetch_symtab(ctx, i, &data);
3193 else if (data.sec_hdr.sh_type == SHT_STRTAB &&
3194 !strcmp(data.sec_name, ".strtab"))
3195 ret = bpf_fetch_strtab(ctx, i, &data);
3196 if (ret < 0) {
3197 fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
3198 i);
3199 return ret;
3200 }
3201 }
3202
3203 if (bpf_has_map_data(ctx)) {
3204 ret = bpf_fetch_maps_end(ctx);
3205 if (ret < 0) {
3206 fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
3207 return ret;
3208 }
3209 }
3210
3211 return ret;
3212}
3213
3214int iproute2_get_root_path(char *root_path, size_t len)
3215{
3216 struct bpf_elf_ctx *ctx = &__ctx;
3217 int ret = 0;
3218
3219 snprintf(root_path, len, "%s/%s",
3220 bpf_get_work_dir(ctx->type), BPF_DIR_GLOBALS);
3221
3222 ret = mkdir(root_path, S_IRWXU);
3223 if (ret && errno != EEXIST) {
3224 fprintf(stderr, "mkdir %s failed: %s\n", root_path, strerror(errno));
3225 return ret;
3226 }
3227
3228 return 0;
3229}
3230
3231bool iproute2_is_pin_map(const char *libbpf_map_name, char *pathname)
3232{
3233 struct bpf_elf_ctx *ctx = &__ctx;
3234 const char *map_name, *tmp;
3235 unsigned int pinning;
3236 int i, ret = 0;
3237
3238 for (i = 0; i < ctx->map_num; i++) {
3239 if (ctx->maps[i].pinning == PIN_OBJECT_NS &&
3240 ctx->noafalg) {
3241 fprintf(stderr, "Missing kernel AF_ALG support for PIN_OBJECT_NS!\n");
3242 return false;
3243 }
3244
3245 map_name = bpf_map_fetch_name(ctx, i);
3246 if (!map_name) {
3247 return false;
3248 }
3249
3250 if (strcmp(libbpf_map_name, map_name))
3251 continue;
3252
3253 pinning = ctx->maps[i].pinning;
3254
3255 if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
3256 return false;
3257
3258 if (pinning == PIN_OBJECT_NS)
3259 ret = bpf_make_obj_path(ctx);
3260 else if ((tmp = bpf_custom_pinning(ctx, pinning)))
3261 ret = bpf_make_custom_path(ctx, tmp);
3262 if (ret < 0)
3263 return false;
3264
3265 bpf_make_pathname(pathname, PATH_MAX, map_name, ctx, pinning);
3266
3267 return true;
3268 }
3269
3270 return false;
3271}
3272
3273bool iproute2_is_map_in_map(const char *libbpf_map_name, struct bpf_elf_map *imap,
3274 struct bpf_elf_map *omap, char *omap_name)
3275{
3276 struct bpf_elf_ctx *ctx = &__ctx;
3277 const char *inner_map_name, *outer_map_name;
3278 int i, j;
3279
3280 for (i = 0; i < ctx->map_num; i++) {
3281 inner_map_name = bpf_map_fetch_name(ctx, i);
3282 if (!inner_map_name) {
3283 return false;
3284 }
3285
3286 if (strcmp(libbpf_map_name, inner_map_name))
3287 continue;
3288
3289 if (!ctx->maps[i].id ||
3290 ctx->maps[i].inner_id)
3291 continue;
3292
3293 *imap = ctx->maps[i];
3294
3295 for (j = 0; j < ctx->map_num; j++) {
3296 if (!bpf_is_map_in_map_type(&ctx->maps[j]))
3297 continue;
3298 if (ctx->maps[j].inner_id != ctx->maps[i].id)
3299 continue;
3300
3301 *omap = ctx->maps[j];
3302 outer_map_name = bpf_map_fetch_name(ctx, j);
3303 if (!outer_map_name)
3304 return false;
3305
3306 memcpy(omap_name, outer_map_name, strlen(outer_map_name) + 1);
3307
3308 return true;
3309 }
3310 }
3311
3312 return false;
3313}
3314
3315int iproute2_find_map_name_by_id(unsigned int map_id, char *name)
3316{
3317 struct bpf_elf_ctx *ctx = &__ctx;
3318 const char *map_name;
3319 int i, idx = -1;
3320
3321 for (i = 0; i < ctx->map_num; i++) {
3322 if (ctx->maps[i].id == map_id &&
3323 ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) {
3324 idx = i;
3325 break;
3326 }
3327 }
3328
3329 if (idx < 0)
3330 return -1;
3331
3332 map_name = bpf_map_fetch_name(ctx, idx);
3333 if (!map_name)
3334 return -1;
3335
3336 memcpy(name, map_name, strlen(map_name) + 1);
3337 return 0;
3338}
3339#endif
3340#endif
3341