1
2
3
4
5
6
7
8
9
10
11#include <errno.h>
12#include <stdlib.h>
13#include <string.h>
14#include <unistd.h>
15#include <arpa/inet.h>
16#include <asm/barrier.h>
17#include <linux/compiler.h>
18#include <linux/ethtool.h>
19#include <linux/filter.h>
20#include <linux/if_ether.h>
21#include <linux/if_packet.h>
22#include <linux/if_xdp.h>
23#include <linux/sockios.h>
24#include <net/if.h>
25#include <sys/ioctl.h>
26#include <sys/mman.h>
27#include <sys/socket.h>
28#include <sys/types.h>
29
30#include "bpf.h"
31#include "libbpf.h"
32#include "libbpf_util.h"
33#include "xsk.h"
34
35#ifndef SOL_XDP
36 #define SOL_XDP 283
37#endif
38
39#ifndef AF_XDP
40 #define AF_XDP 44
41#endif
42
43#ifndef PF_XDP
44 #define PF_XDP AF_XDP
45#endif
46
47struct xsk_umem {
48 struct xsk_ring_prod *fill;
49 struct xsk_ring_cons *comp;
50 char *umem_area;
51 struct xsk_umem_config config;
52 int fd;
53 int refcount;
54};
55
56struct xsk_socket {
57 struct xsk_ring_cons *rx;
58 struct xsk_ring_prod *tx;
59 __u64 outstanding_tx;
60 struct xsk_umem *umem;
61 struct xsk_socket_config config;
62 int fd;
63 int xsks_map;
64 int ifindex;
65 int prog_fd;
66 int qidconf_map_fd;
67 int xsks_map_fd;
68 __u32 queue_id;
69 char ifname[IFNAMSIZ];
70};
71
72struct xsk_nl_info {
73 bool xdp_prog_attached;
74 int ifindex;
75 int fd;
76};
77
78
79
80
81static inline void *xsk_mmap(void *addr, size_t length, int prot, int flags,
82 int fd, __u64 offset)
83{
84#ifdef __NR_mmap2
85 unsigned int page_shift = __builtin_ffs(getpagesize()) - 1;
86 long ret = syscall(__NR_mmap2, addr, length, prot, flags, fd,
87 (off_t)(offset >> page_shift));
88
89 return (void *)ret;
90#else
91 return mmap(addr, length, prot, flags, fd, offset);
92#endif
93}
94
95int xsk_umem__fd(const struct xsk_umem *umem)
96{
97 return umem ? umem->fd : -EINVAL;
98}
99
100int xsk_socket__fd(const struct xsk_socket *xsk)
101{
102 return xsk ? xsk->fd : -EINVAL;
103}
104
105static bool xsk_page_aligned(void *buffer)
106{
107 unsigned long addr = (unsigned long)buffer;
108
109 return !(addr & (getpagesize() - 1));
110}
111
112static void xsk_set_umem_config(struct xsk_umem_config *cfg,
113 const struct xsk_umem_config *usr_cfg)
114{
115 if (!usr_cfg) {
116 cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
117 cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
118 cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
119 cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
120 return;
121 }
122
123 cfg->fill_size = usr_cfg->fill_size;
124 cfg->comp_size = usr_cfg->comp_size;
125 cfg->frame_size = usr_cfg->frame_size;
126 cfg->frame_headroom = usr_cfg->frame_headroom;
127}
128
129static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
130 const struct xsk_socket_config *usr_cfg)
131{
132 if (!usr_cfg) {
133 cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
134 cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
135 cfg->libbpf_flags = 0;
136 cfg->xdp_flags = 0;
137 cfg->bind_flags = 0;
138 return 0;
139 }
140
141 if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
142 return -EINVAL;
143
144 cfg->rx_size = usr_cfg->rx_size;
145 cfg->tx_size = usr_cfg->tx_size;
146 cfg->libbpf_flags = usr_cfg->libbpf_flags;
147 cfg->xdp_flags = usr_cfg->xdp_flags;
148 cfg->bind_flags = usr_cfg->bind_flags;
149
150 return 0;
151}
152
153int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
154 struct xsk_ring_prod *fill, struct xsk_ring_cons *comp,
155 const struct xsk_umem_config *usr_config)
156{
157 struct xdp_mmap_offsets off;
158 struct xdp_umem_reg mr;
159 struct xsk_umem *umem;
160 socklen_t optlen;
161 void *map;
162 int err;
163
164 if (!umem_area || !umem_ptr || !fill || !comp)
165 return -EFAULT;
166 if (!size && !xsk_page_aligned(umem_area))
167 return -EINVAL;
168
169 umem = calloc(1, sizeof(*umem));
170 if (!umem)
171 return -ENOMEM;
172
173 umem->fd = socket(AF_XDP, SOCK_RAW, 0);
174 if (umem->fd < 0) {
175 err = -errno;
176 goto out_umem_alloc;
177 }
178
179 umem->umem_area = umem_area;
180 xsk_set_umem_config(&umem->config, usr_config);
181
182 mr.addr = (uintptr_t)umem_area;
183 mr.len = size;
184 mr.chunk_size = umem->config.frame_size;
185 mr.headroom = umem->config.frame_headroom;
186
187 err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
188 if (err) {
189 err = -errno;
190 goto out_socket;
191 }
192 err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING,
193 &umem->config.fill_size,
194 sizeof(umem->config.fill_size));
195 if (err) {
196 err = -errno;
197 goto out_socket;
198 }
199 err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
200 &umem->config.comp_size,
201 sizeof(umem->config.comp_size));
202 if (err) {
203 err = -errno;
204 goto out_socket;
205 }
206
207 optlen = sizeof(off);
208 err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
209 if (err) {
210 err = -errno;
211 goto out_socket;
212 }
213
214 map = xsk_mmap(NULL, off.fr.desc +
215 umem->config.fill_size * sizeof(__u64),
216 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
217 umem->fd, XDP_UMEM_PGOFF_FILL_RING);
218 if (map == MAP_FAILED) {
219 err = -errno;
220 goto out_socket;
221 }
222
223 umem->fill = fill;
224 fill->mask = umem->config.fill_size - 1;
225 fill->size = umem->config.fill_size;
226 fill->producer = map + off.fr.producer;
227 fill->consumer = map + off.fr.consumer;
228 fill->ring = map + off.fr.desc;
229 fill->cached_cons = umem->config.fill_size;
230
231 map = xsk_mmap(NULL,
232 off.cr.desc + umem->config.comp_size * sizeof(__u64),
233 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
234 umem->fd, XDP_UMEM_PGOFF_COMPLETION_RING);
235 if (map == MAP_FAILED) {
236 err = -errno;
237 goto out_mmap;
238 }
239
240 umem->comp = comp;
241 comp->mask = umem->config.comp_size - 1;
242 comp->size = umem->config.comp_size;
243 comp->producer = map + off.cr.producer;
244 comp->consumer = map + off.cr.consumer;
245 comp->ring = map + off.cr.desc;
246
247 *umem_ptr = umem;
248 return 0;
249
250out_mmap:
251 munmap(umem->fill,
252 off.fr.desc + umem->config.fill_size * sizeof(__u64));
253out_socket:
254 close(umem->fd);
255out_umem_alloc:
256 free(umem);
257 return err;
258}
259
260static int xsk_load_xdp_prog(struct xsk_socket *xsk)
261{
262 char bpf_log_buf[BPF_LOG_BUF_SIZE];
263 int err, prog_fd;
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282 struct bpf_insn prog[] = {
283
284 BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 16),
285
286 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4),
287 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
288 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
289 BPF_LD_MAP_FD(BPF_REG_1, xsk->qidconf_map_fd),
290 BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
291 BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
292 BPF_MOV32_IMM(BPF_REG_0, 0),
293
294 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),
295 BPF_MOV32_IMM(BPF_REG_0, 2),
296
297 BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
298
299 BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
300
301 BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
302 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
303 BPF_MOV32_IMM(BPF_REG_3, 0),
304 BPF_EMIT_CALL(BPF_FUNC_redirect_map),
305
306 BPF_EXIT_INSN(),
307 };
308 size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
309
310 prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt,
311 "LGPL-2.1 or BSD-2-Clause", 0, bpf_log_buf,
312 BPF_LOG_BUF_SIZE);
313 if (prog_fd < 0) {
314 pr_warning("BPF log buffer:\n%s", bpf_log_buf);
315 return prog_fd;
316 }
317
318 err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags);
319 if (err) {
320 close(prog_fd);
321 return err;
322 }
323
324 xsk->prog_fd = prog_fd;
325 return 0;
326}
327
328static int xsk_get_max_queues(struct xsk_socket *xsk)
329{
330 struct ethtool_channels channels;
331 struct ifreq ifr;
332 int fd, err, ret;
333
334 fd = socket(AF_INET, SOCK_DGRAM, 0);
335 if (fd < 0)
336 return -errno;
337
338 channels.cmd = ETHTOOL_GCHANNELS;
339 ifr.ifr_data = (void *)&channels;
340 strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ);
341 err = ioctl(fd, SIOCETHTOOL, &ifr);
342 if (err && errno != EOPNOTSUPP) {
343 ret = -errno;
344 goto out;
345 }
346
347 if (channels.max_combined == 0 || errno == EOPNOTSUPP)
348
349
350
351 ret = 1;
352 else
353 ret = channels.max_combined;
354
355out:
356 close(fd);
357 return ret;
358}
359
360static int xsk_create_bpf_maps(struct xsk_socket *xsk)
361{
362 int max_queues;
363 int fd;
364
365 max_queues = xsk_get_max_queues(xsk);
366 if (max_queues < 0)
367 return max_queues;
368
369 fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "qidconf_map",
370 sizeof(int), sizeof(int), max_queues, 0);
371 if (fd < 0)
372 return fd;
373 xsk->qidconf_map_fd = fd;
374
375 fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
376 sizeof(int), sizeof(int), max_queues, 0);
377 if (fd < 0) {
378 close(xsk->qidconf_map_fd);
379 return fd;
380 }
381 xsk->xsks_map_fd = fd;
382
383 return 0;
384}
385
386static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
387{
388 close(xsk->qidconf_map_fd);
389 close(xsk->xsks_map_fd);
390}
391
392static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value,
393 int xsks_value)
394{
395 bool qidconf_map_updated = false, xsks_map_updated = false;
396 struct bpf_prog_info prog_info = {};
397 __u32 prog_len = sizeof(prog_info);
398 struct bpf_map_info map_info;
399 __u32 map_len = sizeof(map_info);
400 __u32 *map_ids;
401 int reset_value = 0;
402 __u32 num_maps;
403 unsigned int i;
404 int err;
405
406 err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
407 if (err)
408 return err;
409
410 num_maps = prog_info.nr_map_ids;
411
412 map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
413 if (!map_ids)
414 return -ENOMEM;
415
416 memset(&prog_info, 0, prog_len);
417 prog_info.nr_map_ids = num_maps;
418 prog_info.map_ids = (__u64)(unsigned long)map_ids;
419
420 err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
421 if (err)
422 goto out_map_ids;
423
424 for (i = 0; i < prog_info.nr_map_ids; i++) {
425 int fd;
426
427 fd = bpf_map_get_fd_by_id(map_ids[i]);
428 if (fd < 0) {
429 err = -errno;
430 goto out_maps;
431 }
432
433 err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
434 if (err)
435 goto out_maps;
436
437 if (!strcmp(map_info.name, "qidconf_map")) {
438 err = bpf_map_update_elem(fd, &xsk->queue_id,
439 &qidconf_value, 0);
440 if (err)
441 goto out_maps;
442 qidconf_map_updated = true;
443 xsk->qidconf_map_fd = fd;
444 } else if (!strcmp(map_info.name, "xsks_map")) {
445 err = bpf_map_update_elem(fd, &xsk->queue_id,
446 &xsks_value, 0);
447 if (err)
448 goto out_maps;
449 xsks_map_updated = true;
450 xsk->xsks_map_fd = fd;
451 }
452
453 if (qidconf_map_updated && xsks_map_updated)
454 break;
455 }
456
457 if (!(qidconf_map_updated && xsks_map_updated)) {
458 err = -ENOENT;
459 goto out_maps;
460 }
461
462 err = 0;
463 goto out_success;
464
465out_maps:
466 if (qidconf_map_updated)
467 (void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id,
468 &reset_value, 0);
469 if (xsks_map_updated)
470 (void)bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
471 &reset_value, 0);
472out_success:
473 if (qidconf_map_updated)
474 close(xsk->qidconf_map_fd);
475 if (xsks_map_updated)
476 close(xsk->xsks_map_fd);
477out_map_ids:
478 free(map_ids);
479 return err;
480}
481
482static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
483{
484 bool prog_attached = false;
485 __u32 prog_id = 0;
486 int err;
487
488 err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
489 xsk->config.xdp_flags);
490 if (err)
491 return err;
492
493 if (!prog_id) {
494 prog_attached = true;
495 err = xsk_create_bpf_maps(xsk);
496 if (err)
497 return err;
498
499 err = xsk_load_xdp_prog(xsk);
500 if (err)
501 goto out_maps;
502 } else {
503 xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
504 }
505
506 err = xsk_update_bpf_maps(xsk, true, xsk->fd);
507 if (err)
508 goto out_load;
509
510 return 0;
511
512out_load:
513 if (prog_attached)
514 close(xsk->prog_fd);
515out_maps:
516 if (prog_attached)
517 xsk_delete_bpf_maps(xsk);
518 return err;
519}
520
521int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
522 __u32 queue_id, struct xsk_umem *umem,
523 struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
524 const struct xsk_socket_config *usr_config)
525{
526 struct sockaddr_xdp sxdp = {};
527 struct xdp_mmap_offsets off;
528 struct xsk_socket *xsk;
529 socklen_t optlen;
530 void *map;
531 int err;
532
533 if (!umem || !xsk_ptr || !rx || !tx)
534 return -EFAULT;
535
536 if (umem->refcount) {
537 pr_warning("Error: shared umems not supported by libbpf.\n");
538 return -EBUSY;
539 }
540
541 xsk = calloc(1, sizeof(*xsk));
542 if (!xsk)
543 return -ENOMEM;
544
545 if (umem->refcount++ > 0) {
546 xsk->fd = socket(AF_XDP, SOCK_RAW, 0);
547 if (xsk->fd < 0) {
548 err = -errno;
549 goto out_xsk_alloc;
550 }
551 } else {
552 xsk->fd = umem->fd;
553 }
554
555 xsk->outstanding_tx = 0;
556 xsk->queue_id = queue_id;
557 xsk->umem = umem;
558 xsk->ifindex = if_nametoindex(ifname);
559 if (!xsk->ifindex) {
560 err = -errno;
561 goto out_socket;
562 }
563 strncpy(xsk->ifname, ifname, IFNAMSIZ);
564
565 err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
566 if (err)
567 goto out_socket;
568
569 if (rx) {
570 err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
571 &xsk->config.rx_size,
572 sizeof(xsk->config.rx_size));
573 if (err) {
574 err = -errno;
575 goto out_socket;
576 }
577 }
578 if (tx) {
579 err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
580 &xsk->config.tx_size,
581 sizeof(xsk->config.tx_size));
582 if (err) {
583 err = -errno;
584 goto out_socket;
585 }
586 }
587
588 optlen = sizeof(off);
589 err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
590 if (err) {
591 err = -errno;
592 goto out_socket;
593 }
594
595 if (rx) {
596 map = xsk_mmap(NULL, off.rx.desc +
597 xsk->config.rx_size * sizeof(struct xdp_desc),
598 PROT_READ | PROT_WRITE,
599 MAP_SHARED | MAP_POPULATE,
600 xsk->fd, XDP_PGOFF_RX_RING);
601 if (map == MAP_FAILED) {
602 err = -errno;
603 goto out_socket;
604 }
605
606 rx->mask = xsk->config.rx_size - 1;
607 rx->size = xsk->config.rx_size;
608 rx->producer = map + off.rx.producer;
609 rx->consumer = map + off.rx.consumer;
610 rx->ring = map + off.rx.desc;
611 }
612 xsk->rx = rx;
613
614 if (tx) {
615 map = xsk_mmap(NULL, off.tx.desc +
616 xsk->config.tx_size * sizeof(struct xdp_desc),
617 PROT_READ | PROT_WRITE,
618 MAP_SHARED | MAP_POPULATE,
619 xsk->fd, XDP_PGOFF_TX_RING);
620 if (map == MAP_FAILED) {
621 err = -errno;
622 goto out_mmap_rx;
623 }
624
625 tx->mask = xsk->config.tx_size - 1;
626 tx->size = xsk->config.tx_size;
627 tx->producer = map + off.tx.producer;
628 tx->consumer = map + off.tx.consumer;
629 tx->ring = map + off.tx.desc;
630 tx->cached_cons = xsk->config.tx_size;
631 }
632 xsk->tx = tx;
633
634 sxdp.sxdp_family = PF_XDP;
635 sxdp.sxdp_ifindex = xsk->ifindex;
636 sxdp.sxdp_queue_id = xsk->queue_id;
637 sxdp.sxdp_flags = xsk->config.bind_flags;
638
639 err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
640 if (err) {
641 err = -errno;
642 goto out_mmap_tx;
643 }
644
645 if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
646 err = xsk_setup_xdp_prog(xsk);
647 if (err)
648 goto out_mmap_tx;
649 }
650
651 *xsk_ptr = xsk;
652 return 0;
653
654out_mmap_tx:
655 if (tx)
656 munmap(xsk->tx,
657 off.tx.desc +
658 xsk->config.tx_size * sizeof(struct xdp_desc));
659out_mmap_rx:
660 if (rx)
661 munmap(xsk->rx,
662 off.rx.desc +
663 xsk->config.rx_size * sizeof(struct xdp_desc));
664out_socket:
665 if (--umem->refcount)
666 close(xsk->fd);
667out_xsk_alloc:
668 free(xsk);
669 return err;
670}
671
672int xsk_umem__delete(struct xsk_umem *umem)
673{
674 struct xdp_mmap_offsets off;
675 socklen_t optlen;
676 int err;
677
678 if (!umem)
679 return 0;
680
681 if (umem->refcount)
682 return -EBUSY;
683
684 optlen = sizeof(off);
685 err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
686 if (!err) {
687 munmap(umem->fill->ring,
688 off.fr.desc + umem->config.fill_size * sizeof(__u64));
689 munmap(umem->comp->ring,
690 off.cr.desc + umem->config.comp_size * sizeof(__u64));
691 }
692
693 close(umem->fd);
694 free(umem);
695
696 return 0;
697}
698
699void xsk_socket__delete(struct xsk_socket *xsk)
700{
701 struct xdp_mmap_offsets off;
702 socklen_t optlen;
703 int err;
704
705 if (!xsk)
706 return;
707
708 (void)xsk_update_bpf_maps(xsk, 0, 0);
709
710 optlen = sizeof(off);
711 err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
712 if (!err) {
713 if (xsk->rx)
714 munmap(xsk->rx->ring,
715 off.rx.desc +
716 xsk->config.rx_size * sizeof(struct xdp_desc));
717 if (xsk->tx)
718 munmap(xsk->tx->ring,
719 off.tx.desc +
720 xsk->config.tx_size * sizeof(struct xdp_desc));
721 }
722
723 xsk->umem->refcount--;
724
725
726
727 if (xsk->fd != xsk->umem->fd)
728 close(xsk->fd);
729 free(xsk);
730}
731