1
2
3
4#include <unistd.h>
5#include <errno.h>
6#include <stdlib.h>
7#include <string.h>
8#include <poll.h>
9#include <netinet/in.h>
10#include <net/if.h>
11#include <sys/socket.h>
12#include <sys/ioctl.h>
13#include <linux/if_ether.h>
14#include <linux/if_xdp.h>
15#include <linux/if_link.h>
16#include <linux/ethtool.h>
17#include <linux/sockios.h>
18#include "af_xdp_deps.h"
19#include <bpf/xsk.h>
20
21#include <rte_ethdev.h>
22#include <rte_ethdev_driver.h>
23#include <rte_ethdev_vdev.h>
24#include <rte_kvargs.h>
25#include <rte_bus_vdev.h>
26#include <rte_string_fns.h>
27#include <rte_branch_prediction.h>
28#include <rte_common.h>
29#include <rte_dev.h>
30#include <rte_eal.h>
31#include <rte_ether.h>
32#include <rte_lcore.h>
33#include <rte_log.h>
34#include <rte_memory.h>
35#include <rte_memzone.h>
36#include <rte_mempool.h>
37#include <rte_mbuf.h>
38#include <rte_malloc.h>
39#include <rte_ring.h>
40#include <rte_spinlock.h>
41
42#include "compat.h"
43
44
45#ifndef SOL_XDP
46#define SOL_XDP 283
47#endif
48
49#ifndef AF_XDP
50#define AF_XDP 44
51#endif
52
53#ifndef PF_XDP
54#define PF_XDP AF_XDP
55#endif
56
57RTE_LOG_REGISTER(af_xdp_logtype, pmd.net.af_xdp, NOTICE);
58
59#define AF_XDP_LOG(level, fmt, args...) \
60 rte_log(RTE_LOG_ ## level, af_xdp_logtype, \
61 "%s(): " fmt, __func__, ##args)
62
63#define ETH_AF_XDP_FRAME_SIZE 2048
64#define ETH_AF_XDP_NUM_BUFFERS 4096
65#define ETH_AF_XDP_DFLT_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS
66#define ETH_AF_XDP_DFLT_START_QUEUE_IDX 0
67#define ETH_AF_XDP_DFLT_QUEUE_COUNT 1
68
69#define ETH_AF_XDP_RX_BATCH_SIZE 32
70#define ETH_AF_XDP_TX_BATCH_SIZE 32
71
72
73struct xsk_umem_info {
74 struct xsk_umem *umem;
75 struct rte_ring *buf_ring;
76 const struct rte_memzone *mz;
77 struct rte_mempool *mb_pool;
78 void *buffer;
79 uint8_t refcnt;
80 uint32_t max_xsks;
81};
82
83struct rx_stats {
84 uint64_t rx_pkts;
85 uint64_t rx_bytes;
86 uint64_t rx_dropped;
87};
88
89struct pkt_rx_queue {
90 struct xsk_ring_cons rx;
91 struct xsk_umem_info *umem;
92 struct xsk_socket *xsk;
93 struct rte_mempool *mb_pool;
94
95 struct rx_stats stats;
96
97 struct xsk_ring_prod fq;
98 struct xsk_ring_cons cq;
99
100 struct pkt_tx_queue *pair;
101 struct pollfd fds[1];
102 int xsk_queue_idx;
103};
104
105struct tx_stats {
106 uint64_t tx_pkts;
107 uint64_t tx_bytes;
108 uint64_t tx_dropped;
109};
110
111struct pkt_tx_queue {
112 struct xsk_ring_prod tx;
113 struct xsk_umem_info *umem;
114
115 struct tx_stats stats;
116
117 struct pkt_rx_queue *pair;
118 int xsk_queue_idx;
119};
120
121struct pmd_internals {
122 int if_index;
123 char if_name[IFNAMSIZ];
124 int start_queue_idx;
125 int queue_cnt;
126 int max_queue_cnt;
127 int combined_queue_cnt;
128 bool shared_umem;
129 char prog_path[PATH_MAX];
130 bool custom_prog_configured;
131
132 struct rte_ether_addr eth_addr;
133
134 struct pkt_rx_queue *rx_queues;
135 struct pkt_tx_queue *tx_queues;
136};
137
138#define ETH_AF_XDP_IFACE_ARG "iface"
139#define ETH_AF_XDP_START_QUEUE_ARG "start_queue"
140#define ETH_AF_XDP_QUEUE_COUNT_ARG "queue_count"
141#define ETH_AF_XDP_SHARED_UMEM_ARG "shared_umem"
142#define ETH_AF_XDP_PROG_ARG "xdp_prog"
143
144static const char * const valid_arguments[] = {
145 ETH_AF_XDP_IFACE_ARG,
146 ETH_AF_XDP_START_QUEUE_ARG,
147 ETH_AF_XDP_QUEUE_COUNT_ARG,
148 ETH_AF_XDP_SHARED_UMEM_ARG,
149 ETH_AF_XDP_PROG_ARG,
150 NULL
151};
152
153static const struct rte_eth_link pmd_link = {
154 .link_speed = ETH_SPEED_NUM_10G,
155 .link_duplex = ETH_LINK_FULL_DUPLEX,
156 .link_status = ETH_LINK_DOWN,
157 .link_autoneg = ETH_LINK_AUTONEG
158};
159
160
161struct internal_list {
162 TAILQ_ENTRY(internal_list) next;
163 struct rte_eth_dev *eth_dev;
164};
165
166TAILQ_HEAD(internal_list_head, internal_list);
167static struct internal_list_head internal_list =
168 TAILQ_HEAD_INITIALIZER(internal_list);
169
170static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER;
171
172#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
173static inline int
174reserve_fill_queue_zc(struct xsk_umem_info *umem, uint16_t reserve_size,
175 struct rte_mbuf **bufs, struct xsk_ring_prod *fq)
176{
177 uint32_t idx;
178 uint16_t i;
179
180 if (unlikely(!xsk_ring_prod__reserve(fq, reserve_size, &idx))) {
181 for (i = 0; i < reserve_size; i++)
182 rte_pktmbuf_free(bufs[i]);
183 AF_XDP_LOG(DEBUG, "Failed to reserve enough fq descs.\n");
184 return -1;
185 }
186
187 for (i = 0; i < reserve_size; i++) {
188 __u64 *fq_addr;
189 uint64_t addr;
190
191 fq_addr = xsk_ring_prod__fill_addr(fq, idx++);
192 addr = (uint64_t)bufs[i] - (uint64_t)umem->buffer -
193 umem->mb_pool->header_size;
194 *fq_addr = addr;
195 }
196
197 xsk_ring_prod__submit(fq, reserve_size);
198
199 return 0;
200}
201#else
202static inline int
203reserve_fill_queue_cp(struct xsk_umem_info *umem, uint16_t reserve_size,
204 struct rte_mbuf **bufs __rte_unused,
205 struct xsk_ring_prod *fq)
206{
207 void *addrs[reserve_size];
208 uint32_t idx;
209 uint16_t i;
210
211 if (rte_ring_dequeue_bulk(umem->buf_ring, addrs, reserve_size, NULL)
212 != reserve_size) {
213 AF_XDP_LOG(DEBUG, "Failed to get enough buffers for fq.\n");
214 return -1;
215 }
216
217 if (unlikely(!xsk_ring_prod__reserve(fq, reserve_size, &idx))) {
218 AF_XDP_LOG(DEBUG, "Failed to reserve enough fq descs.\n");
219 rte_ring_enqueue_bulk(umem->buf_ring, addrs,
220 reserve_size, NULL);
221 return -1;
222 }
223
224 for (i = 0; i < reserve_size; i++) {
225 __u64 *fq_addr;
226
227 fq_addr = xsk_ring_prod__fill_addr(fq, idx++);
228 *fq_addr = (uint64_t)addrs[i];
229 }
230
231 xsk_ring_prod__submit(fq, reserve_size);
232
233 return 0;
234}
235#endif
236
237static inline int
238reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size,
239 struct rte_mbuf **bufs, struct xsk_ring_prod *fq)
240{
241#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
242 return reserve_fill_queue_zc(umem, reserve_size, bufs, fq);
243#else
244 return reserve_fill_queue_cp(umem, reserve_size, bufs, fq);
245#endif
246}
247
248#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
249static uint16_t
250af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
251{
252 struct pkt_rx_queue *rxq = queue;
253 struct xsk_ring_cons *rx = &rxq->rx;
254 struct xsk_ring_prod *fq = &rxq->fq;
255 struct xsk_umem_info *umem = rxq->umem;
256 uint32_t idx_rx = 0;
257 unsigned long rx_bytes = 0;
258 int rcvd, i;
259 struct rte_mbuf *fq_bufs[ETH_AF_XDP_RX_BATCH_SIZE];
260
261
262 if (rte_pktmbuf_alloc_bulk(umem->mb_pool, fq_bufs, nb_pkts)) {
263 AF_XDP_LOG(DEBUG,
264 "Failed to get enough buffers for fq.\n");
265 return 0;
266 }
267
268 rcvd = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx);
269
270 if (rcvd == 0) {
271#if defined(XDP_USE_NEED_WAKEUP)
272 if (xsk_ring_prod__needs_wakeup(fq))
273 (void)poll(rxq->fds, 1, 1000);
274#endif
275
276 goto out;
277 }
278
279 for (i = 0; i < rcvd; i++) {
280 const struct xdp_desc *desc;
281 uint64_t addr;
282 uint32_t len;
283 uint64_t offset;
284
285 desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
286 addr = desc->addr;
287 len = desc->len;
288
289 offset = xsk_umem__extract_offset(addr);
290 addr = xsk_umem__extract_addr(addr);
291
292 bufs[i] = (struct rte_mbuf *)
293 xsk_umem__get_data(umem->buffer, addr +
294 umem->mb_pool->header_size);
295 bufs[i]->data_off = offset - sizeof(struct rte_mbuf) -
296 rte_pktmbuf_priv_size(umem->mb_pool) -
297 umem->mb_pool->header_size;
298
299 rte_pktmbuf_pkt_len(bufs[i]) = len;
300 rte_pktmbuf_data_len(bufs[i]) = len;
301 rx_bytes += len;
302 }
303
304 xsk_ring_cons__release(rx, rcvd);
305
306 (void)reserve_fill_queue(umem, rcvd, fq_bufs, fq);
307
308
309 rxq->stats.rx_pkts += rcvd;
310 rxq->stats.rx_bytes += rx_bytes;
311
312out:
313 if (rcvd != nb_pkts)
314 rte_mempool_put_bulk(umem->mb_pool, (void **)&fq_bufs[rcvd],
315 nb_pkts - rcvd);
316
317 return rcvd;
318}
319#else
320static uint16_t
321af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
322{
323 struct pkt_rx_queue *rxq = queue;
324 struct xsk_ring_cons *rx = &rxq->rx;
325 struct xsk_umem_info *umem = rxq->umem;
326 struct xsk_ring_prod *fq = &rxq->fq;
327 uint32_t idx_rx = 0;
328 unsigned long rx_bytes = 0;
329 int rcvd, i;
330 uint32_t free_thresh = fq->size >> 1;
331 struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
332
333 if (xsk_prod_nb_free(fq, free_thresh) >= free_thresh)
334 (void)reserve_fill_queue(umem, ETH_AF_XDP_RX_BATCH_SIZE,
335 NULL, fq);
336
337 if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, nb_pkts) != 0))
338 return 0;
339
340 rcvd = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx);
341 if (rcvd == 0) {
342#if defined(XDP_USE_NEED_WAKEUP)
343 if (xsk_ring_prod__needs_wakeup(fq))
344 (void)poll(rxq->fds, 1, 1000);
345#endif
346
347 goto out;
348 }
349
350 for (i = 0; i < rcvd; i++) {
351 const struct xdp_desc *desc;
352 uint64_t addr;
353 uint32_t len;
354 void *pkt;
355
356 desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
357 addr = desc->addr;
358 len = desc->len;
359 pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr);
360
361 rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len);
362 rte_ring_enqueue(umem->buf_ring, (void *)addr);
363 rte_pktmbuf_pkt_len(mbufs[i]) = len;
364 rte_pktmbuf_data_len(mbufs[i]) = len;
365 rx_bytes += len;
366 bufs[i] = mbufs[i];
367 }
368
369 xsk_ring_cons__release(rx, rcvd);
370
371
372 rxq->stats.rx_pkts += rcvd;
373 rxq->stats.rx_bytes += rx_bytes;
374
375out:
376 if (rcvd != nb_pkts)
377 rte_mempool_put_bulk(rxq->mb_pool, (void **)&mbufs[rcvd],
378 nb_pkts - rcvd);
379
380 return rcvd;
381}
382#endif
383
384static uint16_t
385eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
386{
387 nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE);
388
389#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
390 return af_xdp_rx_zc(queue, bufs, nb_pkts);
391#else
392 return af_xdp_rx_cp(queue, bufs, nb_pkts);
393#endif
394}
395
396static void
397pull_umem_cq(struct xsk_umem_info *umem, int size, struct xsk_ring_cons *cq)
398{
399 size_t i, n;
400 uint32_t idx_cq = 0;
401
402 n = xsk_ring_cons__peek(cq, size, &idx_cq);
403
404 for (i = 0; i < n; i++) {
405 uint64_t addr;
406 addr = *xsk_ring_cons__comp_addr(cq, idx_cq++);
407#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
408 addr = xsk_umem__extract_addr(addr);
409 rte_pktmbuf_free((struct rte_mbuf *)
410 xsk_umem__get_data(umem->buffer,
411 addr + umem->mb_pool->header_size));
412#else
413 rte_ring_enqueue(umem->buf_ring, (void *)addr);
414#endif
415 }
416
417 xsk_ring_cons__release(cq, n);
418}
419
420static void
421kick_tx(struct pkt_tx_queue *txq, struct xsk_ring_cons *cq)
422{
423 struct xsk_umem_info *umem = txq->umem;
424
425 pull_umem_cq(umem, XSK_RING_CONS__DEFAULT_NUM_DESCS, cq);
426
427#if defined(XDP_USE_NEED_WAKEUP)
428 if (xsk_ring_prod__needs_wakeup(&txq->tx))
429#endif
430 while (send(xsk_socket__fd(txq->pair->xsk), NULL,
431 0, MSG_DONTWAIT) < 0) {
432
433 if (errno != EBUSY && errno != EAGAIN && errno != EINTR)
434 break;
435
436
437 if (errno == EAGAIN)
438 pull_umem_cq(umem,
439 XSK_RING_CONS__DEFAULT_NUM_DESCS,
440 cq);
441 }
442}
443
444#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
445static uint16_t
446af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
447{
448 struct pkt_tx_queue *txq = queue;
449 struct xsk_umem_info *umem = txq->umem;
450 struct rte_mbuf *mbuf;
451 unsigned long tx_bytes = 0;
452 int i;
453 uint32_t idx_tx;
454 uint16_t count = 0;
455 struct xdp_desc *desc;
456 uint64_t addr, offset;
457 struct xsk_ring_cons *cq = &txq->pair->cq;
458 uint32_t free_thresh = cq->size >> 1;
459
460 if (xsk_cons_nb_avail(cq, free_thresh) >= free_thresh)
461 pull_umem_cq(umem, XSK_RING_CONS__DEFAULT_NUM_DESCS, cq);
462
463 for (i = 0; i < nb_pkts; i++) {
464 mbuf = bufs[i];
465
466 if (mbuf->pool == umem->mb_pool) {
467 if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
468 kick_tx(txq, cq);
469 if (!xsk_ring_prod__reserve(&txq->tx, 1,
470 &idx_tx))
471 goto out;
472 }
473 desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
474 desc->len = mbuf->pkt_len;
475 addr = (uint64_t)mbuf - (uint64_t)umem->buffer -
476 umem->mb_pool->header_size;
477 offset = rte_pktmbuf_mtod(mbuf, uint64_t) -
478 (uint64_t)mbuf +
479 umem->mb_pool->header_size;
480 offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
481 desc->addr = addr | offset;
482 count++;
483 } else {
484 struct rte_mbuf *local_mbuf =
485 rte_pktmbuf_alloc(umem->mb_pool);
486 void *pkt;
487
488 if (local_mbuf == NULL)
489 goto out;
490
491 if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
492 rte_pktmbuf_free(local_mbuf);
493 kick_tx(txq, cq);
494 goto out;
495 }
496
497 desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
498 desc->len = mbuf->pkt_len;
499
500 addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer -
501 umem->mb_pool->header_size;
502 offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) -
503 (uint64_t)local_mbuf +
504 umem->mb_pool->header_size;
505 pkt = xsk_umem__get_data(umem->buffer, addr + offset);
506 offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
507 desc->addr = addr | offset;
508 rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
509 desc->len);
510 rte_pktmbuf_free(mbuf);
511 count++;
512 }
513
514 tx_bytes += mbuf->pkt_len;
515 }
516
517 kick_tx(txq, cq);
518
519out:
520 xsk_ring_prod__submit(&txq->tx, count);
521
522 txq->stats.tx_pkts += count;
523 txq->stats.tx_bytes += tx_bytes;
524 txq->stats.tx_dropped += nb_pkts - count;
525
526 return count;
527}
528#else
529static uint16_t
530af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
531{
532 struct pkt_tx_queue *txq = queue;
533 struct xsk_umem_info *umem = txq->umem;
534 struct rte_mbuf *mbuf;
535 void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
536 unsigned long tx_bytes = 0;
537 int i;
538 uint32_t idx_tx;
539 struct xsk_ring_cons *cq = &txq->pair->cq;
540
541 nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE);
542
543 pull_umem_cq(umem, nb_pkts, cq);
544
545 nb_pkts = rte_ring_dequeue_bulk(umem->buf_ring, addrs,
546 nb_pkts, NULL);
547 if (nb_pkts == 0)
548 return 0;
549
550 if (xsk_ring_prod__reserve(&txq->tx, nb_pkts, &idx_tx) != nb_pkts) {
551 kick_tx(txq, cq);
552 rte_ring_enqueue_bulk(umem->buf_ring, addrs, nb_pkts, NULL);
553 return 0;
554 }
555
556 for (i = 0; i < nb_pkts; i++) {
557 struct xdp_desc *desc;
558 void *pkt;
559
560 desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i);
561 mbuf = bufs[i];
562 desc->len = mbuf->pkt_len;
563
564 desc->addr = (uint64_t)addrs[i];
565 pkt = xsk_umem__get_data(umem->mz->addr,
566 desc->addr);
567 rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *), desc->len);
568 tx_bytes += mbuf->pkt_len;
569 rte_pktmbuf_free(mbuf);
570 }
571
572 xsk_ring_prod__submit(&txq->tx, nb_pkts);
573
574 kick_tx(txq, cq);
575
576 txq->stats.tx_pkts += nb_pkts;
577 txq->stats.tx_bytes += tx_bytes;
578
579 return nb_pkts;
580}
581#endif
582
583static uint16_t
584eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
585{
586#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
587 return af_xdp_tx_zc(queue, bufs, nb_pkts);
588#else
589 return af_xdp_tx_cp(queue, bufs, nb_pkts);
590#endif
591}
592
593static int
594eth_dev_start(struct rte_eth_dev *dev)
595{
596 dev->data->dev_link.link_status = ETH_LINK_UP;
597
598 return 0;
599}
600
601
602static int
603eth_dev_stop(struct rte_eth_dev *dev)
604{
605 dev->data->dev_link.link_status = ETH_LINK_DOWN;
606 return 0;
607}
608
609
610static inline struct internal_list *
611find_internal_resource(struct pmd_internals *port_int)
612{
613 int found = 0;
614 struct internal_list *list = NULL;
615
616 if (port_int == NULL)
617 return NULL;
618
619 pthread_mutex_lock(&internal_list_lock);
620
621 TAILQ_FOREACH(list, &internal_list, next) {
622 struct pmd_internals *list_int =
623 list->eth_dev->data->dev_private;
624 if (list_int == port_int) {
625 found = 1;
626 break;
627 }
628 }
629
630 pthread_mutex_unlock(&internal_list_lock);
631
632 if (!found)
633 return NULL;
634
635 return list;
636}
637
638
639static inline bool
640ctx_exists(struct pkt_rx_queue *rxq, const char *ifname,
641 struct pkt_rx_queue *list_rxq, const char *list_ifname)
642{
643 bool exists = false;
644
645 if (rxq->xsk_queue_idx == list_rxq->xsk_queue_idx &&
646 !strncmp(ifname, list_ifname, IFNAMSIZ)) {
647 AF_XDP_LOG(ERR, "ctx %s,%i already exists, cannot share umem\n",
648 ifname, rxq->xsk_queue_idx);
649 exists = true;
650 }
651
652 return exists;
653}
654
655
656static inline int
657get_shared_umem(struct pkt_rx_queue *rxq, const char *ifname,
658 struct xsk_umem_info **umem)
659{
660 struct internal_list *list;
661 struct pmd_internals *internals;
662 int i = 0, ret = 0;
663 struct rte_mempool *mb_pool = rxq->mb_pool;
664
665 if (mb_pool == NULL)
666 return ret;
667
668 pthread_mutex_lock(&internal_list_lock);
669
670 TAILQ_FOREACH(list, &internal_list, next) {
671 internals = list->eth_dev->data->dev_private;
672 for (i = 0; i < internals->queue_cnt; i++) {
673 struct pkt_rx_queue *list_rxq =
674 &internals->rx_queues[i];
675 if (rxq == list_rxq)
676 continue;
677 if (mb_pool == internals->rx_queues[i].mb_pool) {
678 if (ctx_exists(rxq, ifname, list_rxq,
679 internals->if_name)) {
680 ret = -1;
681 goto out;
682 }
683 if (__atomic_load_n(
684 &internals->rx_queues[i].umem->refcnt,
685 __ATOMIC_ACQUIRE)) {
686 *umem = internals->rx_queues[i].umem;
687 goto out;
688 }
689 }
690 }
691 }
692
693out:
694 pthread_mutex_unlock(&internal_list_lock);
695
696 return ret;
697}
698
699static int
700eth_dev_configure(struct rte_eth_dev *dev)
701{
702 struct pmd_internals *internal = dev->data->dev_private;
703
704
705 if (dev->data->nb_rx_queues != dev->data->nb_tx_queues)
706 return -EINVAL;
707
708 if (internal->shared_umem) {
709 struct internal_list *list = NULL;
710 const char *name = dev->device->name;
711
712
713 list = find_internal_resource(internal);
714 if (list)
715 return 0;
716
717 list = rte_zmalloc_socket(name, sizeof(*list), 0,
718 dev->device->numa_node);
719 if (list == NULL)
720 return -1;
721
722 list->eth_dev = dev;
723 pthread_mutex_lock(&internal_list_lock);
724 TAILQ_INSERT_TAIL(&internal_list, list, next);
725 pthread_mutex_unlock(&internal_list_lock);
726 }
727
728 return 0;
729}
730
731static int
732eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
733{
734 struct pmd_internals *internals = dev->data->dev_private;
735
736 dev_info->if_index = internals->if_index;
737 dev_info->max_mac_addrs = 1;
738 dev_info->max_rx_pktlen = ETH_FRAME_LEN;
739 dev_info->max_rx_queues = internals->queue_cnt;
740 dev_info->max_tx_queues = internals->queue_cnt;
741
742 dev_info->min_mtu = RTE_ETHER_MIN_MTU;
743#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
744 dev_info->max_mtu = getpagesize() -
745 sizeof(struct rte_mempool_objhdr) -
746 sizeof(struct rte_mbuf) -
747 RTE_PKTMBUF_HEADROOM - XDP_PACKET_HEADROOM;
748#else
749 dev_info->max_mtu = ETH_AF_XDP_FRAME_SIZE - XDP_PACKET_HEADROOM;
750#endif
751
752 dev_info->default_rxportconf.nb_queues = 1;
753 dev_info->default_txportconf.nb_queues = 1;
754 dev_info->default_rxportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS;
755 dev_info->default_txportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS;
756
757 return 0;
758}
759
760static int
761eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
762{
763 struct pmd_internals *internals = dev->data->dev_private;
764 struct xdp_statistics xdp_stats;
765 struct pkt_rx_queue *rxq;
766 struct pkt_tx_queue *txq;
767 socklen_t optlen;
768 int i, ret;
769
770 for (i = 0; i < dev->data->nb_rx_queues; i++) {
771 optlen = sizeof(struct xdp_statistics);
772 rxq = &internals->rx_queues[i];
773 txq = rxq->pair;
774 stats->q_ipackets[i] = rxq->stats.rx_pkts;
775 stats->q_ibytes[i] = rxq->stats.rx_bytes;
776
777 stats->q_opackets[i] = txq->stats.tx_pkts;
778 stats->q_obytes[i] = txq->stats.tx_bytes;
779
780 stats->ipackets += stats->q_ipackets[i];
781 stats->ibytes += stats->q_ibytes[i];
782 stats->imissed += rxq->stats.rx_dropped;
783 stats->oerrors += txq->stats.tx_dropped;
784 ret = getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP,
785 XDP_STATISTICS, &xdp_stats, &optlen);
786 if (ret != 0) {
787 AF_XDP_LOG(ERR, "getsockopt() failed for XDP_STATISTICS.\n");
788 return -1;
789 }
790 stats->imissed += xdp_stats.rx_dropped;
791
792 stats->opackets += stats->q_opackets[i];
793 stats->obytes += stats->q_obytes[i];
794 }
795
796 return 0;
797}
798
799static int
800eth_stats_reset(struct rte_eth_dev *dev)
801{
802 struct pmd_internals *internals = dev->data->dev_private;
803 int i;
804
805 for (i = 0; i < internals->queue_cnt; i++) {
806 memset(&internals->rx_queues[i].stats, 0,
807 sizeof(struct rx_stats));
808 memset(&internals->tx_queues[i].stats, 0,
809 sizeof(struct tx_stats));
810 }
811
812 return 0;
813}
814
815static void
816remove_xdp_program(struct pmd_internals *internals)
817{
818 uint32_t curr_prog_id = 0;
819
820 if (bpf_get_link_xdp_id(internals->if_index, &curr_prog_id,
821 XDP_FLAGS_UPDATE_IF_NOEXIST)) {
822 AF_XDP_LOG(ERR, "bpf_get_link_xdp_id failed\n");
823 return;
824 }
825 bpf_set_link_xdp_fd(internals->if_index, -1,
826 XDP_FLAGS_UPDATE_IF_NOEXIST);
827}
828
829static void
830xdp_umem_destroy(struct xsk_umem_info *umem)
831{
832#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
833 umem->mb_pool = NULL;
834#else
835 rte_memzone_free(umem->mz);
836 umem->mz = NULL;
837
838 rte_ring_free(umem->buf_ring);
839 umem->buf_ring = NULL;
840#endif
841
842 rte_free(umem);
843 umem = NULL;
844}
845
846static int
847eth_dev_close(struct rte_eth_dev *dev)
848{
849 struct pmd_internals *internals = dev->data->dev_private;
850 struct pkt_rx_queue *rxq;
851 int i;
852
853 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
854 return 0;
855
856 AF_XDP_LOG(INFO, "Closing AF_XDP ethdev on numa socket %u\n",
857 rte_socket_id());
858
859 for (i = 0; i < internals->queue_cnt; i++) {
860 rxq = &internals->rx_queues[i];
861 if (rxq->umem == NULL)
862 break;
863 xsk_socket__delete(rxq->xsk);
864
865 if (__atomic_sub_fetch(&rxq->umem->refcnt, 1, __ATOMIC_ACQUIRE)
866 == 0) {
867 (void)xsk_umem__delete(rxq->umem->umem);
868 xdp_umem_destroy(rxq->umem);
869 }
870
871
872 rte_free(rxq->pair);
873 rte_free(rxq);
874 }
875
876
877
878
879
880 dev->data->mac_addrs = NULL;
881
882 remove_xdp_program(internals);
883
884 if (internals->shared_umem) {
885 struct internal_list *list;
886
887
888 list = find_internal_resource(internals);
889 if (list) {
890 pthread_mutex_lock(&internal_list_lock);
891 TAILQ_REMOVE(&internal_list, list, next);
892 pthread_mutex_unlock(&internal_list_lock);
893 rte_free(list);
894 }
895 }
896
897 return 0;
898}
899
900static void
901eth_queue_release(void *q __rte_unused)
902{
903}
904
905static int
906eth_link_update(struct rte_eth_dev *dev __rte_unused,
907 int wait_to_complete __rte_unused)
908{
909 return 0;
910}
911
912#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
913static inline uintptr_t get_base_addr(struct rte_mempool *mp, uint64_t *align)
914{
915 struct rte_mempool_memhdr *memhdr;
916 uintptr_t memhdr_addr, aligned_addr;
917
918 memhdr = STAILQ_FIRST(&mp->mem_list);
919 memhdr_addr = (uintptr_t)memhdr->addr;
920 aligned_addr = memhdr_addr & ~(getpagesize() - 1);
921 *align = memhdr_addr - aligned_addr;
922
923 return aligned_addr;
924}
925
926static struct
927xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals,
928 struct pkt_rx_queue *rxq)
929{
930 struct xsk_umem_info *umem = NULL;
931 int ret;
932 struct xsk_umem_config usr_config = {
933 .fill_size = ETH_AF_XDP_DFLT_NUM_DESCS * 2,
934 .comp_size = ETH_AF_XDP_DFLT_NUM_DESCS,
935 .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG};
936 void *base_addr = NULL;
937 struct rte_mempool *mb_pool = rxq->mb_pool;
938 uint64_t umem_size, align = 0;
939
940 if (internals->shared_umem) {
941 if (get_shared_umem(rxq, internals->if_name, &umem) < 0)
942 return NULL;
943
944 if (umem != NULL &&
945 __atomic_load_n(&umem->refcnt, __ATOMIC_ACQUIRE) <
946 umem->max_xsks) {
947 AF_XDP_LOG(INFO, "%s,qid%i sharing UMEM\n",
948 internals->if_name, rxq->xsk_queue_idx);
949 __atomic_fetch_add(&umem->refcnt, 1, __ATOMIC_ACQUIRE);
950 }
951 }
952
953 if (umem == NULL) {
954 usr_config.frame_size =
955 rte_mempool_calc_obj_size(mb_pool->elt_size,
956 mb_pool->flags, NULL);
957 usr_config.frame_headroom = mb_pool->header_size +
958 sizeof(struct rte_mbuf) +
959 rte_pktmbuf_priv_size(mb_pool) +
960 RTE_PKTMBUF_HEADROOM;
961
962 umem = rte_zmalloc_socket("umem", sizeof(*umem), 0,
963 rte_socket_id());
964 if (umem == NULL) {
965 AF_XDP_LOG(ERR, "Failed to allocate umem info");
966 return NULL;
967 }
968
969 umem->mb_pool = mb_pool;
970 base_addr = (void *)get_base_addr(mb_pool, &align);
971 umem_size = (uint64_t)mb_pool->populated_size *
972 (uint64_t)usr_config.frame_size +
973 align;
974
975 ret = xsk_umem__create(&umem->umem, base_addr, umem_size,
976 &rxq->fq, &rxq->cq, &usr_config);
977 if (ret) {
978 AF_XDP_LOG(ERR, "Failed to create umem");
979 goto err;
980 }
981 umem->buffer = base_addr;
982
983 if (internals->shared_umem) {
984 umem->max_xsks = mb_pool->populated_size /
985 ETH_AF_XDP_NUM_BUFFERS;
986 AF_XDP_LOG(INFO, "Max xsks for UMEM %s: %u\n",
987 mb_pool->name, umem->max_xsks);
988 }
989
990 __atomic_store_n(&umem->refcnt, 1, __ATOMIC_RELEASE);
991 }
992
993#else
994static struct
995xsk_umem_info *xdp_umem_configure(struct pmd_internals *internals,
996 struct pkt_rx_queue *rxq)
997{
998 struct xsk_umem_info *umem;
999 const struct rte_memzone *mz;
1000 struct xsk_umem_config usr_config = {
1001 .fill_size = ETH_AF_XDP_DFLT_NUM_DESCS,
1002 .comp_size = ETH_AF_XDP_DFLT_NUM_DESCS,
1003 .frame_size = ETH_AF_XDP_FRAME_SIZE,
1004 .frame_headroom = 0 };
1005 char ring_name[RTE_RING_NAMESIZE];
1006 char mz_name[RTE_MEMZONE_NAMESIZE];
1007 int ret;
1008 uint64_t i;
1009
1010 umem = rte_zmalloc_socket("umem", sizeof(*umem), 0, rte_socket_id());
1011 if (umem == NULL) {
1012 AF_XDP_LOG(ERR, "Failed to allocate umem info");
1013 return NULL;
1014 }
1015
1016 snprintf(ring_name, sizeof(ring_name), "af_xdp_ring_%s_%u",
1017 internals->if_name, rxq->xsk_queue_idx);
1018 umem->buf_ring = rte_ring_create(ring_name,
1019 ETH_AF_XDP_NUM_BUFFERS,
1020 rte_socket_id(),
1021 0x0);
1022 if (umem->buf_ring == NULL) {
1023 AF_XDP_LOG(ERR, "Failed to create rte_ring\n");
1024 goto err;
1025 }
1026
1027 for (i = 0; i < ETH_AF_XDP_NUM_BUFFERS; i++)
1028 rte_ring_enqueue(umem->buf_ring,
1029 (void *)(i * ETH_AF_XDP_FRAME_SIZE));
1030
1031 snprintf(mz_name, sizeof(mz_name), "af_xdp_umem_%s_%u",
1032 internals->if_name, rxq->xsk_queue_idx);
1033 mz = rte_memzone_reserve_aligned(mz_name,
1034 ETH_AF_XDP_NUM_BUFFERS * ETH_AF_XDP_FRAME_SIZE,
1035 rte_socket_id(), RTE_MEMZONE_IOVA_CONTIG,
1036 getpagesize());
1037 if (mz == NULL) {
1038 AF_XDP_LOG(ERR, "Failed to reserve memzone for af_xdp umem.\n");
1039 goto err;
1040 }
1041
1042 ret = xsk_umem__create(&umem->umem, mz->addr,
1043 ETH_AF_XDP_NUM_BUFFERS * ETH_AF_XDP_FRAME_SIZE,
1044 &rxq->fq, &rxq->cq,
1045 &usr_config);
1046
1047 if (ret) {
1048 AF_XDP_LOG(ERR, "Failed to create umem");
1049 goto err;
1050 }
1051 umem->mz = mz;
1052
1053#endif
1054 return umem;
1055
1056err:
1057 xdp_umem_destroy(umem);
1058 return NULL;
1059}
1060
1061static int
1062load_custom_xdp_prog(const char *prog_path, int if_index)
1063{
1064 int ret, prog_fd = -1;
1065 struct bpf_object *obj;
1066 struct bpf_map *map;
1067
1068 ret = bpf_prog_load(prog_path, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
1069 if (ret) {
1070 AF_XDP_LOG(ERR, "Failed to load program %s\n", prog_path);
1071 return ret;
1072 }
1073
1074
1075
1076
1077
1078
1079 map = bpf_object__find_map_by_name(obj, "xsks_map");
1080 if (!map) {
1081 AF_XDP_LOG(ERR, "Failed to find xsks_map in %s\n", prog_path);
1082 return -1;
1083 }
1084
1085
1086 ret = bpf_set_link_xdp_fd(if_index, prog_fd,
1087 XDP_FLAGS_UPDATE_IF_NOEXIST);
1088 if (ret) {
1089 AF_XDP_LOG(ERR, "Failed to set prog fd %d on interface\n",
1090 prog_fd);
1091 return -1;
1092 }
1093
1094 AF_XDP_LOG(INFO, "Successfully loaded XDP program %s with fd %d\n",
1095 prog_path, prog_fd);
1096
1097 return 0;
1098}
1099
1100static int
1101xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
1102 int ring_size)
1103{
1104 struct xsk_socket_config cfg;
1105 struct pkt_tx_queue *txq = rxq->pair;
1106 int ret = 0;
1107 int reserve_size = ETH_AF_XDP_DFLT_NUM_DESCS;
1108 struct rte_mbuf *fq_bufs[reserve_size];
1109
1110 rxq->umem = xdp_umem_configure(internals, rxq);
1111 if (rxq->umem == NULL)
1112 return -ENOMEM;
1113 txq->umem = rxq->umem;
1114
1115 cfg.rx_size = ring_size;
1116 cfg.tx_size = ring_size;
1117 cfg.libbpf_flags = 0;
1118 cfg.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
1119 cfg.bind_flags = 0;
1120
1121#if defined(XDP_USE_NEED_WAKEUP)
1122 cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
1123#endif
1124
1125 if (strnlen(internals->prog_path, PATH_MAX) &&
1126 !internals->custom_prog_configured) {
1127 ret = load_custom_xdp_prog(internals->prog_path,
1128 internals->if_index);
1129 if (ret) {
1130 AF_XDP_LOG(ERR, "Failed to load custom XDP program %s\n",
1131 internals->prog_path);
1132 goto err;
1133 }
1134 internals->custom_prog_configured = 1;
1135 }
1136
1137 if (internals->shared_umem)
1138 ret = create_shared_socket(&rxq->xsk, internals->if_name,
1139 rxq->xsk_queue_idx, rxq->umem->umem, &rxq->rx,
1140 &txq->tx, &rxq->fq, &rxq->cq, &cfg);
1141 else
1142 ret = xsk_socket__create(&rxq->xsk, internals->if_name,
1143 rxq->xsk_queue_idx, rxq->umem->umem, &rxq->rx,
1144 &txq->tx, &cfg);
1145
1146 if (ret) {
1147 AF_XDP_LOG(ERR, "Failed to create xsk socket.\n");
1148 goto err;
1149 }
1150
1151#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
1152 if (rte_pktmbuf_alloc_bulk(rxq->umem->mb_pool, fq_bufs, reserve_size)) {
1153 AF_XDP_LOG(DEBUG, "Failed to get enough buffers for fq.\n");
1154 goto err;
1155 }
1156#endif
1157 ret = reserve_fill_queue(rxq->umem, reserve_size, fq_bufs, &rxq->fq);
1158 if (ret) {
1159 xsk_socket__delete(rxq->xsk);
1160 AF_XDP_LOG(ERR, "Failed to reserve fill queue.\n");
1161 goto err;
1162 }
1163
1164 return 0;
1165
1166err:
1167 if (__atomic_sub_fetch(&rxq->umem->refcnt, 1, __ATOMIC_ACQUIRE) == 0)
1168 xdp_umem_destroy(rxq->umem);
1169
1170 return ret;
1171}
1172
1173static int
1174eth_rx_queue_setup(struct rte_eth_dev *dev,
1175 uint16_t rx_queue_id,
1176 uint16_t nb_rx_desc,
1177 unsigned int socket_id __rte_unused,
1178 const struct rte_eth_rxconf *rx_conf __rte_unused,
1179 struct rte_mempool *mb_pool)
1180{
1181 struct pmd_internals *internals = dev->data->dev_private;
1182 struct pkt_rx_queue *rxq;
1183 int ret;
1184
1185 rxq = &internals->rx_queues[rx_queue_id];
1186
1187 AF_XDP_LOG(INFO, "Set up rx queue, rx queue id: %d, xsk queue id: %d\n",
1188 rx_queue_id, rxq->xsk_queue_idx);
1189
1190#ifndef XDP_UMEM_UNALIGNED_CHUNK_FLAG
1191 uint32_t buf_size, data_size;
1192
1193
1194 buf_size = rte_pktmbuf_data_room_size(mb_pool) -
1195 RTE_PKTMBUF_HEADROOM;
1196 data_size = ETH_AF_XDP_FRAME_SIZE;
1197
1198 if (data_size > buf_size) {
1199 AF_XDP_LOG(ERR, "%s: %d bytes will not fit in mbuf (%d bytes)\n",
1200 dev->device->name, data_size, buf_size);
1201 ret = -ENOMEM;
1202 goto err;
1203 }
1204#endif
1205
1206 rxq->mb_pool = mb_pool;
1207
1208 if (xsk_configure(internals, rxq, nb_rx_desc)) {
1209 AF_XDP_LOG(ERR, "Failed to configure xdp socket\n");
1210 ret = -EINVAL;
1211 goto err;
1212 }
1213
1214 rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
1215 rxq->fds[0].events = POLLIN;
1216
1217 dev->data->rx_queues[rx_queue_id] = rxq;
1218 return 0;
1219
1220err:
1221 return ret;
1222}
1223
1224static int
1225eth_tx_queue_setup(struct rte_eth_dev *dev,
1226 uint16_t tx_queue_id,
1227 uint16_t nb_tx_desc __rte_unused,
1228 unsigned int socket_id __rte_unused,
1229 const struct rte_eth_txconf *tx_conf __rte_unused)
1230{
1231 struct pmd_internals *internals = dev->data->dev_private;
1232 struct pkt_tx_queue *txq;
1233
1234 txq = &internals->tx_queues[tx_queue_id];
1235
1236 dev->data->tx_queues[tx_queue_id] = txq;
1237 return 0;
1238}
1239
1240static int
1241eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
1242{
1243 struct pmd_internals *internals = dev->data->dev_private;
1244 struct ifreq ifr = { .ifr_mtu = mtu };
1245 int ret;
1246 int s;
1247
1248 s = socket(PF_INET, SOCK_DGRAM, 0);
1249 if (s < 0)
1250 return -EINVAL;
1251
1252 strlcpy(ifr.ifr_name, internals->if_name, IFNAMSIZ);
1253 ret = ioctl(s, SIOCSIFMTU, &ifr);
1254 close(s);
1255
1256 return (ret < 0) ? -errno : 0;
1257}
1258
1259static int
1260eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask)
1261{
1262 struct ifreq ifr;
1263 int ret = 0;
1264 int s;
1265
1266 s = socket(PF_INET, SOCK_DGRAM, 0);
1267 if (s < 0)
1268 return -errno;
1269
1270 strlcpy(ifr.ifr_name, if_name, IFNAMSIZ);
1271 if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) {
1272 ret = -errno;
1273 goto out;
1274 }
1275 ifr.ifr_flags &= mask;
1276 ifr.ifr_flags |= flags;
1277 if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) {
1278 ret = -errno;
1279 goto out;
1280 }
1281out:
1282 close(s);
1283 return ret;
1284}
1285
1286static int
1287eth_dev_promiscuous_enable(struct rte_eth_dev *dev)
1288{
1289 struct pmd_internals *internals = dev->data->dev_private;
1290
1291 return eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0);
1292}
1293
1294static int
1295eth_dev_promiscuous_disable(struct rte_eth_dev *dev)
1296{
1297 struct pmd_internals *internals = dev->data->dev_private;
1298
1299 return eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC);
1300}
1301
1302static const struct eth_dev_ops ops = {
1303 .dev_start = eth_dev_start,
1304 .dev_stop = eth_dev_stop,
1305 .dev_close = eth_dev_close,
1306 .dev_configure = eth_dev_configure,
1307 .dev_infos_get = eth_dev_info,
1308 .mtu_set = eth_dev_mtu_set,
1309 .promiscuous_enable = eth_dev_promiscuous_enable,
1310 .promiscuous_disable = eth_dev_promiscuous_disable,
1311 .rx_queue_setup = eth_rx_queue_setup,
1312 .tx_queue_setup = eth_tx_queue_setup,
1313 .rx_queue_release = eth_queue_release,
1314 .tx_queue_release = eth_queue_release,
1315 .link_update = eth_link_update,
1316 .stats_get = eth_stats_get,
1317 .stats_reset = eth_stats_reset,
1318};
1319
1320
1321static int
1322parse_integer_arg(const char *key __rte_unused,
1323 const char *value, void *extra_args)
1324{
1325 int *i = (int *)extra_args;
1326 char *end;
1327
1328 *i = strtol(value, &end, 10);
1329 if (*i < 0) {
1330 AF_XDP_LOG(ERR, "Argument has to be positive.\n");
1331 return -EINVAL;
1332 }
1333
1334 return 0;
1335}
1336
1337
1338static int
1339parse_name_arg(const char *key __rte_unused,
1340 const char *value, void *extra_args)
1341{
1342 char *name = extra_args;
1343
1344 if (strnlen(value, IFNAMSIZ) > IFNAMSIZ - 1) {
1345 AF_XDP_LOG(ERR, "Invalid name %s, should be less than %u bytes.\n",
1346 value, IFNAMSIZ);
1347 return -EINVAL;
1348 }
1349
1350 strlcpy(name, value, IFNAMSIZ);
1351
1352 return 0;
1353}
1354
1355
1356static int
1357parse_prog_arg(const char *key __rte_unused,
1358 const char *value, void *extra_args)
1359{
1360 char *path = extra_args;
1361
1362 if (strnlen(value, PATH_MAX) == PATH_MAX) {
1363 AF_XDP_LOG(ERR, "Invalid path %s, should be less than %u bytes.\n",
1364 value, PATH_MAX);
1365 return -EINVAL;
1366 }
1367
1368 if (access(value, F_OK) != 0) {
1369 AF_XDP_LOG(ERR, "Error accessing %s: %s\n",
1370 value, strerror(errno));
1371 return -EINVAL;
1372 }
1373
1374 strlcpy(path, value, PATH_MAX);
1375
1376 return 0;
1377}
1378
1379static int
1380xdp_get_channels_info(const char *if_name, int *max_queues,
1381 int *combined_queues)
1382{
1383 struct ethtool_channels channels;
1384 struct ifreq ifr;
1385 int fd, ret;
1386
1387 fd = socket(AF_INET, SOCK_DGRAM, 0);
1388 if (fd < 0)
1389 return -1;
1390
1391 channels.cmd = ETHTOOL_GCHANNELS;
1392 ifr.ifr_data = (void *)&channels;
1393 strlcpy(ifr.ifr_name, if_name, IFNAMSIZ);
1394 ret = ioctl(fd, SIOCETHTOOL, &ifr);
1395 if (ret) {
1396 if (errno == EOPNOTSUPP) {
1397 ret = 0;
1398 } else {
1399 ret = -errno;
1400 goto out;
1401 }
1402 }
1403
1404 if (channels.max_combined == 0 || errno == EOPNOTSUPP) {
1405
1406
1407
1408 *max_queues = 1;
1409 *combined_queues = 1;
1410 } else {
1411 *max_queues = channels.max_combined;
1412 *combined_queues = channels.combined_count;
1413 }
1414
1415 out:
1416 close(fd);
1417 return ret;
1418}
1419
1420static int
1421parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
1422 int *queue_cnt, int *shared_umem, char *prog_path)
1423{
1424 int ret;
1425
1426 ret = rte_kvargs_process(kvlist, ETH_AF_XDP_IFACE_ARG,
1427 &parse_name_arg, if_name);
1428 if (ret < 0)
1429 goto free_kvlist;
1430
1431 ret = rte_kvargs_process(kvlist, ETH_AF_XDP_START_QUEUE_ARG,
1432 &parse_integer_arg, start_queue);
1433 if (ret < 0)
1434 goto free_kvlist;
1435
1436 ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_COUNT_ARG,
1437 &parse_integer_arg, queue_cnt);
1438 if (ret < 0 || *queue_cnt <= 0) {
1439 ret = -EINVAL;
1440 goto free_kvlist;
1441 }
1442
1443 ret = rte_kvargs_process(kvlist, ETH_AF_XDP_SHARED_UMEM_ARG,
1444 &parse_integer_arg, shared_umem);
1445 if (ret < 0)
1446 goto free_kvlist;
1447
1448 ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PROG_ARG,
1449 &parse_prog_arg, prog_path);
1450 if (ret < 0)
1451 goto free_kvlist;
1452
1453free_kvlist:
1454 rte_kvargs_free(kvlist);
1455 return ret;
1456}
1457
1458static int
1459get_iface_info(const char *if_name,
1460 struct rte_ether_addr *eth_addr,
1461 int *if_index)
1462{
1463 struct ifreq ifr;
1464 int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
1465
1466 if (sock < 0)
1467 return -1;
1468
1469 strlcpy(ifr.ifr_name, if_name, IFNAMSIZ);
1470 if (ioctl(sock, SIOCGIFINDEX, &ifr))
1471 goto error;
1472
1473 *if_index = ifr.ifr_ifindex;
1474
1475 if (ioctl(sock, SIOCGIFHWADDR, &ifr))
1476 goto error;
1477
1478 rte_memcpy(eth_addr, ifr.ifr_hwaddr.sa_data, RTE_ETHER_ADDR_LEN);
1479
1480 close(sock);
1481 return 0;
1482
1483error:
1484 close(sock);
1485 return -1;
1486}
1487
1488static struct rte_eth_dev *
1489init_internals(struct rte_vdev_device *dev, const char *if_name,
1490 int start_queue_idx, int queue_cnt, int shared_umem,
1491 const char *prog_path)
1492{
1493 const char *name = rte_vdev_device_name(dev);
1494 const unsigned int numa_node = dev->device.numa_node;
1495 struct pmd_internals *internals;
1496 struct rte_eth_dev *eth_dev;
1497 int ret;
1498 int i;
1499
1500 internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node);
1501 if (internals == NULL)
1502 return NULL;
1503
1504 internals->start_queue_idx = start_queue_idx;
1505 internals->queue_cnt = queue_cnt;
1506 strlcpy(internals->if_name, if_name, IFNAMSIZ);
1507 strlcpy(internals->prog_path, prog_path, PATH_MAX);
1508 internals->custom_prog_configured = 0;
1509
1510#ifndef ETH_AF_XDP_SHARED_UMEM
1511 if (shared_umem) {
1512 AF_XDP_LOG(ERR, "Shared UMEM feature not available. "
1513 "Check kernel and libbpf version\n");
1514 goto err_free_internals;
1515 }
1516#endif
1517 internals->shared_umem = shared_umem;
1518
1519 if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
1520 &internals->combined_queue_cnt)) {
1521 AF_XDP_LOG(ERR, "Failed to get channel info of interface: %s\n",
1522 if_name);
1523 goto err_free_internals;
1524 }
1525
1526 if (queue_cnt > internals->combined_queue_cnt) {
1527 AF_XDP_LOG(ERR, "Specified queue count %d is larger than combined queue count %d.\n",
1528 queue_cnt, internals->combined_queue_cnt);
1529 goto err_free_internals;
1530 }
1531
1532 internals->rx_queues = rte_zmalloc_socket(NULL,
1533 sizeof(struct pkt_rx_queue) * queue_cnt,
1534 0, numa_node);
1535 if (internals->rx_queues == NULL) {
1536 AF_XDP_LOG(ERR, "Failed to allocate memory for rx queues.\n");
1537 goto err_free_internals;
1538 }
1539
1540 internals->tx_queues = rte_zmalloc_socket(NULL,
1541 sizeof(struct pkt_tx_queue) * queue_cnt,
1542 0, numa_node);
1543 if (internals->tx_queues == NULL) {
1544 AF_XDP_LOG(ERR, "Failed to allocate memory for tx queues.\n");
1545 goto err_free_rx;
1546 }
1547 for (i = 0; i < queue_cnt; i++) {
1548 internals->tx_queues[i].pair = &internals->rx_queues[i];
1549 internals->rx_queues[i].pair = &internals->tx_queues[i];
1550 internals->rx_queues[i].xsk_queue_idx = start_queue_idx + i;
1551 internals->tx_queues[i].xsk_queue_idx = start_queue_idx + i;
1552 }
1553
1554 ret = get_iface_info(if_name, &internals->eth_addr,
1555 &internals->if_index);
1556 if (ret)
1557 goto err_free_tx;
1558
1559 eth_dev = rte_eth_vdev_allocate(dev, 0);
1560 if (eth_dev == NULL)
1561 goto err_free_tx;
1562
1563 eth_dev->data->dev_private = internals;
1564 eth_dev->data->dev_link = pmd_link;
1565 eth_dev->data->mac_addrs = &internals->eth_addr;
1566 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1567 eth_dev->dev_ops = &ops;
1568 eth_dev->rx_pkt_burst = eth_af_xdp_rx;
1569 eth_dev->tx_pkt_burst = eth_af_xdp_tx;
1570
1571#if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
1572 AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
1573#endif
1574
1575 return eth_dev;
1576
1577err_free_tx:
1578 rte_free(internals->tx_queues);
1579err_free_rx:
1580 rte_free(internals->rx_queues);
1581err_free_internals:
1582 rte_free(internals);
1583 return NULL;
1584}
1585
1586static int
1587rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
1588{
1589 struct rte_kvargs *kvlist;
1590 char if_name[IFNAMSIZ] = {'\0'};
1591 int xsk_start_queue_idx = ETH_AF_XDP_DFLT_START_QUEUE_IDX;
1592 int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT;
1593 int shared_umem = 0;
1594 char prog_path[PATH_MAX] = {'\0'};
1595 struct rte_eth_dev *eth_dev = NULL;
1596 const char *name;
1597
1598 AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
1599 rte_vdev_device_name(dev));
1600
1601 name = rte_vdev_device_name(dev);
1602 if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
1603 strlen(rte_vdev_device_args(dev)) == 0) {
1604 eth_dev = rte_eth_dev_attach_secondary(name);
1605 if (eth_dev == NULL) {
1606 AF_XDP_LOG(ERR, "Failed to probe %s\n", name);
1607 return -EINVAL;
1608 }
1609 eth_dev->dev_ops = &ops;
1610 rte_eth_dev_probing_finish(eth_dev);
1611 return 0;
1612 }
1613
1614 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
1615 if (kvlist == NULL) {
1616 AF_XDP_LOG(ERR, "Invalid kvargs key\n");
1617 return -EINVAL;
1618 }
1619
1620 if (dev->device.numa_node == SOCKET_ID_ANY)
1621 dev->device.numa_node = rte_socket_id();
1622
1623 if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
1624 &xsk_queue_cnt, &shared_umem, prog_path) < 0) {
1625 AF_XDP_LOG(ERR, "Invalid kvargs value\n");
1626 return -EINVAL;
1627 }
1628
1629 if (strlen(if_name) == 0) {
1630 AF_XDP_LOG(ERR, "Network interface must be specified\n");
1631 return -EINVAL;
1632 }
1633
1634 eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
1635 xsk_queue_cnt, shared_umem, prog_path);
1636 if (eth_dev == NULL) {
1637 AF_XDP_LOG(ERR, "Failed to init internals\n");
1638 return -1;
1639 }
1640
1641 rte_eth_dev_probing_finish(eth_dev);
1642
1643 return 0;
1644}
1645
1646static int
1647rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
1648{
1649 struct rte_eth_dev *eth_dev = NULL;
1650
1651 AF_XDP_LOG(INFO, "Removing AF_XDP ethdev on numa socket %u\n",
1652 rte_socket_id());
1653
1654 if (dev == NULL)
1655 return -1;
1656
1657
1658 eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
1659 if (eth_dev == NULL)
1660 return 0;
1661
1662 eth_dev_close(eth_dev);
1663 rte_eth_dev_release_port(eth_dev);
1664
1665
1666 return 0;
1667}
1668
1669static struct rte_vdev_driver pmd_af_xdp_drv = {
1670 .probe = rte_pmd_af_xdp_probe,
1671 .remove = rte_pmd_af_xdp_remove,
1672};
1673
1674RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv);
1675RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
1676 "iface=<string> "
1677 "start_queue=<int> "
1678 "queue_count=<int> "
1679 "shared_umem=<int> "
1680 "xdp_prog=<string> ");
1681