linux/tools/lib/bpf/xsk.c
<<
>>
Prefs
   1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
   2
   3/*
   4 * AF_XDP user-space access library.
   5 *
   6 * Copyright(c) 2018 - 2019 Intel Corporation.
   7 *
   8 * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
   9 */
  10
  11#include <errno.h>
  12#include <stdlib.h>
  13#include <string.h>
  14#include <unistd.h>
  15#include <arpa/inet.h>
  16#include <asm/barrier.h>
  17#include <linux/compiler.h>
  18#include <linux/ethtool.h>
  19#include <linux/filter.h>
  20#include <linux/if_ether.h>
  21#include <linux/if_packet.h>
  22#include <linux/if_xdp.h>
  23#include <linux/sockios.h>
  24#include <net/if.h>
  25#include <sys/ioctl.h>
  26#include <sys/mman.h>
  27#include <sys/socket.h>
  28#include <sys/types.h>
  29
  30#include "bpf.h"
  31#include "libbpf.h"
  32#include "libbpf_internal.h"
  33#include "xsk.h"
  34
  35/* make sure libbpf doesn't use kernel-only integer typedefs */
  36#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
  37
  38#ifndef SOL_XDP
  39 #define SOL_XDP 283
  40#endif
  41
  42#ifndef AF_XDP
  43 #define AF_XDP 44
  44#endif
  45
  46#ifndef PF_XDP
  47 #define PF_XDP AF_XDP
  48#endif
  49
  50struct xsk_umem {
  51        struct xsk_ring_prod *fill;
  52        struct xsk_ring_cons *comp;
  53        char *umem_area;
  54        struct xsk_umem_config config;
  55        int fd;
  56        int refcount;
  57};
  58
  59struct xsk_socket {
  60        struct xsk_ring_cons *rx;
  61        struct xsk_ring_prod *tx;
  62        __u64 outstanding_tx;
  63        struct xsk_umem *umem;
  64        struct xsk_socket_config config;
  65        int fd;
  66        int ifindex;
  67        int prog_fd;
  68        int xsks_map_fd;
  69        __u32 queue_id;
  70        char ifname[IFNAMSIZ];
  71};
  72
  73struct xsk_nl_info {
  74        bool xdp_prog_attached;
  75        int ifindex;
  76        int fd;
  77};
  78
  79/* Up until and including Linux 5.3 */
  80struct xdp_ring_offset_v1 {
  81        __u64 producer;
  82        __u64 consumer;
  83        __u64 desc;
  84};
  85
  86/* Up until and including Linux 5.3 */
  87struct xdp_mmap_offsets_v1 {
  88        struct xdp_ring_offset_v1 rx;
  89        struct xdp_ring_offset_v1 tx;
  90        struct xdp_ring_offset_v1 fr;
  91        struct xdp_ring_offset_v1 cr;
  92};
  93
  94int xsk_umem__fd(const struct xsk_umem *umem)
  95{
  96        return umem ? umem->fd : -EINVAL;
  97}
  98
  99int xsk_socket__fd(const struct xsk_socket *xsk)
 100{
 101        return xsk ? xsk->fd : -EINVAL;
 102}
 103
 104static bool xsk_page_aligned(void *buffer)
 105{
 106        unsigned long addr = (unsigned long)buffer;
 107
 108        return !(addr & (getpagesize() - 1));
 109}
 110
 111static void xsk_set_umem_config(struct xsk_umem_config *cfg,
 112                                const struct xsk_umem_config *usr_cfg)
 113{
 114        if (!usr_cfg) {
 115                cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
 116                cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 117                cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
 118                cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
 119                cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
 120                return;
 121        }
 122
 123        cfg->fill_size = usr_cfg->fill_size;
 124        cfg->comp_size = usr_cfg->comp_size;
 125        cfg->frame_size = usr_cfg->frame_size;
 126        cfg->frame_headroom = usr_cfg->frame_headroom;
 127        cfg->flags = usr_cfg->flags;
 128}
 129
 130static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
 131                                     const struct xsk_socket_config *usr_cfg)
 132{
 133        if (!usr_cfg) {
 134                cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 135                cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
 136                cfg->libbpf_flags = 0;
 137                cfg->xdp_flags = 0;
 138                cfg->bind_flags = 0;
 139                return 0;
 140        }
 141
 142        if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
 143                return -EINVAL;
 144
 145        cfg->rx_size = usr_cfg->rx_size;
 146        cfg->tx_size = usr_cfg->tx_size;
 147        cfg->libbpf_flags = usr_cfg->libbpf_flags;
 148        cfg->xdp_flags = usr_cfg->xdp_flags;
 149        cfg->bind_flags = usr_cfg->bind_flags;
 150
 151        return 0;
 152}
 153
 154static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
 155{
 156        struct xdp_mmap_offsets_v1 off_v1;
 157
 158        /* getsockopt on a kernel <= 5.3 has no flags fields.
 159         * Copy over the offsets to the correct places in the >=5.4 format
 160         * and put the flags where they would have been on that kernel.
 161         */
 162        memcpy(&off_v1, off, sizeof(off_v1));
 163
 164        off->rx.producer = off_v1.rx.producer;
 165        off->rx.consumer = off_v1.rx.consumer;
 166        off->rx.desc = off_v1.rx.desc;
 167        off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
 168
 169        off->tx.producer = off_v1.tx.producer;
 170        off->tx.consumer = off_v1.tx.consumer;
 171        off->tx.desc = off_v1.tx.desc;
 172        off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
 173
 174        off->fr.producer = off_v1.fr.producer;
 175        off->fr.consumer = off_v1.fr.consumer;
 176        off->fr.desc = off_v1.fr.desc;
 177        off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
 178
 179        off->cr.producer = off_v1.cr.producer;
 180        off->cr.consumer = off_v1.cr.consumer;
 181        off->cr.desc = off_v1.cr.desc;
 182        off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
 183}
 184
 185static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
 186{
 187        socklen_t optlen;
 188        int err;
 189
 190        optlen = sizeof(*off);
 191        err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
 192        if (err)
 193                return err;
 194
 195        if (optlen == sizeof(*off))
 196                return 0;
 197
 198        if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
 199                xsk_mmap_offsets_v1(off);
 200                return 0;
 201        }
 202
 203        return -EINVAL;
 204}
 205
 206int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
 207                            __u64 size, struct xsk_ring_prod *fill,
 208                            struct xsk_ring_cons *comp,
 209                            const struct xsk_umem_config *usr_config)
 210{
 211        struct xdp_mmap_offsets off;
 212        struct xdp_umem_reg mr;
 213        struct xsk_umem *umem;
 214        void *map;
 215        int err;
 216
 217        if (!umem_area || !umem_ptr || !fill || !comp)
 218                return -EFAULT;
 219        if (!size && !xsk_page_aligned(umem_area))
 220                return -EINVAL;
 221
 222        umem = calloc(1, sizeof(*umem));
 223        if (!umem)
 224                return -ENOMEM;
 225
 226        umem->fd = socket(AF_XDP, SOCK_RAW, 0);
 227        if (umem->fd < 0) {
 228                err = -errno;
 229                goto out_umem_alloc;
 230        }
 231
 232        umem->umem_area = umem_area;
 233        xsk_set_umem_config(&umem->config, usr_config);
 234
 235        memset(&mr, 0, sizeof(mr));
 236        mr.addr = (uintptr_t)umem_area;
 237        mr.len = size;
 238        mr.chunk_size = umem->config.frame_size;
 239        mr.headroom = umem->config.frame_headroom;
 240        mr.flags = umem->config.flags;
 241
 242        err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
 243        if (err) {
 244                err = -errno;
 245                goto out_socket;
 246        }
 247        err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING,
 248                         &umem->config.fill_size,
 249                         sizeof(umem->config.fill_size));
 250        if (err) {
 251                err = -errno;
 252                goto out_socket;
 253        }
 254        err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
 255                         &umem->config.comp_size,
 256                         sizeof(umem->config.comp_size));
 257        if (err) {
 258                err = -errno;
 259                goto out_socket;
 260        }
 261
 262        err = xsk_get_mmap_offsets(umem->fd, &off);
 263        if (err) {
 264                err = -errno;
 265                goto out_socket;
 266        }
 267
 268        map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
 269                   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
 270                   XDP_UMEM_PGOFF_FILL_RING);
 271        if (map == MAP_FAILED) {
 272                err = -errno;
 273                goto out_socket;
 274        }
 275
 276        umem->fill = fill;
 277        fill->mask = umem->config.fill_size - 1;
 278        fill->size = umem->config.fill_size;
 279        fill->producer = map + off.fr.producer;
 280        fill->consumer = map + off.fr.consumer;
 281        fill->flags = map + off.fr.flags;
 282        fill->ring = map + off.fr.desc;
 283        fill->cached_prod = *fill->producer;
 284        /* cached_cons is "size" bigger than the real consumer pointer
 285         * See xsk_prod_nb_free
 286         */
 287        fill->cached_cons = *fill->consumer + umem->config.fill_size;
 288
 289        map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
 290                   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
 291                   XDP_UMEM_PGOFF_COMPLETION_RING);
 292        if (map == MAP_FAILED) {
 293                err = -errno;
 294                goto out_mmap;
 295        }
 296
 297        umem->comp = comp;
 298        comp->mask = umem->config.comp_size - 1;
 299        comp->size = umem->config.comp_size;
 300        comp->producer = map + off.cr.producer;
 301        comp->consumer = map + off.cr.consumer;
 302        comp->flags = map + off.cr.flags;
 303        comp->ring = map + off.cr.desc;
 304        comp->cached_prod = *comp->producer;
 305        comp->cached_cons = *comp->consumer;
 306
 307        *umem_ptr = umem;
 308        return 0;
 309
 310out_mmap:
 311        munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
 312out_socket:
 313        close(umem->fd);
 314out_umem_alloc:
 315        free(umem);
 316        return err;
 317}
 318
 319struct xsk_umem_config_v1 {
 320        __u32 fill_size;
 321        __u32 comp_size;
 322        __u32 frame_size;
 323        __u32 frame_headroom;
 324};
 325
 326int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
 327                            __u64 size, struct xsk_ring_prod *fill,
 328                            struct xsk_ring_cons *comp,
 329                            const struct xsk_umem_config *usr_config)
 330{
 331        struct xsk_umem_config config;
 332
 333        memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1));
 334        config.flags = 0;
 335
 336        return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
 337                                        &config);
 338}
 339COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
 340DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
 341
 342static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 343{
 344        static const int log_buf_size = 16 * 1024;
 345        char log_buf[log_buf_size];
 346        int err, prog_fd;
 347
 348        /* This is the C-program:
 349         * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
 350         * {
 351         *     int ret, index = ctx->rx_queue_index;
 352         *
 353         *     // A set entry here means that the correspnding queue_id
 354         *     // has an active AF_XDP socket bound to it.
 355         *     ret = bpf_redirect_map(&xsks_map, index, XDP_PASS);
 356         *     if (ret > 0)
 357         *         return ret;
 358         *
 359         *     // Fallback for pre-5.3 kernels, not supporting default
 360         *     // action in the flags parameter.
 361         *     if (bpf_map_lookup_elem(&xsks_map, &index))
 362         *         return bpf_redirect_map(&xsks_map, index, 0);
 363         *     return XDP_PASS;
 364         * }
 365         */
 366        struct bpf_insn prog[] = {
 367                /* r2 = *(u32 *)(r1 + 16) */
 368                BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
 369                /* *(u32 *)(r10 - 4) = r2 */
 370                BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
 371                /* r1 = xskmap[] */
 372                BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
 373                /* r3 = XDP_PASS */
 374                BPF_MOV64_IMM(BPF_REG_3, 2),
 375                /* call bpf_redirect_map */
 376                BPF_EMIT_CALL(BPF_FUNC_redirect_map),
 377                /* if w0 != 0 goto pc+13 */
 378                BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13),
 379                /* r2 = r10 */
 380                BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 381                /* r2 += -4 */
 382                BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
 383                /* r1 = xskmap[] */
 384                BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
 385                /* call bpf_map_lookup_elem */
 386                BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 387                /* r1 = r0 */
 388                BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 389                /* r0 = XDP_PASS */
 390                BPF_MOV64_IMM(BPF_REG_0, 2),
 391                /* if r1 == 0 goto pc+5 */
 392                BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
 393                /* r2 = *(u32 *)(r10 - 4) */
 394                BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
 395                /* r1 = xskmap[] */
 396                BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
 397                /* r3 = 0 */
 398                BPF_MOV64_IMM(BPF_REG_3, 0),
 399                /* call bpf_redirect_map */
 400                BPF_EMIT_CALL(BPF_FUNC_redirect_map),
 401                /* The jumps are to this instruction */
 402                BPF_EXIT_INSN(),
 403        };
 404        size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
 405
 406        prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt,
 407                                   "LGPL-2.1 or BSD-2-Clause", 0, log_buf,
 408                                   log_buf_size);
 409        if (prog_fd < 0) {
 410                pr_warn("BPF log buffer:\n%s", log_buf);
 411                return prog_fd;
 412        }
 413
 414        err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags);
 415        if (err) {
 416                close(prog_fd);
 417                return err;
 418        }
 419
 420        xsk->prog_fd = prog_fd;
 421        return 0;
 422}
 423
 424static int xsk_get_max_queues(struct xsk_socket *xsk)
 425{
 426        struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
 427        struct ifreq ifr = {};
 428        int fd, err, ret;
 429
 430        fd = socket(AF_INET, SOCK_DGRAM, 0);
 431        if (fd < 0)
 432                return -errno;
 433
 434        ifr.ifr_data = (void *)&channels;
 435        memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1);
 436        ifr.ifr_name[IFNAMSIZ - 1] = '\0';
 437        err = ioctl(fd, SIOCETHTOOL, &ifr);
 438        if (err && errno != EOPNOTSUPP) {
 439                ret = -errno;
 440                goto out;
 441        }
 442
 443        if (err) {
 444                /* If the device says it has no channels, then all traffic
 445                 * is sent to a single stream, so max queues = 1.
 446                 */
 447                ret = 1;
 448        } else {
 449                /* Take the max of rx, tx, combined. Drivers return
 450                 * the number of channels in different ways.
 451                 */
 452                ret = max(channels.max_rx, channels.max_tx);
 453                ret = max(ret, (int)channels.max_combined);
 454        }
 455
 456out:
 457        close(fd);
 458        return ret;
 459}
 460
 461static int xsk_create_bpf_maps(struct xsk_socket *xsk)
 462{
 463        int max_queues;
 464        int fd;
 465
 466        max_queues = xsk_get_max_queues(xsk);
 467        if (max_queues < 0)
 468                return max_queues;
 469
 470        fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
 471                                 sizeof(int), sizeof(int), max_queues, 0);
 472        if (fd < 0)
 473                return fd;
 474
 475        xsk->xsks_map_fd = fd;
 476
 477        return 0;
 478}
 479
 480static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
 481{
 482        bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
 483        close(xsk->xsks_map_fd);
 484}
 485
 486static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 487{
 488        __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
 489        __u32 map_len = sizeof(struct bpf_map_info);
 490        struct bpf_prog_info prog_info = {};
 491        struct bpf_map_info map_info;
 492        int fd, err;
 493
 494        err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
 495        if (err)
 496                return err;
 497
 498        num_maps = prog_info.nr_map_ids;
 499
 500        map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
 501        if (!map_ids)
 502                return -ENOMEM;
 503
 504        memset(&prog_info, 0, prog_len);
 505        prog_info.nr_map_ids = num_maps;
 506        prog_info.map_ids = (__u64)(unsigned long)map_ids;
 507
 508        err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
 509        if (err)
 510                goto out_map_ids;
 511
 512        xsk->xsks_map_fd = -1;
 513
 514        for (i = 0; i < prog_info.nr_map_ids; i++) {
 515                fd = bpf_map_get_fd_by_id(map_ids[i]);
 516                if (fd < 0)
 517                        continue;
 518
 519                err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
 520                if (err) {
 521                        close(fd);
 522                        continue;
 523                }
 524
 525                if (!strcmp(map_info.name, "xsks_map")) {
 526                        xsk->xsks_map_fd = fd;
 527                        continue;
 528                }
 529
 530                close(fd);
 531        }
 532
 533        err = 0;
 534        if (xsk->xsks_map_fd == -1)
 535                err = -ENOENT;
 536
 537out_map_ids:
 538        free(map_ids);
 539        return err;
 540}
 541
 542static int xsk_set_bpf_maps(struct xsk_socket *xsk)
 543{
 544        return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
 545                                   &xsk->fd, 0);
 546}
 547
 548static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 549{
 550        __u32 prog_id = 0;
 551        int err;
 552
 553        err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
 554                                  xsk->config.xdp_flags);
 555        if (err)
 556                return err;
 557
 558        if (!prog_id) {
 559                err = xsk_create_bpf_maps(xsk);
 560                if (err)
 561                        return err;
 562
 563                err = xsk_load_xdp_prog(xsk);
 564                if (err) {
 565                        xsk_delete_bpf_maps(xsk);
 566                        return err;
 567                }
 568        } else {
 569                xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
 570                if (xsk->prog_fd < 0)
 571                        return -errno;
 572                err = xsk_lookup_bpf_maps(xsk);
 573                if (err) {
 574                        close(xsk->prog_fd);
 575                        return err;
 576                }
 577        }
 578
 579        if (xsk->rx)
 580                err = xsk_set_bpf_maps(xsk);
 581        if (err) {
 582                xsk_delete_bpf_maps(xsk);
 583                close(xsk->prog_fd);
 584                return err;
 585        }
 586
 587        return 0;
 588}
 589
 590int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 591                       __u32 queue_id, struct xsk_umem *umem,
 592                       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
 593                       const struct xsk_socket_config *usr_config)
 594{
 595        void *rx_map = NULL, *tx_map = NULL;
 596        struct sockaddr_xdp sxdp = {};
 597        struct xdp_mmap_offsets off;
 598        struct xsk_socket *xsk;
 599        int err;
 600
 601        if (!umem || !xsk_ptr || !(rx || tx))
 602                return -EFAULT;
 603
 604        xsk = calloc(1, sizeof(*xsk));
 605        if (!xsk)
 606                return -ENOMEM;
 607
 608        err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
 609        if (err)
 610                goto out_xsk_alloc;
 611
 612        if (umem->refcount &&
 613            !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
 614                pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n");
 615                err = -EBUSY;
 616                goto out_xsk_alloc;
 617        }
 618
 619        if (umem->refcount++ > 0) {
 620                xsk->fd = socket(AF_XDP, SOCK_RAW, 0);
 621                if (xsk->fd < 0) {
 622                        err = -errno;
 623                        goto out_xsk_alloc;
 624                }
 625        } else {
 626                xsk->fd = umem->fd;
 627        }
 628
 629        xsk->outstanding_tx = 0;
 630        xsk->queue_id = queue_id;
 631        xsk->umem = umem;
 632        xsk->ifindex = if_nametoindex(ifname);
 633        if (!xsk->ifindex) {
 634                err = -errno;
 635                goto out_socket;
 636        }
 637        memcpy(xsk->ifname, ifname, IFNAMSIZ - 1);
 638        xsk->ifname[IFNAMSIZ - 1] = '\0';
 639
 640        if (rx) {
 641                err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
 642                                 &xsk->config.rx_size,
 643                                 sizeof(xsk->config.rx_size));
 644                if (err) {
 645                        err = -errno;
 646                        goto out_socket;
 647                }
 648        }
 649        if (tx) {
 650                err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
 651                                 &xsk->config.tx_size,
 652                                 sizeof(xsk->config.tx_size));
 653                if (err) {
 654                        err = -errno;
 655                        goto out_socket;
 656                }
 657        }
 658
 659        err = xsk_get_mmap_offsets(xsk->fd, &off);
 660        if (err) {
 661                err = -errno;
 662                goto out_socket;
 663        }
 664
 665        if (rx) {
 666                rx_map = mmap(NULL, off.rx.desc +
 667                              xsk->config.rx_size * sizeof(struct xdp_desc),
 668                              PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
 669                              xsk->fd, XDP_PGOFF_RX_RING);
 670                if (rx_map == MAP_FAILED) {
 671                        err = -errno;
 672                        goto out_socket;
 673                }
 674
 675                rx->mask = xsk->config.rx_size - 1;
 676                rx->size = xsk->config.rx_size;
 677                rx->producer = rx_map + off.rx.producer;
 678                rx->consumer = rx_map + off.rx.consumer;
 679                rx->flags = rx_map + off.rx.flags;
 680                rx->ring = rx_map + off.rx.desc;
 681                rx->cached_prod = *rx->producer;
 682                rx->cached_cons = *rx->consumer;
 683        }
 684        xsk->rx = rx;
 685
 686        if (tx) {
 687                tx_map = mmap(NULL, off.tx.desc +
 688                              xsk->config.tx_size * sizeof(struct xdp_desc),
 689                              PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
 690                              xsk->fd, XDP_PGOFF_TX_RING);
 691                if (tx_map == MAP_FAILED) {
 692                        err = -errno;
 693                        goto out_mmap_rx;
 694                }
 695
 696                tx->mask = xsk->config.tx_size - 1;
 697                tx->size = xsk->config.tx_size;
 698                tx->producer = tx_map + off.tx.producer;
 699                tx->consumer = tx_map + off.tx.consumer;
 700                tx->flags = tx_map + off.tx.flags;
 701                tx->ring = tx_map + off.tx.desc;
 702                tx->cached_prod = *tx->producer;
 703                /* cached_cons is r->size bigger than the real consumer pointer
 704                 * See xsk_prod_nb_free
 705                 */
 706                tx->cached_cons = *tx->consumer + xsk->config.tx_size;
 707        }
 708        xsk->tx = tx;
 709
 710        sxdp.sxdp_family = PF_XDP;
 711        sxdp.sxdp_ifindex = xsk->ifindex;
 712        sxdp.sxdp_queue_id = xsk->queue_id;
 713        if (umem->refcount > 1) {
 714                sxdp.sxdp_flags = XDP_SHARED_UMEM;
 715                sxdp.sxdp_shared_umem_fd = umem->fd;
 716        } else {
 717                sxdp.sxdp_flags = xsk->config.bind_flags;
 718        }
 719
 720        err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
 721        if (err) {
 722                err = -errno;
 723                goto out_mmap_tx;
 724        }
 725
 726        xsk->prog_fd = -1;
 727
 728        if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
 729                err = xsk_setup_xdp_prog(xsk);
 730                if (err)
 731                        goto out_mmap_tx;
 732        }
 733
 734        *xsk_ptr = xsk;
 735        return 0;
 736
 737out_mmap_tx:
 738        if (tx)
 739                munmap(tx_map, off.tx.desc +
 740                       xsk->config.tx_size * sizeof(struct xdp_desc));
 741out_mmap_rx:
 742        if (rx)
 743                munmap(rx_map, off.rx.desc +
 744                       xsk->config.rx_size * sizeof(struct xdp_desc));
 745out_socket:
 746        if (--umem->refcount)
 747                close(xsk->fd);
 748out_xsk_alloc:
 749        free(xsk);
 750        return err;
 751}
 752
 753int xsk_umem__delete(struct xsk_umem *umem)
 754{
 755        struct xdp_mmap_offsets off;
 756        int err;
 757
 758        if (!umem)
 759                return 0;
 760
 761        if (umem->refcount)
 762                return -EBUSY;
 763
 764        err = xsk_get_mmap_offsets(umem->fd, &off);
 765        if (!err) {
 766                munmap(umem->fill->ring - off.fr.desc,
 767                       off.fr.desc + umem->config.fill_size * sizeof(__u64));
 768                munmap(umem->comp->ring - off.cr.desc,
 769                       off.cr.desc + umem->config.comp_size * sizeof(__u64));
 770        }
 771
 772        close(umem->fd);
 773        free(umem);
 774
 775        return 0;
 776}
 777
 778void xsk_socket__delete(struct xsk_socket *xsk)
 779{
 780        size_t desc_sz = sizeof(struct xdp_desc);
 781        struct xdp_mmap_offsets off;
 782        int err;
 783
 784        if (!xsk)
 785                return;
 786
 787        if (xsk->prog_fd != -1) {
 788                xsk_delete_bpf_maps(xsk);
 789                close(xsk->prog_fd);
 790        }
 791
 792        err = xsk_get_mmap_offsets(xsk->fd, &off);
 793        if (!err) {
 794                if (xsk->rx) {
 795                        munmap(xsk->rx->ring - off.rx.desc,
 796                               off.rx.desc + xsk->config.rx_size * desc_sz);
 797                }
 798                if (xsk->tx) {
 799                        munmap(xsk->tx->ring - off.tx.desc,
 800                               off.tx.desc + xsk->config.tx_size * desc_sz);
 801                }
 802
 803        }
 804
 805        xsk->umem->refcount--;
 806        /* Do not close an fd that also has an associated umem connected
 807         * to it.
 808         */
 809        if (xsk->fd != xsk->umem->fd)
 810                close(xsk->fd);
 811        free(xsk);
 812}
 813