linux/net/core/xdp.c
<<
>>
Prefs
   1/* net/core/xdp.c
   2 *
   3 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
   4 * Released under terms in GPL version 2.  See COPYING.
   5 */
   6#include <linux/bpf.h>
   7#include <linux/filter.h>
   8#include <linux/types.h>
   9#include <linux/mm.h>
  10#include <linux/netdevice.h>
  11#include <linux/slab.h>
  12#include <linux/idr.h>
  13#include <linux/rhashtable.h>
  14#include <net/page_pool.h>
  15
  16#include <net/xdp.h>
  17
  18#define REG_STATE_NEW           0x0
  19#define REG_STATE_REGISTERED    0x1
  20#define REG_STATE_UNREGISTERED  0x2
  21#define REG_STATE_UNUSED        0x3
  22
  23static DEFINE_IDA(mem_id_pool);
  24static DEFINE_MUTEX(mem_id_lock);
  25#define MEM_ID_MAX 0xFFFE
  26#define MEM_ID_MIN 1
  27static int mem_id_next = MEM_ID_MIN;
  28
  29static bool mem_id_init; /* false */
  30static struct rhashtable *mem_id_ht;
  31
  32struct xdp_mem_allocator {
  33        struct xdp_mem_info mem;
  34        union {
  35                void *allocator;
  36                struct page_pool *page_pool;
  37                struct zero_copy_allocator *zc_alloc;
  38        };
  39        struct rhash_head node;
  40        struct rcu_head rcu;
  41};
  42
  43static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
  44{
  45        const u32 *k = data;
  46        const u32 key = *k;
  47
  48        BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
  49                     != sizeof(u32));
  50
  51        /* Use cyclic increasing ID as direct hash key */
  52        return key;
  53}
  54
  55static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
  56                          const void *ptr)
  57{
  58        const struct xdp_mem_allocator *xa = ptr;
  59        u32 mem_id = *(u32 *)arg->key;
  60
  61        return xa->mem.id != mem_id;
  62}
  63
  64static const struct rhashtable_params mem_id_rht_params = {
  65        .nelem_hint = 64,
  66        .head_offset = offsetof(struct xdp_mem_allocator, node),
  67        .key_offset  = offsetof(struct xdp_mem_allocator, mem.id),
  68        .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id),
  69        .max_size = MEM_ID_MAX,
  70        .min_size = 8,
  71        .automatic_shrinking = true,
  72        .hashfn    = xdp_mem_id_hashfn,
  73        .obj_cmpfn = xdp_mem_id_cmp,
  74};
  75
  76static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
  77{
  78        struct xdp_mem_allocator *xa;
  79
  80        xa = container_of(rcu, struct xdp_mem_allocator, rcu);
  81
  82        /* Allow this ID to be reused */
  83        ida_simple_remove(&mem_id_pool, xa->mem.id);
  84
  85        /* Notice, driver is expected to free the *allocator,
  86         * e.g. page_pool, and MUST also use RCU free.
  87         */
  88
  89        /* Poison memory */
  90        xa->mem.id = 0xFFFF;
  91        xa->mem.type = 0xF0F0;
  92        xa->allocator = (void *)0xDEAD9001;
  93
  94        kfree(xa);
  95}
  96
  97void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
  98{
  99        struct xdp_mem_allocator *xa;
 100        int id = xdp_rxq->mem.id;
 101
 102        if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
 103                WARN(1, "Missing register, driver bug");
 104                return;
 105        }
 106
 107        if (xdp_rxq->mem.type != MEM_TYPE_PAGE_POOL &&
 108            xdp_rxq->mem.type != MEM_TYPE_ZERO_COPY) {
 109                return;
 110        }
 111
 112        if (id == 0)
 113                return;
 114
 115        mutex_lock(&mem_id_lock);
 116
 117        xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
 118        if (xa && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
 119                call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
 120
 121        mutex_unlock(&mem_id_lock);
 122}
 123EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
 124
 125void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
 126{
 127        /* Simplify driver cleanup code paths, allow unreg "unused" */
 128        if (xdp_rxq->reg_state == REG_STATE_UNUSED)
 129                return;
 130
 131        WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
 132
 133        xdp_rxq_info_unreg_mem_model(xdp_rxq);
 134
 135        xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
 136        xdp_rxq->dev = NULL;
 137
 138        /* Reset mem info to defaults */
 139        xdp_rxq->mem.id = 0;
 140        xdp_rxq->mem.type = 0;
 141}
 142EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
 143
 144static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
 145{
 146        memset(xdp_rxq, 0, sizeof(*xdp_rxq));
 147}
 148
 149/* Returns 0 on success, negative on failure */
 150int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
 151                     struct net_device *dev, u32 queue_index)
 152{
 153        if (xdp_rxq->reg_state == REG_STATE_UNUSED) {
 154                WARN(1, "Driver promised not to register this");
 155                return -EINVAL;
 156        }
 157
 158        if (xdp_rxq->reg_state == REG_STATE_REGISTERED) {
 159                WARN(1, "Missing unregister, handled but fix driver");
 160                xdp_rxq_info_unreg(xdp_rxq);
 161        }
 162
 163        if (!dev) {
 164                WARN(1, "Missing net_device from driver");
 165                return -ENODEV;
 166        }
 167
 168        /* State either UNREGISTERED or NEW */
 169        xdp_rxq_info_init(xdp_rxq);
 170        xdp_rxq->dev = dev;
 171        xdp_rxq->queue_index = queue_index;
 172
 173        xdp_rxq->reg_state = REG_STATE_REGISTERED;
 174        return 0;
 175}
 176EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
 177
 178void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
 179{
 180        xdp_rxq->reg_state = REG_STATE_UNUSED;
 181}
 182EXPORT_SYMBOL_GPL(xdp_rxq_info_unused);
 183
 184bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
 185{
 186        return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
 187}
 188EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
 189
 190static int __mem_id_init_hash_table(void)
 191{
 192        struct rhashtable *rht;
 193        int ret;
 194
 195        if (unlikely(mem_id_init))
 196                return 0;
 197
 198        rht = kzalloc(sizeof(*rht), GFP_KERNEL);
 199        if (!rht)
 200                return -ENOMEM;
 201
 202        ret = rhashtable_init(rht, &mem_id_rht_params);
 203        if (ret < 0) {
 204                kfree(rht);
 205                return ret;
 206        }
 207        mem_id_ht = rht;
 208        smp_mb(); /* mutex lock should provide enough pairing */
 209        mem_id_init = true;
 210
 211        return 0;
 212}
 213
 214/* Allocate a cyclic ID that maps to allocator pointer.
 215 * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
 216 *
 217 * Caller must lock mem_id_lock.
 218 */
 219static int __mem_id_cyclic_get(gfp_t gfp)
 220{
 221        int retries = 1;
 222        int id;
 223
 224again:
 225        id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp);
 226        if (id < 0) {
 227                if (id == -ENOSPC) {
 228                        /* Cyclic allocator, reset next id */
 229                        if (retries--) {
 230                                mem_id_next = MEM_ID_MIN;
 231                                goto again;
 232                        }
 233                }
 234                return id; /* errno */
 235        }
 236        mem_id_next = id + 1;
 237
 238        return id;
 239}
 240
 241static bool __is_supported_mem_type(enum xdp_mem_type type)
 242{
 243        if (type == MEM_TYPE_PAGE_POOL)
 244                return is_page_pool_compiled_in();
 245
 246        if (type >= MEM_TYPE_MAX)
 247                return false;
 248
 249        return true;
 250}
 251
 252int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 253                               enum xdp_mem_type type, void *allocator)
 254{
 255        struct xdp_mem_allocator *xdp_alloc;
 256        gfp_t gfp = GFP_KERNEL;
 257        int id, errno, ret;
 258        void *ptr;
 259
 260        if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
 261                WARN(1, "Missing register, driver bug");
 262                return -EFAULT;
 263        }
 264
 265        if (!__is_supported_mem_type(type))
 266                return -EOPNOTSUPP;
 267
 268        xdp_rxq->mem.type = type;
 269
 270        if (!allocator) {
 271                if (type == MEM_TYPE_PAGE_POOL || type == MEM_TYPE_ZERO_COPY)
 272                        return -EINVAL; /* Setup time check page_pool req */
 273                return 0;
 274        }
 275
 276        /* Delay init of rhashtable to save memory if feature isn't used */
 277        if (!mem_id_init) {
 278                mutex_lock(&mem_id_lock);
 279                ret = __mem_id_init_hash_table();
 280                mutex_unlock(&mem_id_lock);
 281                if (ret < 0) {
 282                        WARN_ON(1);
 283                        return ret;
 284                }
 285        }
 286
 287        xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
 288        if (!xdp_alloc)
 289                return -ENOMEM;
 290
 291        mutex_lock(&mem_id_lock);
 292        id = __mem_id_cyclic_get(gfp);
 293        if (id < 0) {
 294                errno = id;
 295                goto err;
 296        }
 297        xdp_rxq->mem.id = id;
 298        xdp_alloc->mem  = xdp_rxq->mem;
 299        xdp_alloc->allocator = allocator;
 300
 301        /* Insert allocator into ID lookup table */
 302        ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
 303        if (IS_ERR(ptr)) {
 304                errno = PTR_ERR(ptr);
 305                goto err;
 306        }
 307
 308        mutex_unlock(&mem_id_lock);
 309
 310        return 0;
 311err:
 312        mutex_unlock(&mem_id_lock);
 313        kfree(xdp_alloc);
 314        return errno;
 315}
 316EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
 317
 318/* XDP RX runs under NAPI protection, and in different delivery error
 319 * scenarios (e.g. queue full), it is possible to return the xdp_frame
 320 * while still leveraging this protection.  The @napi_direct boolian
 321 * is used for those calls sites.  Thus, allowing for faster recycling
 322 * of xdp_frames/pages in those cases.
 323 */
 324static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
 325                         unsigned long handle)
 326{
 327        struct xdp_mem_allocator *xa;
 328        struct page *page;
 329
 330        switch (mem->type) {
 331        case MEM_TYPE_PAGE_POOL:
 332                rcu_read_lock();
 333                /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
 334                xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
 335                page = virt_to_head_page(data);
 336                if (xa) {
 337                        napi_direct &= !xdp_return_frame_no_direct();
 338                        page_pool_put_page(xa->page_pool, page, napi_direct);
 339                } else {
 340                        put_page(page);
 341                }
 342                rcu_read_unlock();
 343                break;
 344        case MEM_TYPE_PAGE_SHARED:
 345                page_frag_free(data);
 346                break;
 347        case MEM_TYPE_PAGE_ORDER0:
 348                page = virt_to_page(data); /* Assumes order0 page*/
 349                put_page(page);
 350                break;
 351        case MEM_TYPE_ZERO_COPY:
 352                /* NB! Only valid from an xdp_buff! */
 353                rcu_read_lock();
 354                /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
 355                xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
 356                xa->zc_alloc->free(xa->zc_alloc, handle);
 357                rcu_read_unlock();
 358        default:
 359                /* Not possible, checked in xdp_rxq_info_reg_mem_model() */
 360                break;
 361        }
 362}
 363
 364void xdp_return_frame(struct xdp_frame *xdpf)
 365{
 366        __xdp_return(xdpf->data, &xdpf->mem, false, 0);
 367}
 368EXPORT_SYMBOL_GPL(xdp_return_frame);
 369
 370void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
 371{
 372        __xdp_return(xdpf->data, &xdpf->mem, true, 0);
 373}
 374EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
 375
 376void xdp_return_buff(struct xdp_buff *xdp)
 377{
 378        __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle);
 379}
 380EXPORT_SYMBOL_GPL(xdp_return_buff);
 381
 382int xdp_attachment_query(struct xdp_attachment_info *info,
 383                         struct netdev_bpf *bpf)
 384{
 385        bpf->prog_id = info->prog ? info->prog->aux->id : 0;
 386        bpf->prog_flags = info->prog ? info->flags : 0;
 387        return 0;
 388}
 389EXPORT_SYMBOL_GPL(xdp_attachment_query);
 390
 391bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
 392                             struct netdev_bpf *bpf)
 393{
 394        if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) {
 395                NL_SET_ERR_MSG(bpf->extack,
 396                               "program loaded with different flags");
 397                return false;
 398        }
 399        return true;
 400}
 401EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok);
 402
 403void xdp_attachment_setup(struct xdp_attachment_info *info,
 404                          struct netdev_bpf *bpf)
 405{
 406        if (info->prog)
 407                bpf_prog_put(info->prog);
 408        info->prog = bpf->prog;
 409        info->flags = bpf->flags;
 410}
 411EXPORT_SYMBOL_GPL(xdp_attachment_setup);
 412
 413struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
 414{
 415        unsigned int metasize, totsize;
 416        void *addr, *data_to_copy;
 417        struct xdp_frame *xdpf;
 418        struct page *page;
 419
 420        /* Clone into a MEM_TYPE_PAGE_ORDER0 xdp_frame. */
 421        metasize = xdp_data_meta_unsupported(xdp) ? 0 :
 422                   xdp->data - xdp->data_meta;
 423        totsize = xdp->data_end - xdp->data + metasize;
 424
 425        if (sizeof(*xdpf) + totsize > PAGE_SIZE)
 426                return NULL;
 427
 428        page = dev_alloc_page();
 429        if (!page)
 430                return NULL;
 431
 432        addr = page_to_virt(page);
 433        xdpf = addr;
 434        memset(xdpf, 0, sizeof(*xdpf));
 435
 436        addr += sizeof(*xdpf);
 437        data_to_copy = metasize ? xdp->data_meta : xdp->data;
 438        memcpy(addr, data_to_copy, totsize);
 439
 440        xdpf->data = addr + metasize;
 441        xdpf->len = totsize - metasize;
 442        xdpf->headroom = 0;
 443        xdpf->metasize = metasize;
 444        xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
 445
 446        xdp_return_buff(xdp);
 447        return xdpf;
 448}
 449EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame);
 450