linux/kernel/bpf/xskmap.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* XSKMAP used for AF_XDP sockets
   3 * Copyright(c) 2018 Intel Corporation.
   4 */
   5
   6#include <linux/bpf.h>
   7#include <linux/capability.h>
   8#include <net/xdp_sock.h>
   9#include <linux/slab.h>
  10#include <linux/sched.h>
  11
  12struct xsk_map {
  13        struct bpf_map map;
  14        struct xdp_sock **xsk_map;
  15        struct list_head __percpu *flush_list;
  16        spinlock_t lock; /* Synchronize map updates */
  17};
  18
  19int xsk_map_inc(struct xsk_map *map)
  20{
  21        struct bpf_map *m = &map->map;
  22
  23        m = bpf_map_inc(m, false);
  24        return PTR_ERR_OR_ZERO(m);
  25}
  26
  27void xsk_map_put(struct xsk_map *map)
  28{
  29        bpf_map_put(&map->map);
  30}
  31
  32static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
  33                                               struct xdp_sock **map_entry)
  34{
  35        struct xsk_map_node *node;
  36        int err;
  37
  38        node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN);
  39        if (!node)
  40                return ERR_PTR(-ENOMEM);
  41
  42        err = xsk_map_inc(map);
  43        if (err) {
  44                kfree(node);
  45                return ERR_PTR(err);
  46        }
  47
  48        node->map = map;
  49        node->map_entry = map_entry;
  50        return node;
  51}
  52
  53static void xsk_map_node_free(struct xsk_map_node *node)
  54{
  55        xsk_map_put(node->map);
  56        kfree(node);
  57}
  58
  59static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node)
  60{
  61        spin_lock_bh(&xs->map_list_lock);
  62        list_add_tail(&node->node, &xs->map_list);
  63        spin_unlock_bh(&xs->map_list_lock);
  64}
  65
  66static void xsk_map_sock_delete(struct xdp_sock *xs,
  67                                struct xdp_sock **map_entry)
  68{
  69        struct xsk_map_node *n, *tmp;
  70
  71        spin_lock_bh(&xs->map_list_lock);
  72        list_for_each_entry_safe(n, tmp, &xs->map_list, node) {
  73                if (map_entry == n->map_entry) {
  74                        list_del(&n->node);
  75                        xsk_map_node_free(n);
  76                }
  77        }
  78        spin_unlock_bh(&xs->map_list_lock);
  79}
  80
  81static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
  82{
  83        struct xsk_map *m;
  84        int cpu, err;
  85        u64 cost;
  86
  87        if (!capable(CAP_NET_ADMIN))
  88                return ERR_PTR(-EPERM);
  89
  90        if (attr->max_entries == 0 || attr->key_size != 4 ||
  91            attr->value_size != 4 ||
  92            attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
  93                return ERR_PTR(-EINVAL);
  94
  95        m = kzalloc(sizeof(*m), GFP_USER);
  96        if (!m)
  97                return ERR_PTR(-ENOMEM);
  98
  99        bpf_map_init_from_attr(&m->map, attr);
 100        spin_lock_init(&m->lock);
 101
 102        cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
 103        cost += sizeof(struct list_head) * num_possible_cpus();
 104
 105        /* Notice returns -EPERM on if map size is larger than memlock limit */
 106        err = bpf_map_charge_init(&m->map.memory, cost);
 107        if (err)
 108                goto free_m;
 109
 110        err = -ENOMEM;
 111
 112        m->flush_list = alloc_percpu(struct list_head);
 113        if (!m->flush_list)
 114                goto free_charge;
 115
 116        for_each_possible_cpu(cpu)
 117                INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
 118
 119        m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
 120                                        sizeof(struct xdp_sock *),
 121                                        m->map.numa_node);
 122        if (!m->xsk_map)
 123                goto free_percpu;
 124        return &m->map;
 125
 126free_percpu:
 127        free_percpu(m->flush_list);
 128free_charge:
 129        bpf_map_charge_finish(&m->map.memory);
 130free_m:
 131        kfree(m);
 132        return ERR_PTR(err);
 133}
 134
 135static void xsk_map_free(struct bpf_map *map)
 136{
 137        struct xsk_map *m = container_of(map, struct xsk_map, map);
 138
 139        bpf_clear_redirect_map(map);
 140        synchronize_net();
 141        free_percpu(m->flush_list);
 142        bpf_map_area_free(m->xsk_map);
 143        kfree(m);
 144}
 145
 146static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 147{
 148        struct xsk_map *m = container_of(map, struct xsk_map, map);
 149        u32 index = key ? *(u32 *)key : U32_MAX;
 150        u32 *next = next_key;
 151
 152        if (index >= m->map.max_entries) {
 153                *next = 0;
 154                return 0;
 155        }
 156
 157        if (index == m->map.max_entries - 1)
 158                return -ENOENT;
 159        *next = index + 1;
 160        return 0;
 161}
 162
 163struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
 164{
 165        struct xsk_map *m = container_of(map, struct xsk_map, map);
 166        struct xdp_sock *xs;
 167
 168        if (key >= map->max_entries)
 169                return NULL;
 170
 171        xs = READ_ONCE(m->xsk_map[key]);
 172        return xs;
 173}
 174
 175int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
 176                       struct xdp_sock *xs)
 177{
 178        struct xsk_map *m = container_of(map, struct xsk_map, map);
 179        struct list_head *flush_list = this_cpu_ptr(m->flush_list);
 180        int err;
 181
 182        err = xsk_rcv(xs, xdp);
 183        if (err)
 184                return err;
 185
 186        if (!xs->flush_node.prev)
 187                list_add(&xs->flush_node, flush_list);
 188
 189        return 0;
 190}
 191
 192void __xsk_map_flush(struct bpf_map *map)
 193{
 194        struct xsk_map *m = container_of(map, struct xsk_map, map);
 195        struct list_head *flush_list = this_cpu_ptr(m->flush_list);
 196        struct xdp_sock *xs, *tmp;
 197
 198        list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
 199                xsk_flush(xs);
 200                __list_del_clearprev(&xs->flush_node);
 201        }
 202}
 203
 204static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
 205{
 206        WARN_ON_ONCE(!rcu_read_lock_held());
 207        return __xsk_map_lookup_elem(map, *(u32 *)key);
 208}
 209
 210static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key)
 211{
 212        return ERR_PTR(-EOPNOTSUPP);
 213}
 214
 215static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
 216                               u64 map_flags)
 217{
 218        struct xsk_map *m = container_of(map, struct xsk_map, map);
 219        struct xdp_sock *xs, *old_xs, **map_entry;
 220        u32 i = *(u32 *)key, fd = *(u32 *)value;
 221        struct xsk_map_node *node;
 222        struct socket *sock;
 223        int err;
 224
 225        if (unlikely(map_flags > BPF_EXIST))
 226                return -EINVAL;
 227        if (unlikely(i >= m->map.max_entries))
 228                return -E2BIG;
 229
 230        sock = sockfd_lookup(fd, &err);
 231        if (!sock)
 232                return err;
 233
 234        if (sock->sk->sk_family != PF_XDP) {
 235                sockfd_put(sock);
 236                return -EOPNOTSUPP;
 237        }
 238
 239        xs = (struct xdp_sock *)sock->sk;
 240
 241        if (!xsk_is_setup_for_bpf_map(xs)) {
 242                sockfd_put(sock);
 243                return -EOPNOTSUPP;
 244        }
 245
 246        map_entry = &m->xsk_map[i];
 247        node = xsk_map_node_alloc(m, map_entry);
 248        if (IS_ERR(node)) {
 249                sockfd_put(sock);
 250                return PTR_ERR(node);
 251        }
 252
 253        spin_lock_bh(&m->lock);
 254        old_xs = READ_ONCE(*map_entry);
 255        if (old_xs == xs) {
 256                err = 0;
 257                goto out;
 258        } else if (old_xs && map_flags == BPF_NOEXIST) {
 259                err = -EEXIST;
 260                goto out;
 261        } else if (!old_xs && map_flags == BPF_EXIST) {
 262                err = -ENOENT;
 263                goto out;
 264        }
 265        xsk_map_sock_add(xs, node);
 266        WRITE_ONCE(*map_entry, xs);
 267        if (old_xs)
 268                xsk_map_sock_delete(old_xs, map_entry);
 269        spin_unlock_bh(&m->lock);
 270        sockfd_put(sock);
 271        return 0;
 272
 273out:
 274        spin_unlock_bh(&m->lock);
 275        sockfd_put(sock);
 276        xsk_map_node_free(node);
 277        return err;
 278}
 279
 280static int xsk_map_delete_elem(struct bpf_map *map, void *key)
 281{
 282        struct xsk_map *m = container_of(map, struct xsk_map, map);
 283        struct xdp_sock *old_xs, **map_entry;
 284        int k = *(u32 *)key;
 285
 286        if (k >= map->max_entries)
 287                return -EINVAL;
 288
 289        spin_lock_bh(&m->lock);
 290        map_entry = &m->xsk_map[k];
 291        old_xs = xchg(map_entry, NULL);
 292        if (old_xs)
 293                xsk_map_sock_delete(old_xs, map_entry);
 294        spin_unlock_bh(&m->lock);
 295
 296        return 0;
 297}
 298
 299void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
 300                             struct xdp_sock **map_entry)
 301{
 302        spin_lock_bh(&map->lock);
 303        if (READ_ONCE(*map_entry) == xs) {
 304                WRITE_ONCE(*map_entry, NULL);
 305                xsk_map_sock_delete(xs, map_entry);
 306        }
 307        spin_unlock_bh(&map->lock);
 308}
 309
 310const struct bpf_map_ops xsk_map_ops = {
 311        .map_alloc = xsk_map_alloc,
 312        .map_free = xsk_map_free,
 313        .map_get_next_key = xsk_map_get_next_key,
 314        .map_lookup_elem = xsk_map_lookup_elem,
 315        .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
 316        .map_update_elem = xsk_map_update_elem,
 317        .map_delete_elem = xsk_map_delete_elem,
 318        .map_check_btf = map_check_no_btf,
 319};
 320