linux/net/core/bpf_sk_storage.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2019 Facebook  */
   3#include <linux/rculist.h>
   4#include <linux/list.h>
   5#include <linux/hash.h>
   6#include <linux/types.h>
   7#include <linux/spinlock.h>
   8#include <linux/bpf.h>
   9#include <linux/btf.h>
  10#include <linux/btf_ids.h>
  11#include <linux/bpf_local_storage.h>
  12#include <net/bpf_sk_storage.h>
  13#include <net/sock.h>
  14#include <uapi/linux/sock_diag.h>
  15#include <uapi/linux/btf.h>
  16
  17DEFINE_BPF_STORAGE_CACHE(sk_cache);
  18
  19static struct bpf_local_storage_data *
  20bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
  21{
  22        struct bpf_local_storage *sk_storage;
  23        struct bpf_local_storage_map *smap;
  24
  25        sk_storage = rcu_dereference(sk->sk_bpf_storage);
  26        if (!sk_storage)
  27                return NULL;
  28
  29        smap = (struct bpf_local_storage_map *)map;
  30        return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit);
  31}
  32
  33static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
  34{
  35        struct bpf_local_storage_data *sdata;
  36
  37        sdata = bpf_sk_storage_lookup(sk, map, false);
  38        if (!sdata)
  39                return -ENOENT;
  40
  41        bpf_selem_unlink(SELEM(sdata));
  42
  43        return 0;
  44}
  45
  46/* Called by __sk_destruct() & bpf_sk_storage_clone() */
  47void bpf_sk_storage_free(struct sock *sk)
  48{
  49        struct bpf_local_storage_elem *selem;
  50        struct bpf_local_storage *sk_storage;
  51        bool free_sk_storage = false;
  52        struct hlist_node *n;
  53
  54        rcu_read_lock();
  55        sk_storage = rcu_dereference(sk->sk_bpf_storage);
  56        if (!sk_storage) {
  57                rcu_read_unlock();
  58                return;
  59        }
  60
  61        /* Netiher the bpf_prog nor the bpf-map's syscall
  62         * could be modifying the sk_storage->list now.
  63         * Thus, no elem can be added-to or deleted-from the
  64         * sk_storage->list by the bpf_prog or by the bpf-map's syscall.
  65         *
  66         * It is racing with bpf_local_storage_map_free() alone
  67         * when unlinking elem from the sk_storage->list and
  68         * the map's bucket->list.
  69         */
  70        raw_spin_lock_bh(&sk_storage->lock);
  71        hlist_for_each_entry_safe(selem, n, &sk_storage->list, snode) {
  72                /* Always unlink from map before unlinking from
  73                 * sk_storage.
  74                 */
  75                bpf_selem_unlink_map(selem);
  76                free_sk_storage = bpf_selem_unlink_storage_nolock(sk_storage,
  77                                                                  selem, true);
  78        }
  79        raw_spin_unlock_bh(&sk_storage->lock);
  80        rcu_read_unlock();
  81
  82        if (free_sk_storage)
  83                kfree_rcu(sk_storage, rcu);
  84}
  85
  86static void bpf_sk_storage_map_free(struct bpf_map *map)
  87{
  88        struct bpf_local_storage_map *smap;
  89
  90        smap = (struct bpf_local_storage_map *)map;
  91        bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
  92        bpf_local_storage_map_free(smap, NULL);
  93}
  94
  95static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
  96{
  97        struct bpf_local_storage_map *smap;
  98
  99        smap = bpf_local_storage_map_alloc(attr);
 100        if (IS_ERR(smap))
 101                return ERR_CAST(smap);
 102
 103        smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
 104        return &smap->map;
 105}
 106
 107static int notsupp_get_next_key(struct bpf_map *map, void *key,
 108                                void *next_key)
 109{
 110        return -ENOTSUPP;
 111}
 112
 113static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key)
 114{
 115        struct bpf_local_storage_data *sdata;
 116        struct socket *sock;
 117        int fd, err;
 118
 119        fd = *(int *)key;
 120        sock = sockfd_lookup(fd, &err);
 121        if (sock) {
 122                sdata = bpf_sk_storage_lookup(sock->sk, map, true);
 123                sockfd_put(sock);
 124                return sdata ? sdata->data : NULL;
 125        }
 126
 127        return ERR_PTR(err);
 128}
 129
 130static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
 131                                         void *value, u64 map_flags)
 132{
 133        struct bpf_local_storage_data *sdata;
 134        struct socket *sock;
 135        int fd, err;
 136
 137        fd = *(int *)key;
 138        sock = sockfd_lookup(fd, &err);
 139        if (sock) {
 140                sdata = bpf_local_storage_update(
 141                        sock->sk, (struct bpf_local_storage_map *)map, value,
 142                        map_flags);
 143                sockfd_put(sock);
 144                return PTR_ERR_OR_ZERO(sdata);
 145        }
 146
 147        return err;
 148}
 149
 150static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
 151{
 152        struct socket *sock;
 153        int fd, err;
 154
 155        fd = *(int *)key;
 156        sock = sockfd_lookup(fd, &err);
 157        if (sock) {
 158                err = bpf_sk_storage_del(sock->sk, map);
 159                sockfd_put(sock);
 160                return err;
 161        }
 162
 163        return err;
 164}
 165
 166static struct bpf_local_storage_elem *
 167bpf_sk_storage_clone_elem(struct sock *newsk,
 168                          struct bpf_local_storage_map *smap,
 169                          struct bpf_local_storage_elem *selem)
 170{
 171        struct bpf_local_storage_elem *copy_selem;
 172
 173        copy_selem = bpf_selem_alloc(smap, newsk, NULL, true);
 174        if (!copy_selem)
 175                return NULL;
 176
 177        if (map_value_has_spin_lock(&smap->map))
 178                copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
 179                                      SDATA(selem)->data, true);
 180        else
 181                copy_map_value(&smap->map, SDATA(copy_selem)->data,
 182                               SDATA(selem)->data);
 183
 184        return copy_selem;
 185}
 186
 187int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
 188{
 189        struct bpf_local_storage *new_sk_storage = NULL;
 190        struct bpf_local_storage *sk_storage;
 191        struct bpf_local_storage_elem *selem;
 192        int ret = 0;
 193
 194        RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
 195
 196        rcu_read_lock();
 197        sk_storage = rcu_dereference(sk->sk_bpf_storage);
 198
 199        if (!sk_storage || hlist_empty(&sk_storage->list))
 200                goto out;
 201
 202        hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
 203                struct bpf_local_storage_elem *copy_selem;
 204                struct bpf_local_storage_map *smap;
 205                struct bpf_map *map;
 206
 207                smap = rcu_dereference(SDATA(selem)->smap);
 208                if (!(smap->map.map_flags & BPF_F_CLONE))
 209                        continue;
 210
 211                /* Note that for lockless listeners adding new element
 212                 * here can race with cleanup in bpf_local_storage_map_free.
 213                 * Try to grab map refcnt to make sure that it's still
 214                 * alive and prevent concurrent removal.
 215                 */
 216                map = bpf_map_inc_not_zero(&smap->map);
 217                if (IS_ERR(map))
 218                        continue;
 219
 220                copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
 221                if (!copy_selem) {
 222                        ret = -ENOMEM;
 223                        bpf_map_put(map);
 224                        goto out;
 225                }
 226
 227                if (new_sk_storage) {
 228                        bpf_selem_link_map(smap, copy_selem);
 229                        bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
 230                } else {
 231                        ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
 232                        if (ret) {
 233                                kfree(copy_selem);
 234                                atomic_sub(smap->elem_size,
 235                                           &newsk->sk_omem_alloc);
 236                                bpf_map_put(map);
 237                                goto out;
 238                        }
 239
 240                        new_sk_storage =
 241                                rcu_dereference(copy_selem->local_storage);
 242                }
 243                bpf_map_put(map);
 244        }
 245
 246out:
 247        rcu_read_unlock();
 248
 249        /* In case of an error, don't free anything explicitly here, the
 250         * caller is responsible to call bpf_sk_storage_free.
 251         */
 252
 253        return ret;
 254}
 255
 256BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
 257           void *, value, u64, flags)
 258{
 259        struct bpf_local_storage_data *sdata;
 260
 261        if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
 262                return (unsigned long)NULL;
 263
 264        sdata = bpf_sk_storage_lookup(sk, map, true);
 265        if (sdata)
 266                return (unsigned long)sdata->data;
 267
 268        if (flags == BPF_SK_STORAGE_GET_F_CREATE &&
 269            /* Cannot add new elem to a going away sk.
 270             * Otherwise, the new elem may become a leak
 271             * (and also other memory issues during map
 272             *  destruction).
 273             */
 274            refcount_inc_not_zero(&sk->sk_refcnt)) {
 275                sdata = bpf_local_storage_update(
 276                        sk, (struct bpf_local_storage_map *)map, value,
 277                        BPF_NOEXIST);
 278                /* sk must be a fullsock (guaranteed by verifier),
 279                 * so sock_gen_put() is unnecessary.
 280                 */
 281                sock_put(sk);
 282                return IS_ERR(sdata) ?
 283                        (unsigned long)NULL : (unsigned long)sdata->data;
 284        }
 285
 286        return (unsigned long)NULL;
 287}
 288
 289BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
 290{
 291        if (!sk || !sk_fullsock(sk))
 292                return -EINVAL;
 293
 294        if (refcount_inc_not_zero(&sk->sk_refcnt)) {
 295                int err;
 296
 297                err = bpf_sk_storage_del(sk, map);
 298                sock_put(sk);
 299                return err;
 300        }
 301
 302        return -ENOENT;
 303}
 304
 305static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
 306                                 void *owner, u32 size)
 307{
 308        struct sock *sk = (struct sock *)owner;
 309
 310        /* same check as in sock_kmalloc() */
 311        if (size <= sysctl_optmem_max &&
 312            atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
 313                atomic_add(size, &sk->sk_omem_alloc);
 314                return 0;
 315        }
 316
 317        return -ENOMEM;
 318}
 319
 320static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap,
 321                                    void *owner, u32 size)
 322{
 323        struct sock *sk = owner;
 324
 325        atomic_sub(size, &sk->sk_omem_alloc);
 326}
 327
 328static struct bpf_local_storage __rcu **
 329bpf_sk_storage_ptr(void *owner)
 330{
 331        struct sock *sk = owner;
 332
 333        return &sk->sk_bpf_storage;
 334}
 335
 336static int sk_storage_map_btf_id;
 337const struct bpf_map_ops sk_storage_map_ops = {
 338        .map_meta_equal = bpf_map_meta_equal,
 339        .map_alloc_check = bpf_local_storage_map_alloc_check,
 340        .map_alloc = bpf_sk_storage_map_alloc,
 341        .map_free = bpf_sk_storage_map_free,
 342        .map_get_next_key = notsupp_get_next_key,
 343        .map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
 344        .map_update_elem = bpf_fd_sk_storage_update_elem,
 345        .map_delete_elem = bpf_fd_sk_storage_delete_elem,
 346        .map_check_btf = bpf_local_storage_map_check_btf,
 347        .map_btf_name = "bpf_local_storage_map",
 348        .map_btf_id = &sk_storage_map_btf_id,
 349        .map_local_storage_charge = bpf_sk_storage_charge,
 350        .map_local_storage_uncharge = bpf_sk_storage_uncharge,
 351        .map_owner_storage_ptr = bpf_sk_storage_ptr,
 352};
 353
 354const struct bpf_func_proto bpf_sk_storage_get_proto = {
 355        .func           = bpf_sk_storage_get,
 356        .gpl_only       = false,
 357        .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
 358        .arg1_type      = ARG_CONST_MAP_PTR,
 359        .arg2_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
 360        .arg3_type      = ARG_PTR_TO_MAP_VALUE_OR_NULL,
 361        .arg4_type      = ARG_ANYTHING,
 362};
 363
 364const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = {
 365        .func           = bpf_sk_storage_get,
 366        .gpl_only       = false,
 367        .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
 368        .arg1_type      = ARG_CONST_MAP_PTR,
 369        .arg2_type      = ARG_PTR_TO_CTX, /* context is 'struct sock' */
 370        .arg3_type      = ARG_PTR_TO_MAP_VALUE_OR_NULL,
 371        .arg4_type      = ARG_ANYTHING,
 372};
 373
 374const struct bpf_func_proto bpf_sk_storage_delete_proto = {
 375        .func           = bpf_sk_storage_delete,
 376        .gpl_only       = false,
 377        .ret_type       = RET_INTEGER,
 378        .arg1_type      = ARG_CONST_MAP_PTR,
 379        .arg2_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
 380};
 381
 382static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
 383{
 384        const struct btf *btf_vmlinux;
 385        const struct btf_type *t;
 386        const char *tname;
 387        u32 btf_id;
 388
 389        if (prog->aux->dst_prog)
 390                return false;
 391
 392        /* Ensure the tracing program is not tracing
 393         * any bpf_sk_storage*() function and also
 394         * use the bpf_sk_storage_(get|delete) helper.
 395         */
 396        switch (prog->expected_attach_type) {
 397        case BPF_TRACE_ITER:
 398        case BPF_TRACE_RAW_TP:
 399                /* bpf_sk_storage has no trace point */
 400                return true;
 401        case BPF_TRACE_FENTRY:
 402        case BPF_TRACE_FEXIT:
 403                btf_vmlinux = bpf_get_btf_vmlinux();
 404                btf_id = prog->aux->attach_btf_id;
 405                t = btf_type_by_id(btf_vmlinux, btf_id);
 406                tname = btf_name_by_offset(btf_vmlinux, t->name_off);
 407                return !!strncmp(tname, "bpf_sk_storage",
 408                                 strlen("bpf_sk_storage"));
 409        default:
 410                return false;
 411        }
 412
 413        return false;
 414}
 415
 416BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
 417           void *, value, u64, flags)
 418{
 419        if (in_hardirq() || in_nmi())
 420                return (unsigned long)NULL;
 421
 422        return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags);
 423}
 424
 425BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
 426           struct sock *, sk)
 427{
 428        if (in_hardirq() || in_nmi())
 429                return -EPERM;
 430
 431        return ____bpf_sk_storage_delete(map, sk);
 432}
 433
 434const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = {
 435        .func           = bpf_sk_storage_get_tracing,
 436        .gpl_only       = false,
 437        .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
 438        .arg1_type      = ARG_CONST_MAP_PTR,
 439        .arg2_type      = ARG_PTR_TO_BTF_ID,
 440        .arg2_btf_id    = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
 441        .arg3_type      = ARG_PTR_TO_MAP_VALUE_OR_NULL,
 442        .arg4_type      = ARG_ANYTHING,
 443        .allowed        = bpf_sk_storage_tracing_allowed,
 444};
 445
 446const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = {
 447        .func           = bpf_sk_storage_delete_tracing,
 448        .gpl_only       = false,
 449        .ret_type       = RET_INTEGER,
 450        .arg1_type      = ARG_CONST_MAP_PTR,
 451        .arg2_type      = ARG_PTR_TO_BTF_ID,
 452        .arg2_btf_id    = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
 453        .allowed        = bpf_sk_storage_tracing_allowed,
 454};
 455
 456struct bpf_sk_storage_diag {
 457        u32 nr_maps;
 458        struct bpf_map *maps[];
 459};
 460
 461/* The reply will be like:
 462 * INET_DIAG_BPF_SK_STORAGES (nla_nest)
 463 *      SK_DIAG_BPF_STORAGE (nla_nest)
 464 *              SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
 465 *              SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
 466 *      SK_DIAG_BPF_STORAGE (nla_nest)
 467 *              SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
 468 *              SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
 469 *      ....
 470 */
 471static int nla_value_size(u32 value_size)
 472{
 473        /* SK_DIAG_BPF_STORAGE (nla_nest)
 474         *      SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
 475         *      SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
 476         */
 477        return nla_total_size(0) + nla_total_size(sizeof(u32)) +
 478                nla_total_size_64bit(value_size);
 479}
 480
 481void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag)
 482{
 483        u32 i;
 484
 485        if (!diag)
 486                return;
 487
 488        for (i = 0; i < diag->nr_maps; i++)
 489                bpf_map_put(diag->maps[i]);
 490
 491        kfree(diag);
 492}
 493EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free);
 494
 495static bool diag_check_dup(const struct bpf_sk_storage_diag *diag,
 496                           const struct bpf_map *map)
 497{
 498        u32 i;
 499
 500        for (i = 0; i < diag->nr_maps; i++) {
 501                if (diag->maps[i] == map)
 502                        return true;
 503        }
 504
 505        return false;
 506}
 507
 508struct bpf_sk_storage_diag *
 509bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
 510{
 511        struct bpf_sk_storage_diag *diag;
 512        struct nlattr *nla;
 513        u32 nr_maps = 0;
 514        int rem, err;
 515
 516        /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as
 517         * the map_alloc_check() side also does.
 518         */
 519        if (!bpf_capable())
 520                return ERR_PTR(-EPERM);
 521
 522        nla_for_each_nested(nla, nla_stgs, rem) {
 523                if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
 524                        nr_maps++;
 525        }
 526
 527        diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
 528        if (!diag)
 529                return ERR_PTR(-ENOMEM);
 530
 531        nla_for_each_nested(nla, nla_stgs, rem) {
 532                struct bpf_map *map;
 533                int map_fd;
 534
 535                if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
 536                        continue;
 537
 538                map_fd = nla_get_u32(nla);
 539                map = bpf_map_get(map_fd);
 540                if (IS_ERR(map)) {
 541                        err = PTR_ERR(map);
 542                        goto err_free;
 543                }
 544                if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) {
 545                        bpf_map_put(map);
 546                        err = -EINVAL;
 547                        goto err_free;
 548                }
 549                if (diag_check_dup(diag, map)) {
 550                        bpf_map_put(map);
 551                        err = -EEXIST;
 552                        goto err_free;
 553                }
 554                diag->maps[diag->nr_maps++] = map;
 555        }
 556
 557        return diag;
 558
 559err_free:
 560        bpf_sk_storage_diag_free(diag);
 561        return ERR_PTR(err);
 562}
 563EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);
 564
 565static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
 566{
 567        struct nlattr *nla_stg, *nla_value;
 568        struct bpf_local_storage_map *smap;
 569
 570        /* It cannot exceed max nlattr's payload */
 571        BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE);
 572
 573        nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE);
 574        if (!nla_stg)
 575                return -EMSGSIZE;
 576
 577        smap = rcu_dereference(sdata->smap);
 578        if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id))
 579                goto errout;
 580
 581        nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE,
 582                                      smap->map.value_size,
 583                                      SK_DIAG_BPF_STORAGE_PAD);
 584        if (!nla_value)
 585                goto errout;
 586
 587        if (map_value_has_spin_lock(&smap->map))
 588                copy_map_value_locked(&smap->map, nla_data(nla_value),
 589                                      sdata->data, true);
 590        else
 591                copy_map_value(&smap->map, nla_data(nla_value), sdata->data);
 592
 593        nla_nest_end(skb, nla_stg);
 594        return 0;
 595
 596errout:
 597        nla_nest_cancel(skb, nla_stg);
 598        return -EMSGSIZE;
 599}
 600
 601static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb,
 602                                       int stg_array_type,
 603                                       unsigned int *res_diag_size)
 604{
 605        /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
 606        unsigned int diag_size = nla_total_size(0);
 607        struct bpf_local_storage *sk_storage;
 608        struct bpf_local_storage_elem *selem;
 609        struct bpf_local_storage_map *smap;
 610        struct nlattr *nla_stgs;
 611        unsigned int saved_len;
 612        int err = 0;
 613
 614        rcu_read_lock();
 615
 616        sk_storage = rcu_dereference(sk->sk_bpf_storage);
 617        if (!sk_storage || hlist_empty(&sk_storage->list)) {
 618                rcu_read_unlock();
 619                return 0;
 620        }
 621
 622        nla_stgs = nla_nest_start(skb, stg_array_type);
 623        if (!nla_stgs)
 624                /* Continue to learn diag_size */
 625                err = -EMSGSIZE;
 626
 627        saved_len = skb->len;
 628        hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
 629                smap = rcu_dereference(SDATA(selem)->smap);
 630                diag_size += nla_value_size(smap->map.value_size);
 631
 632                if (nla_stgs && diag_get(SDATA(selem), skb))
 633                        /* Continue to learn diag_size */
 634                        err = -EMSGSIZE;
 635        }
 636
 637        rcu_read_unlock();
 638
 639        if (nla_stgs) {
 640                if (saved_len == skb->len)
 641                        nla_nest_cancel(skb, nla_stgs);
 642                else
 643                        nla_nest_end(skb, nla_stgs);
 644        }
 645
 646        if (diag_size == nla_total_size(0)) {
 647                *res_diag_size = 0;
 648                return 0;
 649        }
 650
 651        *res_diag_size = diag_size;
 652        return err;
 653}
 654
 655int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
 656                            struct sock *sk, struct sk_buff *skb,
 657                            int stg_array_type,
 658                            unsigned int *res_diag_size)
 659{
 660        /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
 661        unsigned int diag_size = nla_total_size(0);
 662        struct bpf_local_storage *sk_storage;
 663        struct bpf_local_storage_data *sdata;
 664        struct nlattr *nla_stgs;
 665        unsigned int saved_len;
 666        int err = 0;
 667        u32 i;
 668
 669        *res_diag_size = 0;
 670
 671        /* No map has been specified.  Dump all. */
 672        if (!diag->nr_maps)
 673                return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type,
 674                                                   res_diag_size);
 675
 676        rcu_read_lock();
 677        sk_storage = rcu_dereference(sk->sk_bpf_storage);
 678        if (!sk_storage || hlist_empty(&sk_storage->list)) {
 679                rcu_read_unlock();
 680                return 0;
 681        }
 682
 683        nla_stgs = nla_nest_start(skb, stg_array_type);
 684        if (!nla_stgs)
 685                /* Continue to learn diag_size */
 686                err = -EMSGSIZE;
 687
 688        saved_len = skb->len;
 689        for (i = 0; i < diag->nr_maps; i++) {
 690                sdata = bpf_local_storage_lookup(sk_storage,
 691                                (struct bpf_local_storage_map *)diag->maps[i],
 692                                false);
 693
 694                if (!sdata)
 695                        continue;
 696
 697                diag_size += nla_value_size(diag->maps[i]->value_size);
 698
 699                if (nla_stgs && diag_get(sdata, skb))
 700                        /* Continue to learn diag_size */
 701                        err = -EMSGSIZE;
 702        }
 703        rcu_read_unlock();
 704
 705        if (nla_stgs) {
 706                if (saved_len == skb->len)
 707                        nla_nest_cancel(skb, nla_stgs);
 708                else
 709                        nla_nest_end(skb, nla_stgs);
 710        }
 711
 712        if (diag_size == nla_total_size(0)) {
 713                *res_diag_size = 0;
 714                return 0;
 715        }
 716
 717        *res_diag_size = diag_size;
 718        return err;
 719}
 720EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put);
 721
 722struct bpf_iter_seq_sk_storage_map_info {
 723        struct bpf_map *map;
 724        unsigned int bucket_id;
 725        unsigned skip_elems;
 726};
 727
 728static struct bpf_local_storage_elem *
 729bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
 730                                 struct bpf_local_storage_elem *prev_selem)
 731        __acquires(RCU) __releases(RCU)
 732{
 733        struct bpf_local_storage *sk_storage;
 734        struct bpf_local_storage_elem *selem;
 735        u32 skip_elems = info->skip_elems;
 736        struct bpf_local_storage_map *smap;
 737        u32 bucket_id = info->bucket_id;
 738        u32 i, count, n_buckets;
 739        struct bpf_local_storage_map_bucket *b;
 740
 741        smap = (struct bpf_local_storage_map *)info->map;
 742        n_buckets = 1U << smap->bucket_log;
 743        if (bucket_id >= n_buckets)
 744                return NULL;
 745
 746        /* try to find next selem in the same bucket */
 747        selem = prev_selem;
 748        count = 0;
 749        while (selem) {
 750                selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)),
 751                                         struct bpf_local_storage_elem, map_node);
 752                if (!selem) {
 753                        /* not found, unlock and go to the next bucket */
 754                        b = &smap->buckets[bucket_id++];
 755                        rcu_read_unlock();
 756                        skip_elems = 0;
 757                        break;
 758                }
 759                sk_storage = rcu_dereference(selem->local_storage);
 760                if (sk_storage) {
 761                        info->skip_elems = skip_elems + count;
 762                        return selem;
 763                }
 764                count++;
 765        }
 766
 767        for (i = bucket_id; i < (1U << smap->bucket_log); i++) {
 768                b = &smap->buckets[i];
 769                rcu_read_lock();
 770                count = 0;
 771                hlist_for_each_entry_rcu(selem, &b->list, map_node) {
 772                        sk_storage = rcu_dereference(selem->local_storage);
 773                        if (sk_storage && count >= skip_elems) {
 774                                info->bucket_id = i;
 775                                info->skip_elems = count;
 776                                return selem;
 777                        }
 778                        count++;
 779                }
 780                rcu_read_unlock();
 781                skip_elems = 0;
 782        }
 783
 784        info->bucket_id = i;
 785        info->skip_elems = 0;
 786        return NULL;
 787}
 788
 789static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos)
 790{
 791        struct bpf_local_storage_elem *selem;
 792
 793        selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL);
 794        if (!selem)
 795                return NULL;
 796
 797        if (*pos == 0)
 798                ++*pos;
 799        return selem;
 800}
 801
 802static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v,
 803                                         loff_t *pos)
 804{
 805        struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
 806
 807        ++*pos;
 808        ++info->skip_elems;
 809        return bpf_sk_storage_map_seq_find_next(seq->private, v);
 810}
 811
 812struct bpf_iter__bpf_sk_storage_map {
 813        __bpf_md_ptr(struct bpf_iter_meta *, meta);
 814        __bpf_md_ptr(struct bpf_map *, map);
 815        __bpf_md_ptr(struct sock *, sk);
 816        __bpf_md_ptr(void *, value);
 817};
 818
 819DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta,
 820                     struct bpf_map *map, struct sock *sk,
 821                     void *value)
 822
 823static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
 824                                         struct bpf_local_storage_elem *selem)
 825{
 826        struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
 827        struct bpf_iter__bpf_sk_storage_map ctx = {};
 828        struct bpf_local_storage *sk_storage;
 829        struct bpf_iter_meta meta;
 830        struct bpf_prog *prog;
 831        int ret = 0;
 832
 833        meta.seq = seq;
 834        prog = bpf_iter_get_info(&meta, selem == NULL);
 835        if (prog) {
 836                ctx.meta = &meta;
 837                ctx.map = info->map;
 838                if (selem) {
 839                        sk_storage = rcu_dereference(selem->local_storage);
 840                        ctx.sk = sk_storage->owner;
 841                        ctx.value = SDATA(selem)->data;
 842                }
 843                ret = bpf_iter_run_prog(prog, &ctx);
 844        }
 845
 846        return ret;
 847}
 848
 849static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v)
 850{
 851        return __bpf_sk_storage_map_seq_show(seq, v);
 852}
 853
 854static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v)
 855        __releases(RCU)
 856{
 857        if (!v)
 858                (void)__bpf_sk_storage_map_seq_show(seq, v);
 859        else
 860                rcu_read_unlock();
 861}
 862
 863static int bpf_iter_init_sk_storage_map(void *priv_data,
 864                                        struct bpf_iter_aux_info *aux)
 865{
 866        struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
 867
 868        seq_info->map = aux->map;
 869        return 0;
 870}
 871
 872static int bpf_iter_attach_map(struct bpf_prog *prog,
 873                               union bpf_iter_link_info *linfo,
 874                               struct bpf_iter_aux_info *aux)
 875{
 876        struct bpf_map *map;
 877        int err = -EINVAL;
 878
 879        if (!linfo->map.map_fd)
 880                return -EBADF;
 881
 882        map = bpf_map_get_with_uref(linfo->map.map_fd);
 883        if (IS_ERR(map))
 884                return PTR_ERR(map);
 885
 886        if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
 887                goto put_map;
 888
 889        if (prog->aux->max_rdonly_access > map->value_size) {
 890                err = -EACCES;
 891                goto put_map;
 892        }
 893
 894        aux->map = map;
 895        return 0;
 896
 897put_map:
 898        bpf_map_put_with_uref(map);
 899        return err;
 900}
 901
 902static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
 903{
 904        bpf_map_put_with_uref(aux->map);
 905}
 906
 907static const struct seq_operations bpf_sk_storage_map_seq_ops = {
 908        .start  = bpf_sk_storage_map_seq_start,
 909        .next   = bpf_sk_storage_map_seq_next,
 910        .stop   = bpf_sk_storage_map_seq_stop,
 911        .show   = bpf_sk_storage_map_seq_show,
 912};
 913
 914static const struct bpf_iter_seq_info iter_seq_info = {
 915        .seq_ops                = &bpf_sk_storage_map_seq_ops,
 916        .init_seq_private       = bpf_iter_init_sk_storage_map,
 917        .fini_seq_private       = NULL,
 918        .seq_priv_size          = sizeof(struct bpf_iter_seq_sk_storage_map_info),
 919};
 920
 921static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
 922        .target                 = "bpf_sk_storage_map",
 923        .attach_target          = bpf_iter_attach_map,
 924        .detach_target          = bpf_iter_detach_map,
 925        .show_fdinfo            = bpf_iter_map_show_fdinfo,
 926        .fill_link_info         = bpf_iter_map_fill_link_info,
 927        .ctx_arg_info_size      = 2,
 928        .ctx_arg_info           = {
 929                { offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
 930                  PTR_TO_BTF_ID_OR_NULL },
 931                { offsetof(struct bpf_iter__bpf_sk_storage_map, value),
 932                  PTR_TO_RDWR_BUF_OR_NULL },
 933        },
 934        .seq_info               = &iter_seq_info,
 935};
 936
 937static int __init bpf_sk_storage_map_iter_init(void)
 938{
 939        bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id =
 940                btf_sock_ids[BTF_SOCK_TYPE_SOCK];
 941        return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info);
 942}
 943late_initcall(bpf_sk_storage_map_iter_init);
 944