linux/kernel/bpf/bpf_struct_ops.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2019 Facebook */
   3
   4#include <linux/bpf.h>
   5#include <linux/bpf_verifier.h>
   6#include <linux/btf.h>
   7#include <linux/filter.h>
   8#include <linux/slab.h>
   9#include <linux/numa.h>
  10#include <linux/seq_file.h>
  11#include <linux/refcount.h>
  12#include <linux/mutex.h>
  13
  14enum bpf_struct_ops_state {
  15        BPF_STRUCT_OPS_STATE_INIT,
  16        BPF_STRUCT_OPS_STATE_INUSE,
  17        BPF_STRUCT_OPS_STATE_TOBEFREE,
  18};
  19
  20#define BPF_STRUCT_OPS_COMMON_VALUE                     \
  21        refcount_t refcnt;                              \
  22        enum bpf_struct_ops_state state
  23
  24struct bpf_struct_ops_value {
  25        BPF_STRUCT_OPS_COMMON_VALUE;
  26        char data[] ____cacheline_aligned_in_smp;
  27};
  28
  29struct bpf_struct_ops_map {
  30        struct bpf_map map;
  31        struct rcu_head rcu;
  32        const struct bpf_struct_ops *st_ops;
  33        /* protect map_update */
  34        struct mutex lock;
  35        /* progs has all the bpf_prog that is populated
  36         * to the func ptr of the kernel's struct
  37         * (in kvalue.data).
  38         */
  39        struct bpf_prog **progs;
  40        /* image is a page that has all the trampolines
  41         * that stores the func args before calling the bpf_prog.
  42         * A PAGE_SIZE "image" is enough to store all trampoline for
  43         * "progs[]".
  44         */
  45        void *image;
  46        /* uvalue->data stores the kernel struct
  47         * (e.g. tcp_congestion_ops) that is more useful
  48         * to userspace than the kvalue.  For example,
  49         * the bpf_prog's id is stored instead of the kernel
  50         * address of a func ptr.
  51         */
  52        struct bpf_struct_ops_value *uvalue;
  53        /* kvalue.data stores the actual kernel's struct
  54         * (e.g. tcp_congestion_ops) that will be
  55         * registered to the kernel subsystem.
  56         */
  57        struct bpf_struct_ops_value kvalue;
  58};
  59
  60#define VALUE_PREFIX "bpf_struct_ops_"
  61#define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1)
  62
  63/* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is
  64 * the map's value exposed to the userspace and its btf-type-id is
  65 * stored at the map->btf_vmlinux_value_type_id.
  66 *
  67 */
  68#define BPF_STRUCT_OPS_TYPE(_name)                              \
  69extern struct bpf_struct_ops bpf_##_name;                       \
  70                                                                \
  71struct bpf_struct_ops_##_name {                                         \
  72        BPF_STRUCT_OPS_COMMON_VALUE;                            \
  73        struct _name data ____cacheline_aligned_in_smp;         \
  74};
  75#include "bpf_struct_ops_types.h"
  76#undef BPF_STRUCT_OPS_TYPE
  77
  78enum {
  79#define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name,
  80#include "bpf_struct_ops_types.h"
  81#undef BPF_STRUCT_OPS_TYPE
  82        __NR_BPF_STRUCT_OPS_TYPE,
  83};
  84
  85static struct bpf_struct_ops * const bpf_struct_ops[] = {
  86#define BPF_STRUCT_OPS_TYPE(_name)                              \
  87        [BPF_STRUCT_OPS_TYPE_##_name] = &bpf_##_name,
  88#include "bpf_struct_ops_types.h"
  89#undef BPF_STRUCT_OPS_TYPE
  90};
  91
  92const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = {
  93};
  94
  95const struct bpf_prog_ops bpf_struct_ops_prog_ops = {
  96#ifdef CONFIG_NET
  97        .test_run = bpf_struct_ops_test_run,
  98#endif
  99};
 100
 101static const struct btf_type *module_type;
 102
 103void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log)
 104{
 105        s32 type_id, value_id, module_id;
 106        const struct btf_member *member;
 107        struct bpf_struct_ops *st_ops;
 108        const struct btf_type *t;
 109        char value_name[128];
 110        const char *mname;
 111        u32 i, j;
 112
 113        /* Ensure BTF type is emitted for "struct bpf_struct_ops_##_name" */
 114#define BPF_STRUCT_OPS_TYPE(_name) BTF_TYPE_EMIT(struct bpf_struct_ops_##_name);
 115#include "bpf_struct_ops_types.h"
 116#undef BPF_STRUCT_OPS_TYPE
 117
 118        module_id = btf_find_by_name_kind(btf, "module", BTF_KIND_STRUCT);
 119        if (module_id < 0) {
 120                pr_warn("Cannot find struct module in btf_vmlinux\n");
 121                return;
 122        }
 123        module_type = btf_type_by_id(btf, module_id);
 124
 125        for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
 126                st_ops = bpf_struct_ops[i];
 127
 128                if (strlen(st_ops->name) + VALUE_PREFIX_LEN >=
 129                    sizeof(value_name)) {
 130                        pr_warn("struct_ops name %s is too long\n",
 131                                st_ops->name);
 132                        continue;
 133                }
 134                sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name);
 135
 136                value_id = btf_find_by_name_kind(btf, value_name,
 137                                                 BTF_KIND_STRUCT);
 138                if (value_id < 0) {
 139                        pr_warn("Cannot find struct %s in btf_vmlinux\n",
 140                                value_name);
 141                        continue;
 142                }
 143
 144                type_id = btf_find_by_name_kind(btf, st_ops->name,
 145                                                BTF_KIND_STRUCT);
 146                if (type_id < 0) {
 147                        pr_warn("Cannot find struct %s in btf_vmlinux\n",
 148                                st_ops->name);
 149                        continue;
 150                }
 151                t = btf_type_by_id(btf, type_id);
 152                if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) {
 153                        pr_warn("Cannot support #%u members in struct %s\n",
 154                                btf_type_vlen(t), st_ops->name);
 155                        continue;
 156                }
 157
 158                for_each_member(j, t, member) {
 159                        const struct btf_type *func_proto;
 160
 161                        mname = btf_name_by_offset(btf, member->name_off);
 162                        if (!*mname) {
 163                                pr_warn("anon member in struct %s is not supported\n",
 164                                        st_ops->name);
 165                                break;
 166                        }
 167
 168                        if (__btf_member_bitfield_size(t, member)) {
 169                                pr_warn("bit field member %s in struct %s is not supported\n",
 170                                        mname, st_ops->name);
 171                                break;
 172                        }
 173
 174                        func_proto = btf_type_resolve_func_ptr(btf,
 175                                                               member->type,
 176                                                               NULL);
 177                        if (func_proto &&
 178                            btf_distill_func_proto(log, btf,
 179                                                   func_proto, mname,
 180                                                   &st_ops->func_models[j])) {
 181                                pr_warn("Error in parsing func ptr %s in struct %s\n",
 182                                        mname, st_ops->name);
 183                                break;
 184                        }
 185                }
 186
 187                if (j == btf_type_vlen(t)) {
 188                        if (st_ops->init(btf)) {
 189                                pr_warn("Error in init bpf_struct_ops %s\n",
 190                                        st_ops->name);
 191                        } else {
 192                                st_ops->type_id = type_id;
 193                                st_ops->type = t;
 194                                st_ops->value_id = value_id;
 195                                st_ops->value_type = btf_type_by_id(btf,
 196                                                                    value_id);
 197                        }
 198                }
 199        }
 200}
 201
 202extern struct btf *btf_vmlinux;
 203
 204static const struct bpf_struct_ops *
 205bpf_struct_ops_find_value(u32 value_id)
 206{
 207        unsigned int i;
 208
 209        if (!value_id || !btf_vmlinux)
 210                return NULL;
 211
 212        for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
 213                if (bpf_struct_ops[i]->value_id == value_id)
 214                        return bpf_struct_ops[i];
 215        }
 216
 217        return NULL;
 218}
 219
 220const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
 221{
 222        unsigned int i;
 223
 224        if (!type_id || !btf_vmlinux)
 225                return NULL;
 226
 227        for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
 228                if (bpf_struct_ops[i]->type_id == type_id)
 229                        return bpf_struct_ops[i];
 230        }
 231
 232        return NULL;
 233}
 234
 235static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key,
 236                                           void *next_key)
 237{
 238        if (key && *(u32 *)key == 0)
 239                return -ENOENT;
 240
 241        *(u32 *)next_key = 0;
 242        return 0;
 243}
 244
 245int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
 246                                       void *value)
 247{
 248        struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
 249        struct bpf_struct_ops_value *uvalue, *kvalue;
 250        enum bpf_struct_ops_state state;
 251
 252        if (unlikely(*(u32 *)key != 0))
 253                return -ENOENT;
 254
 255        kvalue = &st_map->kvalue;
 256        /* Pair with smp_store_release() during map_update */
 257        state = smp_load_acquire(&kvalue->state);
 258        if (state == BPF_STRUCT_OPS_STATE_INIT) {
 259                memset(value, 0, map->value_size);
 260                return 0;
 261        }
 262
 263        /* No lock is needed.  state and refcnt do not need
 264         * to be updated together under atomic context.
 265         */
 266        uvalue = (struct bpf_struct_ops_value *)value;
 267        memcpy(uvalue, st_map->uvalue, map->value_size);
 268        uvalue->state = state;
 269        refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt));
 270
 271        return 0;
 272}
 273
 274static void *bpf_struct_ops_map_lookup_elem(struct bpf_map *map, void *key)
 275{
 276        return ERR_PTR(-EINVAL);
 277}
 278
 279static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
 280{
 281        const struct btf_type *t = st_map->st_ops->type;
 282        u32 i;
 283
 284        for (i = 0; i < btf_type_vlen(t); i++) {
 285                if (st_map->progs[i]) {
 286                        bpf_prog_put(st_map->progs[i]);
 287                        st_map->progs[i] = NULL;
 288                }
 289        }
 290}
 291
 292static int check_zero_holes(const struct btf_type *t, void *data)
 293{
 294        const struct btf_member *member;
 295        u32 i, moff, msize, prev_mend = 0;
 296        const struct btf_type *mtype;
 297
 298        for_each_member(i, t, member) {
 299                moff = __btf_member_bit_offset(t, member) / 8;
 300                if (moff > prev_mend &&
 301                    memchr_inv(data + prev_mend, 0, moff - prev_mend))
 302                        return -EINVAL;
 303
 304                mtype = btf_type_by_id(btf_vmlinux, member->type);
 305                mtype = btf_resolve_size(btf_vmlinux, mtype, &msize);
 306                if (IS_ERR(mtype))
 307                        return PTR_ERR(mtype);
 308                prev_mend = moff + msize;
 309        }
 310
 311        if (t->size > prev_mend &&
 312            memchr_inv(data + prev_mend, 0, t->size - prev_mend))
 313                return -EINVAL;
 314
 315        return 0;
 316}
 317
 318int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs,
 319                                      struct bpf_prog *prog,
 320                                      const struct btf_func_model *model,
 321                                      void *image, void *image_end)
 322{
 323        u32 flags;
 324
 325        tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
 326        tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
 327        flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0;
 328        return arch_prepare_bpf_trampoline(NULL, image, image_end,
 329                                           model, flags, tprogs, NULL);
 330}
 331
 332static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 333                                          void *value, u64 flags)
 334{
 335        struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
 336        const struct bpf_struct_ops *st_ops = st_map->st_ops;
 337        struct bpf_struct_ops_value *uvalue, *kvalue;
 338        const struct btf_member *member;
 339        const struct btf_type *t = st_ops->type;
 340        struct bpf_tramp_progs *tprogs = NULL;
 341        void *udata, *kdata;
 342        int prog_fd, err = 0;
 343        void *image, *image_end;
 344        u32 i;
 345
 346        if (flags)
 347                return -EINVAL;
 348
 349        if (*(u32 *)key != 0)
 350                return -E2BIG;
 351
 352        err = check_zero_holes(st_ops->value_type, value);
 353        if (err)
 354                return err;
 355
 356        uvalue = (struct bpf_struct_ops_value *)value;
 357        err = check_zero_holes(t, uvalue->data);
 358        if (err)
 359                return err;
 360
 361        if (uvalue->state || refcount_read(&uvalue->refcnt))
 362                return -EINVAL;
 363
 364        tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
 365        if (!tprogs)
 366                return -ENOMEM;
 367
 368        uvalue = (struct bpf_struct_ops_value *)st_map->uvalue;
 369        kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue;
 370
 371        mutex_lock(&st_map->lock);
 372
 373        if (kvalue->state != BPF_STRUCT_OPS_STATE_INIT) {
 374                err = -EBUSY;
 375                goto unlock;
 376        }
 377
 378        memcpy(uvalue, value, map->value_size);
 379
 380        udata = &uvalue->data;
 381        kdata = &kvalue->data;
 382        image = st_map->image;
 383        image_end = st_map->image + PAGE_SIZE;
 384
 385        for_each_member(i, t, member) {
 386                const struct btf_type *mtype, *ptype;
 387                struct bpf_prog *prog;
 388                u32 moff;
 389
 390                moff = __btf_member_bit_offset(t, member) / 8;
 391                ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
 392                if (ptype == module_type) {
 393                        if (*(void **)(udata + moff))
 394                                goto reset_unlock;
 395                        *(void **)(kdata + moff) = BPF_MODULE_OWNER;
 396                        continue;
 397                }
 398
 399                err = st_ops->init_member(t, member, kdata, udata);
 400                if (err < 0)
 401                        goto reset_unlock;
 402
 403                /* The ->init_member() has handled this member */
 404                if (err > 0)
 405                        continue;
 406
 407                /* If st_ops->init_member does not handle it,
 408                 * we will only handle func ptrs and zero-ed members
 409                 * here.  Reject everything else.
 410                 */
 411
 412                /* All non func ptr member must be 0 */
 413                if (!ptype || !btf_type_is_func_proto(ptype)) {
 414                        u32 msize;
 415
 416                        mtype = btf_type_by_id(btf_vmlinux, member->type);
 417                        mtype = btf_resolve_size(btf_vmlinux, mtype, &msize);
 418                        if (IS_ERR(mtype)) {
 419                                err = PTR_ERR(mtype);
 420                                goto reset_unlock;
 421                        }
 422
 423                        if (memchr_inv(udata + moff, 0, msize)) {
 424                                err = -EINVAL;
 425                                goto reset_unlock;
 426                        }
 427
 428                        continue;
 429                }
 430
 431                prog_fd = (int)(*(unsigned long *)(udata + moff));
 432                /* Similar check as the attr->attach_prog_fd */
 433                if (!prog_fd)
 434                        continue;
 435
 436                prog = bpf_prog_get(prog_fd);
 437                if (IS_ERR(prog)) {
 438                        err = PTR_ERR(prog);
 439                        goto reset_unlock;
 440                }
 441                st_map->progs[i] = prog;
 442
 443                if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
 444                    prog->aux->attach_btf_id != st_ops->type_id ||
 445                    prog->expected_attach_type != i) {
 446                        err = -EINVAL;
 447                        goto reset_unlock;
 448                }
 449
 450                err = bpf_struct_ops_prepare_trampoline(tprogs, prog,
 451                                                        &st_ops->func_models[i],
 452                                                        image, image_end);
 453                if (err < 0)
 454                        goto reset_unlock;
 455
 456                *(void **)(kdata + moff) = image;
 457                image += err;
 458
 459                /* put prog_id to udata */
 460                *(unsigned long *)(udata + moff) = prog->aux->id;
 461        }
 462
 463        refcount_set(&kvalue->refcnt, 1);
 464        bpf_map_inc(map);
 465
 466        set_memory_ro((long)st_map->image, 1);
 467        set_memory_x((long)st_map->image, 1);
 468        err = st_ops->reg(kdata);
 469        if (likely(!err)) {
 470                /* Pair with smp_load_acquire() during lookup_elem().
 471                 * It ensures the above udata updates (e.g. prog->aux->id)
 472                 * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
 473                 */
 474                smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_INUSE);
 475                goto unlock;
 476        }
 477
 478        /* Error during st_ops->reg().  It is very unlikely since
 479         * the above init_member() should have caught it earlier
 480         * before reg().  The only possibility is if there was a race
 481         * in registering the struct_ops (under the same name) to
 482         * a sub-system through different struct_ops's maps.
 483         */
 484        set_memory_nx((long)st_map->image, 1);
 485        set_memory_rw((long)st_map->image, 1);
 486        bpf_map_put(map);
 487
 488reset_unlock:
 489        bpf_struct_ops_map_put_progs(st_map);
 490        memset(uvalue, 0, map->value_size);
 491        memset(kvalue, 0, map->value_size);
 492unlock:
 493        kfree(tprogs);
 494        mutex_unlock(&st_map->lock);
 495        return err;
 496}
 497
 498static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
 499{
 500        enum bpf_struct_ops_state prev_state;
 501        struct bpf_struct_ops_map *st_map;
 502
 503        st_map = (struct bpf_struct_ops_map *)map;
 504        prev_state = cmpxchg(&st_map->kvalue.state,
 505                             BPF_STRUCT_OPS_STATE_INUSE,
 506                             BPF_STRUCT_OPS_STATE_TOBEFREE);
 507        switch (prev_state) {
 508        case BPF_STRUCT_OPS_STATE_INUSE:
 509                st_map->st_ops->unreg(&st_map->kvalue.data);
 510                if (refcount_dec_and_test(&st_map->kvalue.refcnt))
 511                        bpf_map_put(map);
 512                return 0;
 513        case BPF_STRUCT_OPS_STATE_TOBEFREE:
 514                return -EINPROGRESS;
 515        case BPF_STRUCT_OPS_STATE_INIT:
 516                return -ENOENT;
 517        default:
 518                WARN_ON_ONCE(1);
 519                /* Should never happen.  Treat it as not found. */
 520                return -ENOENT;
 521        }
 522}
 523
 524static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
 525                                             struct seq_file *m)
 526{
 527        void *value;
 528        int err;
 529
 530        value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
 531        if (!value)
 532                return;
 533
 534        err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
 535        if (!err) {
 536                btf_type_seq_show(btf_vmlinux, map->btf_vmlinux_value_type_id,
 537                                  value, m);
 538                seq_puts(m, "\n");
 539        }
 540
 541        kfree(value);
 542}
 543
 544static void bpf_struct_ops_map_free(struct bpf_map *map)
 545{
 546        struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
 547
 548        if (st_map->progs)
 549                bpf_struct_ops_map_put_progs(st_map);
 550        bpf_map_area_free(st_map->progs);
 551        bpf_jit_free_exec(st_map->image);
 552        bpf_map_area_free(st_map->uvalue);
 553        bpf_map_area_free(st_map);
 554}
 555
 556static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
 557{
 558        if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 ||
 559            attr->map_flags || !attr->btf_vmlinux_value_type_id)
 560                return -EINVAL;
 561        return 0;
 562}
 563
 564static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 565{
 566        const struct bpf_struct_ops *st_ops;
 567        size_t st_map_size;
 568        struct bpf_struct_ops_map *st_map;
 569        const struct btf_type *t, *vt;
 570        struct bpf_map *map;
 571
 572        if (!bpf_capable())
 573                return ERR_PTR(-EPERM);
 574
 575        st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id);
 576        if (!st_ops)
 577                return ERR_PTR(-ENOTSUPP);
 578
 579        vt = st_ops->value_type;
 580        if (attr->value_size != vt->size)
 581                return ERR_PTR(-EINVAL);
 582
 583        t = st_ops->type;
 584
 585        st_map_size = sizeof(*st_map) +
 586                /* kvalue stores the
 587                 * struct bpf_struct_ops_tcp_congestions_ops
 588                 */
 589                (vt->size - sizeof(struct bpf_struct_ops_value));
 590
 591        st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
 592        if (!st_map)
 593                return ERR_PTR(-ENOMEM);
 594
 595        st_map->st_ops = st_ops;
 596        map = &st_map->map;
 597
 598        st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
 599        st_map->progs =
 600                bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_prog *),
 601                                   NUMA_NO_NODE);
 602        st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
 603        if (!st_map->uvalue || !st_map->progs || !st_map->image) {
 604                bpf_struct_ops_map_free(map);
 605                return ERR_PTR(-ENOMEM);
 606        }
 607
 608        mutex_init(&st_map->lock);
 609        set_vm_flush_reset_perms(st_map->image);
 610        bpf_map_init_from_attr(map, attr);
 611
 612        return map;
 613}
 614
 615static int bpf_struct_ops_map_btf_id;
 616const struct bpf_map_ops bpf_struct_ops_map_ops = {
 617        .map_alloc_check = bpf_struct_ops_map_alloc_check,
 618        .map_alloc = bpf_struct_ops_map_alloc,
 619        .map_free = bpf_struct_ops_map_free,
 620        .map_get_next_key = bpf_struct_ops_map_get_next_key,
 621        .map_lookup_elem = bpf_struct_ops_map_lookup_elem,
 622        .map_delete_elem = bpf_struct_ops_map_delete_elem,
 623        .map_update_elem = bpf_struct_ops_map_update_elem,
 624        .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem,
 625        .map_btf_name = "bpf_struct_ops_map",
 626        .map_btf_id = &bpf_struct_ops_map_btf_id,
 627};
 628
 629/* "const void *" because some subsystem is
 630 * passing a const (e.g. const struct tcp_congestion_ops *)
 631 */
 632bool bpf_struct_ops_get(const void *kdata)
 633{
 634        struct bpf_struct_ops_value *kvalue;
 635
 636        kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
 637
 638        return refcount_inc_not_zero(&kvalue->refcnt);
 639}
 640
 641static void bpf_struct_ops_put_rcu(struct rcu_head *head)
 642{
 643        struct bpf_struct_ops_map *st_map;
 644
 645        st_map = container_of(head, struct bpf_struct_ops_map, rcu);
 646        bpf_map_put(&st_map->map);
 647}
 648
 649void bpf_struct_ops_put(const void *kdata)
 650{
 651        struct bpf_struct_ops_value *kvalue;
 652
 653        kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
 654        if (refcount_dec_and_test(&kvalue->refcnt)) {
 655                struct bpf_struct_ops_map *st_map;
 656
 657                st_map = container_of(kvalue, struct bpf_struct_ops_map,
 658                                      kvalue);
 659                /* The struct_ops's function may switch to another struct_ops.
 660                 *
 661                 * For example, bpf_tcp_cc_x->init() may switch to
 662                 * another tcp_cc_y by calling
 663                 * setsockopt(TCP_CONGESTION, "tcp_cc_y").
 664                 * During the switch,  bpf_struct_ops_put(tcp_cc_x) is called
 665                 * and its map->refcnt may reach 0 which then free its
 666                 * trampoline image while tcp_cc_x is still running.
 667                 *
 668                 * Thus, a rcu grace period is needed here.
 669                 */
 670                call_rcu(&st_map->rcu, bpf_struct_ops_put_rcu);
 671        }
 672}
 673