linux/kernel/bpf/bpf_struct_ops.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2019 Facebook */
   3
   4#include <linux/bpf.h>
   5#include <linux/bpf_verifier.h>
   6#include <linux/btf.h>
   7#include <linux/filter.h>
   8#include <linux/slab.h>
   9#include <linux/numa.h>
  10#include <linux/seq_file.h>
  11#include <linux/refcount.h>
  12#include <linux/mutex.h>
  13#include <linux/btf_ids.h>
  14
  15enum bpf_struct_ops_state {
  16        BPF_STRUCT_OPS_STATE_INIT,
  17        BPF_STRUCT_OPS_STATE_INUSE,
  18        BPF_STRUCT_OPS_STATE_TOBEFREE,
  19};
  20
  21#define BPF_STRUCT_OPS_COMMON_VALUE                     \
  22        refcount_t refcnt;                              \
  23        enum bpf_struct_ops_state state
  24
  25struct bpf_struct_ops_value {
  26        BPF_STRUCT_OPS_COMMON_VALUE;
  27        char data[] ____cacheline_aligned_in_smp;
  28};
  29
  30struct bpf_struct_ops_map {
  31        struct bpf_map map;
  32        struct rcu_head rcu;
  33        const struct bpf_struct_ops *st_ops;
  34        /* protect map_update */
  35        struct mutex lock;
  36        /* link has all the bpf_links that is populated
  37         * to the func ptr of the kernel's struct
  38         * (in kvalue.data).
  39         */
  40        struct bpf_link **links;
  41        /* image is a page that has all the trampolines
  42         * that stores the func args before calling the bpf_prog.
  43         * A PAGE_SIZE "image" is enough to store all trampoline for
  44         * "links[]".
  45         */
  46        void *image;
  47        /* uvalue->data stores the kernel struct
  48         * (e.g. tcp_congestion_ops) that is more useful
  49         * to userspace than the kvalue.  For example,
  50         * the bpf_prog's id is stored instead of the kernel
  51         * address of a func ptr.
  52         */
  53        struct bpf_struct_ops_value *uvalue;
  54        /* kvalue.data stores the actual kernel's struct
  55         * (e.g. tcp_congestion_ops) that will be
  56         * registered to the kernel subsystem.
  57         */
  58        struct bpf_struct_ops_value kvalue;
  59};
  60
  61#define VALUE_PREFIX "bpf_struct_ops_"
  62#define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1)
  63
  64/* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is
  65 * the map's value exposed to the userspace and its btf-type-id is
  66 * stored at the map->btf_vmlinux_value_type_id.
  67 *
  68 */
  69#define BPF_STRUCT_OPS_TYPE(_name)                              \
  70extern struct bpf_struct_ops bpf_##_name;                       \
  71                                                                \
  72struct bpf_struct_ops_##_name {                                         \
  73        BPF_STRUCT_OPS_COMMON_VALUE;                            \
  74        struct _name data ____cacheline_aligned_in_smp;         \
  75};
  76#include "bpf_struct_ops_types.h"
  77#undef BPF_STRUCT_OPS_TYPE
  78
  79enum {
  80#define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name,
  81#include "bpf_struct_ops_types.h"
  82#undef BPF_STRUCT_OPS_TYPE
  83        __NR_BPF_STRUCT_OPS_TYPE,
  84};
  85
  86static struct bpf_struct_ops * const bpf_struct_ops[] = {
  87#define BPF_STRUCT_OPS_TYPE(_name)                              \
  88        [BPF_STRUCT_OPS_TYPE_##_name] = &bpf_##_name,
  89#include "bpf_struct_ops_types.h"
  90#undef BPF_STRUCT_OPS_TYPE
  91};
  92
  93const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = {
  94};
  95
  96const struct bpf_prog_ops bpf_struct_ops_prog_ops = {
  97#ifdef CONFIG_NET
  98        .test_run = bpf_struct_ops_test_run,
  99#endif
 100};
 101
 102static const struct btf_type *module_type;
 103
 104void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log)
 105{
 106        s32 type_id, value_id, module_id;
 107        const struct btf_member *member;
 108        struct bpf_struct_ops *st_ops;
 109        const struct btf_type *t;
 110        char value_name[128];
 111        const char *mname;
 112        u32 i, j;
 113
 114        /* Ensure BTF type is emitted for "struct bpf_struct_ops_##_name" */
 115#define BPF_STRUCT_OPS_TYPE(_name) BTF_TYPE_EMIT(struct bpf_struct_ops_##_name);
 116#include "bpf_struct_ops_types.h"
 117#undef BPF_STRUCT_OPS_TYPE
 118
 119        module_id = btf_find_by_name_kind(btf, "module", BTF_KIND_STRUCT);
 120        if (module_id < 0) {
 121                pr_warn("Cannot find struct module in btf_vmlinux\n");
 122                return;
 123        }
 124        module_type = btf_type_by_id(btf, module_id);
 125
 126        for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
 127                st_ops = bpf_struct_ops[i];
 128
 129                if (strlen(st_ops->name) + VALUE_PREFIX_LEN >=
 130                    sizeof(value_name)) {
 131                        pr_warn("struct_ops name %s is too long\n",
 132                                st_ops->name);
 133                        continue;
 134                }
 135                sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name);
 136
 137                value_id = btf_find_by_name_kind(btf, value_name,
 138                                                 BTF_KIND_STRUCT);
 139                if (value_id < 0) {
 140                        pr_warn("Cannot find struct %s in btf_vmlinux\n",
 141                                value_name);
 142                        continue;
 143                }
 144
 145                type_id = btf_find_by_name_kind(btf, st_ops->name,
 146                                                BTF_KIND_STRUCT);
 147                if (type_id < 0) {
 148                        pr_warn("Cannot find struct %s in btf_vmlinux\n",
 149                                st_ops->name);
 150                        continue;
 151                }
 152                t = btf_type_by_id(btf, type_id);
 153                if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) {
 154                        pr_warn("Cannot support #%u members in struct %s\n",
 155                                btf_type_vlen(t), st_ops->name);
 156                        continue;
 157                }
 158
 159                for_each_member(j, t, member) {
 160                        const struct btf_type *func_proto;
 161
 162                        mname = btf_name_by_offset(btf, member->name_off);
 163                        if (!*mname) {
 164                                pr_warn("anon member in struct %s is not supported\n",
 165                                        st_ops->name);
 166                                break;
 167                        }
 168
 169                        if (__btf_member_bitfield_size(t, member)) {
 170                                pr_warn("bit field member %s in struct %s is not supported\n",
 171                                        mname, st_ops->name);
 172                                break;
 173                        }
 174
 175                        func_proto = btf_type_resolve_func_ptr(btf,
 176                                                               member->type,
 177                                                               NULL);
 178                        if (func_proto &&
 179                            btf_distill_func_proto(log, btf,
 180                                                   func_proto, mname,
 181                                                   &st_ops->func_models[j])) {
 182                                pr_warn("Error in parsing func ptr %s in struct %s\n",
 183                                        mname, st_ops->name);
 184                                break;
 185                        }
 186                }
 187
 188                if (j == btf_type_vlen(t)) {
 189                        if (st_ops->init(btf)) {
 190                                pr_warn("Error in init bpf_struct_ops %s\n",
 191                                        st_ops->name);
 192                        } else {
 193                                st_ops->type_id = type_id;
 194                                st_ops->type = t;
 195                                st_ops->value_id = value_id;
 196                                st_ops->value_type = btf_type_by_id(btf,
 197                                                                    value_id);
 198                        }
 199                }
 200        }
 201}
 202
 203extern struct btf *btf_vmlinux;
 204
 205static const struct bpf_struct_ops *
 206bpf_struct_ops_find_value(u32 value_id)
 207{
 208        unsigned int i;
 209
 210        if (!value_id || !btf_vmlinux)
 211                return NULL;
 212
 213        for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
 214                if (bpf_struct_ops[i]->value_id == value_id)
 215                        return bpf_struct_ops[i];
 216        }
 217
 218        return NULL;
 219}
 220
 221const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
 222{
 223        unsigned int i;
 224
 225        if (!type_id || !btf_vmlinux)
 226                return NULL;
 227
 228        for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) {
 229                if (bpf_struct_ops[i]->type_id == type_id)
 230                        return bpf_struct_ops[i];
 231        }
 232
 233        return NULL;
 234}
 235
 236static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key,
 237                                           void *next_key)
 238{
 239        if (key && *(u32 *)key == 0)
 240                return -ENOENT;
 241
 242        *(u32 *)next_key = 0;
 243        return 0;
 244}
 245
 246int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
 247                                       void *value)
 248{
 249        struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
 250        struct bpf_struct_ops_value *uvalue, *kvalue;
 251        enum bpf_struct_ops_state state;
 252
 253        if (unlikely(*(u32 *)key != 0))
 254                return -ENOENT;
 255
 256        kvalue = &st_map->kvalue;
 257        /* Pair with smp_store_release() during map_update */
 258        state = smp_load_acquire(&kvalue->state);
 259        if (state == BPF_STRUCT_OPS_STATE_INIT) {
 260                memset(value, 0, map->value_size);
 261                return 0;
 262        }
 263
 264        /* No lock is needed.  state and refcnt do not need
 265         * to be updated together under atomic context.
 266         */
 267        uvalue = value;
 268        memcpy(uvalue, st_map->uvalue, map->value_size);
 269        uvalue->state = state;
 270        refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt));
 271
 272        return 0;
 273}
 274
 275static void *bpf_struct_ops_map_lookup_elem(struct bpf_map *map, void *key)
 276{
 277        return ERR_PTR(-EINVAL);
 278}
 279
 280static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
 281{
 282        const struct btf_type *t = st_map->st_ops->type;
 283        u32 i;
 284
 285        for (i = 0; i < btf_type_vlen(t); i++) {
 286                if (st_map->links[i]) {
 287                        bpf_link_put(st_map->links[i]);
 288                        st_map->links[i] = NULL;
 289                }
 290        }
 291}
 292
 293static int check_zero_holes(const struct btf_type *t, void *data)
 294{
 295        const struct btf_member *member;
 296        u32 i, moff, msize, prev_mend = 0;
 297        const struct btf_type *mtype;
 298
 299        for_each_member(i, t, member) {
 300                moff = __btf_member_bit_offset(t, member) / 8;
 301                if (moff > prev_mend &&
 302                    memchr_inv(data + prev_mend, 0, moff - prev_mend))
 303                        return -EINVAL;
 304
 305                mtype = btf_type_by_id(btf_vmlinux, member->type);
 306                mtype = btf_resolve_size(btf_vmlinux, mtype, &msize);
 307                if (IS_ERR(mtype))
 308                        return PTR_ERR(mtype);
 309                prev_mend = moff + msize;
 310        }
 311
 312        if (t->size > prev_mend &&
 313            memchr_inv(data + prev_mend, 0, t->size - prev_mend))
 314                return -EINVAL;
 315
 316        return 0;
 317}
 318
 319static void bpf_struct_ops_link_release(struct bpf_link *link)
 320{
 321}
 322
 323static void bpf_struct_ops_link_dealloc(struct bpf_link *link)
 324{
 325        struct bpf_tramp_link *tlink = container_of(link, struct bpf_tramp_link, link);
 326
 327        kfree(tlink);
 328}
 329
 330const struct bpf_link_ops bpf_struct_ops_link_lops = {
 331        .release = bpf_struct_ops_link_release,
 332        .dealloc = bpf_struct_ops_link_dealloc,
 333};
 334
 335int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
 336                                      struct bpf_tramp_link *link,
 337                                      const struct btf_func_model *model,
 338                                      void *image, void *image_end)
 339{
 340        u32 flags;
 341
 342        tlinks[BPF_TRAMP_FENTRY].links[0] = link;
 343        tlinks[BPF_TRAMP_FENTRY].nr_links = 1;
 344        flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0;
 345        return arch_prepare_bpf_trampoline(NULL, image, image_end,
 346                                           model, flags, tlinks, NULL);
 347}
 348
 349static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 350                                          void *value, u64 flags)
 351{
 352        struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
 353        const struct bpf_struct_ops *st_ops = st_map->st_ops;
 354        struct bpf_struct_ops_value *uvalue, *kvalue;
 355        const struct btf_member *member;
 356        const struct btf_type *t = st_ops->type;
 357        struct bpf_tramp_links *tlinks = NULL;
 358        void *udata, *kdata;
 359        int prog_fd, err = 0;
 360        void *image, *image_end;
 361        u32 i;
 362
 363        if (flags)
 364                return -EINVAL;
 365
 366        if (*(u32 *)key != 0)
 367                return -E2BIG;
 368
 369        err = check_zero_holes(st_ops->value_type, value);
 370        if (err)
 371                return err;
 372
 373        uvalue = value;
 374        err = check_zero_holes(t, uvalue->data);
 375        if (err)
 376                return err;
 377
 378        if (uvalue->state || refcount_read(&uvalue->refcnt))
 379                return -EINVAL;
 380
 381        tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
 382        if (!tlinks)
 383                return -ENOMEM;
 384
 385        uvalue = (struct bpf_struct_ops_value *)st_map->uvalue;
 386        kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue;
 387
 388        mutex_lock(&st_map->lock);
 389
 390        if (kvalue->state != BPF_STRUCT_OPS_STATE_INIT) {
 391                err = -EBUSY;
 392                goto unlock;
 393        }
 394
 395        memcpy(uvalue, value, map->value_size);
 396
 397        udata = &uvalue->data;
 398        kdata = &kvalue->data;
 399        image = st_map->image;
 400        image_end = st_map->image + PAGE_SIZE;
 401
 402        for_each_member(i, t, member) {
 403                const struct btf_type *mtype, *ptype;
 404                struct bpf_prog *prog;
 405                struct bpf_tramp_link *link;
 406                u32 moff;
 407
 408                moff = __btf_member_bit_offset(t, member) / 8;
 409                ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
 410                if (ptype == module_type) {
 411                        if (*(void **)(udata + moff))
 412                                goto reset_unlock;
 413                        *(void **)(kdata + moff) = BPF_MODULE_OWNER;
 414                        continue;
 415                }
 416
 417                err = st_ops->init_member(t, member, kdata, udata);
 418                if (err < 0)
 419                        goto reset_unlock;
 420
 421                /* The ->init_member() has handled this member */
 422                if (err > 0)
 423                        continue;
 424
 425                /* If st_ops->init_member does not handle it,
 426                 * we will only handle func ptrs and zero-ed members
 427                 * here.  Reject everything else.
 428                 */
 429
 430                /* All non func ptr member must be 0 */
 431                if (!ptype || !btf_type_is_func_proto(ptype)) {
 432                        u32 msize;
 433
 434                        mtype = btf_type_by_id(btf_vmlinux, member->type);
 435                        mtype = btf_resolve_size(btf_vmlinux, mtype, &msize);
 436                        if (IS_ERR(mtype)) {
 437                                err = PTR_ERR(mtype);
 438                                goto reset_unlock;
 439                        }
 440
 441                        if (memchr_inv(udata + moff, 0, msize)) {
 442                                err = -EINVAL;
 443                                goto reset_unlock;
 444                        }
 445
 446                        continue;
 447                }
 448
 449                prog_fd = (int)(*(unsigned long *)(udata + moff));
 450                /* Similar check as the attr->attach_prog_fd */
 451                if (!prog_fd)
 452                        continue;
 453
 454                prog = bpf_prog_get(prog_fd);
 455                if (IS_ERR(prog)) {
 456                        err = PTR_ERR(prog);
 457                        goto reset_unlock;
 458                }
 459
 460                if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
 461                    prog->aux->attach_btf_id != st_ops->type_id ||
 462                    prog->expected_attach_type != i) {
 463                        bpf_prog_put(prog);
 464                        err = -EINVAL;
 465                        goto reset_unlock;
 466                }
 467
 468                link = kzalloc(sizeof(*link), GFP_USER);
 469                if (!link) {
 470                        bpf_prog_put(prog);
 471                        err = -ENOMEM;
 472                        goto reset_unlock;
 473                }
 474                bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS,
 475                              &bpf_struct_ops_link_lops, prog);
 476                st_map->links[i] = &link->link;
 477
 478                err = bpf_struct_ops_prepare_trampoline(tlinks, link,
 479                                                        &st_ops->func_models[i],
 480                                                        image, image_end);
 481                if (err < 0)
 482                        goto reset_unlock;
 483
 484                *(void **)(kdata + moff) = image;
 485                image += err;
 486
 487                /* put prog_id to udata */
 488                *(unsigned long *)(udata + moff) = prog->aux->id;
 489        }
 490
 491        refcount_set(&kvalue->refcnt, 1);
 492        bpf_map_inc(map);
 493
 494        set_memory_ro((long)st_map->image, 1);
 495        set_memory_x((long)st_map->image, 1);
 496        err = st_ops->reg(kdata);
 497        if (likely(!err)) {
 498                /* Pair with smp_load_acquire() during lookup_elem().
 499                 * It ensures the above udata updates (e.g. prog->aux->id)
 500                 * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
 501                 */
 502                smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_INUSE);
 503                goto unlock;
 504        }
 505
 506        /* Error during st_ops->reg().  It is very unlikely since
 507         * the above init_member() should have caught it earlier
 508         * before reg().  The only possibility is if there was a race
 509         * in registering the struct_ops (under the same name) to
 510         * a sub-system through different struct_ops's maps.
 511         */
 512        set_memory_nx((long)st_map->image, 1);
 513        set_memory_rw((long)st_map->image, 1);
 514        bpf_map_put(map);
 515
 516reset_unlock:
 517        bpf_struct_ops_map_put_progs(st_map);
 518        memset(uvalue, 0, map->value_size);
 519        memset(kvalue, 0, map->value_size);
 520unlock:
 521        kfree(tlinks);
 522        mutex_unlock(&st_map->lock);
 523        return err;
 524}
 525
 526static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
 527{
 528        enum bpf_struct_ops_state prev_state;
 529        struct bpf_struct_ops_map *st_map;
 530
 531        st_map = (struct bpf_struct_ops_map *)map;
 532        prev_state = cmpxchg(&st_map->kvalue.state,
 533                             BPF_STRUCT_OPS_STATE_INUSE,
 534                             BPF_STRUCT_OPS_STATE_TOBEFREE);
 535        switch (prev_state) {
 536        case BPF_STRUCT_OPS_STATE_INUSE:
 537                st_map->st_ops->unreg(&st_map->kvalue.data);
 538                if (refcount_dec_and_test(&st_map->kvalue.refcnt))
 539                        bpf_map_put(map);
 540                return 0;
 541        case BPF_STRUCT_OPS_STATE_TOBEFREE:
 542                return -EINPROGRESS;
 543        case BPF_STRUCT_OPS_STATE_INIT:
 544                return -ENOENT;
 545        default:
 546                WARN_ON_ONCE(1);
 547                /* Should never happen.  Treat it as not found. */
 548                return -ENOENT;
 549        }
 550}
 551
 552static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
 553                                             struct seq_file *m)
 554{
 555        void *value;
 556        int err;
 557
 558        value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
 559        if (!value)
 560                return;
 561
 562        err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
 563        if (!err) {
 564                btf_type_seq_show(btf_vmlinux, map->btf_vmlinux_value_type_id,
 565                                  value, m);
 566                seq_puts(m, "\n");
 567        }
 568
 569        kfree(value);
 570}
 571
 572static void bpf_struct_ops_map_free(struct bpf_map *map)
 573{
 574        struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
 575
 576        if (st_map->links)
 577                bpf_struct_ops_map_put_progs(st_map);
 578        bpf_map_area_free(st_map->links);
 579        bpf_jit_free_exec(st_map->image);
 580        bpf_map_area_free(st_map->uvalue);
 581        bpf_map_area_free(st_map);
 582}
 583
 584static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
 585{
 586        if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 ||
 587            attr->map_flags || !attr->btf_vmlinux_value_type_id)
 588                return -EINVAL;
 589        return 0;
 590}
 591
 592static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 593{
 594        const struct bpf_struct_ops *st_ops;
 595        size_t st_map_size;
 596        struct bpf_struct_ops_map *st_map;
 597        const struct btf_type *t, *vt;
 598        struct bpf_map *map;
 599
 600        if (!bpf_capable())
 601                return ERR_PTR(-EPERM);
 602
 603        st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id);
 604        if (!st_ops)
 605                return ERR_PTR(-ENOTSUPP);
 606
 607        vt = st_ops->value_type;
 608        if (attr->value_size != vt->size)
 609                return ERR_PTR(-EINVAL);
 610
 611        t = st_ops->type;
 612
 613        st_map_size = sizeof(*st_map) +
 614                /* kvalue stores the
 615                 * struct bpf_struct_ops_tcp_congestions_ops
 616                 */
 617                (vt->size - sizeof(struct bpf_struct_ops_value));
 618
 619        st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
 620        if (!st_map)
 621                return ERR_PTR(-ENOMEM);
 622
 623        st_map->st_ops = st_ops;
 624        map = &st_map->map;
 625
 626        st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
 627        st_map->links =
 628                bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *),
 629                                   NUMA_NO_NODE);
 630        st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
 631        if (!st_map->uvalue || !st_map->links || !st_map->image) {
 632                bpf_struct_ops_map_free(map);
 633                return ERR_PTR(-ENOMEM);
 634        }
 635
 636        mutex_init(&st_map->lock);
 637        set_vm_flush_reset_perms(st_map->image);
 638        bpf_map_init_from_attr(map, attr);
 639
 640        return map;
 641}
 642
 643BTF_ID_LIST_SINGLE(bpf_struct_ops_map_btf_ids, struct, bpf_struct_ops_map)
 644const struct bpf_map_ops bpf_struct_ops_map_ops = {
 645        .map_alloc_check = bpf_struct_ops_map_alloc_check,
 646        .map_alloc = bpf_struct_ops_map_alloc,
 647        .map_free = bpf_struct_ops_map_free,
 648        .map_get_next_key = bpf_struct_ops_map_get_next_key,
 649        .map_lookup_elem = bpf_struct_ops_map_lookup_elem,
 650        .map_delete_elem = bpf_struct_ops_map_delete_elem,
 651        .map_update_elem = bpf_struct_ops_map_update_elem,
 652        .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem,
 653        .map_btf_id = &bpf_struct_ops_map_btf_ids[0],
 654};
 655
 656/* "const void *" because some subsystem is
 657 * passing a const (e.g. const struct tcp_congestion_ops *)
 658 */
 659bool bpf_struct_ops_get(const void *kdata)
 660{
 661        struct bpf_struct_ops_value *kvalue;
 662
 663        kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
 664
 665        return refcount_inc_not_zero(&kvalue->refcnt);
 666}
 667
 668static void bpf_struct_ops_put_rcu(struct rcu_head *head)
 669{
 670        struct bpf_struct_ops_map *st_map;
 671
 672        st_map = container_of(head, struct bpf_struct_ops_map, rcu);
 673        bpf_map_put(&st_map->map);
 674}
 675
 676void bpf_struct_ops_put(const void *kdata)
 677{
 678        struct bpf_struct_ops_value *kvalue;
 679
 680        kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
 681        if (refcount_dec_and_test(&kvalue->refcnt)) {
 682                struct bpf_struct_ops_map *st_map;
 683
 684                st_map = container_of(kvalue, struct bpf_struct_ops_map,
 685                                      kvalue);
 686                /* The struct_ops's function may switch to another struct_ops.
 687                 *
 688                 * For example, bpf_tcp_cc_x->init() may switch to
 689                 * another tcp_cc_y by calling
 690                 * setsockopt(TCP_CONGESTION, "tcp_cc_y").
 691                 * During the switch,  bpf_struct_ops_put(tcp_cc_x) is called
 692                 * and its map->refcnt may reach 0 which then free its
 693                 * trampoline image while tcp_cc_x is still running.
 694                 *
 695                 * Thus, a rcu grace period is needed here.
 696                 */
 697                call_rcu(&st_map->rcu, bpf_struct_ops_put_rcu);
 698        }
 699}
 700