linux/kernel/bpf/trampoline.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2019 Facebook */
   3#include <linux/hash.h>
   4#include <linux/bpf.h>
   5#include <linux/filter.h>
   6#include <linux/ftrace.h>
   7#include <linux/rbtree_latch.h>
   8#include <linux/perf_event.h>
   9#include <linux/btf.h>
  10
  11/* dummy _ops. The verifier will operate on target program's ops. */
  12const struct bpf_verifier_ops bpf_extension_verifier_ops = {
  13};
  14const struct bpf_prog_ops bpf_extension_prog_ops = {
  15};
  16
  17/* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
  18#define TRAMPOLINE_HASH_BITS 10
  19#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
  20
  21static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
  22
  23/* serializes access to trampoline_table */
  24static DEFINE_MUTEX(trampoline_mutex);
  25
  26void *bpf_jit_alloc_exec_page(void)
  27{
  28        void *image;
  29
  30        image = bpf_jit_alloc_exec(PAGE_SIZE);
  31        if (!image)
  32                return NULL;
  33
  34        set_vm_flush_reset_perms(image);
  35        /* Keep image as writeable. The alternative is to keep flipping ro/rw
  36         * everytime new program is attached or detached.
  37         */
  38        set_memory_x((long)image, 1);
  39        return image;
  40}
  41
  42void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
  43{
  44        ksym->start = (unsigned long) data;
  45        ksym->end = ksym->start + PAGE_SIZE;
  46        bpf_ksym_add(ksym);
  47        perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
  48                           PAGE_SIZE, false, ksym->name);
  49}
  50
  51void bpf_image_ksym_del(struct bpf_ksym *ksym)
  52{
  53        bpf_ksym_del(ksym);
  54        perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
  55                           PAGE_SIZE, true, ksym->name);
  56}
  57
  58static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr)
  59{
  60        struct bpf_ksym *ksym = &tr->ksym;
  61
  62        snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", tr->key);
  63        bpf_image_ksym_add(tr->image, ksym);
  64}
  65
  66struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
  67{
  68        struct bpf_trampoline *tr;
  69        struct hlist_head *head;
  70        void *image;
  71        int i;
  72
  73        mutex_lock(&trampoline_mutex);
  74        head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
  75        hlist_for_each_entry(tr, head, hlist) {
  76                if (tr->key == key) {
  77                        refcount_inc(&tr->refcnt);
  78                        goto out;
  79                }
  80        }
  81        tr = kzalloc(sizeof(*tr), GFP_KERNEL);
  82        if (!tr)
  83                goto out;
  84
  85        /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
  86        image = bpf_jit_alloc_exec_page();
  87        if (!image) {
  88                kfree(tr);
  89                tr = NULL;
  90                goto out;
  91        }
  92
  93        tr->key = key;
  94        INIT_HLIST_NODE(&tr->hlist);
  95        hlist_add_head(&tr->hlist, head);
  96        refcount_set(&tr->refcnt, 1);
  97        mutex_init(&tr->mutex);
  98        for (i = 0; i < BPF_TRAMP_MAX; i++)
  99                INIT_HLIST_HEAD(&tr->progs_hlist[i]);
 100        tr->image = image;
 101        INIT_LIST_HEAD_RCU(&tr->ksym.lnode);
 102        bpf_trampoline_ksym_add(tr);
 103out:
 104        mutex_unlock(&trampoline_mutex);
 105        return tr;
 106}
 107
 108static int is_ftrace_location(void *ip)
 109{
 110        long addr;
 111
 112        addr = ftrace_location((long)ip);
 113        if (!addr)
 114                return 0;
 115        if (WARN_ON_ONCE(addr != (long)ip))
 116                return -EFAULT;
 117        return 1;
 118}
 119
 120static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
 121{
 122        void *ip = tr->func.addr;
 123        int ret;
 124
 125        if (tr->func.ftrace_managed)
 126                ret = unregister_ftrace_direct((long)ip, (long)old_addr);
 127        else
 128                ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
 129        return ret;
 130}
 131
 132static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr)
 133{
 134        void *ip = tr->func.addr;
 135        int ret;
 136
 137        if (tr->func.ftrace_managed)
 138                ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr);
 139        else
 140                ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
 141        return ret;
 142}
 143
 144/* first time registering */
 145static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
 146{
 147        void *ip = tr->func.addr;
 148        int ret;
 149
 150        ret = is_ftrace_location(ip);
 151        if (ret < 0)
 152                return ret;
 153        tr->func.ftrace_managed = ret;
 154
 155        if (tr->func.ftrace_managed)
 156                ret = register_ftrace_direct((long)ip, (long)new_addr);
 157        else
 158                ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
 159        return ret;
 160}
 161
 162static struct bpf_tramp_progs *
 163bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
 164{
 165        const struct bpf_prog_aux *aux;
 166        struct bpf_tramp_progs *tprogs;
 167        struct bpf_prog **progs;
 168        int kind;
 169
 170        *total = 0;
 171        tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
 172        if (!tprogs)
 173                return ERR_PTR(-ENOMEM);
 174
 175        for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
 176                tprogs[kind].nr_progs = tr->progs_cnt[kind];
 177                *total += tr->progs_cnt[kind];
 178                progs = tprogs[kind].progs;
 179
 180                hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist)
 181                        *progs++ = aux->prog;
 182        }
 183        return tprogs;
 184}
 185
 186static int bpf_trampoline_update(struct bpf_trampoline *tr)
 187{
 188        void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
 189        void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
 190        struct bpf_tramp_progs *tprogs;
 191        u32 flags = BPF_TRAMP_F_RESTORE_REGS;
 192        int err, total;
 193
 194        tprogs = bpf_trampoline_get_progs(tr, &total);
 195        if (IS_ERR(tprogs))
 196                return PTR_ERR(tprogs);
 197
 198        if (total == 0) {
 199                err = unregister_fentry(tr, old_image);
 200                tr->selector = 0;
 201                goto out;
 202        }
 203
 204        if (tprogs[BPF_TRAMP_FEXIT].nr_progs ||
 205            tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
 206                flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
 207
 208        /* Though the second half of trampoline page is unused a task could be
 209         * preempted in the middle of the first half of trampoline and two
 210         * updates to trampoline would change the code from underneath the
 211         * preempted task. Hence wait for tasks to voluntarily schedule or go
 212         * to userspace.
 213         */
 214
 215        synchronize_rcu_tasks();
 216
 217        err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
 218                                          &tr->func.model, flags, tprogs,
 219                                          tr->func.addr);
 220        if (err < 0)
 221                goto out;
 222
 223        if (tr->selector)
 224                /* progs already running at this address */
 225                err = modify_fentry(tr, old_image, new_image);
 226        else
 227                /* first time registering */
 228                err = register_fentry(tr, new_image);
 229        if (err)
 230                goto out;
 231        tr->selector++;
 232out:
 233        kfree(tprogs);
 234        return err;
 235}
 236
 237static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
 238{
 239        switch (prog->expected_attach_type) {
 240        case BPF_TRACE_FENTRY:
 241                return BPF_TRAMP_FENTRY;
 242        case BPF_MODIFY_RETURN:
 243                return BPF_TRAMP_MODIFY_RETURN;
 244        case BPF_TRACE_FEXIT:
 245                return BPF_TRAMP_FEXIT;
 246        case BPF_LSM_MAC:
 247                if (!prog->aux->attach_func_proto->type)
 248                        /* The function returns void, we cannot modify its
 249                         * return value.
 250                         */
 251                        return BPF_TRAMP_FEXIT;
 252                else
 253                        return BPF_TRAMP_MODIFY_RETURN;
 254        default:
 255                return BPF_TRAMP_REPLACE;
 256        }
 257}
 258
 259int bpf_trampoline_link_prog(struct bpf_prog *prog)
 260{
 261        enum bpf_tramp_prog_type kind;
 262        struct bpf_trampoline *tr;
 263        int err = 0;
 264        int cnt;
 265
 266        tr = prog->aux->trampoline;
 267        kind = bpf_attach_type_to_tramp(prog);
 268        mutex_lock(&tr->mutex);
 269        if (tr->extension_prog) {
 270                /* cannot attach fentry/fexit if extension prog is attached.
 271                 * cannot overwrite extension prog either.
 272                 */
 273                err = -EBUSY;
 274                goto out;
 275        }
 276        cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT];
 277        if (kind == BPF_TRAMP_REPLACE) {
 278                /* Cannot attach extension if fentry/fexit are in use. */
 279                if (cnt) {
 280                        err = -EBUSY;
 281                        goto out;
 282                }
 283                tr->extension_prog = prog;
 284                err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
 285                                         prog->bpf_func);
 286                goto out;
 287        }
 288        if (cnt >= BPF_MAX_TRAMP_PROGS) {
 289                err = -E2BIG;
 290                goto out;
 291        }
 292        if (!hlist_unhashed(&prog->aux->tramp_hlist)) {
 293                /* prog already linked */
 294                err = -EBUSY;
 295                goto out;
 296        }
 297        hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
 298        tr->progs_cnt[kind]++;
 299        err = bpf_trampoline_update(prog->aux->trampoline);
 300        if (err) {
 301                hlist_del(&prog->aux->tramp_hlist);
 302                tr->progs_cnt[kind]--;
 303        }
 304out:
 305        mutex_unlock(&tr->mutex);
 306        return err;
 307}
 308
 309/* bpf_trampoline_unlink_prog() should never fail. */
 310int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
 311{
 312        enum bpf_tramp_prog_type kind;
 313        struct bpf_trampoline *tr;
 314        int err;
 315
 316        tr = prog->aux->trampoline;
 317        kind = bpf_attach_type_to_tramp(prog);
 318        mutex_lock(&tr->mutex);
 319        if (kind == BPF_TRAMP_REPLACE) {
 320                WARN_ON_ONCE(!tr->extension_prog);
 321                err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
 322                                         tr->extension_prog->bpf_func, NULL);
 323                tr->extension_prog = NULL;
 324                goto out;
 325        }
 326        hlist_del(&prog->aux->tramp_hlist);
 327        tr->progs_cnt[kind]--;
 328        err = bpf_trampoline_update(prog->aux->trampoline);
 329out:
 330        mutex_unlock(&tr->mutex);
 331        return err;
 332}
 333
 334void bpf_trampoline_put(struct bpf_trampoline *tr)
 335{
 336        if (!tr)
 337                return;
 338        mutex_lock(&trampoline_mutex);
 339        if (!refcount_dec_and_test(&tr->refcnt))
 340                goto out;
 341        WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
 342        if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY])))
 343                goto out;
 344        if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
 345                goto out;
 346        bpf_image_ksym_del(&tr->ksym);
 347        /* wait for tasks to get out of trampoline before freeing it */
 348        synchronize_rcu_tasks();
 349        bpf_jit_free_exec(tr->image);
 350        hlist_del(&tr->hlist);
 351        kfree(tr);
 352out:
 353        mutex_unlock(&trampoline_mutex);
 354}
 355
 356/* The logic is similar to BPF_PROG_RUN, but with an explicit
 357 * rcu_read_lock() and migrate_disable() which are required
 358 * for the trampoline. The macro is split into
 359 * call _bpf_prog_enter
 360 * call prog->bpf_func
 361 * call __bpf_prog_exit
 362 */
 363u64 notrace __bpf_prog_enter(void)
 364        __acquires(RCU)
 365{
 366        u64 start = 0;
 367
 368        rcu_read_lock();
 369        migrate_disable();
 370        if (static_branch_unlikely(&bpf_stats_enabled_key))
 371                start = sched_clock();
 372        return start;
 373}
 374
 375void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
 376        __releases(RCU)
 377{
 378        struct bpf_prog_stats *stats;
 379
 380        if (static_branch_unlikely(&bpf_stats_enabled_key) &&
 381            /* static_key could be enabled in __bpf_prog_enter
 382             * and disabled in __bpf_prog_exit.
 383             * And vice versa.
 384             * Hence check that 'start' is not zero.
 385             */
 386            start) {
 387                stats = this_cpu_ptr(prog->aux->stats);
 388                u64_stats_update_begin(&stats->syncp);
 389                stats->cnt++;
 390                stats->nsecs += sched_clock() - start;
 391                u64_stats_update_end(&stats->syncp);
 392        }
 393        migrate_enable();
 394        rcu_read_unlock();
 395}
 396
 397int __weak
 398arch_prepare_bpf_trampoline(void *image, void *image_end,
 399                            const struct btf_func_model *m, u32 flags,
 400                            struct bpf_tramp_progs *tprogs,
 401                            void *orig_call)
 402{
 403        return -ENOTSUPP;
 404}
 405
 406static int __init init_trampolines(void)
 407{
 408        int i;
 409
 410        for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
 411                INIT_HLIST_HEAD(&trampoline_table[i]);
 412        return 0;
 413}
 414late_initcall(init_trampolines);
 415