linux/kernel/bpf/inode.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Minimal file system backend for holding eBPF maps and programs,
   4 * used by bpf(2) object pinning.
   5 *
   6 * Authors:
   7 *
   8 *      Daniel Borkmann <daniel@iogearbox.net>
   9 */
  10
  11#include <linux/init.h>
  12#include <linux/magic.h>
  13#include <linux/major.h>
  14#include <linux/mount.h>
  15#include <linux/namei.h>
  16#include <linux/fs.h>
  17#include <linux/kdev_t.h>
  18#include <linux/parser.h>
  19#include <linux/filter.h>
  20#include <linux/bpf.h>
  21#include <linux/bpf_trace.h>
  22
  23enum bpf_type {
  24        BPF_TYPE_UNSPEC = 0,
  25        BPF_TYPE_PROG,
  26        BPF_TYPE_MAP,
  27};
  28
  29static void *bpf_any_get(void *raw, enum bpf_type type)
  30{
  31        switch (type) {
  32        case BPF_TYPE_PROG:
  33                raw = bpf_prog_inc(raw);
  34                break;
  35        case BPF_TYPE_MAP:
  36                raw = bpf_map_inc(raw, true);
  37                break;
  38        default:
  39                WARN_ON_ONCE(1);
  40                break;
  41        }
  42
  43        return raw;
  44}
  45
  46static void bpf_any_put(void *raw, enum bpf_type type)
  47{
  48        switch (type) {
  49        case BPF_TYPE_PROG:
  50                bpf_prog_put(raw);
  51                break;
  52        case BPF_TYPE_MAP:
  53                bpf_map_put_with_uref(raw);
  54                break;
  55        default:
  56                WARN_ON_ONCE(1);
  57                break;
  58        }
  59}
  60
  61static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
  62{
  63        void *raw;
  64
  65        *type = BPF_TYPE_MAP;
  66        raw = bpf_map_get_with_uref(ufd);
  67        if (IS_ERR(raw)) {
  68                *type = BPF_TYPE_PROG;
  69                raw = bpf_prog_get(ufd);
  70        }
  71
  72        return raw;
  73}
  74
  75static const struct inode_operations bpf_dir_iops;
  76
  77static const struct inode_operations bpf_prog_iops = { };
  78static const struct inode_operations bpf_map_iops  = { };
  79
  80static struct inode *bpf_get_inode(struct super_block *sb,
  81                                   const struct inode *dir,
  82                                   umode_t mode)
  83{
  84        struct inode *inode;
  85
  86        switch (mode & S_IFMT) {
  87        case S_IFDIR:
  88        case S_IFREG:
  89        case S_IFLNK:
  90                break;
  91        default:
  92                return ERR_PTR(-EINVAL);
  93        }
  94
  95        inode = new_inode(sb);
  96        if (!inode)
  97                return ERR_PTR(-ENOSPC);
  98
  99        inode->i_ino = get_next_ino();
 100        inode->i_atime = current_time(inode);
 101        inode->i_mtime = inode->i_atime;
 102        inode->i_ctime = inode->i_atime;
 103
 104        inode_init_owner(inode, dir, mode);
 105
 106        return inode;
 107}
 108
 109static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
 110{
 111        *type = BPF_TYPE_UNSPEC;
 112        if (inode->i_op == &bpf_prog_iops)
 113                *type = BPF_TYPE_PROG;
 114        else if (inode->i_op == &bpf_map_iops)
 115                *type = BPF_TYPE_MAP;
 116        else
 117                return -EACCES;
 118
 119        return 0;
 120}
 121
 122static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode,
 123                                struct inode *dir)
 124{
 125        d_instantiate(dentry, inode);
 126        dget(dentry);
 127
 128        dir->i_mtime = current_time(dir);
 129        dir->i_ctime = dir->i_mtime;
 130}
 131
 132static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 133{
 134        struct inode *inode;
 135
 136        inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR);
 137        if (IS_ERR(inode))
 138                return PTR_ERR(inode);
 139
 140        inode->i_op = &bpf_dir_iops;
 141        inode->i_fop = &simple_dir_operations;
 142
 143        inc_nlink(inode);
 144        inc_nlink(dir);
 145
 146        bpf_dentry_finalize(dentry, inode, dir);
 147        return 0;
 148}
 149
 150struct map_iter {
 151        void *key;
 152        bool done;
 153};
 154
 155static struct map_iter *map_iter(struct seq_file *m)
 156{
 157        return m->private;
 158}
 159
 160static struct bpf_map *seq_file_to_map(struct seq_file *m)
 161{
 162        return file_inode(m->file)->i_private;
 163}
 164
 165static void map_iter_free(struct map_iter *iter)
 166{
 167        if (iter) {
 168                kfree(iter->key);
 169                kfree(iter);
 170        }
 171}
 172
 173static struct map_iter *map_iter_alloc(struct bpf_map *map)
 174{
 175        struct map_iter *iter;
 176
 177        iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN);
 178        if (!iter)
 179                goto error;
 180
 181        iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN);
 182        if (!iter->key)
 183                goto error;
 184
 185        return iter;
 186
 187error:
 188        map_iter_free(iter);
 189        return NULL;
 190}
 191
 192static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos)
 193{
 194        struct bpf_map *map = seq_file_to_map(m);
 195        void *key = map_iter(m)->key;
 196        void *prev_key;
 197
 198        if (map_iter(m)->done)
 199                return NULL;
 200
 201        if (unlikely(v == SEQ_START_TOKEN))
 202                prev_key = NULL;
 203        else
 204                prev_key = key;
 205
 206        if (map->ops->map_get_next_key(map, prev_key, key)) {
 207                map_iter(m)->done = true;
 208                return NULL;
 209        }
 210
 211        ++(*pos);
 212        return key;
 213}
 214
 215static void *map_seq_start(struct seq_file *m, loff_t *pos)
 216{
 217        if (map_iter(m)->done)
 218                return NULL;
 219
 220        return *pos ? map_iter(m)->key : SEQ_START_TOKEN;
 221}
 222
 223static void map_seq_stop(struct seq_file *m, void *v)
 224{
 225}
 226
 227static int map_seq_show(struct seq_file *m, void *v)
 228{
 229        struct bpf_map *map = seq_file_to_map(m);
 230        void *key = map_iter(m)->key;
 231
 232        if (unlikely(v == SEQ_START_TOKEN)) {
 233                seq_puts(m, "# WARNING!! The output is for debug purpose only\n");
 234                seq_puts(m, "# WARNING!! The output format will change\n");
 235        } else {
 236                map->ops->map_seq_show_elem(map, key, m);
 237        }
 238
 239        return 0;
 240}
 241
 242static const struct seq_operations bpffs_map_seq_ops = {
 243        .start  = map_seq_start,
 244        .next   = map_seq_next,
 245        .show   = map_seq_show,
 246        .stop   = map_seq_stop,
 247};
 248
 249static int bpffs_map_open(struct inode *inode, struct file *file)
 250{
 251        struct bpf_map *map = inode->i_private;
 252        struct map_iter *iter;
 253        struct seq_file *m;
 254        int err;
 255
 256        iter = map_iter_alloc(map);
 257        if (!iter)
 258                return -ENOMEM;
 259
 260        err = seq_open(file, &bpffs_map_seq_ops);
 261        if (err) {
 262                map_iter_free(iter);
 263                return err;
 264        }
 265
 266        m = file->private_data;
 267        m->private = iter;
 268
 269        return 0;
 270}
 271
 272static int bpffs_map_release(struct inode *inode, struct file *file)
 273{
 274        struct seq_file *m = file->private_data;
 275
 276        map_iter_free(map_iter(m));
 277
 278        return seq_release(inode, file);
 279}
 280
 281/* bpffs_map_fops should only implement the basic
 282 * read operation for a BPF map.  The purpose is to
 283 * provide a simple user intuitive way to do
 284 * "cat bpffs/pathto/a-pinned-map".
 285 *
 286 * Other operations (e.g. write, lookup...) should be realized by
 287 * the userspace tools (e.g. bpftool) through the
 288 * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update
 289 * interface.
 290 */
 291static const struct file_operations bpffs_map_fops = {
 292        .open           = bpffs_map_open,
 293        .read           = seq_read,
 294        .release        = bpffs_map_release,
 295};
 296
 297static int bpffs_obj_open(struct inode *inode, struct file *file)
 298{
 299        return -EIO;
 300}
 301
 302static const struct file_operations bpffs_obj_fops = {
 303        .open           = bpffs_obj_open,
 304};
 305
 306static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
 307                         const struct inode_operations *iops,
 308                         const struct file_operations *fops)
 309{
 310        struct inode *dir = dentry->d_parent->d_inode;
 311        struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode);
 312        if (IS_ERR(inode))
 313                return PTR_ERR(inode);
 314
 315        inode->i_op = iops;
 316        inode->i_fop = fops;
 317        inode->i_private = raw;
 318
 319        bpf_dentry_finalize(dentry, inode, dir);
 320        return 0;
 321}
 322
 323static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg)
 324{
 325        return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops,
 326                             &bpffs_obj_fops);
 327}
 328
 329static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
 330{
 331        struct bpf_map *map = arg;
 332
 333        return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops,
 334                             bpf_map_support_seq_show(map) ?
 335                             &bpffs_map_fops : &bpffs_obj_fops);
 336}
 337
 338static struct dentry *
 339bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
 340{
 341        /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future
 342         * extensions.
 343         */
 344        if (strchr(dentry->d_name.name, '.'))
 345                return ERR_PTR(-EPERM);
 346
 347        return simple_lookup(dir, dentry, flags);
 348}
 349
 350static int bpf_symlink(struct inode *dir, struct dentry *dentry,
 351                       const char *target)
 352{
 353        char *link = kstrdup(target, GFP_USER | __GFP_NOWARN);
 354        struct inode *inode;
 355
 356        if (!link)
 357                return -ENOMEM;
 358
 359        inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK);
 360        if (IS_ERR(inode)) {
 361                kfree(link);
 362                return PTR_ERR(inode);
 363        }
 364
 365        inode->i_op = &simple_symlink_inode_operations;
 366        inode->i_link = link;
 367
 368        bpf_dentry_finalize(dentry, inode, dir);
 369        return 0;
 370}
 371
 372static const struct inode_operations bpf_dir_iops = {
 373        .lookup         = bpf_lookup,
 374        .mkdir          = bpf_mkdir,
 375        .symlink        = bpf_symlink,
 376        .rmdir          = simple_rmdir,
 377        .rename         = simple_rename,
 378        .link           = simple_link,
 379        .unlink         = simple_unlink,
 380};
 381
 382static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
 383                          enum bpf_type type)
 384{
 385        struct dentry *dentry;
 386        struct inode *dir;
 387        struct path path;
 388        umode_t mode;
 389        int ret;
 390
 391        dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0);
 392        if (IS_ERR(dentry))
 393                return PTR_ERR(dentry);
 394
 395        mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
 396
 397        ret = security_path_mknod(&path, dentry, mode, 0);
 398        if (ret)
 399                goto out;
 400
 401        dir = d_inode(path.dentry);
 402        if (dir->i_op != &bpf_dir_iops) {
 403                ret = -EPERM;
 404                goto out;
 405        }
 406
 407        switch (type) {
 408        case BPF_TYPE_PROG:
 409                ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw);
 410                break;
 411        case BPF_TYPE_MAP:
 412                ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw);
 413                break;
 414        default:
 415                ret = -EPERM;
 416        }
 417out:
 418        done_path_create(&path, dentry);
 419        return ret;
 420}
 421
 422int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
 423{
 424        struct filename *pname;
 425        enum bpf_type type;
 426        void *raw;
 427        int ret;
 428
 429        pname = getname(pathname);
 430        if (IS_ERR(pname))
 431                return PTR_ERR(pname);
 432
 433        raw = bpf_fd_probe_obj(ufd, &type);
 434        if (IS_ERR(raw)) {
 435                ret = PTR_ERR(raw);
 436                goto out;
 437        }
 438
 439        ret = bpf_obj_do_pin(pname, raw, type);
 440        if (ret != 0)
 441                bpf_any_put(raw, type);
 442out:
 443        putname(pname);
 444        return ret;
 445}
 446
 447static void *bpf_obj_do_get(const struct filename *pathname,
 448                            enum bpf_type *type, int flags)
 449{
 450        struct inode *inode;
 451        struct path path;
 452        void *raw;
 453        int ret;
 454
 455        ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path);
 456        if (ret)
 457                return ERR_PTR(ret);
 458
 459        inode = d_backing_inode(path.dentry);
 460        ret = inode_permission(inode, ACC_MODE(flags));
 461        if (ret)
 462                goto out;
 463
 464        ret = bpf_inode_type(inode, type);
 465        if (ret)
 466                goto out;
 467
 468        raw = bpf_any_get(inode->i_private, *type);
 469        if (!IS_ERR(raw))
 470                touch_atime(&path);
 471
 472        path_put(&path);
 473        return raw;
 474out:
 475        path_put(&path);
 476        return ERR_PTR(ret);
 477}
 478
 479int bpf_obj_get_user(const char __user *pathname, int flags)
 480{
 481        enum bpf_type type = BPF_TYPE_UNSPEC;
 482        struct filename *pname;
 483        int ret = -ENOENT;
 484        int f_flags;
 485        void *raw;
 486
 487        f_flags = bpf_get_file_flag(flags);
 488        if (f_flags < 0)
 489                return f_flags;
 490
 491        pname = getname(pathname);
 492        if (IS_ERR(pname))
 493                return PTR_ERR(pname);
 494
 495        raw = bpf_obj_do_get(pname, &type, f_flags);
 496        if (IS_ERR(raw)) {
 497                ret = PTR_ERR(raw);
 498                goto out;
 499        }
 500
 501        if (type == BPF_TYPE_PROG)
 502                ret = bpf_prog_new_fd(raw);
 503        else if (type == BPF_TYPE_MAP)
 504                ret = bpf_map_new_fd(raw, f_flags);
 505        else
 506                goto out;
 507
 508        if (ret < 0)
 509                bpf_any_put(raw, type);
 510out:
 511        putname(pname);
 512        return ret;
 513}
 514
 515static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
 516{
 517        struct bpf_prog *prog;
 518        int ret = inode_permission(inode, MAY_READ);
 519        if (ret)
 520                return ERR_PTR(ret);
 521
 522        if (inode->i_op == &bpf_map_iops)
 523                return ERR_PTR(-EINVAL);
 524        if (inode->i_op != &bpf_prog_iops)
 525                return ERR_PTR(-EACCES);
 526
 527        prog = inode->i_private;
 528
 529        ret = security_bpf_prog(prog);
 530        if (ret < 0)
 531                return ERR_PTR(ret);
 532
 533        if (!bpf_prog_get_ok(prog, &type, false))
 534                return ERR_PTR(-EINVAL);
 535
 536        return bpf_prog_inc(prog);
 537}
 538
 539struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)
 540{
 541        struct bpf_prog *prog;
 542        struct path path;
 543        int ret = kern_path(name, LOOKUP_FOLLOW, &path);
 544        if (ret)
 545                return ERR_PTR(ret);
 546        prog = __get_prog_inode(d_backing_inode(path.dentry), type);
 547        if (!IS_ERR(prog))
 548                touch_atime(&path);
 549        path_put(&path);
 550        return prog;
 551}
 552EXPORT_SYMBOL(bpf_prog_get_type_path);
 553
 554/*
 555 * Display the mount options in /proc/mounts.
 556 */
 557static int bpf_show_options(struct seq_file *m, struct dentry *root)
 558{
 559        umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX;
 560
 561        if (mode != S_IRWXUGO)
 562                seq_printf(m, ",mode=%o", mode);
 563        return 0;
 564}
 565
 566static void bpf_free_inode(struct inode *inode)
 567{
 568        enum bpf_type type;
 569
 570        if (S_ISLNK(inode->i_mode))
 571                kfree(inode->i_link);
 572        if (!bpf_inode_type(inode, &type))
 573                bpf_any_put(inode->i_private, type);
 574        free_inode_nonrcu(inode);
 575}
 576
 577static const struct super_operations bpf_super_ops = {
 578        .statfs         = simple_statfs,
 579        .drop_inode     = generic_delete_inode,
 580        .show_options   = bpf_show_options,
 581        .free_inode     = bpf_free_inode,
 582};
 583
 584enum {
 585        OPT_MODE,
 586        OPT_ERR,
 587};
 588
 589static const match_table_t bpf_mount_tokens = {
 590        { OPT_MODE, "mode=%o" },
 591        { OPT_ERR, NULL },
 592};
 593
 594struct bpf_mount_opts {
 595        umode_t mode;
 596};
 597
 598static int bpf_parse_options(char *data, struct bpf_mount_opts *opts)
 599{
 600        substring_t args[MAX_OPT_ARGS];
 601        int option, token;
 602        char *ptr;
 603
 604        opts->mode = S_IRWXUGO;
 605
 606        while ((ptr = strsep(&data, ",")) != NULL) {
 607                if (!*ptr)
 608                        continue;
 609
 610                token = match_token(ptr, bpf_mount_tokens, args);
 611                switch (token) {
 612                case OPT_MODE:
 613                        if (match_octal(&args[0], &option))
 614                                return -EINVAL;
 615                        opts->mode = option & S_IALLUGO;
 616                        break;
 617                /* We might like to report bad mount options here, but
 618                 * traditionally we've ignored all mount options, so we'd
 619                 * better continue to ignore non-existing options for bpf.
 620                 */
 621                }
 622        }
 623
 624        return 0;
 625}
 626
 627static int bpf_fill_super(struct super_block *sb, void *data, int silent)
 628{
 629        static const struct tree_descr bpf_rfiles[] = { { "" } };
 630        struct bpf_mount_opts opts;
 631        struct inode *inode;
 632        int ret;
 633
 634        ret = bpf_parse_options(data, &opts);
 635        if (ret)
 636                return ret;
 637
 638        ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
 639        if (ret)
 640                return ret;
 641
 642        sb->s_op = &bpf_super_ops;
 643
 644        inode = sb->s_root->d_inode;
 645        inode->i_op = &bpf_dir_iops;
 646        inode->i_mode &= ~S_IALLUGO;
 647        inode->i_mode |= S_ISVTX | opts.mode;
 648
 649        return 0;
 650}
 651
 652static struct dentry *bpf_mount(struct file_system_type *type, int flags,
 653                                const char *dev_name, void *data)
 654{
 655        return mount_nodev(type, flags, data, bpf_fill_super);
 656}
 657
 658static struct file_system_type bpf_fs_type = {
 659        .owner          = THIS_MODULE,
 660        .name           = "bpf",
 661        .mount          = bpf_mount,
 662        .kill_sb        = kill_litter_super,
 663};
 664
 665static int __init bpf_init(void)
 666{
 667        int ret;
 668
 669        ret = sysfs_create_mount_point(fs_kobj, "bpf");
 670        if (ret)
 671                return ret;
 672
 673        ret = register_filesystem(&bpf_fs_type);
 674        if (ret)
 675                sysfs_remove_mount_point(fs_kobj, "bpf");
 676
 677        return ret;
 678}
 679fs_initcall(bpf_init);
 680