linux/kernel/bpf/inode.c
<<
>>
Prefs
   1/*
   2 * Minimal file system backend for holding eBPF maps and programs,
   3 * used by bpf(2) object pinning.
   4 *
   5 * Authors:
   6 *
   7 *      Daniel Borkmann <daniel@iogearbox.net>
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU General Public License
  11 * version 2 as published by the Free Software Foundation.
  12 */
  13
  14#include <linux/init.h>
  15#include <linux/magic.h>
  16#include <linux/major.h>
  17#include <linux/mount.h>
  18#include <linux/namei.h>
  19#include <linux/fs.h>
  20#include <linux/kdev_t.h>
  21#include <linux/parser.h>
  22#include <linux/filter.h>
  23#include <linux/bpf.h>
  24#include <linux/bpf_trace.h>
  25
  26enum bpf_type {
  27        BPF_TYPE_UNSPEC = 0,
  28        BPF_TYPE_PROG,
  29        BPF_TYPE_MAP,
  30};
  31
  32static void *bpf_any_get(void *raw, enum bpf_type type)
  33{
  34        switch (type) {
  35        case BPF_TYPE_PROG:
  36                raw = bpf_prog_inc(raw);
  37                break;
  38        case BPF_TYPE_MAP:
  39                raw = bpf_map_inc(raw, true);
  40                break;
  41        default:
  42                WARN_ON_ONCE(1);
  43                break;
  44        }
  45
  46        return raw;
  47}
  48
  49static void bpf_any_put(void *raw, enum bpf_type type)
  50{
  51        switch (type) {
  52        case BPF_TYPE_PROG:
  53                bpf_prog_put(raw);
  54                break;
  55        case BPF_TYPE_MAP:
  56                bpf_map_put_with_uref(raw);
  57                break;
  58        default:
  59                WARN_ON_ONCE(1);
  60                break;
  61        }
  62}
  63
  64static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
  65{
  66        void *raw;
  67
  68        *type = BPF_TYPE_MAP;
  69        raw = bpf_map_get_with_uref(ufd);
  70        if (IS_ERR(raw)) {
  71                *type = BPF_TYPE_PROG;
  72                raw = bpf_prog_get(ufd);
  73        }
  74
  75        return raw;
  76}
  77
  78static const struct inode_operations bpf_dir_iops;
  79
  80static const struct inode_operations bpf_prog_iops = { };
  81static const struct inode_operations bpf_map_iops  = { };
  82
  83static struct inode *bpf_get_inode(struct super_block *sb,
  84                                   const struct inode *dir,
  85                                   umode_t mode)
  86{
  87        struct inode *inode;
  88
  89        switch (mode & S_IFMT) {
  90        case S_IFDIR:
  91        case S_IFREG:
  92        case S_IFLNK:
  93                break;
  94        default:
  95                return ERR_PTR(-EINVAL);
  96        }
  97
  98        inode = new_inode(sb);
  99        if (!inode)
 100                return ERR_PTR(-ENOSPC);
 101
 102        inode->i_ino = get_next_ino();
 103        inode->i_atime = current_time(inode);
 104        inode->i_mtime = inode->i_atime;
 105        inode->i_ctime = inode->i_atime;
 106
 107        inode_init_owner(inode, dir, mode);
 108
 109        return inode;
 110}
 111
 112static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
 113{
 114        *type = BPF_TYPE_UNSPEC;
 115        if (inode->i_op == &bpf_prog_iops)
 116                *type = BPF_TYPE_PROG;
 117        else if (inode->i_op == &bpf_map_iops)
 118                *type = BPF_TYPE_MAP;
 119        else
 120                return -EACCES;
 121
 122        return 0;
 123}
 124
 125static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode,
 126                                struct inode *dir)
 127{
 128        d_instantiate(dentry, inode);
 129        dget(dentry);
 130
 131        dir->i_mtime = current_time(dir);
 132        dir->i_ctime = dir->i_mtime;
 133}
 134
 135static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 136{
 137        struct inode *inode;
 138
 139        inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR);
 140        if (IS_ERR(inode))
 141                return PTR_ERR(inode);
 142
 143        inode->i_op = &bpf_dir_iops;
 144        inode->i_fop = &simple_dir_operations;
 145
 146        inc_nlink(inode);
 147        inc_nlink(dir);
 148
 149        bpf_dentry_finalize(dentry, inode, dir);
 150        return 0;
 151}
 152
 153static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
 154                         const struct inode_operations *iops)
 155{
 156        struct inode *dir = dentry->d_parent->d_inode;
 157        struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode);
 158        if (IS_ERR(inode))
 159                return PTR_ERR(inode);
 160
 161        inode->i_op = iops;
 162        inode->i_private = raw;
 163
 164        bpf_dentry_finalize(dentry, inode, dir);
 165        return 0;
 166}
 167
 168static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg)
 169{
 170        return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops);
 171}
 172
 173static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
 174{
 175        return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops);
 176}
 177
 178static struct dentry *
 179bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
 180{
 181        /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future
 182         * extensions.
 183         */
 184        if (strchr(dentry->d_name.name, '.'))
 185                return ERR_PTR(-EPERM);
 186
 187        return simple_lookup(dir, dentry, flags);
 188}
 189
 190static int bpf_symlink(struct inode *dir, struct dentry *dentry,
 191                       const char *target)
 192{
 193        char *link = kstrdup(target, GFP_USER | __GFP_NOWARN);
 194        struct inode *inode;
 195
 196        if (!link)
 197                return -ENOMEM;
 198
 199        inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK);
 200        if (IS_ERR(inode)) {
 201                kfree(link);
 202                return PTR_ERR(inode);
 203        }
 204
 205        inode->i_op = &simple_symlink_inode_operations;
 206        inode->i_link = link;
 207
 208        bpf_dentry_finalize(dentry, inode, dir);
 209        return 0;
 210}
 211
 212static const struct inode_operations bpf_dir_iops = {
 213        .lookup         = bpf_lookup,
 214        .mkdir          = bpf_mkdir,
 215        .symlink        = bpf_symlink,
 216        .rmdir          = simple_rmdir,
 217        .rename         = simple_rename,
 218        .link           = simple_link,
 219        .unlink         = simple_unlink,
 220};
 221
 222static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
 223                          enum bpf_type type)
 224{
 225        struct dentry *dentry;
 226        struct inode *dir;
 227        struct path path;
 228        umode_t mode;
 229        int ret;
 230
 231        dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0);
 232        if (IS_ERR(dentry))
 233                return PTR_ERR(dentry);
 234
 235        mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
 236
 237        ret = security_path_mknod(&path, dentry, mode, 0);
 238        if (ret)
 239                goto out;
 240
 241        dir = d_inode(path.dentry);
 242        if (dir->i_op != &bpf_dir_iops) {
 243                ret = -EPERM;
 244                goto out;
 245        }
 246
 247        switch (type) {
 248        case BPF_TYPE_PROG:
 249                ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw);
 250                break;
 251        case BPF_TYPE_MAP:
 252                ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw);
 253                break;
 254        default:
 255                ret = -EPERM;
 256        }
 257out:
 258        done_path_create(&path, dentry);
 259        return ret;
 260}
 261
 262int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
 263{
 264        struct filename *pname;
 265        enum bpf_type type;
 266        void *raw;
 267        int ret;
 268
 269        pname = getname(pathname);
 270        if (IS_ERR(pname))
 271                return PTR_ERR(pname);
 272
 273        raw = bpf_fd_probe_obj(ufd, &type);
 274        if (IS_ERR(raw)) {
 275                ret = PTR_ERR(raw);
 276                goto out;
 277        }
 278
 279        ret = bpf_obj_do_pin(pname, raw, type);
 280        if (ret != 0)
 281                bpf_any_put(raw, type);
 282        if ((trace_bpf_obj_pin_prog_enabled() ||
 283             trace_bpf_obj_pin_map_enabled()) && !ret) {
 284                if (type == BPF_TYPE_PROG)
 285                        trace_bpf_obj_pin_prog(raw, ufd, pname);
 286                if (type == BPF_TYPE_MAP)
 287                        trace_bpf_obj_pin_map(raw, ufd, pname);
 288        }
 289out:
 290        putname(pname);
 291        return ret;
 292}
 293
 294static void *bpf_obj_do_get(const struct filename *pathname,
 295                            enum bpf_type *type, int flags)
 296{
 297        struct inode *inode;
 298        struct path path;
 299        void *raw;
 300        int ret;
 301
 302        ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path);
 303        if (ret)
 304                return ERR_PTR(ret);
 305
 306        inode = d_backing_inode(path.dentry);
 307        ret = inode_permission(inode, ACC_MODE(flags));
 308        if (ret)
 309                goto out;
 310
 311        ret = bpf_inode_type(inode, type);
 312        if (ret)
 313                goto out;
 314
 315        raw = bpf_any_get(inode->i_private, *type);
 316        if (!IS_ERR(raw))
 317                touch_atime(&path);
 318
 319        path_put(&path);
 320        return raw;
 321out:
 322        path_put(&path);
 323        return ERR_PTR(ret);
 324}
 325
 326int bpf_obj_get_user(const char __user *pathname, int flags)
 327{
 328        enum bpf_type type = BPF_TYPE_UNSPEC;
 329        struct filename *pname;
 330        int ret = -ENOENT;
 331        int f_flags;
 332        void *raw;
 333
 334        f_flags = bpf_get_file_flag(flags);
 335        if (f_flags < 0)
 336                return f_flags;
 337
 338        pname = getname(pathname);
 339        if (IS_ERR(pname))
 340                return PTR_ERR(pname);
 341
 342        raw = bpf_obj_do_get(pname, &type, f_flags);
 343        if (IS_ERR(raw)) {
 344                ret = PTR_ERR(raw);
 345                goto out;
 346        }
 347
 348        if (type == BPF_TYPE_PROG)
 349                ret = bpf_prog_new_fd(raw);
 350        else if (type == BPF_TYPE_MAP)
 351                ret = bpf_map_new_fd(raw, f_flags);
 352        else
 353                goto out;
 354
 355        if (ret < 0) {
 356                bpf_any_put(raw, type);
 357        } else if (trace_bpf_obj_get_prog_enabled() ||
 358                   trace_bpf_obj_get_map_enabled()) {
 359                if (type == BPF_TYPE_PROG)
 360                        trace_bpf_obj_get_prog(raw, ret, pname);
 361                if (type == BPF_TYPE_MAP)
 362                        trace_bpf_obj_get_map(raw, ret, pname);
 363        }
 364out:
 365        putname(pname);
 366        return ret;
 367}
 368
 369static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
 370{
 371        struct bpf_prog *prog;
 372        int ret = inode_permission(inode, MAY_READ | MAY_WRITE);
 373        if (ret)
 374                return ERR_PTR(ret);
 375
 376        if (inode->i_op == &bpf_map_iops)
 377                return ERR_PTR(-EINVAL);
 378        if (inode->i_op != &bpf_prog_iops)
 379                return ERR_PTR(-EACCES);
 380
 381        prog = inode->i_private;
 382
 383        ret = security_bpf_prog(prog);
 384        if (ret < 0)
 385                return ERR_PTR(ret);
 386
 387        if (!bpf_prog_get_ok(prog, &type, false))
 388                return ERR_PTR(-EINVAL);
 389
 390        return bpf_prog_inc(prog);
 391}
 392
 393struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)
 394{
 395        struct bpf_prog *prog;
 396        struct path path;
 397        int ret = kern_path(name, LOOKUP_FOLLOW, &path);
 398        if (ret)
 399                return ERR_PTR(ret);
 400        prog = __get_prog_inode(d_backing_inode(path.dentry), type);
 401        if (!IS_ERR(prog))
 402                touch_atime(&path);
 403        path_put(&path);
 404        return prog;
 405}
 406EXPORT_SYMBOL(bpf_prog_get_type_path);
 407
 408static void bpf_evict_inode(struct inode *inode)
 409{
 410        enum bpf_type type;
 411
 412        truncate_inode_pages_final(&inode->i_data);
 413        clear_inode(inode);
 414
 415        if (S_ISLNK(inode->i_mode))
 416                kfree(inode->i_link);
 417        if (!bpf_inode_type(inode, &type))
 418                bpf_any_put(inode->i_private, type);
 419}
 420
 421/*
 422 * Display the mount options in /proc/mounts.
 423 */
 424static int bpf_show_options(struct seq_file *m, struct dentry *root)
 425{
 426        umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX;
 427
 428        if (mode != S_IRWXUGO)
 429                seq_printf(m, ",mode=%o", mode);
 430        return 0;
 431}
 432
 433static const struct super_operations bpf_super_ops = {
 434        .statfs         = simple_statfs,
 435        .drop_inode     = generic_delete_inode,
 436        .show_options   = bpf_show_options,
 437        .evict_inode    = bpf_evict_inode,
 438};
 439
 440enum {
 441        OPT_MODE,
 442        OPT_ERR,
 443};
 444
 445static const match_table_t bpf_mount_tokens = {
 446        { OPT_MODE, "mode=%o" },
 447        { OPT_ERR, NULL },
 448};
 449
 450struct bpf_mount_opts {
 451        umode_t mode;
 452};
 453
 454static int bpf_parse_options(char *data, struct bpf_mount_opts *opts)
 455{
 456        substring_t args[MAX_OPT_ARGS];
 457        int option, token;
 458        char *ptr;
 459
 460        opts->mode = S_IRWXUGO;
 461
 462        while ((ptr = strsep(&data, ",")) != NULL) {
 463                if (!*ptr)
 464                        continue;
 465
 466                token = match_token(ptr, bpf_mount_tokens, args);
 467                switch (token) {
 468                case OPT_MODE:
 469                        if (match_octal(&args[0], &option))
 470                                return -EINVAL;
 471                        opts->mode = option & S_IALLUGO;
 472                        break;
 473                /* We might like to report bad mount options here, but
 474                 * traditionally we've ignored all mount options, so we'd
 475                 * better continue to ignore non-existing options for bpf.
 476                 */
 477                }
 478        }
 479
 480        return 0;
 481}
 482
 483static int bpf_fill_super(struct super_block *sb, void *data, int silent)
 484{
 485        static const struct tree_descr bpf_rfiles[] = { { "" } };
 486        struct bpf_mount_opts opts;
 487        struct inode *inode;
 488        int ret;
 489
 490        ret = bpf_parse_options(data, &opts);
 491        if (ret)
 492                return ret;
 493
 494        ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
 495        if (ret)
 496                return ret;
 497
 498        sb->s_op = &bpf_super_ops;
 499
 500        inode = sb->s_root->d_inode;
 501        inode->i_op = &bpf_dir_iops;
 502        inode->i_mode &= ~S_IALLUGO;
 503        inode->i_mode |= S_ISVTX | opts.mode;
 504
 505        return 0;
 506}
 507
 508static struct dentry *bpf_mount(struct file_system_type *type, int flags,
 509                                const char *dev_name, void *data)
 510{
 511        return mount_nodev(type, flags, data, bpf_fill_super);
 512}
 513
 514static struct file_system_type bpf_fs_type = {
 515        .owner          = THIS_MODULE,
 516        .name           = "bpf",
 517        .mount          = bpf_mount,
 518        .kill_sb        = kill_litter_super,
 519};
 520
 521static int __init bpf_init(void)
 522{
 523        int ret;
 524
 525        ret = sysfs_create_mount_point(fs_kobj, "bpf");
 526        if (ret)
 527                return ret;
 528
 529        ret = register_filesystem(&bpf_fs_type);
 530        if (ret)
 531                sysfs_remove_mount_point(fs_kobj, "bpf");
 532
 533        return ret;
 534}
 535fs_initcall(bpf_init);
 536