linux/fs/fuse/inode.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9#include "fuse_i.h"
  10
  11#include <linux/pagemap.h>
  12#include <linux/slab.h>
  13#include <linux/file.h>
  14#include <linux/seq_file.h>
  15#include <linux/init.h>
  16#include <linux/module.h>
  17#include <linux/moduleparam.h>
  18#include <linux/fs_context.h>
  19#include <linux/fs_parser.h>
  20#include <linux/statfs.h>
  21#include <linux/random.h>
  22#include <linux/sched.h>
  23#include <linux/exportfs.h>
  24#include <linux/posix_acl.h>
  25#include <linux/pid_namespace.h>
  26
  27MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
  28MODULE_DESCRIPTION("Filesystem in Userspace");
  29MODULE_LICENSE("GPL");
  30
  31static struct kmem_cache *fuse_inode_cachep;
  32struct list_head fuse_conn_list;
  33DEFINE_MUTEX(fuse_mutex);
  34
  35static int set_global_limit(const char *val, const struct kernel_param *kp);
  36
  37unsigned max_user_bgreq;
  38module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
  39                  &max_user_bgreq, 0644);
  40__MODULE_PARM_TYPE(max_user_bgreq, "uint");
  41MODULE_PARM_DESC(max_user_bgreq,
  42 "Global limit for the maximum number of backgrounded requests an "
  43 "unprivileged user can set");
  44
  45unsigned max_user_congthresh;
  46module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
  47                  &max_user_congthresh, 0644);
  48__MODULE_PARM_TYPE(max_user_congthresh, "uint");
  49MODULE_PARM_DESC(max_user_congthresh,
  50 "Global limit for the maximum congestion threshold an "
  51 "unprivileged user can set");
  52
  53#define FUSE_SUPER_MAGIC 0x65735546
  54
  55#define FUSE_DEFAULT_BLKSIZE 512
  56
  57/** Maximum number of outstanding background requests */
  58#define FUSE_DEFAULT_MAX_BACKGROUND 12
  59
  60/** Congestion starts at 75% of maximum */
  61#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
  62
  63#ifdef CONFIG_BLOCK
  64static struct file_system_type fuseblk_fs_type;
  65#endif
  66
  67struct fuse_forget_link *fuse_alloc_forget(void)
  68{
  69        return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
  70}
  71
  72static struct inode *fuse_alloc_inode(struct super_block *sb)
  73{
  74        struct fuse_inode *fi;
  75
  76        fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
  77        if (!fi)
  78                return NULL;
  79
  80        fi->i_time = 0;
  81        fi->inval_mask = 0;
  82        fi->nodeid = 0;
  83        fi->nlookup = 0;
  84        fi->attr_version = 0;
  85        fi->orig_ino = 0;
  86        fi->state = 0;
  87        mutex_init(&fi->mutex);
  88        init_rwsem(&fi->i_mmap_sem);
  89        spin_lock_init(&fi->lock);
  90        fi->forget = fuse_alloc_forget();
  91        if (!fi->forget)
  92                goto out_free;
  93
  94        if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
  95                goto out_free_forget;
  96
  97        return &fi->inode;
  98
  99out_free_forget:
 100        kfree(fi->forget);
 101out_free:
 102        kmem_cache_free(fuse_inode_cachep, fi);
 103        return NULL;
 104}
 105
 106static void fuse_free_inode(struct inode *inode)
 107{
 108        struct fuse_inode *fi = get_fuse_inode(inode);
 109
 110        mutex_destroy(&fi->mutex);
 111        kfree(fi->forget);
 112#ifdef CONFIG_FUSE_DAX
 113        kfree(fi->dax);
 114#endif
 115        kmem_cache_free(fuse_inode_cachep, fi);
 116}
 117
 118static void fuse_evict_inode(struct inode *inode)
 119{
 120        struct fuse_inode *fi = get_fuse_inode(inode);
 121
 122        truncate_inode_pages_final(&inode->i_data);
 123        clear_inode(inode);
 124        if (inode->i_sb->s_flags & SB_ACTIVE) {
 125                struct fuse_conn *fc = get_fuse_conn(inode);
 126
 127                if (FUSE_IS_DAX(inode))
 128                        fuse_dax_inode_cleanup(inode);
 129                if (fi->nlookup) {
 130                        fuse_queue_forget(fc, fi->forget, fi->nodeid,
 131                                          fi->nlookup);
 132                        fi->forget = NULL;
 133                }
 134        }
 135        if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
 136                WARN_ON(!list_empty(&fi->write_files));
 137                WARN_ON(!list_empty(&fi->queued_writes));
 138        }
 139}
 140
 141static int fuse_reconfigure(struct fs_context *fc)
 142{
 143        struct super_block *sb = fc->root->d_sb;
 144
 145        sync_filesystem(sb);
 146        if (fc->sb_flags & SB_MANDLOCK)
 147                return -EINVAL;
 148
 149        return 0;
 150}
 151
 152/*
 153 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
 154 * so that it will fit.
 155 */
 156static ino_t fuse_squash_ino(u64 ino64)
 157{
 158        ino_t ino = (ino_t) ino64;
 159        if (sizeof(ino_t) < sizeof(u64))
 160                ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
 161        return ino;
 162}
 163
 164void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
 165                                   u64 attr_valid)
 166{
 167        struct fuse_conn *fc = get_fuse_conn(inode);
 168        struct fuse_inode *fi = get_fuse_inode(inode);
 169
 170        lockdep_assert_held(&fi->lock);
 171
 172        fi->attr_version = atomic64_inc_return(&fc->attr_version);
 173        fi->i_time = attr_valid;
 174        WRITE_ONCE(fi->inval_mask, 0);
 175
 176        inode->i_ino     = fuse_squash_ino(attr->ino);
 177        inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
 178        set_nlink(inode, attr->nlink);
 179        inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
 180        inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
 181        inode->i_blocks  = attr->blocks;
 182        inode->i_atime.tv_sec   = attr->atime;
 183        inode->i_atime.tv_nsec  = attr->atimensec;
 184        /* mtime from server may be stale due to local buffered write */
 185        if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
 186                inode->i_mtime.tv_sec   = attr->mtime;
 187                inode->i_mtime.tv_nsec  = attr->mtimensec;
 188                inode->i_ctime.tv_sec   = attr->ctime;
 189                inode->i_ctime.tv_nsec  = attr->ctimensec;
 190        }
 191
 192        if (attr->blksize != 0)
 193                inode->i_blkbits = ilog2(attr->blksize);
 194        else
 195                inode->i_blkbits = inode->i_sb->s_blocksize_bits;
 196
 197        /*
 198         * Don't set the sticky bit in i_mode, unless we want the VFS
 199         * to check permissions.  This prevents failures due to the
 200         * check in may_delete().
 201         */
 202        fi->orig_i_mode = inode->i_mode;
 203        if (!fc->default_permissions)
 204                inode->i_mode &= ~S_ISVTX;
 205
 206        fi->orig_ino = attr->ino;
 207
 208        /*
 209         * We are refreshing inode data and it is possible that another
 210         * client set suid/sgid or security.capability xattr. So clear
 211         * S_NOSEC. Ideally, we could have cleared it only if suid/sgid
 212         * was set or if security.capability xattr was set. But we don't
 213         * know if security.capability has been set or not. So clear it
 214         * anyway. Its less efficient but should be safe.
 215         */
 216        inode->i_flags &= ~S_NOSEC;
 217}
 218
 219void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
 220                            u64 attr_valid, u64 attr_version)
 221{
 222        struct fuse_conn *fc = get_fuse_conn(inode);
 223        struct fuse_inode *fi = get_fuse_inode(inode);
 224        bool is_wb = fc->writeback_cache;
 225        loff_t oldsize;
 226        struct timespec64 old_mtime;
 227
 228        spin_lock(&fi->lock);
 229        if ((attr_version != 0 && fi->attr_version > attr_version) ||
 230            test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
 231                spin_unlock(&fi->lock);
 232                return;
 233        }
 234
 235        old_mtime = inode->i_mtime;
 236        fuse_change_attributes_common(inode, attr, attr_valid);
 237
 238        oldsize = inode->i_size;
 239        /*
 240         * In case of writeback_cache enabled, the cached writes beyond EOF
 241         * extend local i_size without keeping userspace server in sync. So,
 242         * attr->size coming from server can be stale. We cannot trust it.
 243         */
 244        if (!is_wb || !S_ISREG(inode->i_mode))
 245                i_size_write(inode, attr->size);
 246        spin_unlock(&fi->lock);
 247
 248        if (!is_wb && S_ISREG(inode->i_mode)) {
 249                bool inval = false;
 250
 251                if (oldsize != attr->size) {
 252                        truncate_pagecache(inode, attr->size);
 253                        if (!fc->explicit_inval_data)
 254                                inval = true;
 255                } else if (fc->auto_inval_data) {
 256                        struct timespec64 new_mtime = {
 257                                .tv_sec = attr->mtime,
 258                                .tv_nsec = attr->mtimensec,
 259                        };
 260
 261                        /*
 262                         * Auto inval mode also checks and invalidates if mtime
 263                         * has changed.
 264                         */
 265                        if (!timespec64_equal(&old_mtime, &new_mtime))
 266                                inval = true;
 267                }
 268
 269                if (inval)
 270                        invalidate_inode_pages2(inode->i_mapping);
 271        }
 272}
 273
 274static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
 275{
 276        inode->i_mode = attr->mode & S_IFMT;
 277        inode->i_size = attr->size;
 278        inode->i_mtime.tv_sec  = attr->mtime;
 279        inode->i_mtime.tv_nsec = attr->mtimensec;
 280        inode->i_ctime.tv_sec  = attr->ctime;
 281        inode->i_ctime.tv_nsec = attr->ctimensec;
 282        if (S_ISREG(inode->i_mode)) {
 283                fuse_init_common(inode);
 284                fuse_init_file_inode(inode);
 285        } else if (S_ISDIR(inode->i_mode))
 286                fuse_init_dir(inode);
 287        else if (S_ISLNK(inode->i_mode))
 288                fuse_init_symlink(inode);
 289        else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
 290                 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 291                fuse_init_common(inode);
 292                init_special_inode(inode, inode->i_mode,
 293                                   new_decode_dev(attr->rdev));
 294        } else
 295                BUG();
 296}
 297
 298static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
 299{
 300        u64 nodeid = *(u64 *) _nodeidp;
 301        if (get_node_id(inode) == nodeid)
 302                return 1;
 303        else
 304                return 0;
 305}
 306
 307static int fuse_inode_set(struct inode *inode, void *_nodeidp)
 308{
 309        u64 nodeid = *(u64 *) _nodeidp;
 310        get_fuse_inode(inode)->nodeid = nodeid;
 311        return 0;
 312}
 313
 314struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
 315                        int generation, struct fuse_attr *attr,
 316                        u64 attr_valid, u64 attr_version)
 317{
 318        struct inode *inode;
 319        struct fuse_inode *fi;
 320        struct fuse_conn *fc = get_fuse_conn_super(sb);
 321
 322        /*
 323         * Auto mount points get their node id from the submount root, which is
 324         * not a unique identifier within this filesystem.
 325         *
 326         * To avoid conflicts, do not place submount points into the inode hash
 327         * table.
 328         */
 329        if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
 330            S_ISDIR(attr->mode)) {
 331                inode = new_inode(sb);
 332                if (!inode)
 333                        return NULL;
 334
 335                fuse_init_inode(inode, attr);
 336                get_fuse_inode(inode)->nodeid = nodeid;
 337                inode->i_flags |= S_AUTOMOUNT;
 338                goto done;
 339        }
 340
 341retry:
 342        inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
 343        if (!inode)
 344                return NULL;
 345
 346        if ((inode->i_state & I_NEW)) {
 347                inode->i_flags |= S_NOATIME;
 348                if (!fc->writeback_cache || !S_ISREG(attr->mode))
 349                        inode->i_flags |= S_NOCMTIME;
 350                inode->i_generation = generation;
 351                fuse_init_inode(inode, attr);
 352                unlock_new_inode(inode);
 353        } else if (fuse_stale_inode(inode, generation, attr)) {
 354                /* nodeid was reused, any I/O on the old inode should fail */
 355                fuse_make_bad(inode);
 356                iput(inode);
 357                goto retry;
 358        }
 359done:
 360        fi = get_fuse_inode(inode);
 361        spin_lock(&fi->lock);
 362        fi->nlookup++;
 363        spin_unlock(&fi->lock);
 364        fuse_change_attributes(inode, attr, attr_valid, attr_version);
 365
 366        return inode;
 367}
 368
 369struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
 370                           struct fuse_mount **fm)
 371{
 372        struct fuse_mount *fm_iter;
 373        struct inode *inode;
 374
 375        WARN_ON(!rwsem_is_locked(&fc->killsb));
 376        list_for_each_entry(fm_iter, &fc->mounts, fc_entry) {
 377                if (!fm_iter->sb)
 378                        continue;
 379
 380                inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid);
 381                if (inode) {
 382                        if (fm)
 383                                *fm = fm_iter;
 384                        return inode;
 385                }
 386        }
 387
 388        return NULL;
 389}
 390
 391int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
 392                             loff_t offset, loff_t len)
 393{
 394        struct fuse_inode *fi;
 395        struct inode *inode;
 396        pgoff_t pg_start;
 397        pgoff_t pg_end;
 398
 399        inode = fuse_ilookup(fc, nodeid, NULL);
 400        if (!inode)
 401                return -ENOENT;
 402
 403        fi = get_fuse_inode(inode);
 404        spin_lock(&fi->lock);
 405        fi->attr_version = atomic64_inc_return(&fc->attr_version);
 406        spin_unlock(&fi->lock);
 407
 408        fuse_invalidate_attr(inode);
 409        forget_all_cached_acls(inode);
 410        if (offset >= 0) {
 411                pg_start = offset >> PAGE_SHIFT;
 412                if (len <= 0)
 413                        pg_end = -1;
 414                else
 415                        pg_end = (offset + len - 1) >> PAGE_SHIFT;
 416                invalidate_inode_pages2_range(inode->i_mapping,
 417                                              pg_start, pg_end);
 418        }
 419        iput(inode);
 420        return 0;
 421}
 422
 423bool fuse_lock_inode(struct inode *inode)
 424{
 425        bool locked = false;
 426
 427        if (!get_fuse_conn(inode)->parallel_dirops) {
 428                mutex_lock(&get_fuse_inode(inode)->mutex);
 429                locked = true;
 430        }
 431
 432        return locked;
 433}
 434
 435void fuse_unlock_inode(struct inode *inode, bool locked)
 436{
 437        if (locked)
 438                mutex_unlock(&get_fuse_inode(inode)->mutex);
 439}
 440
 441static void fuse_umount_begin(struct super_block *sb)
 442{
 443        struct fuse_conn *fc = get_fuse_conn_super(sb);
 444
 445        if (!fc->no_force_umount)
 446                fuse_abort_conn(fc);
 447}
 448
 449static void fuse_send_destroy(struct fuse_mount *fm)
 450{
 451        if (fm->fc->conn_init) {
 452                FUSE_ARGS(args);
 453
 454                args.opcode = FUSE_DESTROY;
 455                args.force = true;
 456                args.nocreds = true;
 457                fuse_simple_request(fm, &args);
 458        }
 459}
 460
 461static void fuse_put_super(struct super_block *sb)
 462{
 463        struct fuse_mount *fm = get_fuse_mount_super(sb);
 464
 465        fuse_conn_put(fm->fc);
 466        kfree(fm);
 467}
 468
 469static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
 470{
 471        stbuf->f_type    = FUSE_SUPER_MAGIC;
 472        stbuf->f_bsize   = attr->bsize;
 473        stbuf->f_frsize  = attr->frsize;
 474        stbuf->f_blocks  = attr->blocks;
 475        stbuf->f_bfree   = attr->bfree;
 476        stbuf->f_bavail  = attr->bavail;
 477        stbuf->f_files   = attr->files;
 478        stbuf->f_ffree   = attr->ffree;
 479        stbuf->f_namelen = attr->namelen;
 480        /* fsid is left zero */
 481}
 482
 483static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
 484{
 485        struct super_block *sb = dentry->d_sb;
 486        struct fuse_mount *fm = get_fuse_mount_super(sb);
 487        FUSE_ARGS(args);
 488        struct fuse_statfs_out outarg;
 489        int err;
 490
 491        if (!fuse_allow_current_process(fm->fc)) {
 492                buf->f_type = FUSE_SUPER_MAGIC;
 493                return 0;
 494        }
 495
 496        memset(&outarg, 0, sizeof(outarg));
 497        args.in_numargs = 0;
 498        args.opcode = FUSE_STATFS;
 499        args.nodeid = get_node_id(d_inode(dentry));
 500        args.out_numargs = 1;
 501        args.out_args[0].size = sizeof(outarg);
 502        args.out_args[0].value = &outarg;
 503        err = fuse_simple_request(fm, &args);
 504        if (!err)
 505                convert_fuse_statfs(buf, &outarg.st);
 506        return err;
 507}
 508
 509static int fuse_sync_fs(struct super_block *sb, int wait)
 510{
 511        struct fuse_mount *fm = get_fuse_mount_super(sb);
 512        struct fuse_conn *fc = fm->fc;
 513        struct fuse_syncfs_in inarg;
 514        FUSE_ARGS(args);
 515        int err;
 516
 517        /*
 518         * Userspace cannot handle the wait == 0 case.  Avoid a
 519         * gratuitous roundtrip.
 520         */
 521        if (!wait)
 522                return 0;
 523
 524        /* The filesystem is being unmounted.  Nothing to do. */
 525        if (!sb->s_root)
 526                return 0;
 527
 528        if (!fc->sync_fs)
 529                return 0;
 530
 531        memset(&inarg, 0, sizeof(inarg));
 532        args.in_numargs = 1;
 533        args.in_args[0].size = sizeof(inarg);
 534        args.in_args[0].value = &inarg;
 535        args.opcode = FUSE_SYNCFS;
 536        args.nodeid = get_node_id(sb->s_root->d_inode);
 537        args.out_numargs = 0;
 538
 539        err = fuse_simple_request(fm, &args);
 540        if (err == -ENOSYS) {
 541                fc->sync_fs = 0;
 542                err = 0;
 543        }
 544
 545        return err;
 546}
 547
 548enum {
 549        OPT_SOURCE,
 550        OPT_SUBTYPE,
 551        OPT_FD,
 552        OPT_ROOTMODE,
 553        OPT_USER_ID,
 554        OPT_GROUP_ID,
 555        OPT_DEFAULT_PERMISSIONS,
 556        OPT_ALLOW_OTHER,
 557        OPT_MAX_READ,
 558        OPT_BLKSIZE,
 559        OPT_ERR
 560};
 561
 562static const struct fs_parameter_spec fuse_fs_parameters[] = {
 563        fsparam_string  ("source",              OPT_SOURCE),
 564        fsparam_u32     ("fd",                  OPT_FD),
 565        fsparam_u32oct  ("rootmode",            OPT_ROOTMODE),
 566        fsparam_u32     ("user_id",             OPT_USER_ID),
 567        fsparam_u32     ("group_id",            OPT_GROUP_ID),
 568        fsparam_flag    ("default_permissions", OPT_DEFAULT_PERMISSIONS),
 569        fsparam_flag    ("allow_other",         OPT_ALLOW_OTHER),
 570        fsparam_u32     ("max_read",            OPT_MAX_READ),
 571        fsparam_u32     ("blksize",             OPT_BLKSIZE),
 572        fsparam_string  ("subtype",             OPT_SUBTYPE),
 573        {}
 574};
 575
 576static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
 577{
 578        struct fs_parse_result result;
 579        struct fuse_fs_context *ctx = fc->fs_private;
 580        int opt;
 581
 582        if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
 583                /*
 584                 * Ignore options coming from mount(MS_REMOUNT) for backward
 585                 * compatibility.
 586                 */
 587                if (fc->oldapi)
 588                        return 0;
 589
 590                return invalfc(fc, "No changes allowed in reconfigure");
 591        }
 592
 593        opt = fs_parse(fc, fuse_fs_parameters, param, &result);
 594        if (opt < 0)
 595                return opt;
 596
 597        switch (opt) {
 598        case OPT_SOURCE:
 599                if (fc->source)
 600                        return invalfc(fc, "Multiple sources specified");
 601                fc->source = param->string;
 602                param->string = NULL;
 603                break;
 604
 605        case OPT_SUBTYPE:
 606                if (ctx->subtype)
 607                        return invalfc(fc, "Multiple subtypes specified");
 608                ctx->subtype = param->string;
 609                param->string = NULL;
 610                return 0;
 611
 612        case OPT_FD:
 613                ctx->fd = result.uint_32;
 614                ctx->fd_present = true;
 615                break;
 616
 617        case OPT_ROOTMODE:
 618                if (!fuse_valid_type(result.uint_32))
 619                        return invalfc(fc, "Invalid rootmode");
 620                ctx->rootmode = result.uint_32;
 621                ctx->rootmode_present = true;
 622                break;
 623
 624        case OPT_USER_ID:
 625                ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
 626                if (!uid_valid(ctx->user_id))
 627                        return invalfc(fc, "Invalid user_id");
 628                ctx->user_id_present = true;
 629                break;
 630
 631        case OPT_GROUP_ID:
 632                ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
 633                if (!gid_valid(ctx->group_id))
 634                        return invalfc(fc, "Invalid group_id");
 635                ctx->group_id_present = true;
 636                break;
 637
 638        case OPT_DEFAULT_PERMISSIONS:
 639                ctx->default_permissions = true;
 640                break;
 641
 642        case OPT_ALLOW_OTHER:
 643                ctx->allow_other = true;
 644                break;
 645
 646        case OPT_MAX_READ:
 647                ctx->max_read = result.uint_32;
 648                break;
 649
 650        case OPT_BLKSIZE:
 651                if (!ctx->is_bdev)
 652                        return invalfc(fc, "blksize only supported for fuseblk");
 653                ctx->blksize = result.uint_32;
 654                break;
 655
 656        default:
 657                return -EINVAL;
 658        }
 659
 660        return 0;
 661}
 662
 663static void fuse_free_fc(struct fs_context *fc)
 664{
 665        struct fuse_fs_context *ctx = fc->fs_private;
 666
 667        if (ctx) {
 668                kfree(ctx->subtype);
 669                kfree(ctx);
 670        }
 671}
 672
 673static int fuse_show_options(struct seq_file *m, struct dentry *root)
 674{
 675        struct super_block *sb = root->d_sb;
 676        struct fuse_conn *fc = get_fuse_conn_super(sb);
 677
 678        if (fc->legacy_opts_show) {
 679                seq_printf(m, ",user_id=%u",
 680                           from_kuid_munged(fc->user_ns, fc->user_id));
 681                seq_printf(m, ",group_id=%u",
 682                           from_kgid_munged(fc->user_ns, fc->group_id));
 683                if (fc->default_permissions)
 684                        seq_puts(m, ",default_permissions");
 685                if (fc->allow_other)
 686                        seq_puts(m, ",allow_other");
 687                if (fc->max_read != ~0)
 688                        seq_printf(m, ",max_read=%u", fc->max_read);
 689                if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
 690                        seq_printf(m, ",blksize=%lu", sb->s_blocksize);
 691        }
 692#ifdef CONFIG_FUSE_DAX
 693        if (fc->dax)
 694                seq_puts(m, ",dax");
 695#endif
 696
 697        return 0;
 698}
 699
 700static void fuse_iqueue_init(struct fuse_iqueue *fiq,
 701                             const struct fuse_iqueue_ops *ops,
 702                             void *priv)
 703{
 704        memset(fiq, 0, sizeof(struct fuse_iqueue));
 705        spin_lock_init(&fiq->lock);
 706        init_waitqueue_head(&fiq->waitq);
 707        INIT_LIST_HEAD(&fiq->pending);
 708        INIT_LIST_HEAD(&fiq->interrupts);
 709        fiq->forget_list_tail = &fiq->forget_list_head;
 710        fiq->connected = 1;
 711        fiq->ops = ops;
 712        fiq->priv = priv;
 713}
 714
 715static void fuse_pqueue_init(struct fuse_pqueue *fpq)
 716{
 717        unsigned int i;
 718
 719        spin_lock_init(&fpq->lock);
 720        for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
 721                INIT_LIST_HEAD(&fpq->processing[i]);
 722        INIT_LIST_HEAD(&fpq->io);
 723        fpq->connected = 1;
 724}
 725
 726void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
 727                    struct user_namespace *user_ns,
 728                    const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
 729{
 730        memset(fc, 0, sizeof(*fc));
 731        spin_lock_init(&fc->lock);
 732        spin_lock_init(&fc->bg_lock);
 733        init_rwsem(&fc->killsb);
 734        refcount_set(&fc->count, 1);
 735        atomic_set(&fc->dev_count, 1);
 736        init_waitqueue_head(&fc->blocked_waitq);
 737        fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
 738        INIT_LIST_HEAD(&fc->bg_queue);
 739        INIT_LIST_HEAD(&fc->entry);
 740        INIT_LIST_HEAD(&fc->devices);
 741        atomic_set(&fc->num_waiting, 0);
 742        fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
 743        fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
 744        atomic64_set(&fc->khctr, 0);
 745        fc->polled_files = RB_ROOT;
 746        fc->blocked = 0;
 747        fc->initialized = 0;
 748        fc->connected = 1;
 749        atomic64_set(&fc->attr_version, 1);
 750        get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
 751        fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
 752        fc->user_ns = get_user_ns(user_ns);
 753        fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
 754        fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
 755
 756        INIT_LIST_HEAD(&fc->mounts);
 757        list_add(&fm->fc_entry, &fc->mounts);
 758        fm->fc = fc;
 759}
 760EXPORT_SYMBOL_GPL(fuse_conn_init);
 761
 762void fuse_conn_put(struct fuse_conn *fc)
 763{
 764        if (refcount_dec_and_test(&fc->count)) {
 765                struct fuse_iqueue *fiq = &fc->iq;
 766
 767                if (IS_ENABLED(CONFIG_FUSE_DAX))
 768                        fuse_dax_conn_free(fc);
 769                if (fiq->ops->release)
 770                        fiq->ops->release(fiq);
 771                put_pid_ns(fc->pid_ns);
 772                put_user_ns(fc->user_ns);
 773                fc->release(fc);
 774        }
 775}
 776EXPORT_SYMBOL_GPL(fuse_conn_put);
 777
 778struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
 779{
 780        refcount_inc(&fc->count);
 781        return fc;
 782}
 783EXPORT_SYMBOL_GPL(fuse_conn_get);
 784
 785static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
 786{
 787        struct fuse_attr attr;
 788        memset(&attr, 0, sizeof(attr));
 789
 790        attr.mode = mode;
 791        attr.ino = FUSE_ROOT_ID;
 792        attr.nlink = 1;
 793        return fuse_iget(sb, 1, 0, &attr, 0, 0);
 794}
 795
 796struct fuse_inode_handle {
 797        u64 nodeid;
 798        u32 generation;
 799};
 800
 801static struct dentry *fuse_get_dentry(struct super_block *sb,
 802                                      struct fuse_inode_handle *handle)
 803{
 804        struct fuse_conn *fc = get_fuse_conn_super(sb);
 805        struct inode *inode;
 806        struct dentry *entry;
 807        int err = -ESTALE;
 808
 809        if (handle->nodeid == 0)
 810                goto out_err;
 811
 812        inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
 813        if (!inode) {
 814                struct fuse_entry_out outarg;
 815                const struct qstr name = QSTR_INIT(".", 1);
 816
 817                if (!fc->export_support)
 818                        goto out_err;
 819
 820                err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
 821                                       &inode);
 822                if (err && err != -ENOENT)
 823                        goto out_err;
 824                if (err || !inode) {
 825                        err = -ESTALE;
 826                        goto out_err;
 827                }
 828                err = -EIO;
 829                if (get_node_id(inode) != handle->nodeid)
 830                        goto out_iput;
 831        }
 832        err = -ESTALE;
 833        if (inode->i_generation != handle->generation)
 834                goto out_iput;
 835
 836        entry = d_obtain_alias(inode);
 837        if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
 838                fuse_invalidate_entry_cache(entry);
 839
 840        return entry;
 841
 842 out_iput:
 843        iput(inode);
 844 out_err:
 845        return ERR_PTR(err);
 846}
 847
 848static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
 849                           struct inode *parent)
 850{
 851        int len = parent ? 6 : 3;
 852        u64 nodeid;
 853        u32 generation;
 854
 855        if (*max_len < len) {
 856                *max_len = len;
 857                return  FILEID_INVALID;
 858        }
 859
 860        nodeid = get_fuse_inode(inode)->nodeid;
 861        generation = inode->i_generation;
 862
 863        fh[0] = (u32)(nodeid >> 32);
 864        fh[1] = (u32)(nodeid & 0xffffffff);
 865        fh[2] = generation;
 866
 867        if (parent) {
 868                nodeid = get_fuse_inode(parent)->nodeid;
 869                generation = parent->i_generation;
 870
 871                fh[3] = (u32)(nodeid >> 32);
 872                fh[4] = (u32)(nodeid & 0xffffffff);
 873                fh[5] = generation;
 874        }
 875
 876        *max_len = len;
 877        return parent ? 0x82 : 0x81;
 878}
 879
 880static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
 881                struct fid *fid, int fh_len, int fh_type)
 882{
 883        struct fuse_inode_handle handle;
 884
 885        if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
 886                return NULL;
 887
 888        handle.nodeid = (u64) fid->raw[0] << 32;
 889        handle.nodeid |= (u64) fid->raw[1];
 890        handle.generation = fid->raw[2];
 891        return fuse_get_dentry(sb, &handle);
 892}
 893
 894static struct dentry *fuse_fh_to_parent(struct super_block *sb,
 895                struct fid *fid, int fh_len, int fh_type)
 896{
 897        struct fuse_inode_handle parent;
 898
 899        if (fh_type != 0x82 || fh_len < 6)
 900                return NULL;
 901
 902        parent.nodeid = (u64) fid->raw[3] << 32;
 903        parent.nodeid |= (u64) fid->raw[4];
 904        parent.generation = fid->raw[5];
 905        return fuse_get_dentry(sb, &parent);
 906}
 907
 908static struct dentry *fuse_get_parent(struct dentry *child)
 909{
 910        struct inode *child_inode = d_inode(child);
 911        struct fuse_conn *fc = get_fuse_conn(child_inode);
 912        struct inode *inode;
 913        struct dentry *parent;
 914        struct fuse_entry_out outarg;
 915        int err;
 916
 917        if (!fc->export_support)
 918                return ERR_PTR(-ESTALE);
 919
 920        err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
 921                               &dotdot_name, &outarg, &inode);
 922        if (err) {
 923                if (err == -ENOENT)
 924                        return ERR_PTR(-ESTALE);
 925                return ERR_PTR(err);
 926        }
 927
 928        parent = d_obtain_alias(inode);
 929        if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
 930                fuse_invalidate_entry_cache(parent);
 931
 932        return parent;
 933}
 934
 935static const struct export_operations fuse_export_operations = {
 936        .fh_to_dentry   = fuse_fh_to_dentry,
 937        .fh_to_parent   = fuse_fh_to_parent,
 938        .encode_fh      = fuse_encode_fh,
 939        .get_parent     = fuse_get_parent,
 940};
 941
 942static const struct super_operations fuse_super_operations = {
 943        .alloc_inode    = fuse_alloc_inode,
 944        .free_inode     = fuse_free_inode,
 945        .evict_inode    = fuse_evict_inode,
 946        .write_inode    = fuse_write_inode,
 947        .drop_inode     = generic_delete_inode,
 948        .put_super      = fuse_put_super,
 949        .umount_begin   = fuse_umount_begin,
 950        .statfs         = fuse_statfs,
 951        .sync_fs        = fuse_sync_fs,
 952        .show_options   = fuse_show_options,
 953};
 954
 955static void sanitize_global_limit(unsigned *limit)
 956{
 957        /*
 958         * The default maximum number of async requests is calculated to consume
 959         * 1/2^13 of the total memory, assuming 392 bytes per request.
 960         */
 961        if (*limit == 0)
 962                *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
 963
 964        if (*limit >= 1 << 16)
 965                *limit = (1 << 16) - 1;
 966}
 967
 968static int set_global_limit(const char *val, const struct kernel_param *kp)
 969{
 970        int rv;
 971
 972        rv = param_set_uint(val, kp);
 973        if (rv)
 974                return rv;
 975
 976        sanitize_global_limit((unsigned *)kp->arg);
 977
 978        return 0;
 979}
 980
 981static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
 982{
 983        int cap_sys_admin = capable(CAP_SYS_ADMIN);
 984
 985        if (arg->minor < 13)
 986                return;
 987
 988        sanitize_global_limit(&max_user_bgreq);
 989        sanitize_global_limit(&max_user_congthresh);
 990
 991        spin_lock(&fc->bg_lock);
 992        if (arg->max_background) {
 993                fc->max_background = arg->max_background;
 994
 995                if (!cap_sys_admin && fc->max_background > max_user_bgreq)
 996                        fc->max_background = max_user_bgreq;
 997        }
 998        if (arg->congestion_threshold) {
 999                fc->congestion_threshold = arg->congestion_threshold;
1000
1001                if (!cap_sys_admin &&
1002                    fc->congestion_threshold > max_user_congthresh)
1003                        fc->congestion_threshold = max_user_congthresh;
1004        }
1005        spin_unlock(&fc->bg_lock);
1006}
1007
1008struct fuse_init_args {
1009        struct fuse_args args;
1010        struct fuse_init_in in;
1011        struct fuse_init_out out;
1012};
1013
1014static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
1015                               int error)
1016{
1017        struct fuse_conn *fc = fm->fc;
1018        struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
1019        struct fuse_init_out *arg = &ia->out;
1020        bool ok = true;
1021
1022        if (error || arg->major != FUSE_KERNEL_VERSION)
1023                ok = false;
1024        else {
1025                unsigned long ra_pages;
1026
1027                process_init_limits(fc, arg);
1028
1029                if (arg->minor >= 6) {
1030                        ra_pages = arg->max_readahead / PAGE_SIZE;
1031                        if (arg->flags & FUSE_ASYNC_READ)
1032                                fc->async_read = 1;
1033                        if (!(arg->flags & FUSE_POSIX_LOCKS))
1034                                fc->no_lock = 1;
1035                        if (arg->minor >= 17) {
1036                                if (!(arg->flags & FUSE_FLOCK_LOCKS))
1037                                        fc->no_flock = 1;
1038                        } else {
1039                                if (!(arg->flags & FUSE_POSIX_LOCKS))
1040                                        fc->no_flock = 1;
1041                        }
1042                        if (arg->flags & FUSE_ATOMIC_O_TRUNC)
1043                                fc->atomic_o_trunc = 1;
1044                        if (arg->minor >= 9) {
1045                                /* LOOKUP has dependency on proto version */
1046                                if (arg->flags & FUSE_EXPORT_SUPPORT)
1047                                        fc->export_support = 1;
1048                        }
1049                        if (arg->flags & FUSE_BIG_WRITES)
1050                                fc->big_writes = 1;
1051                        if (arg->flags & FUSE_DONT_MASK)
1052                                fc->dont_mask = 1;
1053                        if (arg->flags & FUSE_AUTO_INVAL_DATA)
1054                                fc->auto_inval_data = 1;
1055                        else if (arg->flags & FUSE_EXPLICIT_INVAL_DATA)
1056                                fc->explicit_inval_data = 1;
1057                        if (arg->flags & FUSE_DO_READDIRPLUS) {
1058                                fc->do_readdirplus = 1;
1059                                if (arg->flags & FUSE_READDIRPLUS_AUTO)
1060                                        fc->readdirplus_auto = 1;
1061                        }
1062                        if (arg->flags & FUSE_ASYNC_DIO)
1063                                fc->async_dio = 1;
1064                        if (arg->flags & FUSE_WRITEBACK_CACHE)
1065                                fc->writeback_cache = 1;
1066                        if (arg->flags & FUSE_PARALLEL_DIROPS)
1067                                fc->parallel_dirops = 1;
1068                        if (arg->flags & FUSE_HANDLE_KILLPRIV)
1069                                fc->handle_killpriv = 1;
1070                        if (arg->time_gran && arg->time_gran <= 1000000000)
1071                                fm->sb->s_time_gran = arg->time_gran;
1072                        if ((arg->flags & FUSE_POSIX_ACL)) {
1073                                fc->default_permissions = 1;
1074                                fc->posix_acl = 1;
1075                                fm->sb->s_xattr = fuse_acl_xattr_handlers;
1076                        }
1077                        if (arg->flags & FUSE_CACHE_SYMLINKS)
1078                                fc->cache_symlinks = 1;
1079                        if (arg->flags & FUSE_ABORT_ERROR)
1080                                fc->abort_err = 1;
1081                        if (arg->flags & FUSE_MAX_PAGES) {
1082                                fc->max_pages =
1083                                        min_t(unsigned int, fc->max_pages_limit,
1084                                        max_t(unsigned int, arg->max_pages, 1));
1085                        }
1086                        if (IS_ENABLED(CONFIG_FUSE_DAX) &&
1087                            arg->flags & FUSE_MAP_ALIGNMENT &&
1088                            !fuse_dax_check_alignment(fc, arg->map_alignment)) {
1089                                ok = false;
1090                        }
1091                        if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
1092                                fc->handle_killpriv_v2 = 1;
1093                                fm->sb->s_flags |= SB_NOSEC;
1094                        }
1095                        if (arg->flags & FUSE_SETXATTR_EXT)
1096                                fc->setxattr_ext = 1;
1097                } else {
1098                        ra_pages = fc->max_read / PAGE_SIZE;
1099                        fc->no_lock = 1;
1100                        fc->no_flock = 1;
1101                }
1102
1103                fm->sb->s_bdi->ra_pages =
1104                                min(fm->sb->s_bdi->ra_pages, ra_pages);
1105                fc->minor = arg->minor;
1106                fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
1107                fc->max_write = max_t(unsigned, 4096, fc->max_write);
1108                fc->conn_init = 1;
1109        }
1110        kfree(ia);
1111
1112        if (!ok) {
1113                fc->conn_init = 0;
1114                fc->conn_error = 1;
1115        }
1116
1117        fuse_set_initialized(fc);
1118        wake_up_all(&fc->blocked_waitq);
1119}
1120
1121void fuse_send_init(struct fuse_mount *fm)
1122{
1123        struct fuse_init_args *ia;
1124
1125        ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);
1126
1127        ia->in.major = FUSE_KERNEL_VERSION;
1128        ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
1129        ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE;
1130        ia->in.flags |=
1131                FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
1132                FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
1133                FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
1134                FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
1135                FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
1136                FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
1137                FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
1138                FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
1139                FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
1140                FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT;
1141#ifdef CONFIG_FUSE_DAX
1142        if (fm->fc->dax)
1143                ia->in.flags |= FUSE_MAP_ALIGNMENT;
1144#endif
1145        if (fm->fc->auto_submounts)
1146                ia->in.flags |= FUSE_SUBMOUNTS;
1147
1148        ia->args.opcode = FUSE_INIT;
1149        ia->args.in_numargs = 1;
1150        ia->args.in_args[0].size = sizeof(ia->in);
1151        ia->args.in_args[0].value = &ia->in;
1152        ia->args.out_numargs = 1;
1153        /* Variable length argument used for backward compatibility
1154           with interface version < 7.5.  Rest of init_out is zeroed
1155           by do_get_request(), so a short reply is not a problem */
1156        ia->args.out_argvar = true;
1157        ia->args.out_args[0].size = sizeof(ia->out);
1158        ia->args.out_args[0].value = &ia->out;
1159        ia->args.force = true;
1160        ia->args.nocreds = true;
1161        ia->args.end = process_init_reply;
1162
1163        if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0)
1164                process_init_reply(fm, &ia->args, -ENOTCONN);
1165}
1166EXPORT_SYMBOL_GPL(fuse_send_init);
1167
1168void fuse_free_conn(struct fuse_conn *fc)
1169{
1170        WARN_ON(!list_empty(&fc->devices));
1171        kfree_rcu(fc, rcu);
1172}
1173EXPORT_SYMBOL_GPL(fuse_free_conn);
1174
1175static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
1176{
1177        int err;
1178        char *suffix = "";
1179
1180        if (sb->s_bdev) {
1181                suffix = "-fuseblk";
1182                /*
1183                 * sb->s_bdi points to blkdev's bdi however we want to redirect
1184                 * it to our private bdi...
1185                 */
1186                bdi_put(sb->s_bdi);
1187                sb->s_bdi = &noop_backing_dev_info;
1188        }
1189        err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
1190                                   MINOR(fc->dev), suffix);
1191        if (err)
1192                return err;
1193
1194        /* fuse does it's own writeback accounting */
1195        sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT;
1196        sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT;
1197
1198        /*
1199         * For a single fuse filesystem use max 1% of dirty +
1200         * writeback threshold.
1201         *
1202         * This gives about 1M of write buffer for memory maps on a
1203         * machine with 1G and 10% dirty_ratio, which should be more
1204         * than enough.
1205         *
1206         * Privileged users can raise it by writing to
1207         *
1208         *    /sys/class/bdi/<bdi>/max_ratio
1209         */
1210        bdi_set_max_ratio(sb->s_bdi, 1);
1211
1212        return 0;
1213}
1214
1215struct fuse_dev *fuse_dev_alloc(void)
1216{
1217        struct fuse_dev *fud;
1218        struct list_head *pq;
1219
1220        fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
1221        if (!fud)
1222                return NULL;
1223
1224        pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
1225        if (!pq) {
1226                kfree(fud);
1227                return NULL;
1228        }
1229
1230        fud->pq.processing = pq;
1231        fuse_pqueue_init(&fud->pq);
1232
1233        return fud;
1234}
1235EXPORT_SYMBOL_GPL(fuse_dev_alloc);
1236
1237void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
1238{
1239        fud->fc = fuse_conn_get(fc);
1240        spin_lock(&fc->lock);
1241        list_add_tail(&fud->entry, &fc->devices);
1242        spin_unlock(&fc->lock);
1243}
1244EXPORT_SYMBOL_GPL(fuse_dev_install);
1245
1246struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
1247{
1248        struct fuse_dev *fud;
1249
1250        fud = fuse_dev_alloc();
1251        if (!fud)
1252                return NULL;
1253
1254        fuse_dev_install(fud, fc);
1255        return fud;
1256}
1257EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
1258
1259void fuse_dev_free(struct fuse_dev *fud)
1260{
1261        struct fuse_conn *fc = fud->fc;
1262
1263        if (fc) {
1264                spin_lock(&fc->lock);
1265                list_del(&fud->entry);
1266                spin_unlock(&fc->lock);
1267
1268                fuse_conn_put(fc);
1269        }
1270        kfree(fud->pq.processing);
1271        kfree(fud);
1272}
1273EXPORT_SYMBOL_GPL(fuse_dev_free);
1274
1275static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
1276                                      const struct fuse_inode *fi)
1277{
1278        *attr = (struct fuse_attr){
1279                .ino            = fi->inode.i_ino,
1280                .size           = fi->inode.i_size,
1281                .blocks         = fi->inode.i_blocks,
1282                .atime          = fi->inode.i_atime.tv_sec,
1283                .mtime          = fi->inode.i_mtime.tv_sec,
1284                .ctime          = fi->inode.i_ctime.tv_sec,
1285                .atimensec      = fi->inode.i_atime.tv_nsec,
1286                .mtimensec      = fi->inode.i_mtime.tv_nsec,
1287                .ctimensec      = fi->inode.i_ctime.tv_nsec,
1288                .mode           = fi->inode.i_mode,
1289                .nlink          = fi->inode.i_nlink,
1290                .uid            = fi->inode.i_uid.val,
1291                .gid            = fi->inode.i_gid.val,
1292                .rdev           = fi->inode.i_rdev,
1293                .blksize        = 1u << fi->inode.i_blkbits,
1294        };
1295}
1296
1297static void fuse_sb_defaults(struct super_block *sb)
1298{
1299        sb->s_magic = FUSE_SUPER_MAGIC;
1300        sb->s_op = &fuse_super_operations;
1301        sb->s_xattr = fuse_xattr_handlers;
1302        sb->s_maxbytes = MAX_LFS_FILESIZE;
1303        sb->s_time_gran = 1;
1304        sb->s_export_op = &fuse_export_operations;
1305        sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
1306        if (sb->s_user_ns != &init_user_ns)
1307                sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
1308        sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
1309
1310        /*
1311         * If we are not in the initial user namespace posix
1312         * acls must be translated.
1313         */
1314        if (sb->s_user_ns != &init_user_ns)
1315                sb->s_xattr = fuse_no_acl_xattr_handlers;
1316}
1317
1318static int fuse_fill_super_submount(struct super_block *sb,
1319                                    struct fuse_inode *parent_fi)
1320{
1321        struct fuse_mount *fm = get_fuse_mount_super(sb);
1322        struct super_block *parent_sb = parent_fi->inode.i_sb;
1323        struct fuse_attr root_attr;
1324        struct inode *root;
1325
1326        fuse_sb_defaults(sb);
1327        fm->sb = sb;
1328
1329        WARN_ON(sb->s_bdi != &noop_backing_dev_info);
1330        sb->s_bdi = bdi_get(parent_sb->s_bdi);
1331
1332        sb->s_xattr = parent_sb->s_xattr;
1333        sb->s_time_gran = parent_sb->s_time_gran;
1334        sb->s_blocksize = parent_sb->s_blocksize;
1335        sb->s_blocksize_bits = parent_sb->s_blocksize_bits;
1336        sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL);
1337        if (parent_sb->s_subtype && !sb->s_subtype)
1338                return -ENOMEM;
1339
1340        fuse_fill_attr_from_inode(&root_attr, parent_fi);
1341        root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
1342        /*
1343         * This inode is just a duplicate, so it is not looked up and
1344         * its nlookup should not be incremented.  fuse_iget() does
1345         * that, though, so undo it here.
1346         */
1347        get_fuse_inode(root)->nlookup--;
1348        sb->s_d_op = &fuse_dentry_operations;
1349        sb->s_root = d_make_root(root);
1350        if (!sb->s_root)
1351                return -ENOMEM;
1352
1353        return 0;
1354}
1355
1356/* Filesystem context private data holds the FUSE inode of the mount point */
1357static int fuse_get_tree_submount(struct fs_context *fsc)
1358{
1359        struct fuse_mount *fm;
1360        struct fuse_inode *mp_fi = fsc->fs_private;
1361        struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode);
1362        struct super_block *sb;
1363        int err;
1364
1365        fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
1366        if (!fm)
1367                return -ENOMEM;
1368
1369        fsc->s_fs_info = fm;
1370        sb = sget_fc(fsc, NULL, set_anon_super_fc);
1371        if (IS_ERR(sb)) {
1372                kfree(fm);
1373                return PTR_ERR(sb);
1374        }
1375        fm->fc = fuse_conn_get(fc);
1376
1377        /* Initialize superblock, making @mp_fi its root */
1378        err = fuse_fill_super_submount(sb, mp_fi);
1379        if (err) {
1380                fuse_conn_put(fc);
1381                kfree(fm);
1382                sb->s_fs_info = NULL;
1383                deactivate_locked_super(sb);
1384                return err;
1385        }
1386
1387        down_write(&fc->killsb);
1388        list_add_tail(&fm->fc_entry, &fc->mounts);
1389        up_write(&fc->killsb);
1390
1391        sb->s_flags |= SB_ACTIVE;
1392        fsc->root = dget(sb->s_root);
1393
1394        return 0;
1395}
1396
1397static const struct fs_context_operations fuse_context_submount_ops = {
1398        .get_tree       = fuse_get_tree_submount,
1399};
1400
1401int fuse_init_fs_context_submount(struct fs_context *fsc)
1402{
1403        fsc->ops = &fuse_context_submount_ops;
1404        return 0;
1405}
1406EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount);
1407
1408int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
1409{
1410        struct fuse_dev *fud = NULL;
1411        struct fuse_mount *fm = get_fuse_mount_super(sb);
1412        struct fuse_conn *fc = fm->fc;
1413        struct inode *root;
1414        struct dentry *root_dentry;
1415        int err;
1416
1417        err = -EINVAL;
1418        if (sb->s_flags & SB_MANDLOCK)
1419                goto err;
1420
1421        fuse_sb_defaults(sb);
1422
1423        if (ctx->is_bdev) {
1424#ifdef CONFIG_BLOCK
1425                err = -EINVAL;
1426                if (!sb_set_blocksize(sb, ctx->blksize))
1427                        goto err;
1428#endif
1429        } else {
1430                sb->s_blocksize = PAGE_SIZE;
1431                sb->s_blocksize_bits = PAGE_SHIFT;
1432        }
1433
1434        sb->s_subtype = ctx->subtype;
1435        ctx->subtype = NULL;
1436        if (IS_ENABLED(CONFIG_FUSE_DAX)) {
1437                err = fuse_dax_conn_alloc(fc, ctx->dax_dev);
1438                if (err)
1439                        goto err;
1440        }
1441
1442        if (ctx->fudptr) {
1443                err = -ENOMEM;
1444                fud = fuse_dev_alloc_install(fc);
1445                if (!fud)
1446                        goto err_free_dax;
1447        }
1448
1449        fc->dev = sb->s_dev;
1450        fm->sb = sb;
1451        err = fuse_bdi_init(fc, sb);
1452        if (err)
1453                goto err_dev_free;
1454
1455        /* Handle umasking inside the fuse code */
1456        if (sb->s_flags & SB_POSIXACL)
1457                fc->dont_mask = 1;
1458        sb->s_flags |= SB_POSIXACL;
1459
1460        fc->default_permissions = ctx->default_permissions;
1461        fc->allow_other = ctx->allow_other;
1462        fc->user_id = ctx->user_id;
1463        fc->group_id = ctx->group_id;
1464        fc->legacy_opts_show = ctx->legacy_opts_show;
1465        fc->max_read = max_t(unsigned int, 4096, ctx->max_read);
1466        fc->destroy = ctx->destroy;
1467        fc->no_control = ctx->no_control;
1468        fc->no_force_umount = ctx->no_force_umount;
1469
1470        err = -ENOMEM;
1471        root = fuse_get_root_inode(sb, ctx->rootmode);
1472        sb->s_d_op = &fuse_root_dentry_operations;
1473        root_dentry = d_make_root(root);
1474        if (!root_dentry)
1475                goto err_dev_free;
1476        /* Root dentry doesn't have .d_revalidate */
1477        sb->s_d_op = &fuse_dentry_operations;
1478
1479        mutex_lock(&fuse_mutex);
1480        err = -EINVAL;
1481        if (ctx->fudptr && *ctx->fudptr)
1482                goto err_unlock;
1483
1484        err = fuse_ctl_add_conn(fc);
1485        if (err)
1486                goto err_unlock;
1487
1488        list_add_tail(&fc->entry, &fuse_conn_list);
1489        sb->s_root = root_dentry;
1490        if (ctx->fudptr)
1491                *ctx->fudptr = fud;
1492        mutex_unlock(&fuse_mutex);
1493        return 0;
1494
1495 err_unlock:
1496        mutex_unlock(&fuse_mutex);
1497        dput(root_dentry);
1498 err_dev_free:
1499        if (fud)
1500                fuse_dev_free(fud);
1501 err_free_dax:
1502        if (IS_ENABLED(CONFIG_FUSE_DAX))
1503                fuse_dax_conn_free(fc);
1504 err:
1505        return err;
1506}
1507EXPORT_SYMBOL_GPL(fuse_fill_super_common);
1508
1509static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
1510{
1511        struct fuse_fs_context *ctx = fsc->fs_private;
1512        struct file *file;
1513        int err;
1514        struct fuse_conn *fc;
1515        struct fuse_mount *fm;
1516
1517        err = -EINVAL;
1518        file = fget(ctx->fd);
1519        if (!file)
1520                goto err;
1521
1522        /*
1523         * Require mount to happen from the same user namespace which
1524         * opened /dev/fuse to prevent potential attacks.
1525         */
1526        if ((file->f_op != &fuse_dev_operations) ||
1527            (file->f_cred->user_ns != sb->s_user_ns))
1528                goto err_fput;
1529        ctx->fudptr = &file->private_data;
1530
1531        fc = kmalloc(sizeof(*fc), GFP_KERNEL);
1532        err = -ENOMEM;
1533        if (!fc)
1534                goto err_fput;
1535
1536        fm = kzalloc(sizeof(*fm), GFP_KERNEL);
1537        if (!fm) {
1538                kfree(fc);
1539                goto err_fput;
1540        }
1541
1542        fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
1543        fc->release = fuse_free_conn;
1544
1545        sb->s_fs_info = fm;
1546
1547        err = fuse_fill_super_common(sb, ctx);
1548        if (err)
1549                goto err_put_conn;
1550        /*
1551         * atomic_dec_and_test() in fput() provides the necessary
1552         * memory barrier for file->private_data to be visible on all
1553         * CPUs after this
1554         */
1555        fput(file);
1556        fuse_send_init(get_fuse_mount_super(sb));
1557        return 0;
1558
1559 err_put_conn:
1560        fuse_conn_put(fc);
1561        kfree(fm);
1562        sb->s_fs_info = NULL;
1563 err_fput:
1564        fput(file);
1565 err:
1566        return err;
1567}
1568
1569static int fuse_get_tree(struct fs_context *fc)
1570{
1571        struct fuse_fs_context *ctx = fc->fs_private;
1572
1573        if (!ctx->fd_present || !ctx->rootmode_present ||
1574            !ctx->user_id_present || !ctx->group_id_present)
1575                return -EINVAL;
1576
1577#ifdef CONFIG_BLOCK
1578        if (ctx->is_bdev)
1579                return get_tree_bdev(fc, fuse_fill_super);
1580#endif
1581
1582        return get_tree_nodev(fc, fuse_fill_super);
1583}
1584
1585static const struct fs_context_operations fuse_context_ops = {
1586        .free           = fuse_free_fc,
1587        .parse_param    = fuse_parse_param,
1588        .reconfigure    = fuse_reconfigure,
1589        .get_tree       = fuse_get_tree,
1590};
1591
1592/*
1593 * Set up the filesystem mount context.
1594 */
1595static int fuse_init_fs_context(struct fs_context *fc)
1596{
1597        struct fuse_fs_context *ctx;
1598
1599        ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
1600        if (!ctx)
1601                return -ENOMEM;
1602
1603        ctx->max_read = ~0;
1604        ctx->blksize = FUSE_DEFAULT_BLKSIZE;
1605        ctx->legacy_opts_show = true;
1606
1607#ifdef CONFIG_BLOCK
1608        if (fc->fs_type == &fuseblk_fs_type) {
1609                ctx->is_bdev = true;
1610                ctx->destroy = true;
1611        }
1612#endif
1613
1614        fc->fs_private = ctx;
1615        fc->ops = &fuse_context_ops;
1616        return 0;
1617}
1618
1619bool fuse_mount_remove(struct fuse_mount *fm)
1620{
1621        struct fuse_conn *fc = fm->fc;
1622        bool last = false;
1623
1624        down_write(&fc->killsb);
1625        list_del_init(&fm->fc_entry);
1626        if (list_empty(&fc->mounts))
1627                last = true;
1628        up_write(&fc->killsb);
1629
1630        return last;
1631}
1632EXPORT_SYMBOL_GPL(fuse_mount_remove);
1633
1634void fuse_conn_destroy(struct fuse_mount *fm)
1635{
1636        struct fuse_conn *fc = fm->fc;
1637
1638        if (fc->destroy)
1639                fuse_send_destroy(fm);
1640
1641        fuse_abort_conn(fc);
1642        fuse_wait_aborted(fc);
1643
1644        if (!list_empty(&fc->entry)) {
1645                mutex_lock(&fuse_mutex);
1646                list_del(&fc->entry);
1647                fuse_ctl_remove_conn(fc);
1648                mutex_unlock(&fuse_mutex);
1649        }
1650}
1651EXPORT_SYMBOL_GPL(fuse_conn_destroy);
1652
1653static void fuse_sb_destroy(struct super_block *sb)
1654{
1655        struct fuse_mount *fm = get_fuse_mount_super(sb);
1656        bool last;
1657
1658        if (fm) {
1659                last = fuse_mount_remove(fm);
1660                if (last)
1661                        fuse_conn_destroy(fm);
1662        }
1663}
1664
1665static void fuse_kill_sb_anon(struct super_block *sb)
1666{
1667        fuse_sb_destroy(sb);
1668        kill_anon_super(sb);
1669}
1670
1671static struct file_system_type fuse_fs_type = {
1672        .owner          = THIS_MODULE,
1673        .name           = "fuse",
1674        .fs_flags       = FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
1675        .init_fs_context = fuse_init_fs_context,
1676        .parameters     = fuse_fs_parameters,
1677        .kill_sb        = fuse_kill_sb_anon,
1678};
1679MODULE_ALIAS_FS("fuse");
1680
1681#ifdef CONFIG_BLOCK
1682static void fuse_kill_sb_blk(struct super_block *sb)
1683{
1684        fuse_sb_destroy(sb);
1685        kill_block_super(sb);
1686}
1687
1688static struct file_system_type fuseblk_fs_type = {
1689        .owner          = THIS_MODULE,
1690        .name           = "fuseblk",
1691        .init_fs_context = fuse_init_fs_context,
1692        .parameters     = fuse_fs_parameters,
1693        .kill_sb        = fuse_kill_sb_blk,
1694        .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1695};
1696MODULE_ALIAS_FS("fuseblk");
1697
1698static inline int register_fuseblk(void)
1699{
1700        return register_filesystem(&fuseblk_fs_type);
1701}
1702
1703static inline void unregister_fuseblk(void)
1704{
1705        unregister_filesystem(&fuseblk_fs_type);
1706}
1707#else
1708static inline int register_fuseblk(void)
1709{
1710        return 0;
1711}
1712
1713static inline void unregister_fuseblk(void)
1714{
1715}
1716#endif
1717
1718static void fuse_inode_init_once(void *foo)
1719{
1720        struct inode *inode = foo;
1721
1722        inode_init_once(inode);
1723}
1724
1725static int __init fuse_fs_init(void)
1726{
1727        int err;
1728
1729        fuse_inode_cachep = kmem_cache_create("fuse_inode",
1730                        sizeof(struct fuse_inode), 0,
1731                        SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
1732                        fuse_inode_init_once);
1733        err = -ENOMEM;
1734        if (!fuse_inode_cachep)
1735                goto out;
1736
1737        err = register_fuseblk();
1738        if (err)
1739                goto out2;
1740
1741        err = register_filesystem(&fuse_fs_type);
1742        if (err)
1743                goto out3;
1744
1745        return 0;
1746
1747 out3:
1748        unregister_fuseblk();
1749 out2:
1750        kmem_cache_destroy(fuse_inode_cachep);
1751 out:
1752        return err;
1753}
1754
1755static void fuse_fs_cleanup(void)
1756{
1757        unregister_filesystem(&fuse_fs_type);
1758        unregister_fuseblk();
1759
1760        /*
1761         * Make sure all delayed rcu free inodes are flushed before we
1762         * destroy cache.
1763         */
1764        rcu_barrier();
1765        kmem_cache_destroy(fuse_inode_cachep);
1766}
1767
1768static struct kobject *fuse_kobj;
1769
1770static int fuse_sysfs_init(void)
1771{
1772        int err;
1773
1774        fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
1775        if (!fuse_kobj) {
1776                err = -ENOMEM;
1777                goto out_err;
1778        }
1779
1780        err = sysfs_create_mount_point(fuse_kobj, "connections");
1781        if (err)
1782                goto out_fuse_unregister;
1783
1784        return 0;
1785
1786 out_fuse_unregister:
1787        kobject_put(fuse_kobj);
1788 out_err:
1789        return err;
1790}
1791
1792static void fuse_sysfs_cleanup(void)
1793{
1794        sysfs_remove_mount_point(fuse_kobj, "connections");
1795        kobject_put(fuse_kobj);
1796}
1797
1798static int __init fuse_init(void)
1799{
1800        int res;
1801
1802        pr_info("init (API version %i.%i)\n",
1803                FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
1804
1805        INIT_LIST_HEAD(&fuse_conn_list);
1806        res = fuse_fs_init();
1807        if (res)
1808                goto err;
1809
1810        res = fuse_dev_init();
1811        if (res)
1812                goto err_fs_cleanup;
1813
1814        res = fuse_sysfs_init();
1815        if (res)
1816                goto err_dev_cleanup;
1817
1818        res = fuse_ctl_init();
1819        if (res)
1820                goto err_sysfs_cleanup;
1821
1822        sanitize_global_limit(&max_user_bgreq);
1823        sanitize_global_limit(&max_user_congthresh);
1824
1825        return 0;
1826
1827 err_sysfs_cleanup:
1828        fuse_sysfs_cleanup();
1829 err_dev_cleanup:
1830        fuse_dev_cleanup();
1831 err_fs_cleanup:
1832        fuse_fs_cleanup();
1833 err:
1834        return res;
1835}
1836
1837static void __exit fuse_exit(void)
1838{
1839        pr_debug("exit\n");
1840
1841        fuse_ctl_cleanup();
1842        fuse_sysfs_cleanup();
1843        fuse_fs_cleanup();
1844        fuse_dev_cleanup();
1845}
1846
1847module_init(fuse_init);
1848module_exit(fuse_exit);
1849