linux/fs/fuse/inode.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9#include "fuse_i.h"
  10
  11#include <linux/pagemap.h>
  12#include <linux/slab.h>
  13#include <linux/file.h>
  14#include <linux/seq_file.h>
  15#include <linux/init.h>
  16#include <linux/module.h>
  17#include <linux/moduleparam.h>
  18#include <linux/fs_context.h>
  19#include <linux/fs_parser.h>
  20#include <linux/statfs.h>
  21#include <linux/random.h>
  22#include <linux/sched.h>
  23#include <linux/exportfs.h>
  24#include <linux/posix_acl.h>
  25#include <linux/pid_namespace.h>
  26
  27MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
  28MODULE_DESCRIPTION("Filesystem in Userspace");
  29MODULE_LICENSE("GPL");
  30
  31static struct kmem_cache *fuse_inode_cachep;
  32struct list_head fuse_conn_list;
  33DEFINE_MUTEX(fuse_mutex);
  34
  35static int set_global_limit(const char *val, const struct kernel_param *kp);
  36
  37unsigned max_user_bgreq;
  38module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
  39                  &max_user_bgreq, 0644);
  40__MODULE_PARM_TYPE(max_user_bgreq, "uint");
  41MODULE_PARM_DESC(max_user_bgreq,
  42 "Global limit for the maximum number of backgrounded requests an "
  43 "unprivileged user can set");
  44
  45unsigned max_user_congthresh;
  46module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
  47                  &max_user_congthresh, 0644);
  48__MODULE_PARM_TYPE(max_user_congthresh, "uint");
  49MODULE_PARM_DESC(max_user_congthresh,
  50 "Global limit for the maximum congestion threshold an "
  51 "unprivileged user can set");
  52
  53#define FUSE_SUPER_MAGIC 0x65735546
  54
  55#define FUSE_DEFAULT_BLKSIZE 512
  56
  57/** Maximum number of outstanding background requests */
  58#define FUSE_DEFAULT_MAX_BACKGROUND 12
  59
  60/** Congestion starts at 75% of maximum */
  61#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
  62
  63#ifdef CONFIG_BLOCK
  64static struct file_system_type fuseblk_fs_type;
  65#endif
  66
  67struct fuse_forget_link *fuse_alloc_forget(void)
  68{
  69        return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
  70}
  71
  72static struct inode *fuse_alloc_inode(struct super_block *sb)
  73{
  74        struct fuse_inode *fi;
  75
  76        fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
  77        if (!fi)
  78                return NULL;
  79
  80        fi->i_time = 0;
  81        fi->inval_mask = 0;
  82        fi->nodeid = 0;
  83        fi->nlookup = 0;
  84        fi->attr_version = 0;
  85        fi->orig_ino = 0;
  86        fi->state = 0;
  87        mutex_init(&fi->mutex);
  88        spin_lock_init(&fi->lock);
  89        fi->forget = fuse_alloc_forget();
  90        if (!fi->forget)
  91                goto out_free;
  92
  93        if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
  94                goto out_free_forget;
  95
  96        return &fi->inode;
  97
  98out_free_forget:
  99        kfree(fi->forget);
 100out_free:
 101        kmem_cache_free(fuse_inode_cachep, fi);
 102        return NULL;
 103}
 104
 105static void fuse_free_inode(struct inode *inode)
 106{
 107        struct fuse_inode *fi = get_fuse_inode(inode);
 108
 109        mutex_destroy(&fi->mutex);
 110        kfree(fi->forget);
 111#ifdef CONFIG_FUSE_DAX
 112        kfree(fi->dax);
 113#endif
 114        kmem_cache_free(fuse_inode_cachep, fi);
 115}
 116
 117static void fuse_evict_inode(struct inode *inode)
 118{
 119        struct fuse_inode *fi = get_fuse_inode(inode);
 120
 121        truncate_inode_pages_final(&inode->i_data);
 122        clear_inode(inode);
 123        if (inode->i_sb->s_flags & SB_ACTIVE) {
 124                struct fuse_conn *fc = get_fuse_conn(inode);
 125
 126                if (FUSE_IS_DAX(inode))
 127                        fuse_dax_inode_cleanup(inode);
 128                if (fi->nlookup) {
 129                        fuse_queue_forget(fc, fi->forget, fi->nodeid,
 130                                          fi->nlookup);
 131                        fi->forget = NULL;
 132                }
 133        }
 134        if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
 135                WARN_ON(!list_empty(&fi->write_files));
 136                WARN_ON(!list_empty(&fi->queued_writes));
 137        }
 138}
 139
 140static int fuse_reconfigure(struct fs_context *fsc)
 141{
 142        struct super_block *sb = fsc->root->d_sb;
 143
 144        sync_filesystem(sb);
 145        if (fsc->sb_flags & SB_MANDLOCK)
 146                return -EINVAL;
 147
 148        return 0;
 149}
 150
 151/*
 152 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
 153 * so that it will fit.
 154 */
 155static ino_t fuse_squash_ino(u64 ino64)
 156{
 157        ino_t ino = (ino_t) ino64;
 158        if (sizeof(ino_t) < sizeof(u64))
 159                ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
 160        return ino;
 161}
 162
 163void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
 164                                   u64 attr_valid)
 165{
 166        struct fuse_conn *fc = get_fuse_conn(inode);
 167        struct fuse_inode *fi = get_fuse_inode(inode);
 168
 169        lockdep_assert_held(&fi->lock);
 170
 171        fi->attr_version = atomic64_inc_return(&fc->attr_version);
 172        fi->i_time = attr_valid;
 173        WRITE_ONCE(fi->inval_mask, 0);
 174
 175        inode->i_ino     = fuse_squash_ino(attr->ino);
 176        inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
 177        set_nlink(inode, attr->nlink);
 178        inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
 179        inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
 180        inode->i_blocks  = attr->blocks;
 181        inode->i_atime.tv_sec   = attr->atime;
 182        inode->i_atime.tv_nsec  = attr->atimensec;
 183        /* mtime from server may be stale due to local buffered write */
 184        if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
 185                inode->i_mtime.tv_sec   = attr->mtime;
 186                inode->i_mtime.tv_nsec  = attr->mtimensec;
 187                inode->i_ctime.tv_sec   = attr->ctime;
 188                inode->i_ctime.tv_nsec  = attr->ctimensec;
 189        }
 190
 191        if (attr->blksize != 0)
 192                inode->i_blkbits = ilog2(attr->blksize);
 193        else
 194                inode->i_blkbits = inode->i_sb->s_blocksize_bits;
 195
 196        /*
 197         * Don't set the sticky bit in i_mode, unless we want the VFS
 198         * to check permissions.  This prevents failures due to the
 199         * check in may_delete().
 200         */
 201        fi->orig_i_mode = inode->i_mode;
 202        if (!fc->default_permissions)
 203                inode->i_mode &= ~S_ISVTX;
 204
 205        fi->orig_ino = attr->ino;
 206
 207        /*
 208         * We are refreshing inode data and it is possible that another
 209         * client set suid/sgid or security.capability xattr. So clear
 210         * S_NOSEC. Ideally, we could have cleared it only if suid/sgid
 211         * was set or if security.capability xattr was set. But we don't
 212         * know if security.capability has been set or not. So clear it
 213         * anyway. Its less efficient but should be safe.
 214         */
 215        inode->i_flags &= ~S_NOSEC;
 216}
 217
 218void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
 219                            u64 attr_valid, u64 attr_version)
 220{
 221        struct fuse_conn *fc = get_fuse_conn(inode);
 222        struct fuse_inode *fi = get_fuse_inode(inode);
 223        bool is_wb = fc->writeback_cache;
 224        loff_t oldsize;
 225        struct timespec64 old_mtime;
 226
 227        spin_lock(&fi->lock);
 228        if ((attr_version != 0 && fi->attr_version > attr_version) ||
 229            test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
 230                spin_unlock(&fi->lock);
 231                return;
 232        }
 233
 234        old_mtime = inode->i_mtime;
 235        fuse_change_attributes_common(inode, attr, attr_valid);
 236
 237        oldsize = inode->i_size;
 238        /*
 239         * In case of writeback_cache enabled, the cached writes beyond EOF
 240         * extend local i_size without keeping userspace server in sync. So,
 241         * attr->size coming from server can be stale. We cannot trust it.
 242         */
 243        if (!is_wb || !S_ISREG(inode->i_mode))
 244                i_size_write(inode, attr->size);
 245        spin_unlock(&fi->lock);
 246
 247        if (!is_wb && S_ISREG(inode->i_mode)) {
 248                bool inval = false;
 249
 250                if (oldsize != attr->size) {
 251                        truncate_pagecache(inode, attr->size);
 252                        if (!fc->explicit_inval_data)
 253                                inval = true;
 254                } else if (fc->auto_inval_data) {
 255                        struct timespec64 new_mtime = {
 256                                .tv_sec = attr->mtime,
 257                                .tv_nsec = attr->mtimensec,
 258                        };
 259
 260                        /*
 261                         * Auto inval mode also checks and invalidates if mtime
 262                         * has changed.
 263                         */
 264                        if (!timespec64_equal(&old_mtime, &new_mtime))
 265                                inval = true;
 266                }
 267
 268                if (inval)
 269                        invalidate_inode_pages2(inode->i_mapping);
 270        }
 271}
 272
 273static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
 274{
 275        inode->i_mode = attr->mode & S_IFMT;
 276        inode->i_size = attr->size;
 277        inode->i_mtime.tv_sec  = attr->mtime;
 278        inode->i_mtime.tv_nsec = attr->mtimensec;
 279        inode->i_ctime.tv_sec  = attr->ctime;
 280        inode->i_ctime.tv_nsec = attr->ctimensec;
 281        if (S_ISREG(inode->i_mode)) {
 282                fuse_init_common(inode);
 283                fuse_init_file_inode(inode);
 284        } else if (S_ISDIR(inode->i_mode))
 285                fuse_init_dir(inode);
 286        else if (S_ISLNK(inode->i_mode))
 287                fuse_init_symlink(inode);
 288        else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
 289                 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 290                fuse_init_common(inode);
 291                init_special_inode(inode, inode->i_mode,
 292                                   new_decode_dev(attr->rdev));
 293        } else
 294                BUG();
 295}
 296
 297static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
 298{
 299        u64 nodeid = *(u64 *) _nodeidp;
 300        if (get_node_id(inode) == nodeid)
 301                return 1;
 302        else
 303                return 0;
 304}
 305
 306static int fuse_inode_set(struct inode *inode, void *_nodeidp)
 307{
 308        u64 nodeid = *(u64 *) _nodeidp;
 309        get_fuse_inode(inode)->nodeid = nodeid;
 310        return 0;
 311}
 312
 313struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
 314                        int generation, struct fuse_attr *attr,
 315                        u64 attr_valid, u64 attr_version)
 316{
 317        struct inode *inode;
 318        struct fuse_inode *fi;
 319        struct fuse_conn *fc = get_fuse_conn_super(sb);
 320
 321        /*
 322         * Auto mount points get their node id from the submount root, which is
 323         * not a unique identifier within this filesystem.
 324         *
 325         * To avoid conflicts, do not place submount points into the inode hash
 326         * table.
 327         */
 328        if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
 329            S_ISDIR(attr->mode)) {
 330                inode = new_inode(sb);
 331                if (!inode)
 332                        return NULL;
 333
 334                fuse_init_inode(inode, attr);
 335                get_fuse_inode(inode)->nodeid = nodeid;
 336                inode->i_flags |= S_AUTOMOUNT;
 337                goto done;
 338        }
 339
 340retry:
 341        inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
 342        if (!inode)
 343                return NULL;
 344
 345        if ((inode->i_state & I_NEW)) {
 346                inode->i_flags |= S_NOATIME;
 347                if (!fc->writeback_cache || !S_ISREG(attr->mode))
 348                        inode->i_flags |= S_NOCMTIME;
 349                inode->i_generation = generation;
 350                fuse_init_inode(inode, attr);
 351                unlock_new_inode(inode);
 352        } else if (fuse_stale_inode(inode, generation, attr)) {
 353                /* nodeid was reused, any I/O on the old inode should fail */
 354                fuse_make_bad(inode);
 355                iput(inode);
 356                goto retry;
 357        }
 358done:
 359        fi = get_fuse_inode(inode);
 360        spin_lock(&fi->lock);
 361        fi->nlookup++;
 362        spin_unlock(&fi->lock);
 363        fuse_change_attributes(inode, attr, attr_valid, attr_version);
 364
 365        return inode;
 366}
 367
 368struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
 369                           struct fuse_mount **fm)
 370{
 371        struct fuse_mount *fm_iter;
 372        struct inode *inode;
 373
 374        WARN_ON(!rwsem_is_locked(&fc->killsb));
 375        list_for_each_entry(fm_iter, &fc->mounts, fc_entry) {
 376                if (!fm_iter->sb)
 377                        continue;
 378
 379                inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid);
 380                if (inode) {
 381                        if (fm)
 382                                *fm = fm_iter;
 383                        return inode;
 384                }
 385        }
 386
 387        return NULL;
 388}
 389
 390int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
 391                             loff_t offset, loff_t len)
 392{
 393        struct fuse_inode *fi;
 394        struct inode *inode;
 395        pgoff_t pg_start;
 396        pgoff_t pg_end;
 397
 398        inode = fuse_ilookup(fc, nodeid, NULL);
 399        if (!inode)
 400                return -ENOENT;
 401
 402        fi = get_fuse_inode(inode);
 403        spin_lock(&fi->lock);
 404        fi->attr_version = atomic64_inc_return(&fc->attr_version);
 405        spin_unlock(&fi->lock);
 406
 407        fuse_invalidate_attr(inode);
 408        forget_all_cached_acls(inode);
 409        if (offset >= 0) {
 410                pg_start = offset >> PAGE_SHIFT;
 411                if (len <= 0)
 412                        pg_end = -1;
 413                else
 414                        pg_end = (offset + len - 1) >> PAGE_SHIFT;
 415                invalidate_inode_pages2_range(inode->i_mapping,
 416                                              pg_start, pg_end);
 417        }
 418        iput(inode);
 419        return 0;
 420}
 421
 422bool fuse_lock_inode(struct inode *inode)
 423{
 424        bool locked = false;
 425
 426        if (!get_fuse_conn(inode)->parallel_dirops) {
 427                mutex_lock(&get_fuse_inode(inode)->mutex);
 428                locked = true;
 429        }
 430
 431        return locked;
 432}
 433
 434void fuse_unlock_inode(struct inode *inode, bool locked)
 435{
 436        if (locked)
 437                mutex_unlock(&get_fuse_inode(inode)->mutex);
 438}
 439
 440static void fuse_umount_begin(struct super_block *sb)
 441{
 442        struct fuse_conn *fc = get_fuse_conn_super(sb);
 443
 444        if (!fc->no_force_umount)
 445                fuse_abort_conn(fc);
 446}
 447
 448static void fuse_send_destroy(struct fuse_mount *fm)
 449{
 450        if (fm->fc->conn_init) {
 451                FUSE_ARGS(args);
 452
 453                args.opcode = FUSE_DESTROY;
 454                args.force = true;
 455                args.nocreds = true;
 456                fuse_simple_request(fm, &args);
 457        }
 458}
 459
 460static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
 461{
 462        stbuf->f_type    = FUSE_SUPER_MAGIC;
 463        stbuf->f_bsize   = attr->bsize;
 464        stbuf->f_frsize  = attr->frsize;
 465        stbuf->f_blocks  = attr->blocks;
 466        stbuf->f_bfree   = attr->bfree;
 467        stbuf->f_bavail  = attr->bavail;
 468        stbuf->f_files   = attr->files;
 469        stbuf->f_ffree   = attr->ffree;
 470        stbuf->f_namelen = attr->namelen;
 471        /* fsid is left zero */
 472}
 473
 474static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
 475{
 476        struct super_block *sb = dentry->d_sb;
 477        struct fuse_mount *fm = get_fuse_mount_super(sb);
 478        FUSE_ARGS(args);
 479        struct fuse_statfs_out outarg;
 480        int err;
 481
 482        if (!fuse_allow_current_process(fm->fc)) {
 483                buf->f_type = FUSE_SUPER_MAGIC;
 484                return 0;
 485        }
 486
 487        memset(&outarg, 0, sizeof(outarg));
 488        args.in_numargs = 0;
 489        args.opcode = FUSE_STATFS;
 490        args.nodeid = get_node_id(d_inode(dentry));
 491        args.out_numargs = 1;
 492        args.out_args[0].size = sizeof(outarg);
 493        args.out_args[0].value = &outarg;
 494        err = fuse_simple_request(fm, &args);
 495        if (!err)
 496                convert_fuse_statfs(buf, &outarg.st);
 497        return err;
 498}
 499
 500static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
 501{
 502        struct fuse_sync_bucket *bucket;
 503
 504        bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
 505        if (bucket) {
 506                init_waitqueue_head(&bucket->waitq);
 507                /* Initial active count */
 508                atomic_set(&bucket->count, 1);
 509        }
 510        return bucket;
 511}
 512
 513static void fuse_sync_fs_writes(struct fuse_conn *fc)
 514{
 515        struct fuse_sync_bucket *bucket, *new_bucket;
 516        int count;
 517
 518        new_bucket = fuse_sync_bucket_alloc();
 519        spin_lock(&fc->lock);
 520        bucket = rcu_dereference_protected(fc->curr_bucket, 1);
 521        count = atomic_read(&bucket->count);
 522        WARN_ON(count < 1);
 523        /* No outstanding writes? */
 524        if (count == 1) {
 525                spin_unlock(&fc->lock);
 526                kfree(new_bucket);
 527                return;
 528        }
 529
 530        /*
 531         * Completion of new bucket depends on completion of this bucket, so add
 532         * one more count.
 533         */
 534        atomic_inc(&new_bucket->count);
 535        rcu_assign_pointer(fc->curr_bucket, new_bucket);
 536        spin_unlock(&fc->lock);
 537        /*
 538         * Drop initial active count.  At this point if all writes in this and
 539         * ancestor buckets complete, the count will go to zero and this task
 540         * will be woken up.
 541         */
 542        atomic_dec(&bucket->count);
 543
 544        wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);
 545
 546        /* Drop temp count on descendant bucket */
 547        fuse_sync_bucket_dec(new_bucket);
 548        kfree_rcu(bucket, rcu);
 549}
 550
 551static int fuse_sync_fs(struct super_block *sb, int wait)
 552{
 553        struct fuse_mount *fm = get_fuse_mount_super(sb);
 554        struct fuse_conn *fc = fm->fc;
 555        struct fuse_syncfs_in inarg;
 556        FUSE_ARGS(args);
 557        int err;
 558
 559        /*
 560         * Userspace cannot handle the wait == 0 case.  Avoid a
 561         * gratuitous roundtrip.
 562         */
 563        if (!wait)
 564                return 0;
 565
 566        /* The filesystem is being unmounted.  Nothing to do. */
 567        if (!sb->s_root)
 568                return 0;
 569
 570        if (!fc->sync_fs)
 571                return 0;
 572
 573        fuse_sync_fs_writes(fc);
 574
 575        memset(&inarg, 0, sizeof(inarg));
 576        args.in_numargs = 1;
 577        args.in_args[0].size = sizeof(inarg);
 578        args.in_args[0].value = &inarg;
 579        args.opcode = FUSE_SYNCFS;
 580        args.nodeid = get_node_id(sb->s_root->d_inode);
 581        args.out_numargs = 0;
 582
 583        err = fuse_simple_request(fm, &args);
 584        if (err == -ENOSYS) {
 585                fc->sync_fs = 0;
 586                err = 0;
 587        }
 588
 589        return err;
 590}
 591
 592enum {
 593        OPT_SOURCE,
 594        OPT_SUBTYPE,
 595        OPT_FD,
 596        OPT_ROOTMODE,
 597        OPT_USER_ID,
 598        OPT_GROUP_ID,
 599        OPT_DEFAULT_PERMISSIONS,
 600        OPT_ALLOW_OTHER,
 601        OPT_MAX_READ,
 602        OPT_BLKSIZE,
 603        OPT_ERR
 604};
 605
 606static const struct fs_parameter_spec fuse_fs_parameters[] = {
 607        fsparam_string  ("source",              OPT_SOURCE),
 608        fsparam_u32     ("fd",                  OPT_FD),
 609        fsparam_u32oct  ("rootmode",            OPT_ROOTMODE),
 610        fsparam_u32     ("user_id",             OPT_USER_ID),
 611        fsparam_u32     ("group_id",            OPT_GROUP_ID),
 612        fsparam_flag    ("default_permissions", OPT_DEFAULT_PERMISSIONS),
 613        fsparam_flag    ("allow_other",         OPT_ALLOW_OTHER),
 614        fsparam_u32     ("max_read",            OPT_MAX_READ),
 615        fsparam_u32     ("blksize",             OPT_BLKSIZE),
 616        fsparam_string  ("subtype",             OPT_SUBTYPE),
 617        {}
 618};
 619
 620static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
 621{
 622        struct fs_parse_result result;
 623        struct fuse_fs_context *ctx = fsc->fs_private;
 624        int opt;
 625
 626        if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
 627                /*
 628                 * Ignore options coming from mount(MS_REMOUNT) for backward
 629                 * compatibility.
 630                 */
 631                if (fsc->oldapi)
 632                        return 0;
 633
 634                return invalfc(fsc, "No changes allowed in reconfigure");
 635        }
 636
 637        opt = fs_parse(fsc, fuse_fs_parameters, param, &result);
 638        if (opt < 0)
 639                return opt;
 640
 641        switch (opt) {
 642        case OPT_SOURCE:
 643                if (fsc->source)
 644                        return invalfc(fsc, "Multiple sources specified");
 645                fsc->source = param->string;
 646                param->string = NULL;
 647                break;
 648
 649        case OPT_SUBTYPE:
 650                if (ctx->subtype)
 651                        return invalfc(fsc, "Multiple subtypes specified");
 652                ctx->subtype = param->string;
 653                param->string = NULL;
 654                return 0;
 655
 656        case OPT_FD:
 657                ctx->fd = result.uint_32;
 658                ctx->fd_present = true;
 659                break;
 660
 661        case OPT_ROOTMODE:
 662                if (!fuse_valid_type(result.uint_32))
 663                        return invalfc(fsc, "Invalid rootmode");
 664                ctx->rootmode = result.uint_32;
 665                ctx->rootmode_present = true;
 666                break;
 667
 668        case OPT_USER_ID:
 669                ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
 670                if (!uid_valid(ctx->user_id))
 671                        return invalfc(fsc, "Invalid user_id");
 672                ctx->user_id_present = true;
 673                break;
 674
 675        case OPT_GROUP_ID:
 676                ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
 677                if (!gid_valid(ctx->group_id))
 678                        return invalfc(fsc, "Invalid group_id");
 679                ctx->group_id_present = true;
 680                break;
 681
 682        case OPT_DEFAULT_PERMISSIONS:
 683                ctx->default_permissions = true;
 684                break;
 685
 686        case OPT_ALLOW_OTHER:
 687                ctx->allow_other = true;
 688                break;
 689
 690        case OPT_MAX_READ:
 691                ctx->max_read = result.uint_32;
 692                break;
 693
 694        case OPT_BLKSIZE:
 695                if (!ctx->is_bdev)
 696                        return invalfc(fsc, "blksize only supported for fuseblk");
 697                ctx->blksize = result.uint_32;
 698                break;
 699
 700        default:
 701                return -EINVAL;
 702        }
 703
 704        return 0;
 705}
 706
 707static void fuse_free_fsc(struct fs_context *fsc)
 708{
 709        struct fuse_fs_context *ctx = fsc->fs_private;
 710
 711        if (ctx) {
 712                kfree(ctx->subtype);
 713                kfree(ctx);
 714        }
 715}
 716
 717static int fuse_show_options(struct seq_file *m, struct dentry *root)
 718{
 719        struct super_block *sb = root->d_sb;
 720        struct fuse_conn *fc = get_fuse_conn_super(sb);
 721
 722        if (fc->legacy_opts_show) {
 723                seq_printf(m, ",user_id=%u",
 724                           from_kuid_munged(fc->user_ns, fc->user_id));
 725                seq_printf(m, ",group_id=%u",
 726                           from_kgid_munged(fc->user_ns, fc->group_id));
 727                if (fc->default_permissions)
 728                        seq_puts(m, ",default_permissions");
 729                if (fc->allow_other)
 730                        seq_puts(m, ",allow_other");
 731                if (fc->max_read != ~0)
 732                        seq_printf(m, ",max_read=%u", fc->max_read);
 733                if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
 734                        seq_printf(m, ",blksize=%lu", sb->s_blocksize);
 735        }
 736#ifdef CONFIG_FUSE_DAX
 737        if (fc->dax)
 738                seq_puts(m, ",dax");
 739#endif
 740
 741        return 0;
 742}
 743
 744static void fuse_iqueue_init(struct fuse_iqueue *fiq,
 745                             const struct fuse_iqueue_ops *ops,
 746                             void *priv)
 747{
 748        memset(fiq, 0, sizeof(struct fuse_iqueue));
 749        spin_lock_init(&fiq->lock);
 750        init_waitqueue_head(&fiq->waitq);
 751        INIT_LIST_HEAD(&fiq->pending);
 752        INIT_LIST_HEAD(&fiq->interrupts);
 753        fiq->forget_list_tail = &fiq->forget_list_head;
 754        fiq->connected = 1;
 755        fiq->ops = ops;
 756        fiq->priv = priv;
 757}
 758
 759static void fuse_pqueue_init(struct fuse_pqueue *fpq)
 760{
 761        unsigned int i;
 762
 763        spin_lock_init(&fpq->lock);
 764        for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
 765                INIT_LIST_HEAD(&fpq->processing[i]);
 766        INIT_LIST_HEAD(&fpq->io);
 767        fpq->connected = 1;
 768}
 769
 770void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
 771                    struct user_namespace *user_ns,
 772                    const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
 773{
 774        memset(fc, 0, sizeof(*fc));
 775        spin_lock_init(&fc->lock);
 776        spin_lock_init(&fc->bg_lock);
 777        init_rwsem(&fc->killsb);
 778        refcount_set(&fc->count, 1);
 779        atomic_set(&fc->dev_count, 1);
 780        init_waitqueue_head(&fc->blocked_waitq);
 781        fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
 782        INIT_LIST_HEAD(&fc->bg_queue);
 783        INIT_LIST_HEAD(&fc->entry);
 784        INIT_LIST_HEAD(&fc->devices);
 785        atomic_set(&fc->num_waiting, 0);
 786        fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
 787        fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
 788        atomic64_set(&fc->khctr, 0);
 789        fc->polled_files = RB_ROOT;
 790        fc->blocked = 0;
 791        fc->initialized = 0;
 792        fc->connected = 1;
 793        atomic64_set(&fc->attr_version, 1);
 794        get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
 795        fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
 796        fc->user_ns = get_user_ns(user_ns);
 797        fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
 798        fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
 799
 800        INIT_LIST_HEAD(&fc->mounts);
 801        list_add(&fm->fc_entry, &fc->mounts);
 802        fm->fc = fc;
 803}
 804EXPORT_SYMBOL_GPL(fuse_conn_init);
 805
 806void fuse_conn_put(struct fuse_conn *fc)
 807{
 808        if (refcount_dec_and_test(&fc->count)) {
 809                struct fuse_iqueue *fiq = &fc->iq;
 810                struct fuse_sync_bucket *bucket;
 811
 812                if (IS_ENABLED(CONFIG_FUSE_DAX))
 813                        fuse_dax_conn_free(fc);
 814                if (fiq->ops->release)
 815                        fiq->ops->release(fiq);
 816                put_pid_ns(fc->pid_ns);
 817                put_user_ns(fc->user_ns);
 818                bucket = rcu_dereference_protected(fc->curr_bucket, 1);
 819                if (bucket) {
 820                        WARN_ON(atomic_read(&bucket->count) != 1);
 821                        kfree(bucket);
 822                }
 823                fc->release(fc);
 824        }
 825}
 826EXPORT_SYMBOL_GPL(fuse_conn_put);
 827
 828struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
 829{
 830        refcount_inc(&fc->count);
 831        return fc;
 832}
 833EXPORT_SYMBOL_GPL(fuse_conn_get);
 834
 835static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
 836{
 837        struct fuse_attr attr;
 838        memset(&attr, 0, sizeof(attr));
 839
 840        attr.mode = mode;
 841        attr.ino = FUSE_ROOT_ID;
 842        attr.nlink = 1;
 843        return fuse_iget(sb, 1, 0, &attr, 0, 0);
 844}
 845
 846struct fuse_inode_handle {
 847        u64 nodeid;
 848        u32 generation;
 849};
 850
 851static struct dentry *fuse_get_dentry(struct super_block *sb,
 852                                      struct fuse_inode_handle *handle)
 853{
 854        struct fuse_conn *fc = get_fuse_conn_super(sb);
 855        struct inode *inode;
 856        struct dentry *entry;
 857        int err = -ESTALE;
 858
 859        if (handle->nodeid == 0)
 860                goto out_err;
 861
 862        inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
 863        if (!inode) {
 864                struct fuse_entry_out outarg;
 865                const struct qstr name = QSTR_INIT(".", 1);
 866
 867                if (!fc->export_support)
 868                        goto out_err;
 869
 870                err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
 871                                       &inode);
 872                if (err && err != -ENOENT)
 873                        goto out_err;
 874                if (err || !inode) {
 875                        err = -ESTALE;
 876                        goto out_err;
 877                }
 878                err = -EIO;
 879                if (get_node_id(inode) != handle->nodeid)
 880                        goto out_iput;
 881        }
 882        err = -ESTALE;
 883        if (inode->i_generation != handle->generation)
 884                goto out_iput;
 885
 886        entry = d_obtain_alias(inode);
 887        if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
 888                fuse_invalidate_entry_cache(entry);
 889
 890        return entry;
 891
 892 out_iput:
 893        iput(inode);
 894 out_err:
 895        return ERR_PTR(err);
 896}
 897
 898static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
 899                           struct inode *parent)
 900{
 901        int len = parent ? 6 : 3;
 902        u64 nodeid;
 903        u32 generation;
 904
 905        if (*max_len < len) {
 906                *max_len = len;
 907                return  FILEID_INVALID;
 908        }
 909
 910        nodeid = get_fuse_inode(inode)->nodeid;
 911        generation = inode->i_generation;
 912
 913        fh[0] = (u32)(nodeid >> 32);
 914        fh[1] = (u32)(nodeid & 0xffffffff);
 915        fh[2] = generation;
 916
 917        if (parent) {
 918                nodeid = get_fuse_inode(parent)->nodeid;
 919                generation = parent->i_generation;
 920
 921                fh[3] = (u32)(nodeid >> 32);
 922                fh[4] = (u32)(nodeid & 0xffffffff);
 923                fh[5] = generation;
 924        }
 925
 926        *max_len = len;
 927        return parent ? 0x82 : 0x81;
 928}
 929
 930static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
 931                struct fid *fid, int fh_len, int fh_type)
 932{
 933        struct fuse_inode_handle handle;
 934
 935        if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
 936                return NULL;
 937
 938        handle.nodeid = (u64) fid->raw[0] << 32;
 939        handle.nodeid |= (u64) fid->raw[1];
 940        handle.generation = fid->raw[2];
 941        return fuse_get_dentry(sb, &handle);
 942}
 943
 944static struct dentry *fuse_fh_to_parent(struct super_block *sb,
 945                struct fid *fid, int fh_len, int fh_type)
 946{
 947        struct fuse_inode_handle parent;
 948
 949        if (fh_type != 0x82 || fh_len < 6)
 950                return NULL;
 951
 952        parent.nodeid = (u64) fid->raw[3] << 32;
 953        parent.nodeid |= (u64) fid->raw[4];
 954        parent.generation = fid->raw[5];
 955        return fuse_get_dentry(sb, &parent);
 956}
 957
 958static struct dentry *fuse_get_parent(struct dentry *child)
 959{
 960        struct inode *child_inode = d_inode(child);
 961        struct fuse_conn *fc = get_fuse_conn(child_inode);
 962        struct inode *inode;
 963        struct dentry *parent;
 964        struct fuse_entry_out outarg;
 965        int err;
 966
 967        if (!fc->export_support)
 968                return ERR_PTR(-ESTALE);
 969
 970        err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
 971                               &dotdot_name, &outarg, &inode);
 972        if (err) {
 973                if (err == -ENOENT)
 974                        return ERR_PTR(-ESTALE);
 975                return ERR_PTR(err);
 976        }
 977
 978        parent = d_obtain_alias(inode);
 979        if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
 980                fuse_invalidate_entry_cache(parent);
 981
 982        return parent;
 983}
 984
 985static const struct export_operations fuse_export_operations = {
 986        .fh_to_dentry   = fuse_fh_to_dentry,
 987        .fh_to_parent   = fuse_fh_to_parent,
 988        .encode_fh      = fuse_encode_fh,
 989        .get_parent     = fuse_get_parent,
 990};
 991
 992static const struct super_operations fuse_super_operations = {
 993        .alloc_inode    = fuse_alloc_inode,
 994        .free_inode     = fuse_free_inode,
 995        .evict_inode    = fuse_evict_inode,
 996        .write_inode    = fuse_write_inode,
 997        .drop_inode     = generic_delete_inode,
 998        .umount_begin   = fuse_umount_begin,
 999        .statfs         = fuse_statfs,
1000        .sync_fs        = fuse_sync_fs,
1001        .show_options   = fuse_show_options,
1002};
1003
1004static void sanitize_global_limit(unsigned *limit)
1005{
1006        /*
1007         * The default maximum number of async requests is calculated to consume
1008         * 1/2^13 of the total memory, assuming 392 bytes per request.
1009         */
1010        if (*limit == 0)
1011                *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
1012
1013        if (*limit >= 1 << 16)
1014                *limit = (1 << 16) - 1;
1015}
1016
1017static int set_global_limit(const char *val, const struct kernel_param *kp)
1018{
1019        int rv;
1020
1021        rv = param_set_uint(val, kp);
1022        if (rv)
1023                return rv;
1024
1025        sanitize_global_limit((unsigned *)kp->arg);
1026
1027        return 0;
1028}
1029
1030static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
1031{
1032        int cap_sys_admin = capable(CAP_SYS_ADMIN);
1033
1034        if (arg->minor < 13)
1035                return;
1036
1037        sanitize_global_limit(&max_user_bgreq);
1038        sanitize_global_limit(&max_user_congthresh);
1039
1040        spin_lock(&fc->bg_lock);
1041        if (arg->max_background) {
1042                fc->max_background = arg->max_background;
1043
1044                if (!cap_sys_admin && fc->max_background > max_user_bgreq)
1045                        fc->max_background = max_user_bgreq;
1046        }
1047        if (arg->congestion_threshold) {
1048                fc->congestion_threshold = arg->congestion_threshold;
1049
1050                if (!cap_sys_admin &&
1051                    fc->congestion_threshold > max_user_congthresh)
1052                        fc->congestion_threshold = max_user_congthresh;
1053        }
1054        spin_unlock(&fc->bg_lock);
1055}
1056
1057struct fuse_init_args {
1058        struct fuse_args args;
1059        struct fuse_init_in in;
1060        struct fuse_init_out out;
1061};
1062
1063static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
1064                               int error)
1065{
1066        struct fuse_conn *fc = fm->fc;
1067        struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
1068        struct fuse_init_out *arg = &ia->out;
1069        bool ok = true;
1070
1071        if (error || arg->major != FUSE_KERNEL_VERSION)
1072                ok = false;
1073        else {
1074                unsigned long ra_pages;
1075
1076                process_init_limits(fc, arg);
1077
1078                if (arg->minor >= 6) {
1079                        ra_pages = arg->max_readahead / PAGE_SIZE;
1080                        if (arg->flags & FUSE_ASYNC_READ)
1081                                fc->async_read = 1;
1082                        if (!(arg->flags & FUSE_POSIX_LOCKS))
1083                                fc->no_lock = 1;
1084                        if (arg->minor >= 17) {
1085                                if (!(arg->flags & FUSE_FLOCK_LOCKS))
1086                                        fc->no_flock = 1;
1087                        } else {
1088                                if (!(arg->flags & FUSE_POSIX_LOCKS))
1089                                        fc->no_flock = 1;
1090                        }
1091                        if (arg->flags & FUSE_ATOMIC_O_TRUNC)
1092                                fc->atomic_o_trunc = 1;
1093                        if (arg->minor >= 9) {
1094                                /* LOOKUP has dependency on proto version */
1095                                if (arg->flags & FUSE_EXPORT_SUPPORT)
1096                                        fc->export_support = 1;
1097                        }
1098                        if (arg->flags & FUSE_BIG_WRITES)
1099                                fc->big_writes = 1;
1100                        if (arg->flags & FUSE_DONT_MASK)
1101                                fc->dont_mask = 1;
1102                        if (arg->flags & FUSE_AUTO_INVAL_DATA)
1103                                fc->auto_inval_data = 1;
1104                        else if (arg->flags & FUSE_EXPLICIT_INVAL_DATA)
1105                                fc->explicit_inval_data = 1;
1106                        if (arg->flags & FUSE_DO_READDIRPLUS) {
1107                                fc->do_readdirplus = 1;
1108                                if (arg->flags & FUSE_READDIRPLUS_AUTO)
1109                                        fc->readdirplus_auto = 1;
1110                        }
1111                        if (arg->flags & FUSE_ASYNC_DIO)
1112                                fc->async_dio = 1;
1113                        if (arg->flags & FUSE_WRITEBACK_CACHE)
1114                                fc->writeback_cache = 1;
1115                        if (arg->flags & FUSE_PARALLEL_DIROPS)
1116                                fc->parallel_dirops = 1;
1117                        if (arg->flags & FUSE_HANDLE_KILLPRIV)
1118                                fc->handle_killpriv = 1;
1119                        if (arg->time_gran && arg->time_gran <= 1000000000)
1120                                fm->sb->s_time_gran = arg->time_gran;
1121                        if ((arg->flags & FUSE_POSIX_ACL)) {
1122                                fc->default_permissions = 1;
1123                                fc->posix_acl = 1;
1124                                fm->sb->s_xattr = fuse_acl_xattr_handlers;
1125                        }
1126                        if (arg->flags & FUSE_CACHE_SYMLINKS)
1127                                fc->cache_symlinks = 1;
1128                        if (arg->flags & FUSE_ABORT_ERROR)
1129                                fc->abort_err = 1;
1130                        if (arg->flags & FUSE_MAX_PAGES) {
1131                                fc->max_pages =
1132                                        min_t(unsigned int, fc->max_pages_limit,
1133                                        max_t(unsigned int, arg->max_pages, 1));
1134                        }
1135                        if (IS_ENABLED(CONFIG_FUSE_DAX) &&
1136                            arg->flags & FUSE_MAP_ALIGNMENT &&
1137                            !fuse_dax_check_alignment(fc, arg->map_alignment)) {
1138                                ok = false;
1139                        }
1140                        if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
1141                                fc->handle_killpriv_v2 = 1;
1142                                fm->sb->s_flags |= SB_NOSEC;
1143                        }
1144                        if (arg->flags & FUSE_SETXATTR_EXT)
1145                                fc->setxattr_ext = 1;
1146                } else {
1147                        ra_pages = fc->max_read / PAGE_SIZE;
1148                        fc->no_lock = 1;
1149                        fc->no_flock = 1;
1150                }
1151
1152                fm->sb->s_bdi->ra_pages =
1153                                min(fm->sb->s_bdi->ra_pages, ra_pages);
1154                fc->minor = arg->minor;
1155                fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
1156                fc->max_write = max_t(unsigned, 4096, fc->max_write);
1157                fc->conn_init = 1;
1158        }
1159        kfree(ia);
1160
1161        if (!ok) {
1162                fc->conn_init = 0;
1163                fc->conn_error = 1;
1164        }
1165
1166        fuse_set_initialized(fc);
1167        wake_up_all(&fc->blocked_waitq);
1168}
1169
1170void fuse_send_init(struct fuse_mount *fm)
1171{
1172        struct fuse_init_args *ia;
1173
1174        ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);
1175
1176        ia->in.major = FUSE_KERNEL_VERSION;
1177        ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
1178        ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE;
1179        ia->in.flags |=
1180                FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
1181                FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
1182                FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
1183                FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
1184                FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
1185                FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
1186                FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
1187                FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
1188                FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
1189                FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT;
1190#ifdef CONFIG_FUSE_DAX
1191        if (fm->fc->dax)
1192                ia->in.flags |= FUSE_MAP_ALIGNMENT;
1193#endif
1194        if (fm->fc->auto_submounts)
1195                ia->in.flags |= FUSE_SUBMOUNTS;
1196
1197        ia->args.opcode = FUSE_INIT;
1198        ia->args.in_numargs = 1;
1199        ia->args.in_args[0].size = sizeof(ia->in);
1200        ia->args.in_args[0].value = &ia->in;
1201        ia->args.out_numargs = 1;
1202        /* Variable length argument used for backward compatibility
1203           with interface version < 7.5.  Rest of init_out is zeroed
1204           by do_get_request(), so a short reply is not a problem */
1205        ia->args.out_argvar = true;
1206        ia->args.out_args[0].size = sizeof(ia->out);
1207        ia->args.out_args[0].value = &ia->out;
1208        ia->args.force = true;
1209        ia->args.nocreds = true;
1210        ia->args.end = process_init_reply;
1211
1212        if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0)
1213                process_init_reply(fm, &ia->args, -ENOTCONN);
1214}
1215EXPORT_SYMBOL_GPL(fuse_send_init);
1216
1217void fuse_free_conn(struct fuse_conn *fc)
1218{
1219        WARN_ON(!list_empty(&fc->devices));
1220        kfree_rcu(fc, rcu);
1221}
1222EXPORT_SYMBOL_GPL(fuse_free_conn);
1223
1224static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
1225{
1226        int err;
1227        char *suffix = "";
1228
1229        if (sb->s_bdev) {
1230                suffix = "-fuseblk";
1231                /*
1232                 * sb->s_bdi points to blkdev's bdi however we want to redirect
1233                 * it to our private bdi...
1234                 */
1235                bdi_put(sb->s_bdi);
1236                sb->s_bdi = &noop_backing_dev_info;
1237        }
1238        err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
1239                                   MINOR(fc->dev), suffix);
1240        if (err)
1241                return err;
1242
1243        /* fuse does it's own writeback accounting */
1244        sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT;
1245        sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT;
1246
1247        /*
1248         * For a single fuse filesystem use max 1% of dirty +
1249         * writeback threshold.
1250         *
1251         * This gives about 1M of write buffer for memory maps on a
1252         * machine with 1G and 10% dirty_ratio, which should be more
1253         * than enough.
1254         *
1255         * Privileged users can raise it by writing to
1256         *
1257         *    /sys/class/bdi/<bdi>/max_ratio
1258         */
1259        bdi_set_max_ratio(sb->s_bdi, 1);
1260
1261        return 0;
1262}
1263
1264struct fuse_dev *fuse_dev_alloc(void)
1265{
1266        struct fuse_dev *fud;
1267        struct list_head *pq;
1268
1269        fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
1270        if (!fud)
1271                return NULL;
1272
1273        pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
1274        if (!pq) {
1275                kfree(fud);
1276                return NULL;
1277        }
1278
1279        fud->pq.processing = pq;
1280        fuse_pqueue_init(&fud->pq);
1281
1282        return fud;
1283}
1284EXPORT_SYMBOL_GPL(fuse_dev_alloc);
1285
1286void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
1287{
1288        fud->fc = fuse_conn_get(fc);
1289        spin_lock(&fc->lock);
1290        list_add_tail(&fud->entry, &fc->devices);
1291        spin_unlock(&fc->lock);
1292}
1293EXPORT_SYMBOL_GPL(fuse_dev_install);
1294
1295struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
1296{
1297        struct fuse_dev *fud;
1298
1299        fud = fuse_dev_alloc();
1300        if (!fud)
1301                return NULL;
1302
1303        fuse_dev_install(fud, fc);
1304        return fud;
1305}
1306EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
1307
1308void fuse_dev_free(struct fuse_dev *fud)
1309{
1310        struct fuse_conn *fc = fud->fc;
1311
1312        if (fc) {
1313                spin_lock(&fc->lock);
1314                list_del(&fud->entry);
1315                spin_unlock(&fc->lock);
1316
1317                fuse_conn_put(fc);
1318        }
1319        kfree(fud->pq.processing);
1320        kfree(fud);
1321}
1322EXPORT_SYMBOL_GPL(fuse_dev_free);
1323
1324static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
1325                                      const struct fuse_inode *fi)
1326{
1327        *attr = (struct fuse_attr){
1328                .ino            = fi->inode.i_ino,
1329                .size           = fi->inode.i_size,
1330                .blocks         = fi->inode.i_blocks,
1331                .atime          = fi->inode.i_atime.tv_sec,
1332                .mtime          = fi->inode.i_mtime.tv_sec,
1333                .ctime          = fi->inode.i_ctime.tv_sec,
1334                .atimensec      = fi->inode.i_atime.tv_nsec,
1335                .mtimensec      = fi->inode.i_mtime.tv_nsec,
1336                .ctimensec      = fi->inode.i_ctime.tv_nsec,
1337                .mode           = fi->inode.i_mode,
1338                .nlink          = fi->inode.i_nlink,
1339                .uid            = fi->inode.i_uid.val,
1340                .gid            = fi->inode.i_gid.val,
1341                .rdev           = fi->inode.i_rdev,
1342                .blksize        = 1u << fi->inode.i_blkbits,
1343        };
1344}
1345
1346static void fuse_sb_defaults(struct super_block *sb)
1347{
1348        sb->s_magic = FUSE_SUPER_MAGIC;
1349        sb->s_op = &fuse_super_operations;
1350        sb->s_xattr = fuse_xattr_handlers;
1351        sb->s_maxbytes = MAX_LFS_FILESIZE;
1352        sb->s_time_gran = 1;
1353        sb->s_export_op = &fuse_export_operations;
1354        sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
1355        if (sb->s_user_ns != &init_user_ns)
1356                sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
1357        sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
1358
1359        /*
1360         * If we are not in the initial user namespace posix
1361         * acls must be translated.
1362         */
1363        if (sb->s_user_ns != &init_user_ns)
1364                sb->s_xattr = fuse_no_acl_xattr_handlers;
1365}
1366
1367static int fuse_fill_super_submount(struct super_block *sb,
1368                                    struct fuse_inode *parent_fi)
1369{
1370        struct fuse_mount *fm = get_fuse_mount_super(sb);
1371        struct super_block *parent_sb = parent_fi->inode.i_sb;
1372        struct fuse_attr root_attr;
1373        struct inode *root;
1374
1375        fuse_sb_defaults(sb);
1376        fm->sb = sb;
1377
1378        WARN_ON(sb->s_bdi != &noop_backing_dev_info);
1379        sb->s_bdi = bdi_get(parent_sb->s_bdi);
1380
1381        sb->s_xattr = parent_sb->s_xattr;
1382        sb->s_time_gran = parent_sb->s_time_gran;
1383        sb->s_blocksize = parent_sb->s_blocksize;
1384        sb->s_blocksize_bits = parent_sb->s_blocksize_bits;
1385        sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL);
1386        if (parent_sb->s_subtype && !sb->s_subtype)
1387                return -ENOMEM;
1388
1389        fuse_fill_attr_from_inode(&root_attr, parent_fi);
1390        root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
1391        /*
1392         * This inode is just a duplicate, so it is not looked up and
1393         * its nlookup should not be incremented.  fuse_iget() does
1394         * that, though, so undo it here.
1395         */
1396        get_fuse_inode(root)->nlookup--;
1397        sb->s_d_op = &fuse_dentry_operations;
1398        sb->s_root = d_make_root(root);
1399        if (!sb->s_root)
1400                return -ENOMEM;
1401
1402        return 0;
1403}
1404
1405/* Filesystem context private data holds the FUSE inode of the mount point */
1406static int fuse_get_tree_submount(struct fs_context *fsc)
1407{
1408        struct fuse_mount *fm;
1409        struct fuse_inode *mp_fi = fsc->fs_private;
1410        struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode);
1411        struct super_block *sb;
1412        int err;
1413
1414        fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
1415        if (!fm)
1416                return -ENOMEM;
1417
1418        fm->fc = fuse_conn_get(fc);
1419        fsc->s_fs_info = fm;
1420        sb = sget_fc(fsc, NULL, set_anon_super_fc);
1421        if (fsc->s_fs_info)
1422                fuse_mount_destroy(fm);
1423        if (IS_ERR(sb))
1424                return PTR_ERR(sb);
1425
1426        /* Initialize superblock, making @mp_fi its root */
1427        err = fuse_fill_super_submount(sb, mp_fi);
1428        if (err) {
1429                deactivate_locked_super(sb);
1430                return err;
1431        }
1432
1433        down_write(&fc->killsb);
1434        list_add_tail(&fm->fc_entry, &fc->mounts);
1435        up_write(&fc->killsb);
1436
1437        sb->s_flags |= SB_ACTIVE;
1438        fsc->root = dget(sb->s_root);
1439
1440        return 0;
1441}
1442
1443static const struct fs_context_operations fuse_context_submount_ops = {
1444        .get_tree       = fuse_get_tree_submount,
1445};
1446
1447int fuse_init_fs_context_submount(struct fs_context *fsc)
1448{
1449        fsc->ops = &fuse_context_submount_ops;
1450        return 0;
1451}
1452EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount);
1453
1454int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
1455{
1456        struct fuse_dev *fud = NULL;
1457        struct fuse_mount *fm = get_fuse_mount_super(sb);
1458        struct fuse_conn *fc = fm->fc;
1459        struct inode *root;
1460        struct dentry *root_dentry;
1461        int err;
1462
1463        err = -EINVAL;
1464        if (sb->s_flags & SB_MANDLOCK)
1465                goto err;
1466
1467        rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
1468        fuse_sb_defaults(sb);
1469
1470        if (ctx->is_bdev) {
1471#ifdef CONFIG_BLOCK
1472                err = -EINVAL;
1473                if (!sb_set_blocksize(sb, ctx->blksize))
1474                        goto err;
1475#endif
1476        } else {
1477                sb->s_blocksize = PAGE_SIZE;
1478                sb->s_blocksize_bits = PAGE_SHIFT;
1479        }
1480
1481        sb->s_subtype = ctx->subtype;
1482        ctx->subtype = NULL;
1483        if (IS_ENABLED(CONFIG_FUSE_DAX)) {
1484                err = fuse_dax_conn_alloc(fc, ctx->dax_dev);
1485                if (err)
1486                        goto err;
1487        }
1488
1489        if (ctx->fudptr) {
1490                err = -ENOMEM;
1491                fud = fuse_dev_alloc_install(fc);
1492                if (!fud)
1493                        goto err_free_dax;
1494        }
1495
1496        fc->dev = sb->s_dev;
1497        fm->sb = sb;
1498        err = fuse_bdi_init(fc, sb);
1499        if (err)
1500                goto err_dev_free;
1501
1502        /* Handle umasking inside the fuse code */
1503        if (sb->s_flags & SB_POSIXACL)
1504                fc->dont_mask = 1;
1505        sb->s_flags |= SB_POSIXACL;
1506
1507        fc->default_permissions = ctx->default_permissions;
1508        fc->allow_other = ctx->allow_other;
1509        fc->user_id = ctx->user_id;
1510        fc->group_id = ctx->group_id;
1511        fc->legacy_opts_show = ctx->legacy_opts_show;
1512        fc->max_read = max_t(unsigned int, 4096, ctx->max_read);
1513        fc->destroy = ctx->destroy;
1514        fc->no_control = ctx->no_control;
1515        fc->no_force_umount = ctx->no_force_umount;
1516
1517        err = -ENOMEM;
1518        root = fuse_get_root_inode(sb, ctx->rootmode);
1519        sb->s_d_op = &fuse_root_dentry_operations;
1520        root_dentry = d_make_root(root);
1521        if (!root_dentry)
1522                goto err_dev_free;
1523        /* Root dentry doesn't have .d_revalidate */
1524        sb->s_d_op = &fuse_dentry_operations;
1525
1526        mutex_lock(&fuse_mutex);
1527        err = -EINVAL;
1528        if (ctx->fudptr && *ctx->fudptr)
1529                goto err_unlock;
1530
1531        err = fuse_ctl_add_conn(fc);
1532        if (err)
1533                goto err_unlock;
1534
1535        list_add_tail(&fc->entry, &fuse_conn_list);
1536        sb->s_root = root_dentry;
1537        if (ctx->fudptr)
1538                *ctx->fudptr = fud;
1539        mutex_unlock(&fuse_mutex);
1540        return 0;
1541
1542 err_unlock:
1543        mutex_unlock(&fuse_mutex);
1544        dput(root_dentry);
1545 err_dev_free:
1546        if (fud)
1547                fuse_dev_free(fud);
1548 err_free_dax:
1549        if (IS_ENABLED(CONFIG_FUSE_DAX))
1550                fuse_dax_conn_free(fc);
1551 err:
1552        return err;
1553}
1554EXPORT_SYMBOL_GPL(fuse_fill_super_common);
1555
1556static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
1557{
1558        struct fuse_fs_context *ctx = fsc->fs_private;
1559        int err;
1560
1561        if (!ctx->file || !ctx->rootmode_present ||
1562            !ctx->user_id_present || !ctx->group_id_present)
1563                return -EINVAL;
1564
1565        /*
1566         * Require mount to happen from the same user namespace which
1567         * opened /dev/fuse to prevent potential attacks.
1568         */
1569        if ((ctx->file->f_op != &fuse_dev_operations) ||
1570            (ctx->file->f_cred->user_ns != sb->s_user_ns))
1571                return -EINVAL;
1572        ctx->fudptr = &ctx->file->private_data;
1573
1574        err = fuse_fill_super_common(sb, ctx);
1575        if (err)
1576                return err;
1577        /* file->private_data shall be visible on all CPUs after this */
1578        smp_mb();
1579        fuse_send_init(get_fuse_mount_super(sb));
1580        return 0;
1581}
1582
1583/*
1584 * This is the path where user supplied an already initialized fuse dev.  In
1585 * this case never create a new super if the old one is gone.
1586 */
1587static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc)
1588{
1589        return -ENOTCONN;
1590}
1591
1592static int fuse_test_super(struct super_block *sb, struct fs_context *fsc)
1593{
1594
1595        return fsc->sget_key == get_fuse_conn_super(sb);
1596}
1597
1598static int fuse_get_tree(struct fs_context *fsc)
1599{
1600        struct fuse_fs_context *ctx = fsc->fs_private;
1601        struct fuse_dev *fud;
1602        struct fuse_conn *fc;
1603        struct fuse_mount *fm;
1604        struct super_block *sb;
1605        int err;
1606
1607        fc = kmalloc(sizeof(*fc), GFP_KERNEL);
1608        if (!fc)
1609                return -ENOMEM;
1610
1611        fm = kzalloc(sizeof(*fm), GFP_KERNEL);
1612        if (!fm) {
1613                kfree(fc);
1614                return -ENOMEM;
1615        }
1616
1617        fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL);
1618        fc->release = fuse_free_conn;
1619
1620        fsc->s_fs_info = fm;
1621
1622        if (ctx->fd_present)
1623                ctx->file = fget(ctx->fd);
1624
1625        if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) {
1626                err = get_tree_bdev(fsc, fuse_fill_super);
1627                goto out;
1628        }
1629        /*
1630         * While block dev mount can be initialized with a dummy device fd
1631         * (found by device name), normal fuse mounts can't
1632         */
1633        err = -EINVAL;
1634        if (!ctx->file)
1635                goto out;
1636
1637        /*
1638         * Allow creating a fuse mount with an already initialized fuse
1639         * connection
1640         */
1641        fud = READ_ONCE(ctx->file->private_data);
1642        if (ctx->file->f_op == &fuse_dev_operations && fud) {
1643                fsc->sget_key = fud->fc;
1644                sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super);
1645                err = PTR_ERR_OR_ZERO(sb);
1646                if (!IS_ERR(sb))
1647                        fsc->root = dget(sb->s_root);
1648        } else {
1649                err = get_tree_nodev(fsc, fuse_fill_super);
1650        }
1651out:
1652        if (fsc->s_fs_info)
1653                fuse_mount_destroy(fm);
1654        if (ctx->file)
1655                fput(ctx->file);
1656        return err;
1657}
1658
1659static const struct fs_context_operations fuse_context_ops = {
1660        .free           = fuse_free_fsc,
1661        .parse_param    = fuse_parse_param,
1662        .reconfigure    = fuse_reconfigure,
1663        .get_tree       = fuse_get_tree,
1664};
1665
1666/*
1667 * Set up the filesystem mount context.
1668 */
1669static int fuse_init_fs_context(struct fs_context *fsc)
1670{
1671        struct fuse_fs_context *ctx;
1672
1673        ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
1674        if (!ctx)
1675                return -ENOMEM;
1676
1677        ctx->max_read = ~0;
1678        ctx->blksize = FUSE_DEFAULT_BLKSIZE;
1679        ctx->legacy_opts_show = true;
1680
1681#ifdef CONFIG_BLOCK
1682        if (fsc->fs_type == &fuseblk_fs_type) {
1683                ctx->is_bdev = true;
1684                ctx->destroy = true;
1685        }
1686#endif
1687
1688        fsc->fs_private = ctx;
1689        fsc->ops = &fuse_context_ops;
1690        return 0;
1691}
1692
1693bool fuse_mount_remove(struct fuse_mount *fm)
1694{
1695        struct fuse_conn *fc = fm->fc;
1696        bool last = false;
1697
1698        down_write(&fc->killsb);
1699        list_del_init(&fm->fc_entry);
1700        if (list_empty(&fc->mounts))
1701                last = true;
1702        up_write(&fc->killsb);
1703
1704        return last;
1705}
1706EXPORT_SYMBOL_GPL(fuse_mount_remove);
1707
1708void fuse_conn_destroy(struct fuse_mount *fm)
1709{
1710        struct fuse_conn *fc = fm->fc;
1711
1712        if (fc->destroy)
1713                fuse_send_destroy(fm);
1714
1715        fuse_abort_conn(fc);
1716        fuse_wait_aborted(fc);
1717
1718        if (!list_empty(&fc->entry)) {
1719                mutex_lock(&fuse_mutex);
1720                list_del(&fc->entry);
1721                fuse_ctl_remove_conn(fc);
1722                mutex_unlock(&fuse_mutex);
1723        }
1724}
1725EXPORT_SYMBOL_GPL(fuse_conn_destroy);
1726
1727static void fuse_sb_destroy(struct super_block *sb)
1728{
1729        struct fuse_mount *fm = get_fuse_mount_super(sb);
1730        bool last;
1731
1732        if (sb->s_root) {
1733                last = fuse_mount_remove(fm);
1734                if (last)
1735                        fuse_conn_destroy(fm);
1736        }
1737}
1738
1739void fuse_mount_destroy(struct fuse_mount *fm)
1740{
1741        fuse_conn_put(fm->fc);
1742        kfree(fm);
1743}
1744EXPORT_SYMBOL(fuse_mount_destroy);
1745
1746static void fuse_kill_sb_anon(struct super_block *sb)
1747{
1748        fuse_sb_destroy(sb);
1749        kill_anon_super(sb);
1750        fuse_mount_destroy(get_fuse_mount_super(sb));
1751}
1752
1753static struct file_system_type fuse_fs_type = {
1754        .owner          = THIS_MODULE,
1755        .name           = "fuse",
1756        .fs_flags       = FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
1757        .init_fs_context = fuse_init_fs_context,
1758        .parameters     = fuse_fs_parameters,
1759        .kill_sb        = fuse_kill_sb_anon,
1760};
1761MODULE_ALIAS_FS("fuse");
1762
1763#ifdef CONFIG_BLOCK
1764static void fuse_kill_sb_blk(struct super_block *sb)
1765{
1766        fuse_sb_destroy(sb);
1767        kill_block_super(sb);
1768        fuse_mount_destroy(get_fuse_mount_super(sb));
1769}
1770
1771static struct file_system_type fuseblk_fs_type = {
1772        .owner          = THIS_MODULE,
1773        .name           = "fuseblk",
1774        .init_fs_context = fuse_init_fs_context,
1775        .parameters     = fuse_fs_parameters,
1776        .kill_sb        = fuse_kill_sb_blk,
1777        .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1778};
1779MODULE_ALIAS_FS("fuseblk");
1780
1781static inline int register_fuseblk(void)
1782{
1783        return register_filesystem(&fuseblk_fs_type);
1784}
1785
1786static inline void unregister_fuseblk(void)
1787{
1788        unregister_filesystem(&fuseblk_fs_type);
1789}
1790#else
1791static inline int register_fuseblk(void)
1792{
1793        return 0;
1794}
1795
1796static inline void unregister_fuseblk(void)
1797{
1798}
1799#endif
1800
1801static void fuse_inode_init_once(void *foo)
1802{
1803        struct inode *inode = foo;
1804
1805        inode_init_once(inode);
1806}
1807
1808static int __init fuse_fs_init(void)
1809{
1810        int err;
1811
1812        fuse_inode_cachep = kmem_cache_create("fuse_inode",
1813                        sizeof(struct fuse_inode), 0,
1814                        SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
1815                        fuse_inode_init_once);
1816        err = -ENOMEM;
1817        if (!fuse_inode_cachep)
1818                goto out;
1819
1820        err = register_fuseblk();
1821        if (err)
1822                goto out2;
1823
1824        err = register_filesystem(&fuse_fs_type);
1825        if (err)
1826                goto out3;
1827
1828        return 0;
1829
1830 out3:
1831        unregister_fuseblk();
1832 out2:
1833        kmem_cache_destroy(fuse_inode_cachep);
1834 out:
1835        return err;
1836}
1837
1838static void fuse_fs_cleanup(void)
1839{
1840        unregister_filesystem(&fuse_fs_type);
1841        unregister_fuseblk();
1842
1843        /*
1844         * Make sure all delayed rcu free inodes are flushed before we
1845         * destroy cache.
1846         */
1847        rcu_barrier();
1848        kmem_cache_destroy(fuse_inode_cachep);
1849}
1850
1851static struct kobject *fuse_kobj;
1852
1853static int fuse_sysfs_init(void)
1854{
1855        int err;
1856
1857        fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
1858        if (!fuse_kobj) {
1859                err = -ENOMEM;
1860                goto out_err;
1861        }
1862
1863        err = sysfs_create_mount_point(fuse_kobj, "connections");
1864        if (err)
1865                goto out_fuse_unregister;
1866
1867        return 0;
1868
1869 out_fuse_unregister:
1870        kobject_put(fuse_kobj);
1871 out_err:
1872        return err;
1873}
1874
1875static void fuse_sysfs_cleanup(void)
1876{
1877        sysfs_remove_mount_point(fuse_kobj, "connections");
1878        kobject_put(fuse_kobj);
1879}
1880
1881static int __init fuse_init(void)
1882{
1883        int res;
1884
1885        pr_info("init (API version %i.%i)\n",
1886                FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
1887
1888        INIT_LIST_HEAD(&fuse_conn_list);
1889        res = fuse_fs_init();
1890        if (res)
1891                goto err;
1892
1893        res = fuse_dev_init();
1894        if (res)
1895                goto err_fs_cleanup;
1896
1897        res = fuse_sysfs_init();
1898        if (res)
1899                goto err_dev_cleanup;
1900
1901        res = fuse_ctl_init();
1902        if (res)
1903                goto err_sysfs_cleanup;
1904
1905        sanitize_global_limit(&max_user_bgreq);
1906        sanitize_global_limit(&max_user_congthresh);
1907
1908        return 0;
1909
1910 err_sysfs_cleanup:
1911        fuse_sysfs_cleanup();
1912 err_dev_cleanup:
1913        fuse_dev_cleanup();
1914 err_fs_cleanup:
1915        fuse_fs_cleanup();
1916 err:
1917        return res;
1918}
1919
1920static void __exit fuse_exit(void)
1921{
1922        pr_debug("exit\n");
1923
1924        fuse_ctl_cleanup();
1925        fuse_sysfs_cleanup();
1926        fuse_fs_cleanup();
1927        fuse_dev_cleanup();
1928}
1929
1930module_init(fuse_init);
1931module_exit(fuse_exit);
1932