linux/fs/fuse/dir.c
<<
>>
Prefs
   1/*
   2  FUSE: Filesystem in Userspace
   3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
   4
   5  This program can be distributed under the terms of the GNU GPL.
   6  See the file COPYING.
   7*/
   8
   9#include "fuse_i.h"
  10
  11#include <linux/pagemap.h>
  12#include <linux/file.h>
  13#include <linux/fs_context.h>
  14#include <linux/sched.h>
  15#include <linux/namei.h>
  16#include <linux/slab.h>
  17#include <linux/xattr.h>
  18#include <linux/iversion.h>
  19#include <linux/posix_acl.h>
  20
  21static void fuse_advise_use_readdirplus(struct inode *dir)
  22{
  23        struct fuse_inode *fi = get_fuse_inode(dir);
  24
  25        set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
  26}
  27
  28#if BITS_PER_LONG >= 64
  29static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
  30{
  31        entry->d_fsdata = (void *) time;
  32}
  33
  34static inline u64 fuse_dentry_time(const struct dentry *entry)
  35{
  36        return (u64)entry->d_fsdata;
  37}
  38
  39#else
  40union fuse_dentry {
  41        u64 time;
  42        struct rcu_head rcu;
  43};
  44
  45static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
  46{
  47        ((union fuse_dentry *) dentry->d_fsdata)->time = time;
  48}
  49
  50static inline u64 fuse_dentry_time(const struct dentry *entry)
  51{
  52        return ((union fuse_dentry *) entry->d_fsdata)->time;
  53}
  54#endif
  55
  56static void fuse_dentry_settime(struct dentry *dentry, u64 time)
  57{
  58        struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
  59        bool delete = !time && fc->delete_stale;
  60        /*
  61         * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
  62         * Don't care about races, either way it's just an optimization
  63         */
  64        if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
  65            (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
  66                spin_lock(&dentry->d_lock);
  67                if (!delete)
  68                        dentry->d_flags &= ~DCACHE_OP_DELETE;
  69                else
  70                        dentry->d_flags |= DCACHE_OP_DELETE;
  71                spin_unlock(&dentry->d_lock);
  72        }
  73
  74        __fuse_dentry_settime(dentry, time);
  75}
  76
  77/*
  78 * FUSE caches dentries and attributes with separate timeout.  The
  79 * time in jiffies until the dentry/attributes are valid is stored in
  80 * dentry->d_fsdata and fuse_inode->i_time respectively.
  81 */
  82
  83/*
  84 * Calculate the time in jiffies until a dentry/attributes are valid
  85 */
  86static u64 time_to_jiffies(u64 sec, u32 nsec)
  87{
  88        if (sec || nsec) {
  89                struct timespec64 ts = {
  90                        sec,
  91                        min_t(u32, nsec, NSEC_PER_SEC - 1)
  92                };
  93
  94                return get_jiffies_64() + timespec64_to_jiffies(&ts);
  95        } else
  96                return 0;
  97}
  98
  99/*
 100 * Set dentry and possibly attribute timeouts from the lookup/mk*
 101 * replies
 102 */
 103void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
 104{
 105        fuse_dentry_settime(entry,
 106                time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
 107}
 108
 109static u64 attr_timeout(struct fuse_attr_out *o)
 110{
 111        return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
 112}
 113
 114u64 entry_attr_timeout(struct fuse_entry_out *o)
 115{
 116        return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
 117}
 118
 119static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
 120{
 121        set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
 122}
 123
 124/*
 125 * Mark the attributes as stale, so that at the next call to
 126 * ->getattr() they will be fetched from userspace
 127 */
 128void fuse_invalidate_attr(struct inode *inode)
 129{
 130        fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
 131}
 132
 133static void fuse_dir_changed(struct inode *dir)
 134{
 135        fuse_invalidate_attr(dir);
 136        inode_maybe_inc_iversion(dir, false);
 137}
 138
 139/**
 140 * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
 141 * atime is not used.
 142 */
 143void fuse_invalidate_atime(struct inode *inode)
 144{
 145        if (!IS_RDONLY(inode))
 146                fuse_invalidate_attr_mask(inode, STATX_ATIME);
 147}
 148
 149/*
 150 * Just mark the entry as stale, so that a next attempt to look it up
 151 * will result in a new lookup call to userspace
 152 *
 153 * This is called when a dentry is about to become negative and the
 154 * timeout is unknown (unlink, rmdir, rename and in some cases
 155 * lookup)
 156 */
 157void fuse_invalidate_entry_cache(struct dentry *entry)
 158{
 159        fuse_dentry_settime(entry, 0);
 160}
 161
 162/*
 163 * Same as fuse_invalidate_entry_cache(), but also try to remove the
 164 * dentry from the hash
 165 */
 166static void fuse_invalidate_entry(struct dentry *entry)
 167{
 168        d_invalidate(entry);
 169        fuse_invalidate_entry_cache(entry);
 170}
 171
 172static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
 173                             u64 nodeid, const struct qstr *name,
 174                             struct fuse_entry_out *outarg)
 175{
 176        memset(outarg, 0, sizeof(struct fuse_entry_out));
 177        args->opcode = FUSE_LOOKUP;
 178        args->nodeid = nodeid;
 179        args->in_numargs = 1;
 180        args->in_args[0].size = name->len + 1;
 181        args->in_args[0].value = name->name;
 182        args->out_numargs = 1;
 183        args->out_args[0].size = sizeof(struct fuse_entry_out);
 184        args->out_args[0].value = outarg;
 185}
 186
 187/*
 188 * Check whether the dentry is still valid
 189 *
 190 * If the entry validity timeout has expired and the dentry is
 191 * positive, try to redo the lookup.  If the lookup results in a
 192 * different inode, then let the VFS invalidate the dentry and redo
 193 * the lookup once more.  If the lookup results in the same inode,
 194 * then refresh the attributes, timeouts and mark the dentry valid.
 195 */
 196static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 197{
 198        struct inode *inode;
 199        struct dentry *parent;
 200        struct fuse_mount *fm;
 201        struct fuse_inode *fi;
 202        int ret;
 203
 204        inode = d_inode_rcu(entry);
 205        if (inode && fuse_is_bad(inode))
 206                goto invalid;
 207        else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
 208                 (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) {
 209                struct fuse_entry_out outarg;
 210                FUSE_ARGS(args);
 211                struct fuse_forget_link *forget;
 212                u64 attr_version;
 213
 214                /* For negative dentries, always do a fresh lookup */
 215                if (!inode)
 216                        goto invalid;
 217
 218                ret = -ECHILD;
 219                if (flags & LOOKUP_RCU)
 220                        goto out;
 221
 222                fm = get_fuse_mount(inode);
 223
 224                forget = fuse_alloc_forget();
 225                ret = -ENOMEM;
 226                if (!forget)
 227                        goto out;
 228
 229                attr_version = fuse_get_attr_version(fm->fc);
 230
 231                parent = dget_parent(entry);
 232                fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
 233                                 &entry->d_name, &outarg);
 234                ret = fuse_simple_request(fm, &args);
 235                dput(parent);
 236                /* Zero nodeid is same as -ENOENT */
 237                if (!ret && !outarg.nodeid)
 238                        ret = -ENOENT;
 239                if (!ret) {
 240                        fi = get_fuse_inode(inode);
 241                        if (outarg.nodeid != get_node_id(inode) ||
 242                            (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
 243                                fuse_queue_forget(fm->fc, forget,
 244                                                  outarg.nodeid, 1);
 245                                goto invalid;
 246                        }
 247                        spin_lock(&fi->lock);
 248                        fi->nlookup++;
 249                        spin_unlock(&fi->lock);
 250                }
 251                kfree(forget);
 252                if (ret == -ENOMEM)
 253                        goto out;
 254                if (ret || fuse_invalid_attr(&outarg.attr) ||
 255                    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
 256                        goto invalid;
 257
 258                forget_all_cached_acls(inode);
 259                fuse_change_attributes(inode, &outarg.attr,
 260                                       entry_attr_timeout(&outarg),
 261                                       attr_version);
 262                fuse_change_entry_timeout(entry, &outarg);
 263        } else if (inode) {
 264                fi = get_fuse_inode(inode);
 265                if (flags & LOOKUP_RCU) {
 266                        if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
 267                                return -ECHILD;
 268                } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
 269                        parent = dget_parent(entry);
 270                        fuse_advise_use_readdirplus(d_inode(parent));
 271                        dput(parent);
 272                }
 273        }
 274        ret = 1;
 275out:
 276        return ret;
 277
 278invalid:
 279        ret = 0;
 280        goto out;
 281}
 282
 283#if BITS_PER_LONG < 64
 284static int fuse_dentry_init(struct dentry *dentry)
 285{
 286        dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
 287                                   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
 288
 289        return dentry->d_fsdata ? 0 : -ENOMEM;
 290}
 291static void fuse_dentry_release(struct dentry *dentry)
 292{
 293        union fuse_dentry *fd = dentry->d_fsdata;
 294
 295        kfree_rcu(fd, rcu);
 296}
 297#endif
 298
 299static int fuse_dentry_delete(const struct dentry *dentry)
 300{
 301        return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
 302}
 303
 304/*
 305 * Create a fuse_mount object with a new superblock (with path->dentry
 306 * as the root), and return that mount so it can be auto-mounted on
 307 * @path.
 308 */
 309static struct vfsmount *fuse_dentry_automount(struct path *path)
 310{
 311        struct fs_context *fsc;
 312        struct vfsmount *mnt;
 313        struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
 314
 315        fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
 316        if (IS_ERR(fsc))
 317                return ERR_CAST(fsc);
 318
 319        /* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
 320        fsc->fs_private = mp_fi;
 321
 322        /* Create the submount */
 323        mnt = fc_mount(fsc);
 324        if (!IS_ERR(mnt))
 325                mntget(mnt);
 326
 327        put_fs_context(fsc);
 328        return mnt;
 329}
 330
 331const struct dentry_operations fuse_dentry_operations = {
 332        .d_revalidate   = fuse_dentry_revalidate,
 333        .d_delete       = fuse_dentry_delete,
 334#if BITS_PER_LONG < 64
 335        .d_init         = fuse_dentry_init,
 336        .d_release      = fuse_dentry_release,
 337#endif
 338        .d_automount    = fuse_dentry_automount,
 339};
 340
 341const struct dentry_operations fuse_root_dentry_operations = {
 342#if BITS_PER_LONG < 64
 343        .d_init         = fuse_dentry_init,
 344        .d_release      = fuse_dentry_release,
 345#endif
 346};
 347
 348int fuse_valid_type(int m)
 349{
 350        return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
 351                S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
 352}
 353
 354bool fuse_invalid_attr(struct fuse_attr *attr)
 355{
 356        return !fuse_valid_type(attr->mode) ||
 357                attr->size > LLONG_MAX;
 358}
 359
 360int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
 361                     struct fuse_entry_out *outarg, struct inode **inode)
 362{
 363        struct fuse_mount *fm = get_fuse_mount_super(sb);
 364        FUSE_ARGS(args);
 365        struct fuse_forget_link *forget;
 366        u64 attr_version;
 367        int err;
 368
 369        *inode = NULL;
 370        err = -ENAMETOOLONG;
 371        if (name->len > FUSE_NAME_MAX)
 372                goto out;
 373
 374
 375        forget = fuse_alloc_forget();
 376        err = -ENOMEM;
 377        if (!forget)
 378                goto out;
 379
 380        attr_version = fuse_get_attr_version(fm->fc);
 381
 382        fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
 383        err = fuse_simple_request(fm, &args);
 384        /* Zero nodeid is same as -ENOENT, but with valid timeout */
 385        if (err || !outarg->nodeid)
 386                goto out_put_forget;
 387
 388        err = -EIO;
 389        if (!outarg->nodeid)
 390                goto out_put_forget;
 391        if (fuse_invalid_attr(&outarg->attr))
 392                goto out_put_forget;
 393
 394        *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
 395                           &outarg->attr, entry_attr_timeout(outarg),
 396                           attr_version);
 397        err = -ENOMEM;
 398        if (!*inode) {
 399                fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
 400                goto out;
 401        }
 402        err = 0;
 403
 404 out_put_forget:
 405        kfree(forget);
 406 out:
 407        return err;
 408}
 409
 410static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 411                                  unsigned int flags)
 412{
 413        int err;
 414        struct fuse_entry_out outarg;
 415        struct inode *inode;
 416        struct dentry *newent;
 417        bool outarg_valid = true;
 418        bool locked;
 419
 420        if (fuse_is_bad(dir))
 421                return ERR_PTR(-EIO);
 422
 423        locked = fuse_lock_inode(dir);
 424        err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
 425                               &outarg, &inode);
 426        fuse_unlock_inode(dir, locked);
 427        if (err == -ENOENT) {
 428                outarg_valid = false;
 429                err = 0;
 430        }
 431        if (err)
 432                goto out_err;
 433
 434        err = -EIO;
 435        if (inode && get_node_id(inode) == FUSE_ROOT_ID)
 436                goto out_iput;
 437
 438        newent = d_splice_alias(inode, entry);
 439        err = PTR_ERR(newent);
 440        if (IS_ERR(newent))
 441                goto out_err;
 442
 443        entry = newent ? newent : entry;
 444        if (outarg_valid)
 445                fuse_change_entry_timeout(entry, &outarg);
 446        else
 447                fuse_invalidate_entry_cache(entry);
 448
 449        if (inode)
 450                fuse_advise_use_readdirplus(dir);
 451        return newent;
 452
 453 out_iput:
 454        iput(inode);
 455 out_err:
 456        return ERR_PTR(err);
 457}
 458
 459/*
 460 * Atomic create+open operation
 461 *
 462 * If the filesystem doesn't support this, then fall back to separate
 463 * 'mknod' + 'open' requests.
 464 */
 465static int fuse_create_open(struct inode *dir, struct dentry *entry,
 466                            struct file *file, unsigned int flags,
 467                            umode_t mode)
 468{
 469        int err;
 470        struct inode *inode;
 471        struct fuse_mount *fm = get_fuse_mount(dir);
 472        FUSE_ARGS(args);
 473        struct fuse_forget_link *forget;
 474        struct fuse_create_in inarg;
 475        struct fuse_open_out outopen;
 476        struct fuse_entry_out outentry;
 477        struct fuse_inode *fi;
 478        struct fuse_file *ff;
 479
 480        /* Userspace expects S_IFREG in create mode */
 481        BUG_ON((mode & S_IFMT) != S_IFREG);
 482
 483        forget = fuse_alloc_forget();
 484        err = -ENOMEM;
 485        if (!forget)
 486                goto out_err;
 487
 488        err = -ENOMEM;
 489        ff = fuse_file_alloc(fm);
 490        if (!ff)
 491                goto out_put_forget_req;
 492
 493        if (!fm->fc->dont_mask)
 494                mode &= ~current_umask();
 495
 496        flags &= ~O_NOCTTY;
 497        memset(&inarg, 0, sizeof(inarg));
 498        memset(&outentry, 0, sizeof(outentry));
 499        inarg.flags = flags;
 500        inarg.mode = mode;
 501        inarg.umask = current_umask();
 502
 503        if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
 504            !(flags & O_EXCL) && !capable(CAP_FSETID)) {
 505                inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
 506        }
 507
 508        args.opcode = FUSE_CREATE;
 509        args.nodeid = get_node_id(dir);
 510        args.in_numargs = 2;
 511        args.in_args[0].size = sizeof(inarg);
 512        args.in_args[0].value = &inarg;
 513        args.in_args[1].size = entry->d_name.len + 1;
 514        args.in_args[1].value = entry->d_name.name;
 515        args.out_numargs = 2;
 516        args.out_args[0].size = sizeof(outentry);
 517        args.out_args[0].value = &outentry;
 518        args.out_args[1].size = sizeof(outopen);
 519        args.out_args[1].value = &outopen;
 520        err = fuse_simple_request(fm, &args);
 521        if (err)
 522                goto out_free_ff;
 523
 524        err = -EIO;
 525        if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
 526            fuse_invalid_attr(&outentry.attr))
 527                goto out_free_ff;
 528
 529        ff->fh = outopen.fh;
 530        ff->nodeid = outentry.nodeid;
 531        ff->open_flags = outopen.open_flags;
 532        inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
 533                          &outentry.attr, entry_attr_timeout(&outentry), 0);
 534        if (!inode) {
 535                flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
 536                fuse_sync_release(NULL, ff, flags);
 537                fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
 538                err = -ENOMEM;
 539                goto out_err;
 540        }
 541        kfree(forget);
 542        d_instantiate(entry, inode);
 543        fuse_change_entry_timeout(entry, &outentry);
 544        fuse_dir_changed(dir);
 545        err = finish_open(file, entry, generic_file_open);
 546        if (err) {
 547                fi = get_fuse_inode(inode);
 548                fuse_sync_release(fi, ff, flags);
 549        } else {
 550                file->private_data = ff;
 551                fuse_finish_open(inode, file);
 552        }
 553        return err;
 554
 555out_free_ff:
 556        fuse_file_free(ff);
 557out_put_forget_req:
 558        kfree(forget);
 559out_err:
 560        return err;
 561}
 562
 563static int fuse_mknod(struct user_namespace *, struct inode *, struct dentry *,
 564                      umode_t, dev_t);
 565static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
 566                            struct file *file, unsigned flags,
 567                            umode_t mode)
 568{
 569        int err;
 570        struct fuse_conn *fc = get_fuse_conn(dir);
 571        struct dentry *res = NULL;
 572
 573        if (fuse_is_bad(dir))
 574                return -EIO;
 575
 576        if (d_in_lookup(entry)) {
 577                res = fuse_lookup(dir, entry, 0);
 578                if (IS_ERR(res))
 579                        return PTR_ERR(res);
 580
 581                if (res)
 582                        entry = res;
 583        }
 584
 585        if (!(flags & O_CREAT) || d_really_is_positive(entry))
 586                goto no_open;
 587
 588        /* Only creates */
 589        file->f_mode |= FMODE_CREATED;
 590
 591        if (fc->no_create)
 592                goto mknod;
 593
 594        err = fuse_create_open(dir, entry, file, flags, mode);
 595        if (err == -ENOSYS) {
 596                fc->no_create = 1;
 597                goto mknod;
 598        }
 599out_dput:
 600        dput(res);
 601        return err;
 602
 603mknod:
 604        err = fuse_mknod(&init_user_ns, dir, entry, mode, 0);
 605        if (err)
 606                goto out_dput;
 607no_open:
 608        return finish_no_open(file, res);
 609}
 610
 611/*
 612 * Code shared between mknod, mkdir, symlink and link
 613 */
 614static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
 615                            struct inode *dir, struct dentry *entry,
 616                            umode_t mode)
 617{
 618        struct fuse_entry_out outarg;
 619        struct inode *inode;
 620        struct dentry *d;
 621        int err;
 622        struct fuse_forget_link *forget;
 623
 624        if (fuse_is_bad(dir))
 625                return -EIO;
 626
 627        forget = fuse_alloc_forget();
 628        if (!forget)
 629                return -ENOMEM;
 630
 631        memset(&outarg, 0, sizeof(outarg));
 632        args->nodeid = get_node_id(dir);
 633        args->out_numargs = 1;
 634        args->out_args[0].size = sizeof(outarg);
 635        args->out_args[0].value = &outarg;
 636        err = fuse_simple_request(fm, args);
 637        if (err)
 638                goto out_put_forget_req;
 639
 640        err = -EIO;
 641        if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
 642                goto out_put_forget_req;
 643
 644        if ((outarg.attr.mode ^ mode) & S_IFMT)
 645                goto out_put_forget_req;
 646
 647        inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
 648                          &outarg.attr, entry_attr_timeout(&outarg), 0);
 649        if (!inode) {
 650                fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
 651                return -ENOMEM;
 652        }
 653        kfree(forget);
 654
 655        d_drop(entry);
 656        d = d_splice_alias(inode, entry);
 657        if (IS_ERR(d))
 658                return PTR_ERR(d);
 659
 660        if (d) {
 661                fuse_change_entry_timeout(d, &outarg);
 662                dput(d);
 663        } else {
 664                fuse_change_entry_timeout(entry, &outarg);
 665        }
 666        fuse_dir_changed(dir);
 667        return 0;
 668
 669 out_put_forget_req:
 670        kfree(forget);
 671        return err;
 672}
 673
 674static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir,
 675                      struct dentry *entry, umode_t mode, dev_t rdev)
 676{
 677        struct fuse_mknod_in inarg;
 678        struct fuse_mount *fm = get_fuse_mount(dir);
 679        FUSE_ARGS(args);
 680
 681        if (!fm->fc->dont_mask)
 682                mode &= ~current_umask();
 683
 684        memset(&inarg, 0, sizeof(inarg));
 685        inarg.mode = mode;
 686        inarg.rdev = new_encode_dev(rdev);
 687        inarg.umask = current_umask();
 688        args.opcode = FUSE_MKNOD;
 689        args.in_numargs = 2;
 690        args.in_args[0].size = sizeof(inarg);
 691        args.in_args[0].value = &inarg;
 692        args.in_args[1].size = entry->d_name.len + 1;
 693        args.in_args[1].value = entry->d_name.name;
 694        return create_new_entry(fm, &args, dir, entry, mode);
 695}
 696
 697static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir,
 698                       struct dentry *entry, umode_t mode, bool excl)
 699{
 700        return fuse_mknod(&init_user_ns, dir, entry, mode, 0);
 701}
 702
 703static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
 704                      struct dentry *entry, umode_t mode)
 705{
 706        struct fuse_mkdir_in inarg;
 707        struct fuse_mount *fm = get_fuse_mount(dir);
 708        FUSE_ARGS(args);
 709
 710        if (!fm->fc->dont_mask)
 711                mode &= ~current_umask();
 712
 713        memset(&inarg, 0, sizeof(inarg));
 714        inarg.mode = mode;
 715        inarg.umask = current_umask();
 716        args.opcode = FUSE_MKDIR;
 717        args.in_numargs = 2;
 718        args.in_args[0].size = sizeof(inarg);
 719        args.in_args[0].value = &inarg;
 720        args.in_args[1].size = entry->d_name.len + 1;
 721        args.in_args[1].value = entry->d_name.name;
 722        return create_new_entry(fm, &args, dir, entry, S_IFDIR);
 723}
 724
 725static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir,
 726                        struct dentry *entry, const char *link)
 727{
 728        struct fuse_mount *fm = get_fuse_mount(dir);
 729        unsigned len = strlen(link) + 1;
 730        FUSE_ARGS(args);
 731
 732        args.opcode = FUSE_SYMLINK;
 733        args.in_numargs = 2;
 734        args.in_args[0].size = entry->d_name.len + 1;
 735        args.in_args[0].value = entry->d_name.name;
 736        args.in_args[1].size = len;
 737        args.in_args[1].value = link;
 738        return create_new_entry(fm, &args, dir, entry, S_IFLNK);
 739}
 740
 741void fuse_update_ctime(struct inode *inode)
 742{
 743        if (!IS_NOCMTIME(inode)) {
 744                inode->i_ctime = current_time(inode);
 745                mark_inode_dirty_sync(inode);
 746        }
 747}
 748
 749static int fuse_unlink(struct inode *dir, struct dentry *entry)
 750{
 751        int err;
 752        struct fuse_mount *fm = get_fuse_mount(dir);
 753        FUSE_ARGS(args);
 754
 755        if (fuse_is_bad(dir))
 756                return -EIO;
 757
 758        args.opcode = FUSE_UNLINK;
 759        args.nodeid = get_node_id(dir);
 760        args.in_numargs = 1;
 761        args.in_args[0].size = entry->d_name.len + 1;
 762        args.in_args[0].value = entry->d_name.name;
 763        err = fuse_simple_request(fm, &args);
 764        if (!err) {
 765                struct inode *inode = d_inode(entry);
 766                struct fuse_inode *fi = get_fuse_inode(inode);
 767
 768                spin_lock(&fi->lock);
 769                fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
 770                /*
 771                 * If i_nlink == 0 then unlink doesn't make sense, yet this can
 772                 * happen if userspace filesystem is careless.  It would be
 773                 * difficult to enforce correct nlink usage so just ignore this
 774                 * condition here
 775                 */
 776                if (inode->i_nlink > 0)
 777                        drop_nlink(inode);
 778                spin_unlock(&fi->lock);
 779                fuse_invalidate_attr(inode);
 780                fuse_dir_changed(dir);
 781                fuse_invalidate_entry_cache(entry);
 782                fuse_update_ctime(inode);
 783        } else if (err == -EINTR)
 784                fuse_invalidate_entry(entry);
 785        return err;
 786}
 787
 788static int fuse_rmdir(struct inode *dir, struct dentry *entry)
 789{
 790        int err;
 791        struct fuse_mount *fm = get_fuse_mount(dir);
 792        FUSE_ARGS(args);
 793
 794        if (fuse_is_bad(dir))
 795                return -EIO;
 796
 797        args.opcode = FUSE_RMDIR;
 798        args.nodeid = get_node_id(dir);
 799        args.in_numargs = 1;
 800        args.in_args[0].size = entry->d_name.len + 1;
 801        args.in_args[0].value = entry->d_name.name;
 802        err = fuse_simple_request(fm, &args);
 803        if (!err) {
 804                clear_nlink(d_inode(entry));
 805                fuse_dir_changed(dir);
 806                fuse_invalidate_entry_cache(entry);
 807        } else if (err == -EINTR)
 808                fuse_invalidate_entry(entry);
 809        return err;
 810}
 811
 812static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
 813                              struct inode *newdir, struct dentry *newent,
 814                              unsigned int flags, int opcode, size_t argsize)
 815{
 816        int err;
 817        struct fuse_rename2_in inarg;
 818        struct fuse_mount *fm = get_fuse_mount(olddir);
 819        FUSE_ARGS(args);
 820
 821        memset(&inarg, 0, argsize);
 822        inarg.newdir = get_node_id(newdir);
 823        inarg.flags = flags;
 824        args.opcode = opcode;
 825        args.nodeid = get_node_id(olddir);
 826        args.in_numargs = 3;
 827        args.in_args[0].size = argsize;
 828        args.in_args[0].value = &inarg;
 829        args.in_args[1].size = oldent->d_name.len + 1;
 830        args.in_args[1].value = oldent->d_name.name;
 831        args.in_args[2].size = newent->d_name.len + 1;
 832        args.in_args[2].value = newent->d_name.name;
 833        err = fuse_simple_request(fm, &args);
 834        if (!err) {
 835                /* ctime changes */
 836                fuse_invalidate_attr(d_inode(oldent));
 837                fuse_update_ctime(d_inode(oldent));
 838
 839                if (flags & RENAME_EXCHANGE) {
 840                        fuse_invalidate_attr(d_inode(newent));
 841                        fuse_update_ctime(d_inode(newent));
 842                }
 843
 844                fuse_dir_changed(olddir);
 845                if (olddir != newdir)
 846                        fuse_dir_changed(newdir);
 847
 848                /* newent will end up negative */
 849                if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
 850                        fuse_invalidate_attr(d_inode(newent));
 851                        fuse_invalidate_entry_cache(newent);
 852                        fuse_update_ctime(d_inode(newent));
 853                }
 854        } else if (err == -EINTR) {
 855                /* If request was interrupted, DEITY only knows if the
 856                   rename actually took place.  If the invalidation
 857                   fails (e.g. some process has CWD under the renamed
 858                   directory), then there can be inconsistency between
 859                   the dcache and the real filesystem.  Tough luck. */
 860                fuse_invalidate_entry(oldent);
 861                if (d_really_is_positive(newent))
 862                        fuse_invalidate_entry(newent);
 863        }
 864
 865        return err;
 866}
 867
 868static int fuse_rename2(struct user_namespace *mnt_userns, struct inode *olddir,
 869                        struct dentry *oldent, struct inode *newdir,
 870                        struct dentry *newent, unsigned int flags)
 871{
 872        struct fuse_conn *fc = get_fuse_conn(olddir);
 873        int err;
 874
 875        if (fuse_is_bad(olddir))
 876                return -EIO;
 877
 878        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
 879                return -EINVAL;
 880
 881        if (flags) {
 882                if (fc->no_rename2 || fc->minor < 23)
 883                        return -EINVAL;
 884
 885                err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
 886                                         FUSE_RENAME2,
 887                                         sizeof(struct fuse_rename2_in));
 888                if (err == -ENOSYS) {
 889                        fc->no_rename2 = 1;
 890                        err = -EINVAL;
 891                }
 892        } else {
 893                err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
 894                                         FUSE_RENAME,
 895                                         sizeof(struct fuse_rename_in));
 896        }
 897
 898        return err;
 899}
 900
 901static int fuse_link(struct dentry *entry, struct inode *newdir,
 902                     struct dentry *newent)
 903{
 904        int err;
 905        struct fuse_link_in inarg;
 906        struct inode *inode = d_inode(entry);
 907        struct fuse_mount *fm = get_fuse_mount(inode);
 908        FUSE_ARGS(args);
 909
 910        memset(&inarg, 0, sizeof(inarg));
 911        inarg.oldnodeid = get_node_id(inode);
 912        args.opcode = FUSE_LINK;
 913        args.in_numargs = 2;
 914        args.in_args[0].size = sizeof(inarg);
 915        args.in_args[0].value = &inarg;
 916        args.in_args[1].size = newent->d_name.len + 1;
 917        args.in_args[1].value = newent->d_name.name;
 918        err = create_new_entry(fm, &args, newdir, newent, inode->i_mode);
 919        /* Contrary to "normal" filesystems it can happen that link
 920           makes two "logical" inodes point to the same "physical"
 921           inode.  We invalidate the attributes of the old one, so it
 922           will reflect changes in the backing inode (link count,
 923           etc.)
 924        */
 925        if (!err) {
 926                struct fuse_inode *fi = get_fuse_inode(inode);
 927
 928                spin_lock(&fi->lock);
 929                fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
 930                if (likely(inode->i_nlink < UINT_MAX))
 931                        inc_nlink(inode);
 932                spin_unlock(&fi->lock);
 933                fuse_invalidate_attr(inode);
 934                fuse_update_ctime(inode);
 935        } else if (err == -EINTR) {
 936                fuse_invalidate_attr(inode);
 937        }
 938        return err;
 939}
 940
 941static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
 942                          struct kstat *stat)
 943{
 944        unsigned int blkbits;
 945        struct fuse_conn *fc = get_fuse_conn(inode);
 946
 947        /* see the comment in fuse_change_attributes() */
 948        if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
 949                attr->size = i_size_read(inode);
 950                attr->mtime = inode->i_mtime.tv_sec;
 951                attr->mtimensec = inode->i_mtime.tv_nsec;
 952                attr->ctime = inode->i_ctime.tv_sec;
 953                attr->ctimensec = inode->i_ctime.tv_nsec;
 954        }
 955
 956        stat->dev = inode->i_sb->s_dev;
 957        stat->ino = attr->ino;
 958        stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
 959        stat->nlink = attr->nlink;
 960        stat->uid = make_kuid(fc->user_ns, attr->uid);
 961        stat->gid = make_kgid(fc->user_ns, attr->gid);
 962        stat->rdev = inode->i_rdev;
 963        stat->atime.tv_sec = attr->atime;
 964        stat->atime.tv_nsec = attr->atimensec;
 965        stat->mtime.tv_sec = attr->mtime;
 966        stat->mtime.tv_nsec = attr->mtimensec;
 967        stat->ctime.tv_sec = attr->ctime;
 968        stat->ctime.tv_nsec = attr->ctimensec;
 969        stat->size = attr->size;
 970        stat->blocks = attr->blocks;
 971
 972        if (attr->blksize != 0)
 973                blkbits = ilog2(attr->blksize);
 974        else
 975                blkbits = inode->i_sb->s_blocksize_bits;
 976
 977        stat->blksize = 1 << blkbits;
 978}
 979
 980static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
 981                           struct file *file)
 982{
 983        int err;
 984        struct fuse_getattr_in inarg;
 985        struct fuse_attr_out outarg;
 986        struct fuse_mount *fm = get_fuse_mount(inode);
 987        FUSE_ARGS(args);
 988        u64 attr_version;
 989
 990        attr_version = fuse_get_attr_version(fm->fc);
 991
 992        memset(&inarg, 0, sizeof(inarg));
 993        memset(&outarg, 0, sizeof(outarg));
 994        /* Directories have separate file-handle space */
 995        if (file && S_ISREG(inode->i_mode)) {
 996                struct fuse_file *ff = file->private_data;
 997
 998                inarg.getattr_flags |= FUSE_GETATTR_FH;
 999                inarg.fh = ff->fh;
1000        }
1001        args.opcode = FUSE_GETATTR;
1002        args.nodeid = get_node_id(inode);
1003        args.in_numargs = 1;
1004        args.in_args[0].size = sizeof(inarg);
1005        args.in_args[0].value = &inarg;
1006        args.out_numargs = 1;
1007        args.out_args[0].size = sizeof(outarg);
1008        args.out_args[0].value = &outarg;
1009        err = fuse_simple_request(fm, &args);
1010        if (!err) {
1011                if (fuse_invalid_attr(&outarg.attr) ||
1012                    inode_wrong_type(inode, outarg.attr.mode)) {
1013                        fuse_make_bad(inode);
1014                        err = -EIO;
1015                } else {
1016                        fuse_change_attributes(inode, &outarg.attr,
1017                                               attr_timeout(&outarg),
1018                                               attr_version);
1019                        if (stat)
1020                                fuse_fillattr(inode, &outarg.attr, stat);
1021                }
1022        }
1023        return err;
1024}
1025
1026static int fuse_update_get_attr(struct inode *inode, struct file *file,
1027                                struct kstat *stat, u32 request_mask,
1028                                unsigned int flags)
1029{
1030        struct fuse_inode *fi = get_fuse_inode(inode);
1031        int err = 0;
1032        bool sync;
1033
1034        if (flags & AT_STATX_FORCE_SYNC)
1035                sync = true;
1036        else if (flags & AT_STATX_DONT_SYNC)
1037                sync = false;
1038        else if (request_mask & READ_ONCE(fi->inval_mask))
1039                sync = true;
1040        else
1041                sync = time_before64(fi->i_time, get_jiffies_64());
1042
1043        if (sync) {
1044                forget_all_cached_acls(inode);
1045                err = fuse_do_getattr(inode, stat, file);
1046        } else if (stat) {
1047                generic_fillattr(&init_user_ns, inode, stat);
1048                stat->mode = fi->orig_i_mode;
1049                stat->ino = fi->orig_ino;
1050        }
1051
1052        return err;
1053}
1054
1055int fuse_update_attributes(struct inode *inode, struct file *file)
1056{
1057        /* Do *not* need to get atime for internal purposes */
1058        return fuse_update_get_attr(inode, file, NULL,
1059                                    STATX_BASIC_STATS & ~STATX_ATIME, 0);
1060}
1061
1062int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1063                             u64 child_nodeid, struct qstr *name)
1064{
1065        int err = -ENOTDIR;
1066        struct inode *parent;
1067        struct dentry *dir;
1068        struct dentry *entry;
1069
1070        parent = fuse_ilookup(fc, parent_nodeid, NULL);
1071        if (!parent)
1072                return -ENOENT;
1073
1074        inode_lock(parent);
1075        if (!S_ISDIR(parent->i_mode))
1076                goto unlock;
1077
1078        err = -ENOENT;
1079        dir = d_find_alias(parent);
1080        if (!dir)
1081                goto unlock;
1082
1083        name->hash = full_name_hash(dir, name->name, name->len);
1084        entry = d_lookup(dir, name);
1085        dput(dir);
1086        if (!entry)
1087                goto unlock;
1088
1089        fuse_dir_changed(parent);
1090        fuse_invalidate_entry(entry);
1091
1092        if (child_nodeid != 0 && d_really_is_positive(entry)) {
1093                inode_lock(d_inode(entry));
1094                if (get_node_id(d_inode(entry)) != child_nodeid) {
1095                        err = -ENOENT;
1096                        goto badentry;
1097                }
1098                if (d_mountpoint(entry)) {
1099                        err = -EBUSY;
1100                        goto badentry;
1101                }
1102                if (d_is_dir(entry)) {
1103                        shrink_dcache_parent(entry);
1104                        if (!simple_empty(entry)) {
1105                                err = -ENOTEMPTY;
1106                                goto badentry;
1107                        }
1108                        d_inode(entry)->i_flags |= S_DEAD;
1109                }
1110                dont_mount(entry);
1111                clear_nlink(d_inode(entry));
1112                err = 0;
1113 badentry:
1114                inode_unlock(d_inode(entry));
1115                if (!err)
1116                        d_delete(entry);
1117        } else {
1118                err = 0;
1119        }
1120        dput(entry);
1121
1122 unlock:
1123        inode_unlock(parent);
1124        iput(parent);
1125        return err;
1126}
1127
1128/*
1129 * Calling into a user-controlled filesystem gives the filesystem
1130 * daemon ptrace-like capabilities over the current process.  This
1131 * means, that the filesystem daemon is able to record the exact
1132 * filesystem operations performed, and can also control the behavior
1133 * of the requester process in otherwise impossible ways.  For example
1134 * it can delay the operation for arbitrary length of time allowing
1135 * DoS against the requester.
1136 *
1137 * For this reason only those processes can call into the filesystem,
1138 * for which the owner of the mount has ptrace privilege.  This
1139 * excludes processes started by other users, suid or sgid processes.
1140 */
1141int fuse_allow_current_process(struct fuse_conn *fc)
1142{
1143        const struct cred *cred;
1144
1145        if (fc->allow_other)
1146                return current_in_userns(fc->user_ns);
1147
1148        cred = current_cred();
1149        if (uid_eq(cred->euid, fc->user_id) &&
1150            uid_eq(cred->suid, fc->user_id) &&
1151            uid_eq(cred->uid,  fc->user_id) &&
1152            gid_eq(cred->egid, fc->group_id) &&
1153            gid_eq(cred->sgid, fc->group_id) &&
1154            gid_eq(cred->gid,  fc->group_id))
1155                return 1;
1156
1157        return 0;
1158}
1159
1160static int fuse_access(struct inode *inode, int mask)
1161{
1162        struct fuse_mount *fm = get_fuse_mount(inode);
1163        FUSE_ARGS(args);
1164        struct fuse_access_in inarg;
1165        int err;
1166
1167        BUG_ON(mask & MAY_NOT_BLOCK);
1168
1169        if (fm->fc->no_access)
1170                return 0;
1171
1172        memset(&inarg, 0, sizeof(inarg));
1173        inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1174        args.opcode = FUSE_ACCESS;
1175        args.nodeid = get_node_id(inode);
1176        args.in_numargs = 1;
1177        args.in_args[0].size = sizeof(inarg);
1178        args.in_args[0].value = &inarg;
1179        err = fuse_simple_request(fm, &args);
1180        if (err == -ENOSYS) {
1181                fm->fc->no_access = 1;
1182                err = 0;
1183        }
1184        return err;
1185}
1186
1187static int fuse_perm_getattr(struct inode *inode, int mask)
1188{
1189        if (mask & MAY_NOT_BLOCK)
1190                return -ECHILD;
1191
1192        forget_all_cached_acls(inode);
1193        return fuse_do_getattr(inode, NULL, NULL);
1194}
1195
1196/*
1197 * Check permission.  The two basic access models of FUSE are:
1198 *
1199 * 1) Local access checking ('default_permissions' mount option) based
1200 * on file mode.  This is the plain old disk filesystem permission
1201 * modell.
1202 *
1203 * 2) "Remote" access checking, where server is responsible for
1204 * checking permission in each inode operation.  An exception to this
1205 * is if ->permission() was invoked from sys_access() in which case an
1206 * access request is sent.  Execute permission is still checked
1207 * locally based on file mode.
1208 */
1209static int fuse_permission(struct user_namespace *mnt_userns,
1210                           struct inode *inode, int mask)
1211{
1212        struct fuse_conn *fc = get_fuse_conn(inode);
1213        bool refreshed = false;
1214        int err = 0;
1215
1216        if (fuse_is_bad(inode))
1217                return -EIO;
1218
1219        if (!fuse_allow_current_process(fc))
1220                return -EACCES;
1221
1222        /*
1223         * If attributes are needed, refresh them before proceeding
1224         */
1225        if (fc->default_permissions ||
1226            ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1227                struct fuse_inode *fi = get_fuse_inode(inode);
1228                u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1229
1230                if (perm_mask & READ_ONCE(fi->inval_mask) ||
1231                    time_before64(fi->i_time, get_jiffies_64())) {
1232                        refreshed = true;
1233
1234                        err = fuse_perm_getattr(inode, mask);
1235                        if (err)
1236                                return err;
1237                }
1238        }
1239
1240        if (fc->default_permissions) {
1241                err = generic_permission(&init_user_ns, inode, mask);
1242
1243                /* If permission is denied, try to refresh file
1244                   attributes.  This is also needed, because the root
1245                   node will at first have no permissions */
1246                if (err == -EACCES && !refreshed) {
1247                        err = fuse_perm_getattr(inode, mask);
1248                        if (!err)
1249                                err = generic_permission(&init_user_ns,
1250                                                         inode, mask);
1251                }
1252
1253                /* Note: the opposite of the above test does not
1254                   exist.  So if permissions are revoked this won't be
1255                   noticed immediately, only after the attribute
1256                   timeout has expired */
1257        } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1258                err = fuse_access(inode, mask);
1259        } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1260                if (!(inode->i_mode & S_IXUGO)) {
1261                        if (refreshed)
1262                                return -EACCES;
1263
1264                        err = fuse_perm_getattr(inode, mask);
1265                        if (!err && !(inode->i_mode & S_IXUGO))
1266                                return -EACCES;
1267                }
1268        }
1269        return err;
1270}
1271
1272static int fuse_readlink_page(struct inode *inode, struct page *page)
1273{
1274        struct fuse_mount *fm = get_fuse_mount(inode);
1275        struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
1276        struct fuse_args_pages ap = {
1277                .num_pages = 1,
1278                .pages = &page,
1279                .descs = &desc,
1280        };
1281        char *link;
1282        ssize_t res;
1283
1284        ap.args.opcode = FUSE_READLINK;
1285        ap.args.nodeid = get_node_id(inode);
1286        ap.args.out_pages = true;
1287        ap.args.out_argvar = true;
1288        ap.args.page_zeroing = true;
1289        ap.args.out_numargs = 1;
1290        ap.args.out_args[0].size = desc.length;
1291        res = fuse_simple_request(fm, &ap.args);
1292
1293        fuse_invalidate_atime(inode);
1294
1295        if (res < 0)
1296                return res;
1297
1298        if (WARN_ON(res >= PAGE_SIZE))
1299                return -EIO;
1300
1301        link = page_address(page);
1302        link[res] = '\0';
1303
1304        return 0;
1305}
1306
1307static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1308                                 struct delayed_call *callback)
1309{
1310        struct fuse_conn *fc = get_fuse_conn(inode);
1311        struct page *page;
1312        int err;
1313
1314        err = -EIO;
1315        if (fuse_is_bad(inode))
1316                goto out_err;
1317
1318        if (fc->cache_symlinks)
1319                return page_get_link(dentry, inode, callback);
1320
1321        err = -ECHILD;
1322        if (!dentry)
1323                goto out_err;
1324
1325        page = alloc_page(GFP_KERNEL);
1326        err = -ENOMEM;
1327        if (!page)
1328                goto out_err;
1329
1330        err = fuse_readlink_page(inode, page);
1331        if (err) {
1332                __free_page(page);
1333                goto out_err;
1334        }
1335
1336        set_delayed_call(callback, page_put_link, page);
1337
1338        return page_address(page);
1339
1340out_err:
1341        return ERR_PTR(err);
1342}
1343
1344static int fuse_dir_open(struct inode *inode, struct file *file)
1345{
1346        return fuse_open_common(inode, file, true);
1347}
1348
1349static int fuse_dir_release(struct inode *inode, struct file *file)
1350{
1351        fuse_release_common(file, true);
1352
1353        return 0;
1354}
1355
1356static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1357                          int datasync)
1358{
1359        struct inode *inode = file->f_mapping->host;
1360        struct fuse_conn *fc = get_fuse_conn(inode);
1361        int err;
1362
1363        if (fuse_is_bad(inode))
1364                return -EIO;
1365
1366        if (fc->no_fsyncdir)
1367                return 0;
1368
1369        inode_lock(inode);
1370        err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1371        if (err == -ENOSYS) {
1372                fc->no_fsyncdir = 1;
1373                err = 0;
1374        }
1375        inode_unlock(inode);
1376
1377        return err;
1378}
1379
1380static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1381                            unsigned long arg)
1382{
1383        struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1384
1385        /* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1386        if (fc->minor < 18)
1387                return -ENOTTY;
1388
1389        return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1390}
1391
1392static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1393                                   unsigned long arg)
1394{
1395        struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1396
1397        if (fc->minor < 18)
1398                return -ENOTTY;
1399
1400        return fuse_ioctl_common(file, cmd, arg,
1401                                 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1402}
1403
1404static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1405{
1406        /* Always update if mtime is explicitly set  */
1407        if (ivalid & ATTR_MTIME_SET)
1408                return true;
1409
1410        /* Or if kernel i_mtime is the official one */
1411        if (trust_local_mtime)
1412                return true;
1413
1414        /* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1415        if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1416                return false;
1417
1418        /* In all other cases update */
1419        return true;
1420}
1421
1422static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
1423                           struct fuse_setattr_in *arg, bool trust_local_cmtime)
1424{
1425        unsigned ivalid = iattr->ia_valid;
1426
1427        if (ivalid & ATTR_MODE)
1428                arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1429        if (ivalid & ATTR_UID)
1430                arg->valid |= FATTR_UID,    arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1431        if (ivalid & ATTR_GID)
1432                arg->valid |= FATTR_GID,    arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1433        if (ivalid & ATTR_SIZE)
1434                arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1435        if (ivalid & ATTR_ATIME) {
1436                arg->valid |= FATTR_ATIME;
1437                arg->atime = iattr->ia_atime.tv_sec;
1438                arg->atimensec = iattr->ia_atime.tv_nsec;
1439                if (!(ivalid & ATTR_ATIME_SET))
1440                        arg->valid |= FATTR_ATIME_NOW;
1441        }
1442        if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1443                arg->valid |= FATTR_MTIME;
1444                arg->mtime = iattr->ia_mtime.tv_sec;
1445                arg->mtimensec = iattr->ia_mtime.tv_nsec;
1446                if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1447                        arg->valid |= FATTR_MTIME_NOW;
1448        }
1449        if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1450                arg->valid |= FATTR_CTIME;
1451                arg->ctime = iattr->ia_ctime.tv_sec;
1452                arg->ctimensec = iattr->ia_ctime.tv_nsec;
1453        }
1454}
1455
1456/*
1457 * Prevent concurrent writepages on inode
1458 *
1459 * This is done by adding a negative bias to the inode write counter
1460 * and waiting for all pending writes to finish.
1461 */
1462void fuse_set_nowrite(struct inode *inode)
1463{
1464        struct fuse_inode *fi = get_fuse_inode(inode);
1465
1466        BUG_ON(!inode_is_locked(inode));
1467
1468        spin_lock(&fi->lock);
1469        BUG_ON(fi->writectr < 0);
1470        fi->writectr += FUSE_NOWRITE;
1471        spin_unlock(&fi->lock);
1472        wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1473}
1474
1475/*
1476 * Allow writepages on inode
1477 *
1478 * Remove the bias from the writecounter and send any queued
1479 * writepages.
1480 */
1481static void __fuse_release_nowrite(struct inode *inode)
1482{
1483        struct fuse_inode *fi = get_fuse_inode(inode);
1484
1485        BUG_ON(fi->writectr != FUSE_NOWRITE);
1486        fi->writectr = 0;
1487        fuse_flush_writepages(inode);
1488}
1489
1490void fuse_release_nowrite(struct inode *inode)
1491{
1492        struct fuse_inode *fi = get_fuse_inode(inode);
1493
1494        spin_lock(&fi->lock);
1495        __fuse_release_nowrite(inode);
1496        spin_unlock(&fi->lock);
1497}
1498
1499static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1500                              struct inode *inode,
1501                              struct fuse_setattr_in *inarg_p,
1502                              struct fuse_attr_out *outarg_p)
1503{
1504        args->opcode = FUSE_SETATTR;
1505        args->nodeid = get_node_id(inode);
1506        args->in_numargs = 1;
1507        args->in_args[0].size = sizeof(*inarg_p);
1508        args->in_args[0].value = inarg_p;
1509        args->out_numargs = 1;
1510        args->out_args[0].size = sizeof(*outarg_p);
1511        args->out_args[0].value = outarg_p;
1512}
1513
1514/*
1515 * Flush inode->i_mtime to the server
1516 */
1517int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1518{
1519        struct fuse_mount *fm = get_fuse_mount(inode);
1520        FUSE_ARGS(args);
1521        struct fuse_setattr_in inarg;
1522        struct fuse_attr_out outarg;
1523
1524        memset(&inarg, 0, sizeof(inarg));
1525        memset(&outarg, 0, sizeof(outarg));
1526
1527        inarg.valid = FATTR_MTIME;
1528        inarg.mtime = inode->i_mtime.tv_sec;
1529        inarg.mtimensec = inode->i_mtime.tv_nsec;
1530        if (fm->fc->minor >= 23) {
1531                inarg.valid |= FATTR_CTIME;
1532                inarg.ctime = inode->i_ctime.tv_sec;
1533                inarg.ctimensec = inode->i_ctime.tv_nsec;
1534        }
1535        if (ff) {
1536                inarg.valid |= FATTR_FH;
1537                inarg.fh = ff->fh;
1538        }
1539        fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1540
1541        return fuse_simple_request(fm, &args);
1542}
1543
1544/*
1545 * Set attributes, and at the same time refresh them.
1546 *
1547 * Truncation is slightly complicated, because the 'truncate' request
1548 * may fail, in which case we don't want to touch the mapping.
1549 * vmtruncate() doesn't allow for this case, so do the rlimit checking
1550 * and the actual truncation by hand.
1551 */
1552int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1553                    struct file *file)
1554{
1555        struct inode *inode = d_inode(dentry);
1556        struct fuse_mount *fm = get_fuse_mount(inode);
1557        struct fuse_conn *fc = fm->fc;
1558        struct fuse_inode *fi = get_fuse_inode(inode);
1559        struct address_space *mapping = inode->i_mapping;
1560        FUSE_ARGS(args);
1561        struct fuse_setattr_in inarg;
1562        struct fuse_attr_out outarg;
1563        bool is_truncate = false;
1564        bool is_wb = fc->writeback_cache;
1565        loff_t oldsize;
1566        int err;
1567        bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1568        bool fault_blocked = false;
1569
1570        if (!fc->default_permissions)
1571                attr->ia_valid |= ATTR_FORCE;
1572
1573        err = setattr_prepare(&init_user_ns, dentry, attr);
1574        if (err)
1575                return err;
1576
1577        if (attr->ia_valid & ATTR_SIZE) {
1578                if (WARN_ON(!S_ISREG(inode->i_mode)))
1579                        return -EIO;
1580                is_truncate = true;
1581        }
1582
1583        if (FUSE_IS_DAX(inode) && is_truncate) {
1584                filemap_invalidate_lock(mapping);
1585                fault_blocked = true;
1586                err = fuse_dax_break_layouts(inode, 0, 0);
1587                if (err) {
1588                        filemap_invalidate_unlock(mapping);
1589                        return err;
1590                }
1591        }
1592
1593        if (attr->ia_valid & ATTR_OPEN) {
1594                /* This is coming from open(..., ... | O_TRUNC); */
1595                WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1596                WARN_ON(attr->ia_size != 0);
1597                if (fc->atomic_o_trunc) {
1598                        /*
1599                         * No need to send request to userspace, since actual
1600                         * truncation has already been done by OPEN.  But still
1601                         * need to truncate page cache.
1602                         */
1603                        i_size_write(inode, 0);
1604                        truncate_pagecache(inode, 0);
1605                        goto out;
1606                }
1607                file = NULL;
1608        }
1609
1610        /* Flush dirty data/metadata before non-truncate SETATTR */
1611        if (is_wb && S_ISREG(inode->i_mode) &&
1612            attr->ia_valid &
1613                        (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1614                         ATTR_TIMES_SET)) {
1615                err = write_inode_now(inode, true);
1616                if (err)
1617                        return err;
1618
1619                fuse_set_nowrite(inode);
1620                fuse_release_nowrite(inode);
1621        }
1622
1623        if (is_truncate) {
1624                fuse_set_nowrite(inode);
1625                set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1626                if (trust_local_cmtime && attr->ia_size != inode->i_size)
1627                        attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1628        }
1629
1630        memset(&inarg, 0, sizeof(inarg));
1631        memset(&outarg, 0, sizeof(outarg));
1632        iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1633        if (file) {
1634                struct fuse_file *ff = file->private_data;
1635                inarg.valid |= FATTR_FH;
1636                inarg.fh = ff->fh;
1637        }
1638
1639        /* Kill suid/sgid for non-directory chown unconditionally */
1640        if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
1641            attr->ia_valid & (ATTR_UID | ATTR_GID))
1642                inarg.valid |= FATTR_KILL_SUIDGID;
1643
1644        if (attr->ia_valid & ATTR_SIZE) {
1645                /* For mandatory locking in truncate */
1646                inarg.valid |= FATTR_LOCKOWNER;
1647                inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1648
1649                /* Kill suid/sgid for truncate only if no CAP_FSETID */
1650                if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
1651                        inarg.valid |= FATTR_KILL_SUIDGID;
1652        }
1653        fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1654        err = fuse_simple_request(fm, &args);
1655        if (err) {
1656                if (err == -EINTR)
1657                        fuse_invalidate_attr(inode);
1658                goto error;
1659        }
1660
1661        if (fuse_invalid_attr(&outarg.attr) ||
1662            inode_wrong_type(inode, outarg.attr.mode)) {
1663                fuse_make_bad(inode);
1664                err = -EIO;
1665                goto error;
1666        }
1667
1668        spin_lock(&fi->lock);
1669        /* the kernel maintains i_mtime locally */
1670        if (trust_local_cmtime) {
1671                if (attr->ia_valid & ATTR_MTIME)
1672                        inode->i_mtime = attr->ia_mtime;
1673                if (attr->ia_valid & ATTR_CTIME)
1674                        inode->i_ctime = attr->ia_ctime;
1675                /* FIXME: clear I_DIRTY_SYNC? */
1676        }
1677
1678        fuse_change_attributes_common(inode, &outarg.attr,
1679                                      attr_timeout(&outarg));
1680        oldsize = inode->i_size;
1681        /* see the comment in fuse_change_attributes() */
1682        if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
1683                i_size_write(inode, outarg.attr.size);
1684
1685        if (is_truncate) {
1686                /* NOTE: this may release/reacquire fi->lock */
1687                __fuse_release_nowrite(inode);
1688        }
1689        spin_unlock(&fi->lock);
1690
1691        /*
1692         * Only call invalidate_inode_pages2() after removing
1693         * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1694         */
1695        if ((is_truncate || !is_wb) &&
1696            S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1697                truncate_pagecache(inode, outarg.attr.size);
1698                invalidate_inode_pages2(mapping);
1699        }
1700
1701        clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1702out:
1703        if (fault_blocked)
1704                filemap_invalidate_unlock(mapping);
1705
1706        return 0;
1707
1708error:
1709        if (is_truncate)
1710                fuse_release_nowrite(inode);
1711
1712        clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1713
1714        if (fault_blocked)
1715                filemap_invalidate_unlock(mapping);
1716        return err;
1717}
1718
1719static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry,
1720                        struct iattr *attr)
1721{
1722        struct inode *inode = d_inode(entry);
1723        struct fuse_conn *fc = get_fuse_conn(inode);
1724        struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
1725        int ret;
1726
1727        if (fuse_is_bad(inode))
1728                return -EIO;
1729
1730        if (!fuse_allow_current_process(get_fuse_conn(inode)))
1731                return -EACCES;
1732
1733        if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
1734                attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
1735                                    ATTR_MODE);
1736
1737                /*
1738                 * The only sane way to reliably kill suid/sgid is to do it in
1739                 * the userspace filesystem
1740                 *
1741                 * This should be done on write(), truncate() and chown().
1742                 */
1743                if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
1744                        /*
1745                         * ia_mode calculation may have used stale i_mode.
1746                         * Refresh and recalculate.
1747                         */
1748                        ret = fuse_do_getattr(inode, NULL, file);
1749                        if (ret)
1750                                return ret;
1751
1752                        attr->ia_mode = inode->i_mode;
1753                        if (inode->i_mode & S_ISUID) {
1754                                attr->ia_valid |= ATTR_MODE;
1755                                attr->ia_mode &= ~S_ISUID;
1756                        }
1757                        if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1758                                attr->ia_valid |= ATTR_MODE;
1759                                attr->ia_mode &= ~S_ISGID;
1760                        }
1761                }
1762        }
1763        if (!attr->ia_valid)
1764                return 0;
1765
1766        ret = fuse_do_setattr(entry, attr, file);
1767        if (!ret) {
1768                /*
1769                 * If filesystem supports acls it may have updated acl xattrs in
1770                 * the filesystem, so forget cached acls for the inode.
1771                 */
1772                if (fc->posix_acl)
1773                        forget_all_cached_acls(inode);
1774
1775                /* Directory mode changed, may need to revalidate access */
1776                if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
1777                        fuse_invalidate_entry_cache(entry);
1778        }
1779        return ret;
1780}
1781
1782static int fuse_getattr(struct user_namespace *mnt_userns,
1783                        const struct path *path, struct kstat *stat,
1784                        u32 request_mask, unsigned int flags)
1785{
1786        struct inode *inode = d_inode(path->dentry);
1787        struct fuse_conn *fc = get_fuse_conn(inode);
1788
1789        if (fuse_is_bad(inode))
1790                return -EIO;
1791
1792        if (!fuse_allow_current_process(fc)) {
1793                if (!request_mask) {
1794                        /*
1795                         * If user explicitly requested *nothing* then don't
1796                         * error out, but return st_dev only.
1797                         */
1798                        stat->result_mask = 0;
1799                        stat->dev = inode->i_sb->s_dev;
1800                        return 0;
1801                }
1802                return -EACCES;
1803        }
1804
1805        return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
1806}
1807
1808static const struct inode_operations fuse_dir_inode_operations = {
1809        .lookup         = fuse_lookup,
1810        .mkdir          = fuse_mkdir,
1811        .symlink        = fuse_symlink,
1812        .unlink         = fuse_unlink,
1813        .rmdir          = fuse_rmdir,
1814        .rename         = fuse_rename2,
1815        .link           = fuse_link,
1816        .setattr        = fuse_setattr,
1817        .create         = fuse_create,
1818        .atomic_open    = fuse_atomic_open,
1819        .mknod          = fuse_mknod,
1820        .permission     = fuse_permission,
1821        .getattr        = fuse_getattr,
1822        .listxattr      = fuse_listxattr,
1823        .get_acl        = fuse_get_acl,
1824        .set_acl        = fuse_set_acl,
1825        .fileattr_get   = fuse_fileattr_get,
1826        .fileattr_set   = fuse_fileattr_set,
1827};
1828
1829static const struct file_operations fuse_dir_operations = {
1830        .llseek         = generic_file_llseek,
1831        .read           = generic_read_dir,
1832        .iterate_shared = fuse_readdir,
1833        .open           = fuse_dir_open,
1834        .release        = fuse_dir_release,
1835        .fsync          = fuse_dir_fsync,
1836        .unlocked_ioctl = fuse_dir_ioctl,
1837        .compat_ioctl   = fuse_dir_compat_ioctl,
1838};
1839
1840static const struct inode_operations fuse_common_inode_operations = {
1841        .setattr        = fuse_setattr,
1842        .permission     = fuse_permission,
1843        .getattr        = fuse_getattr,
1844        .listxattr      = fuse_listxattr,
1845        .get_acl        = fuse_get_acl,
1846        .set_acl        = fuse_set_acl,
1847        .fileattr_get   = fuse_fileattr_get,
1848        .fileattr_set   = fuse_fileattr_set,
1849};
1850
1851static const struct inode_operations fuse_symlink_inode_operations = {
1852        .setattr        = fuse_setattr,
1853        .get_link       = fuse_get_link,
1854        .getattr        = fuse_getattr,
1855        .listxattr      = fuse_listxattr,
1856};
1857
1858void fuse_init_common(struct inode *inode)
1859{
1860        inode->i_op = &fuse_common_inode_operations;
1861}
1862
1863void fuse_init_dir(struct inode *inode)
1864{
1865        struct fuse_inode *fi = get_fuse_inode(inode);
1866
1867        inode->i_op = &fuse_dir_inode_operations;
1868        inode->i_fop = &fuse_dir_operations;
1869
1870        spin_lock_init(&fi->rdc.lock);
1871        fi->rdc.cached = false;
1872        fi->rdc.size = 0;
1873        fi->rdc.pos = 0;
1874        fi->rdc.version = 0;
1875}
1876
1877static int fuse_symlink_readpage(struct file *null, struct page *page)
1878{
1879        int err = fuse_readlink_page(page->mapping->host, page);
1880
1881        if (!err)
1882                SetPageUptodate(page);
1883
1884        unlock_page(page);
1885
1886        return err;
1887}
1888
1889static const struct address_space_operations fuse_symlink_aops = {
1890        .readpage       = fuse_symlink_readpage,
1891};
1892
1893void fuse_init_symlink(struct inode *inode)
1894{
1895        inode->i_op = &fuse_symlink_inode_operations;
1896        inode->i_data.a_ops = &fuse_symlink_aops;
1897        inode_nohighmem(inode);
1898}
1899