linux/fs/open.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *  linux/fs/open.c
   4 *
   5 *  Copyright (C) 1991, 1992  Linus Torvalds
   6 */
   7
   8#include <linux/string.h>
   9#include <linux/mm.h>
  10#include <linux/file.h>
  11#include <linux/fdtable.h>
  12#include <linux/fsnotify.h>
  13#include <linux/module.h>
  14#include <linux/tty.h>
  15#include <linux/namei.h>
  16#include <linux/backing-dev.h>
  17#include <linux/capability.h>
  18#include <linux/securebits.h>
  19#include <linux/security.h>
  20#include <linux/mount.h>
  21#include <linux/fcntl.h>
  22#include <linux/slab.h>
  23#include <linux/uaccess.h>
  24#include <linux/fs.h>
  25#include <linux/personality.h>
  26#include <linux/pagemap.h>
  27#include <linux/syscalls.h>
  28#include <linux/rcupdate.h>
  29#include <linux/audit.h>
  30#include <linux/falloc.h>
  31#include <linux/fs_struct.h>
  32#include <linux/ima.h>
  33#include <linux/dnotify.h>
  34#include <linux/compat.h>
  35
  36#include "internal.h"
  37
  38int do_truncate(struct user_namespace *mnt_userns, struct dentry *dentry,
  39                loff_t length, unsigned int time_attrs, struct file *filp)
  40{
  41        int ret;
  42        struct iattr newattrs;
  43
  44        /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
  45        if (length < 0)
  46                return -EINVAL;
  47
  48        newattrs.ia_size = length;
  49        newattrs.ia_valid = ATTR_SIZE | time_attrs;
  50        if (filp) {
  51                newattrs.ia_file = filp;
  52                newattrs.ia_valid |= ATTR_FILE;
  53        }
  54
  55        /* Remove suid, sgid, and file capabilities on truncate too */
  56        ret = dentry_needs_remove_privs(dentry);
  57        if (ret < 0)
  58                return ret;
  59        if (ret)
  60                newattrs.ia_valid |= ret | ATTR_FORCE;
  61
  62        inode_lock(dentry->d_inode);
  63        /* Note any delegations or leases have already been broken: */
  64        ret = notify_change(mnt_userns, dentry, &newattrs, NULL);
  65        inode_unlock(dentry->d_inode);
  66        return ret;
  67}
  68
  69long vfs_truncate(const struct path *path, loff_t length)
  70{
  71        struct user_namespace *mnt_userns;
  72        struct inode *inode;
  73        long error;
  74
  75        inode = path->dentry->d_inode;
  76
  77        /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
  78        if (S_ISDIR(inode->i_mode))
  79                return -EISDIR;
  80        if (!S_ISREG(inode->i_mode))
  81                return -EINVAL;
  82
  83        error = mnt_want_write(path->mnt);
  84        if (error)
  85                goto out;
  86
  87        mnt_userns = mnt_user_ns(path->mnt);
  88        error = inode_permission(mnt_userns, inode, MAY_WRITE);
  89        if (error)
  90                goto mnt_drop_write_and_out;
  91
  92        error = -EPERM;
  93        if (IS_APPEND(inode))
  94                goto mnt_drop_write_and_out;
  95
  96        error = get_write_access(inode);
  97        if (error)
  98                goto mnt_drop_write_and_out;
  99
 100        /*
 101         * Make sure that there are no leases.  get_write_access() protects
 102         * against the truncate racing with a lease-granting setlease().
 103         */
 104        error = break_lease(inode, O_WRONLY);
 105        if (error)
 106                goto put_write_and_out;
 107
 108        error = security_path_truncate(path);
 109        if (!error)
 110                error = do_truncate(mnt_userns, path->dentry, length, 0, NULL);
 111
 112put_write_and_out:
 113        put_write_access(inode);
 114mnt_drop_write_and_out:
 115        mnt_drop_write(path->mnt);
 116out:
 117        return error;
 118}
 119EXPORT_SYMBOL_GPL(vfs_truncate);
 120
 121long do_sys_truncate(const char __user *pathname, loff_t length)
 122{
 123        unsigned int lookup_flags = LOOKUP_FOLLOW;
 124        struct path path;
 125        int error;
 126
 127        if (length < 0) /* sorry, but loff_t says... */
 128                return -EINVAL;
 129
 130retry:
 131        error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 132        if (!error) {
 133                error = vfs_truncate(&path, length);
 134                path_put(&path);
 135        }
 136        if (retry_estale(error, lookup_flags)) {
 137                lookup_flags |= LOOKUP_REVAL;
 138                goto retry;
 139        }
 140        return error;
 141}
 142
 143SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
 144{
 145        return do_sys_truncate(path, length);
 146}
 147
 148#ifdef CONFIG_COMPAT
 149COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length)
 150{
 151        return do_sys_truncate(path, length);
 152}
 153#endif
 154
 155long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 156{
 157        struct inode *inode;
 158        struct dentry *dentry;
 159        struct fd f;
 160        int error;
 161
 162        error = -EINVAL;
 163        if (length < 0)
 164                goto out;
 165        error = -EBADF;
 166        f = fdget(fd);
 167        if (!f.file)
 168                goto out;
 169
 170        /* explicitly opened as large or we are on 64-bit box */
 171        if (f.file->f_flags & O_LARGEFILE)
 172                small = 0;
 173
 174        dentry = f.file->f_path.dentry;
 175        inode = dentry->d_inode;
 176        error = -EINVAL;
 177        if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
 178                goto out_putf;
 179
 180        error = -EINVAL;
 181        /* Cannot ftruncate over 2^31 bytes without large file support */
 182        if (small && length > MAX_NON_LFS)
 183                goto out_putf;
 184
 185        error = -EPERM;
 186        /* Check IS_APPEND on real upper inode */
 187        if (IS_APPEND(file_inode(f.file)))
 188                goto out_putf;
 189        sb_start_write(inode->i_sb);
 190        error = security_path_truncate(&f.file->f_path);
 191        if (!error)
 192                error = do_truncate(file_mnt_user_ns(f.file), dentry, length,
 193                                    ATTR_MTIME | ATTR_CTIME, f.file);
 194        sb_end_write(inode->i_sb);
 195out_putf:
 196        fdput(f);
 197out:
 198        return error;
 199}
 200
 201SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
 202{
 203        return do_sys_ftruncate(fd, length, 1);
 204}
 205
 206#ifdef CONFIG_COMPAT
 207COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length)
 208{
 209        return do_sys_ftruncate(fd, length, 1);
 210}
 211#endif
 212
 213/* LFS versions of truncate are only needed on 32 bit machines */
 214#if BITS_PER_LONG == 32
 215SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length)
 216{
 217        return do_sys_truncate(path, length);
 218}
 219
 220SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length)
 221{
 222        return do_sys_ftruncate(fd, length, 0);
 223}
 224#endif /* BITS_PER_LONG == 32 */
 225
 226
 227int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 228{
 229        struct inode *inode = file_inode(file);
 230        long ret;
 231
 232        if (offset < 0 || len <= 0)
 233                return -EINVAL;
 234
 235        /* Return error if mode is not supported */
 236        if (mode & ~FALLOC_FL_SUPPORTED_MASK)
 237                return -EOPNOTSUPP;
 238
 239        /* Punch hole and zero range are mutually exclusive */
 240        if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
 241            (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
 242                return -EOPNOTSUPP;
 243
 244        /* Punch hole must have keep size set */
 245        if ((mode & FALLOC_FL_PUNCH_HOLE) &&
 246            !(mode & FALLOC_FL_KEEP_SIZE))
 247                return -EOPNOTSUPP;
 248
 249        /* Collapse range should only be used exclusively. */
 250        if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
 251            (mode & ~FALLOC_FL_COLLAPSE_RANGE))
 252                return -EINVAL;
 253
 254        /* Insert range should only be used exclusively. */
 255        if ((mode & FALLOC_FL_INSERT_RANGE) &&
 256            (mode & ~FALLOC_FL_INSERT_RANGE))
 257                return -EINVAL;
 258
 259        /* Unshare range should only be used with allocate mode. */
 260        if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
 261            (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
 262                return -EINVAL;
 263
 264        if (!(file->f_mode & FMODE_WRITE))
 265                return -EBADF;
 266
 267        /*
 268         * We can only allow pure fallocate on append only files
 269         */
 270        if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
 271                return -EPERM;
 272
 273        if (IS_IMMUTABLE(inode))
 274                return -EPERM;
 275
 276        /*
 277         * We cannot allow any fallocate operation on an active swapfile
 278         */
 279        if (IS_SWAPFILE(inode))
 280                return -ETXTBSY;
 281
 282        /*
 283         * Revalidate the write permissions, in case security policy has
 284         * changed since the files were opened.
 285         */
 286        ret = security_file_permission(file, MAY_WRITE);
 287        if (ret)
 288                return ret;
 289
 290        if (S_ISFIFO(inode->i_mode))
 291                return -ESPIPE;
 292
 293        if (S_ISDIR(inode->i_mode))
 294                return -EISDIR;
 295
 296        if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
 297                return -ENODEV;
 298
 299        /* Check for wrap through zero too */
 300        if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
 301                return -EFBIG;
 302
 303        if (!file->f_op->fallocate)
 304                return -EOPNOTSUPP;
 305
 306        file_start_write(file);
 307        ret = file->f_op->fallocate(file, mode, offset, len);
 308
 309        /*
 310         * Create inotify and fanotify events.
 311         *
 312         * To keep the logic simple always create events if fallocate succeeds.
 313         * This implies that events are even created if the file size remains
 314         * unchanged, e.g. when using flag FALLOC_FL_KEEP_SIZE.
 315         */
 316        if (ret == 0)
 317                fsnotify_modify(file);
 318
 319        file_end_write(file);
 320        return ret;
 321}
 322EXPORT_SYMBOL_GPL(vfs_fallocate);
 323
 324int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len)
 325{
 326        struct fd f = fdget(fd);
 327        int error = -EBADF;
 328
 329        if (f.file) {
 330                error = vfs_fallocate(f.file, mode, offset, len);
 331                fdput(f);
 332        }
 333        return error;
 334}
 335
 336SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
 337{
 338        return ksys_fallocate(fd, mode, offset, len);
 339}
 340
 341/*
 342 * access() needs to use the real uid/gid, not the effective uid/gid.
 343 * We do this by temporarily clearing all FS-related capabilities and
 344 * switching the fsuid/fsgid around to the real ones.
 345 */
 346static const struct cred *access_override_creds(void)
 347{
 348        const struct cred *old_cred;
 349        struct cred *override_cred;
 350
 351        override_cred = prepare_creds();
 352        if (!override_cred)
 353                return NULL;
 354
 355        override_cred->fsuid = override_cred->uid;
 356        override_cred->fsgid = override_cred->gid;
 357
 358        if (!issecure(SECURE_NO_SETUID_FIXUP)) {
 359                /* Clear the capabilities if we switch to a non-root user */
 360                kuid_t root_uid = make_kuid(override_cred->user_ns, 0);
 361                if (!uid_eq(override_cred->uid, root_uid))
 362                        cap_clear(override_cred->cap_effective);
 363                else
 364                        override_cred->cap_effective =
 365                                override_cred->cap_permitted;
 366        }
 367
 368        /*
 369         * The new set of credentials can *only* be used in
 370         * task-synchronous circumstances, and does not need
 371         * RCU freeing, unless somebody then takes a separate
 372         * reference to it.
 373         *
 374         * NOTE! This is _only_ true because this credential
 375         * is used purely for override_creds() that installs
 376         * it as the subjective cred. Other threads will be
 377         * accessing ->real_cred, not the subjective cred.
 378         *
 379         * If somebody _does_ make a copy of this (using the
 380         * 'get_current_cred()' function), that will clear the
 381         * non_rcu field, because now that other user may be
 382         * expecting RCU freeing. But normal thread-synchronous
 383         * cred accesses will keep things non-RCY.
 384         */
 385        override_cred->non_rcu = 1;
 386
 387        old_cred = override_creds(override_cred);
 388
 389        /* override_cred() gets its own ref */
 390        put_cred(override_cred);
 391
 392        return old_cred;
 393}
 394
 395static long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
 396{
 397        struct path path;
 398        struct inode *inode;
 399        int res;
 400        unsigned int lookup_flags = LOOKUP_FOLLOW;
 401        const struct cred *old_cred = NULL;
 402
 403        if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
 404                return -EINVAL;
 405
 406        if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
 407                return -EINVAL;
 408
 409        if (flags & AT_SYMLINK_NOFOLLOW)
 410                lookup_flags &= ~LOOKUP_FOLLOW;
 411        if (flags & AT_EMPTY_PATH)
 412                lookup_flags |= LOOKUP_EMPTY;
 413
 414        if (!(flags & AT_EACCESS)) {
 415                old_cred = access_override_creds();
 416                if (!old_cred)
 417                        return -ENOMEM;
 418        }
 419
 420retry:
 421        res = user_path_at(dfd, filename, lookup_flags, &path);
 422        if (res)
 423                goto out;
 424
 425        inode = d_backing_inode(path.dentry);
 426
 427        if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
 428                /*
 429                 * MAY_EXEC on regular files is denied if the fs is mounted
 430                 * with the "noexec" flag.
 431                 */
 432                res = -EACCES;
 433                if (path_noexec(&path))
 434                        goto out_path_release;
 435        }
 436
 437        res = inode_permission(mnt_user_ns(path.mnt), inode, mode | MAY_ACCESS);
 438        /* SuS v2 requires we report a read only fs too */
 439        if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
 440                goto out_path_release;
 441        /*
 442         * This is a rare case where using __mnt_is_readonly()
 443         * is OK without a mnt_want/drop_write() pair.  Since
 444         * no actual write to the fs is performed here, we do
 445         * not need to telegraph to that to anyone.
 446         *
 447         * By doing this, we accept that this access is
 448         * inherently racy and know that the fs may change
 449         * state before we even see this result.
 450         */
 451        if (__mnt_is_readonly(path.mnt))
 452                res = -EROFS;
 453
 454out_path_release:
 455        path_put(&path);
 456        if (retry_estale(res, lookup_flags)) {
 457                lookup_flags |= LOOKUP_REVAL;
 458                goto retry;
 459        }
 460out:
 461        if (old_cred)
 462                revert_creds(old_cred);
 463
 464        return res;
 465}
 466
 467SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
 468{
 469        return do_faccessat(dfd, filename, mode, 0);
 470}
 471
 472SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode,
 473                int, flags)
 474{
 475        return do_faccessat(dfd, filename, mode, flags);
 476}
 477
 478SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
 479{
 480        return do_faccessat(AT_FDCWD, filename, mode, 0);
 481}
 482
 483SYSCALL_DEFINE1(chdir, const char __user *, filename)
 484{
 485        struct path path;
 486        int error;
 487        unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 488retry:
 489        error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
 490        if (error)
 491                goto out;
 492
 493        error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
 494        if (error)
 495                goto dput_and_out;
 496
 497        set_fs_pwd(current->fs, &path);
 498
 499dput_and_out:
 500        path_put(&path);
 501        if (retry_estale(error, lookup_flags)) {
 502                lookup_flags |= LOOKUP_REVAL;
 503                goto retry;
 504        }
 505out:
 506        return error;
 507}
 508
 509SYSCALL_DEFINE1(fchdir, unsigned int, fd)
 510{
 511        struct fd f = fdget_raw(fd);
 512        int error;
 513
 514        error = -EBADF;
 515        if (!f.file)
 516                goto out;
 517
 518        error = -ENOTDIR;
 519        if (!d_can_lookup(f.file->f_path.dentry))
 520                goto out_putf;
 521
 522        error = file_permission(f.file, MAY_EXEC | MAY_CHDIR);
 523        if (!error)
 524                set_fs_pwd(current->fs, &f.file->f_path);
 525out_putf:
 526        fdput(f);
 527out:
 528        return error;
 529}
 530
 531SYSCALL_DEFINE1(chroot, const char __user *, filename)
 532{
 533        struct path path;
 534        int error;
 535        unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 536retry:
 537        error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
 538        if (error)
 539                goto out;
 540
 541        error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
 542        if (error)
 543                goto dput_and_out;
 544
 545        error = -EPERM;
 546        if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
 547                goto dput_and_out;
 548        error = security_path_chroot(&path);
 549        if (error)
 550                goto dput_and_out;
 551
 552        set_fs_root(current->fs, &path);
 553        error = 0;
 554dput_and_out:
 555        path_put(&path);
 556        if (retry_estale(error, lookup_flags)) {
 557                lookup_flags |= LOOKUP_REVAL;
 558                goto retry;
 559        }
 560out:
 561        return error;
 562}
 563
 564int chmod_common(const struct path *path, umode_t mode)
 565{
 566        struct inode *inode = path->dentry->d_inode;
 567        struct inode *delegated_inode = NULL;
 568        struct iattr newattrs;
 569        int error;
 570
 571        error = mnt_want_write(path->mnt);
 572        if (error)
 573                return error;
 574retry_deleg:
 575        inode_lock(inode);
 576        error = security_path_chmod(path, mode);
 577        if (error)
 578                goto out_unlock;
 579        newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
 580        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 581        error = notify_change(mnt_user_ns(path->mnt), path->dentry,
 582                              &newattrs, &delegated_inode);
 583out_unlock:
 584        inode_unlock(inode);
 585        if (delegated_inode) {
 586                error = break_deleg_wait(&delegated_inode);
 587                if (!error)
 588                        goto retry_deleg;
 589        }
 590        mnt_drop_write(path->mnt);
 591        return error;
 592}
 593
 594int vfs_fchmod(struct file *file, umode_t mode)
 595{
 596        audit_file(file);
 597        return chmod_common(&file->f_path, mode);
 598}
 599
 600SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
 601{
 602        struct fd f = fdget(fd);
 603        int err = -EBADF;
 604
 605        if (f.file) {
 606                err = vfs_fchmod(f.file, mode);
 607                fdput(f);
 608        }
 609        return err;
 610}
 611
 612static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
 613{
 614        struct path path;
 615        int error;
 616        unsigned int lookup_flags = LOOKUP_FOLLOW;
 617retry:
 618        error = user_path_at(dfd, filename, lookup_flags, &path);
 619        if (!error) {
 620                error = chmod_common(&path, mode);
 621                path_put(&path);
 622                if (retry_estale(error, lookup_flags)) {
 623                        lookup_flags |= LOOKUP_REVAL;
 624                        goto retry;
 625                }
 626        }
 627        return error;
 628}
 629
 630SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
 631                umode_t, mode)
 632{
 633        return do_fchmodat(dfd, filename, mode);
 634}
 635
 636SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
 637{
 638        return do_fchmodat(AT_FDCWD, filename, mode);
 639}
 640
 641int chown_common(const struct path *path, uid_t user, gid_t group)
 642{
 643        struct user_namespace *mnt_userns;
 644        struct inode *inode = path->dentry->d_inode;
 645        struct inode *delegated_inode = NULL;
 646        int error;
 647        struct iattr newattrs;
 648        kuid_t uid;
 649        kgid_t gid;
 650
 651        uid = make_kuid(current_user_ns(), user);
 652        gid = make_kgid(current_user_ns(), group);
 653
 654        mnt_userns = mnt_user_ns(path->mnt);
 655        uid = kuid_from_mnt(mnt_userns, uid);
 656        gid = kgid_from_mnt(mnt_userns, gid);
 657
 658retry_deleg:
 659        newattrs.ia_valid =  ATTR_CTIME;
 660        if (user != (uid_t) -1) {
 661                if (!uid_valid(uid))
 662                        return -EINVAL;
 663                newattrs.ia_valid |= ATTR_UID;
 664                newattrs.ia_uid = uid;
 665        }
 666        if (group != (gid_t) -1) {
 667                if (!gid_valid(gid))
 668                        return -EINVAL;
 669                newattrs.ia_valid |= ATTR_GID;
 670                newattrs.ia_gid = gid;
 671        }
 672        if (!S_ISDIR(inode->i_mode))
 673                newattrs.ia_valid |=
 674                        ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
 675        inode_lock(inode);
 676        error = security_path_chown(path, uid, gid);
 677        if (!error)
 678                error = notify_change(mnt_userns, path->dentry, &newattrs,
 679                                      &delegated_inode);
 680        inode_unlock(inode);
 681        if (delegated_inode) {
 682                error = break_deleg_wait(&delegated_inode);
 683                if (!error)
 684                        goto retry_deleg;
 685        }
 686        return error;
 687}
 688
 689int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
 690                int flag)
 691{
 692        struct path path;
 693        int error = -EINVAL;
 694        int lookup_flags;
 695
 696        if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
 697                goto out;
 698
 699        lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
 700        if (flag & AT_EMPTY_PATH)
 701                lookup_flags |= LOOKUP_EMPTY;
 702retry:
 703        error = user_path_at(dfd, filename, lookup_flags, &path);
 704        if (error)
 705                goto out;
 706        error = mnt_want_write(path.mnt);
 707        if (error)
 708                goto out_release;
 709        error = chown_common(&path, user, group);
 710        mnt_drop_write(path.mnt);
 711out_release:
 712        path_put(&path);
 713        if (retry_estale(error, lookup_flags)) {
 714                lookup_flags |= LOOKUP_REVAL;
 715                goto retry;
 716        }
 717out:
 718        return error;
 719}
 720
 721SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
 722                gid_t, group, int, flag)
 723{
 724        return do_fchownat(dfd, filename, user, group, flag);
 725}
 726
 727SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
 728{
 729        return do_fchownat(AT_FDCWD, filename, user, group, 0);
 730}
 731
 732SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
 733{
 734        return do_fchownat(AT_FDCWD, filename, user, group,
 735                           AT_SYMLINK_NOFOLLOW);
 736}
 737
 738int vfs_fchown(struct file *file, uid_t user, gid_t group)
 739{
 740        int error;
 741
 742        error = mnt_want_write_file(file);
 743        if (error)
 744                return error;
 745        audit_file(file);
 746        error = chown_common(&file->f_path, user, group);
 747        mnt_drop_write_file(file);
 748        return error;
 749}
 750
 751int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
 752{
 753        struct fd f = fdget(fd);
 754        int error = -EBADF;
 755
 756        if (f.file) {
 757                error = vfs_fchown(f.file, user, group);
 758                fdput(f);
 759        }
 760        return error;
 761}
 762
 763SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
 764{
 765        return ksys_fchown(fd, user, group);
 766}
 767
 768static int do_dentry_open(struct file *f,
 769                          struct inode *inode,
 770                          int (*open)(struct inode *, struct file *))
 771{
 772        static const struct file_operations empty_fops = {};
 773        int error;
 774
 775        path_get(&f->f_path);
 776        f->f_inode = inode;
 777        f->f_mapping = inode->i_mapping;
 778        f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
 779        f->f_sb_err = file_sample_sb_err(f);
 780
 781        if (unlikely(f->f_flags & O_PATH)) {
 782                f->f_mode = FMODE_PATH | FMODE_OPENED;
 783                f->f_op = &empty_fops;
 784                return 0;
 785        }
 786
 787        if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
 788                error = get_write_access(inode);
 789                if (unlikely(error))
 790                        goto cleanup_file;
 791                error = __mnt_want_write(f->f_path.mnt);
 792                if (unlikely(error)) {
 793                        put_write_access(inode);
 794                        goto cleanup_file;
 795                }
 796                f->f_mode |= FMODE_WRITER;
 797        }
 798
 799        /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
 800        if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
 801                f->f_mode |= FMODE_ATOMIC_POS;
 802
 803        f->f_op = fops_get(inode->i_fop);
 804        if (WARN_ON(!f->f_op)) {
 805                error = -ENODEV;
 806                goto cleanup_all;
 807        }
 808
 809        error = security_file_open(f);
 810        if (error)
 811                goto cleanup_all;
 812
 813        error = break_lease(locks_inode(f), f->f_flags);
 814        if (error)
 815                goto cleanup_all;
 816
 817        /* normally all 3 are set; ->open() can clear them if needed */
 818        f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
 819        if (!open)
 820                open = f->f_op->open;
 821        if (open) {
 822                error = open(inode, f);
 823                if (error)
 824                        goto cleanup_all;
 825        }
 826        f->f_mode |= FMODE_OPENED;
 827        if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 828                i_readcount_inc(inode);
 829        if ((f->f_mode & FMODE_READ) &&
 830             likely(f->f_op->read || f->f_op->read_iter))
 831                f->f_mode |= FMODE_CAN_READ;
 832        if ((f->f_mode & FMODE_WRITE) &&
 833             likely(f->f_op->write || f->f_op->write_iter))
 834                f->f_mode |= FMODE_CAN_WRITE;
 835
 836        f->f_write_hint = WRITE_LIFE_NOT_SET;
 837        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 838
 839        file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
 840
 841        /* NB: we're sure to have correct a_ops only after f_op->open */
 842        if (f->f_flags & O_DIRECT) {
 843                if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
 844                        return -EINVAL;
 845        }
 846
 847        /*
 848         * XXX: Huge page cache doesn't support writing yet. Drop all page
 849         * cache for this file before processing writes.
 850         */
 851        if (f->f_mode & FMODE_WRITE) {
 852                /*
 853                 * Paired with smp_mb() in collapse_file() to ensure nr_thps
 854                 * is up to date and the update to i_writecount by
 855                 * get_write_access() is visible. Ensures subsequent insertion
 856                 * of THPs into the page cache will fail.
 857                 */
 858                smp_mb();
 859                if (filemap_nr_thps(inode->i_mapping))
 860                        truncate_pagecache(inode, 0);
 861        }
 862
 863        return 0;
 864
 865cleanup_all:
 866        if (WARN_ON_ONCE(error > 0))
 867                error = -EINVAL;
 868        fops_put(f->f_op);
 869        if (f->f_mode & FMODE_WRITER) {
 870                put_write_access(inode);
 871                __mnt_drop_write(f->f_path.mnt);
 872        }
 873cleanup_file:
 874        path_put(&f->f_path);
 875        f->f_path.mnt = NULL;
 876        f->f_path.dentry = NULL;
 877        f->f_inode = NULL;
 878        return error;
 879}
 880
 881/**
 882 * finish_open - finish opening a file
 883 * @file: file pointer
 884 * @dentry: pointer to dentry
 885 * @open: open callback
 886 * @opened: state of open
 887 *
 888 * This can be used to finish opening a file passed to i_op->atomic_open().
 889 *
 890 * If the open callback is set to NULL, then the standard f_op->open()
 891 * filesystem callback is substituted.
 892 *
 893 * NB: the dentry reference is _not_ consumed.  If, for example, the dentry is
 894 * the return value of d_splice_alias(), then the caller needs to perform dput()
 895 * on it after finish_open().
 896 *
 897 * Returns zero on success or -errno if the open failed.
 898 */
 899int finish_open(struct file *file, struct dentry *dentry,
 900                int (*open)(struct inode *, struct file *))
 901{
 902        BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
 903
 904        file->f_path.dentry = dentry;
 905        return do_dentry_open(file, d_backing_inode(dentry), open);
 906}
 907EXPORT_SYMBOL(finish_open);
 908
 909/**
 910 * finish_no_open - finish ->atomic_open() without opening the file
 911 *
 912 * @file: file pointer
 913 * @dentry: dentry or NULL (as returned from ->lookup())
 914 *
 915 * This can be used to set the result of a successful lookup in ->atomic_open().
 916 *
 917 * NB: unlike finish_open() this function does consume the dentry reference and
 918 * the caller need not dput() it.
 919 *
 920 * Returns "0" which must be the return value of ->atomic_open() after having
 921 * called this function.
 922 */
 923int finish_no_open(struct file *file, struct dentry *dentry)
 924{
 925        file->f_path.dentry = dentry;
 926        return 0;
 927}
 928EXPORT_SYMBOL(finish_no_open);
 929
 930char *file_path(struct file *filp, char *buf, int buflen)
 931{
 932        return d_path(&filp->f_path, buf, buflen);
 933}
 934EXPORT_SYMBOL(file_path);
 935
 936/**
 937 * vfs_open - open the file at the given path
 938 * @path: path to open
 939 * @file: newly allocated file with f_flag initialized
 940 * @cred: credentials to use
 941 */
 942int vfs_open(const struct path *path, struct file *file)
 943{
 944        file->f_path = *path;
 945        return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
 946}
 947
 948struct file *dentry_open(const struct path *path, int flags,
 949                         const struct cred *cred)
 950{
 951        int error;
 952        struct file *f;
 953
 954        validate_creds(cred);
 955
 956        /* We must always pass in a valid mount pointer. */
 957        BUG_ON(!path->mnt);
 958
 959        f = alloc_empty_file(flags, cred);
 960        if (!IS_ERR(f)) {
 961                error = vfs_open(path, f);
 962                if (error) {
 963                        fput(f);
 964                        f = ERR_PTR(error);
 965                }
 966        }
 967        return f;
 968}
 969EXPORT_SYMBOL(dentry_open);
 970
 971struct file *open_with_fake_path(const struct path *path, int flags,
 972                                struct inode *inode, const struct cred *cred)
 973{
 974        struct file *f = alloc_empty_file_noaccount(flags, cred);
 975        if (!IS_ERR(f)) {
 976                int error;
 977
 978                f->f_path = *path;
 979                error = do_dentry_open(f, inode, NULL);
 980                if (error) {
 981                        fput(f);
 982                        f = ERR_PTR(error);
 983                }
 984        }
 985        return f;
 986}
 987EXPORT_SYMBOL(open_with_fake_path);
 988
 989#define WILL_CREATE(flags)      (flags & (O_CREAT | __O_TMPFILE))
 990#define O_PATH_FLAGS            (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
 991
 992inline struct open_how build_open_how(int flags, umode_t mode)
 993{
 994        struct open_how how = {
 995                .flags = flags & VALID_OPEN_FLAGS,
 996                .mode = mode & S_IALLUGO,
 997        };
 998
 999        /* O_PATH beats everything else. */
1000        if (how.flags & O_PATH)
1001                how.flags &= O_PATH_FLAGS;
1002        /* Modes should only be set for create-like flags. */
1003        if (!WILL_CREATE(how.flags))
1004                how.mode = 0;
1005        return how;
1006}
1007
1008inline int build_open_flags(const struct open_how *how, struct open_flags *op)
1009{
1010        u64 flags = how->flags;
1011        u64 strip = FMODE_NONOTIFY | O_CLOEXEC;
1012        int lookup_flags = 0;
1013        int acc_mode = ACC_MODE(flags);
1014
1015        BUILD_BUG_ON_MSG(upper_32_bits(VALID_OPEN_FLAGS),
1016                         "struct open_flags doesn't yet handle flags > 32 bits");
1017
1018        /*
1019         * Strip flags that either shouldn't be set by userspace like
1020         * FMODE_NONOTIFY or that aren't relevant in determining struct
1021         * open_flags like O_CLOEXEC.
1022         */
1023        flags &= ~strip;
1024
1025        /*
1026         * Older syscalls implicitly clear all of the invalid flags or argument
1027         * values before calling build_open_flags(), but openat2(2) checks all
1028         * of its arguments.
1029         */
1030        if (flags & ~VALID_OPEN_FLAGS)
1031                return -EINVAL;
1032        if (how->resolve & ~VALID_RESOLVE_FLAGS)
1033                return -EINVAL;
1034
1035        /* Scoping flags are mutually exclusive. */
1036        if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT))
1037                return -EINVAL;
1038
1039        /* Deal with the mode. */
1040        if (WILL_CREATE(flags)) {
1041                if (how->mode & ~S_IALLUGO)
1042                        return -EINVAL;
1043                op->mode = how->mode | S_IFREG;
1044        } else {
1045                if (how->mode != 0)
1046                        return -EINVAL;
1047                op->mode = 0;
1048        }
1049
1050        /*
1051         * In order to ensure programs get explicit errors when trying to use
1052         * O_TMPFILE on old kernels, O_TMPFILE is implemented such that it
1053         * looks like (O_DIRECTORY|O_RDWR & ~O_CREAT) to old kernels. But we
1054         * have to require userspace to explicitly set it.
1055         */
1056        if (flags & __O_TMPFILE) {
1057                if ((flags & O_TMPFILE_MASK) != O_TMPFILE)
1058                        return -EINVAL;
1059                if (!(acc_mode & MAY_WRITE))
1060                        return -EINVAL;
1061        }
1062        if (flags & O_PATH) {
1063                /* O_PATH only permits certain other flags to be set. */
1064                if (flags & ~O_PATH_FLAGS)
1065                        return -EINVAL;
1066                acc_mode = 0;
1067        }
1068
1069        /*
1070         * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
1071         * check for O_DSYNC if the need any syncing at all we enforce it's
1072         * always set instead of having to deal with possibly weird behaviour
1073         * for malicious applications setting only __O_SYNC.
1074         */
1075        if (flags & __O_SYNC)
1076                flags |= O_DSYNC;
1077
1078        op->open_flag = flags;
1079
1080        /* O_TRUNC implies we need access checks for write permissions */
1081        if (flags & O_TRUNC)
1082                acc_mode |= MAY_WRITE;
1083
1084        /* Allow the LSM permission hook to distinguish append
1085           access from general write access. */
1086        if (flags & O_APPEND)
1087                acc_mode |= MAY_APPEND;
1088
1089        op->acc_mode = acc_mode;
1090
1091        op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
1092
1093        if (flags & O_CREAT) {
1094                op->intent |= LOOKUP_CREATE;
1095                if (flags & O_EXCL) {
1096                        op->intent |= LOOKUP_EXCL;
1097                        flags |= O_NOFOLLOW;
1098                }
1099        }
1100
1101        if (flags & O_DIRECTORY)
1102                lookup_flags |= LOOKUP_DIRECTORY;
1103        if (!(flags & O_NOFOLLOW))
1104                lookup_flags |= LOOKUP_FOLLOW;
1105
1106        if (how->resolve & RESOLVE_NO_XDEV)
1107                lookup_flags |= LOOKUP_NO_XDEV;
1108        if (how->resolve & RESOLVE_NO_MAGICLINKS)
1109                lookup_flags |= LOOKUP_NO_MAGICLINKS;
1110        if (how->resolve & RESOLVE_NO_SYMLINKS)
1111                lookup_flags |= LOOKUP_NO_SYMLINKS;
1112        if (how->resolve & RESOLVE_BENEATH)
1113                lookup_flags |= LOOKUP_BENEATH;
1114        if (how->resolve & RESOLVE_IN_ROOT)
1115                lookup_flags |= LOOKUP_IN_ROOT;
1116        if (how->resolve & RESOLVE_CACHED) {
1117                /* Don't bother even trying for create/truncate/tmpfile open */
1118                if (flags & (O_TRUNC | O_CREAT | O_TMPFILE))
1119                        return -EAGAIN;
1120                lookup_flags |= LOOKUP_CACHED;
1121        }
1122
1123        op->lookup_flags = lookup_flags;
1124        return 0;
1125}
1126
1127/**
1128 * file_open_name - open file and return file pointer
1129 *
1130 * @name:       struct filename containing path to open
1131 * @flags:      open flags as per the open(2) second argument
1132 * @mode:       mode for the new file if O_CREAT is set, else ignored
1133 *
1134 * This is the helper to open a file from kernelspace if you really
1135 * have to.  But in generally you should not do this, so please move
1136 * along, nothing to see here..
1137 */
1138struct file *file_open_name(struct filename *name, int flags, umode_t mode)
1139{
1140        struct open_flags op;
1141        struct open_how how = build_open_how(flags, mode);
1142        int err = build_open_flags(&how, &op);
1143        if (err)
1144                return ERR_PTR(err);
1145        return do_filp_open(AT_FDCWD, name, &op);
1146}
1147
1148/**
1149 * filp_open - open file and return file pointer
1150 *
1151 * @filename:   path to open
1152 * @flags:      open flags as per the open(2) second argument
1153 * @mode:       mode for the new file if O_CREAT is set, else ignored
1154 *
1155 * This is the helper to open a file from kernelspace if you really
1156 * have to.  But in generally you should not do this, so please move
1157 * along, nothing to see here..
1158 */
1159struct file *filp_open(const char *filename, int flags, umode_t mode)
1160{
1161        struct filename *name = getname_kernel(filename);
1162        struct file *file = ERR_CAST(name);
1163        
1164        if (!IS_ERR(name)) {
1165                file = file_open_name(name, flags, mode);
1166                putname(name);
1167        }
1168        return file;
1169}
1170EXPORT_SYMBOL(filp_open);
1171
1172struct file *file_open_root(const struct path *root,
1173                            const char *filename, int flags, umode_t mode)
1174{
1175        struct open_flags op;
1176        struct open_how how = build_open_how(flags, mode);
1177        int err = build_open_flags(&how, &op);
1178        if (err)
1179                return ERR_PTR(err);
1180        return do_file_open_root(root, filename, &op);
1181}
1182EXPORT_SYMBOL(file_open_root);
1183
1184static long do_sys_openat2(int dfd, const char __user *filename,
1185                           struct open_how *how)
1186{
1187        struct open_flags op;
1188        int fd = build_open_flags(how, &op);
1189        struct filename *tmp;
1190
1191        if (fd)
1192                return fd;
1193
1194        tmp = getname(filename);
1195        if (IS_ERR(tmp))
1196                return PTR_ERR(tmp);
1197
1198        fd = get_unused_fd_flags(how->flags);
1199        if (fd >= 0) {
1200                struct file *f = do_filp_open(dfd, tmp, &op);
1201                if (IS_ERR(f)) {
1202                        put_unused_fd(fd);
1203                        fd = PTR_ERR(f);
1204                } else {
1205                        fsnotify_open(f);
1206                        fd_install(fd, f);
1207                }
1208        }
1209        putname(tmp);
1210        return fd;
1211}
1212
1213long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
1214{
1215        struct open_how how = build_open_how(flags, mode);
1216        return do_sys_openat2(dfd, filename, &how);
1217}
1218
1219
1220SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
1221{
1222        if (force_o_largefile())
1223                flags |= O_LARGEFILE;
1224        return do_sys_open(AT_FDCWD, filename, flags, mode);
1225}
1226
1227SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
1228                umode_t, mode)
1229{
1230        if (force_o_largefile())
1231                flags |= O_LARGEFILE;
1232        return do_sys_open(dfd, filename, flags, mode);
1233}
1234
1235SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename,
1236                struct open_how __user *, how, size_t, usize)
1237{
1238        int err;
1239        struct open_how tmp;
1240
1241        BUILD_BUG_ON(sizeof(struct open_how) < OPEN_HOW_SIZE_VER0);
1242        BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_LATEST);
1243
1244        if (unlikely(usize < OPEN_HOW_SIZE_VER0))
1245                return -EINVAL;
1246
1247        err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize);
1248        if (err)
1249                return err;
1250
1251        /* O_LARGEFILE is only allowed for non-O_PATH. */
1252        if (!(tmp.flags & O_PATH) && force_o_largefile())
1253                tmp.flags |= O_LARGEFILE;
1254
1255        return do_sys_openat2(dfd, filename, &tmp);
1256}
1257
1258#ifdef CONFIG_COMPAT
1259/*
1260 * Exactly like sys_open(), except that it doesn't set the
1261 * O_LARGEFILE flag.
1262 */
1263COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
1264{
1265        return do_sys_open(AT_FDCWD, filename, flags, mode);
1266}
1267
1268/*
1269 * Exactly like sys_openat(), except that it doesn't set the
1270 * O_LARGEFILE flag.
1271 */
1272COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
1273{
1274        return do_sys_open(dfd, filename, flags, mode);
1275}
1276#endif
1277
1278#ifndef __alpha__
1279
1280/*
1281 * For backward compatibility?  Maybe this should be moved
1282 * into arch/i386 instead?
1283 */
1284SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
1285{
1286        int flags = O_CREAT | O_WRONLY | O_TRUNC;
1287
1288        if (force_o_largefile())
1289                flags |= O_LARGEFILE;
1290        return do_sys_open(AT_FDCWD, pathname, flags, mode);
1291}
1292#endif
1293
1294/*
1295 * "id" is the POSIX thread ID. We use the
1296 * files pointer for this..
1297 */
1298int filp_close(struct file *filp, fl_owner_t id)
1299{
1300        int retval = 0;
1301
1302        if (!file_count(filp)) {
1303                printk(KERN_ERR "VFS: Close: file count is 0\n");
1304                return 0;
1305        }
1306
1307        if (filp->f_op->flush)
1308                retval = filp->f_op->flush(filp, id);
1309
1310        if (likely(!(filp->f_mode & FMODE_PATH))) {
1311                dnotify_flush(filp, id);
1312                locks_remove_posix(filp, id);
1313        }
1314        fput(filp);
1315        return retval;
1316}
1317
1318EXPORT_SYMBOL(filp_close);
1319
1320/*
1321 * Careful here! We test whether the file pointer is NULL before
1322 * releasing the fd. This ensures that one clone task can't release
1323 * an fd while another clone is opening it.
1324 */
1325SYSCALL_DEFINE1(close, unsigned int, fd)
1326{
1327        int retval = close_fd(fd);
1328
1329        /* can't restart close syscall because file table entry was cleared */
1330        if (unlikely(retval == -ERESTARTSYS ||
1331                     retval == -ERESTARTNOINTR ||
1332                     retval == -ERESTARTNOHAND ||
1333                     retval == -ERESTART_RESTARTBLOCK))
1334                retval = -EINTR;
1335
1336        return retval;
1337}
1338
1339/**
1340 * close_range() - Close all file descriptors in a given range.
1341 *
1342 * @fd:     starting file descriptor to close
1343 * @max_fd: last file descriptor to close
1344 * @flags:  reserved for future extensions
1345 *
1346 * This closes a range of file descriptors. All file descriptors
1347 * from @fd up to and including @max_fd are closed.
1348 * Currently, errors to close a given file descriptor are ignored.
1349 */
1350SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd,
1351                unsigned int, flags)
1352{
1353        return __close_range(fd, max_fd, flags);
1354}
1355
1356/*
1357 * This routine simulates a hangup on the tty, to arrange that users
1358 * are given clean terminals at login time.
1359 */
1360SYSCALL_DEFINE0(vhangup)
1361{
1362        if (capable(CAP_SYS_TTY_CONFIG)) {
1363                tty_vhangup_self();
1364                return 0;
1365        }
1366        return -EPERM;
1367}
1368
1369/*
1370 * Called when an inode is about to be open.
1371 * We use this to disallow opening large files on 32bit systems if
1372 * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
1373 * on this flag in sys_open.
1374 */
1375int generic_file_open(struct inode * inode, struct file * filp)
1376{
1377        if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1378                return -EOVERFLOW;
1379        return 0;
1380}
1381
1382EXPORT_SYMBOL(generic_file_open);
1383
1384/*
1385 * This is used by subsystems that don't want seekable
1386 * file descriptors. The function is not supposed to ever fail, the only
1387 * reason it returns an 'int' and not 'void' is so that it can be plugged
1388 * directly into file_operations structure.
1389 */
1390int nonseekable_open(struct inode *inode, struct file *filp)
1391{
1392        filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1393        return 0;
1394}
1395
1396EXPORT_SYMBOL(nonseekable_open);
1397
1398/*
1399 * stream_open is used by subsystems that want stream-like file descriptors.
1400 * Such file descriptors are not seekable and don't have notion of position
1401 * (file.f_pos is always 0 and ppos passed to .read()/.write() is always NULL).
1402 * Contrary to file descriptors of other regular files, .read() and .write()
1403 * can run simultaneously.
1404 *
1405 * stream_open never fails and is marked to return int so that it could be
1406 * directly used as file_operations.open .
1407 */
1408int stream_open(struct inode *inode, struct file *filp)
1409{
1410        filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
1411        filp->f_mode |= FMODE_STREAM;
1412        return 0;
1413}
1414
1415EXPORT_SYMBOL(stream_open);
1416