linux/fs/overlayfs/file.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2017 Red Hat, Inc.
   4 */
   5
   6#include <linux/cred.h>
   7#include <linux/file.h>
   8#include <linux/mount.h>
   9#include <linux/xattr.h>
  10#include <linux/uio.h>
  11#include <linux/uaccess.h>
  12#include "overlayfs.h"
  13
  14static char ovl_whatisit(struct inode *inode, struct inode *realinode)
  15{
  16        if (realinode != ovl_inode_upper(inode))
  17                return 'l';
  18        if (ovl_has_upperdata(inode))
  19                return 'u';
  20        else
  21                return 'm';
  22}
  23
  24static struct file *ovl_open_realfile(const struct file *file,
  25                                      struct inode *realinode)
  26{
  27        struct inode *inode = file_inode(file);
  28        struct file *realfile;
  29        const struct cred *old_cred;
  30        int flags = file->f_flags | O_NOATIME | FMODE_NONOTIFY;
  31
  32        old_cred = ovl_override_creds(inode->i_sb);
  33        realfile = open_with_fake_path(&file->f_path, flags, realinode,
  34                                       current_cred());
  35        revert_creds(old_cred);
  36
  37        pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
  38                 file, file, ovl_whatisit(inode, realinode), file->f_flags,
  39                 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
  40
  41        return realfile;
  42}
  43
  44#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
  45
  46static int ovl_change_flags(struct file *file, unsigned int flags)
  47{
  48        struct inode *inode = file_inode(file);
  49        int err;
  50
  51        /* No atime modificaton on underlying */
  52        flags |= O_NOATIME | FMODE_NONOTIFY;
  53
  54        /* If some flag changed that cannot be changed then something's amiss */
  55        if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
  56                return -EIO;
  57
  58        flags &= OVL_SETFL_MASK;
  59
  60        if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
  61                return -EPERM;
  62
  63        if (flags & O_DIRECT) {
  64                if (!file->f_mapping->a_ops ||
  65                    !file->f_mapping->a_ops->direct_IO)
  66                        return -EINVAL;
  67        }
  68
  69        if (file->f_op->check_flags) {
  70                err = file->f_op->check_flags(flags);
  71                if (err)
  72                        return err;
  73        }
  74
  75        spin_lock(&file->f_lock);
  76        file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
  77        spin_unlock(&file->f_lock);
  78
  79        return 0;
  80}
  81
  82static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
  83                               bool allow_meta)
  84{
  85        struct inode *inode = file_inode(file);
  86        struct inode *realinode;
  87
  88        real->flags = 0;
  89        real->file = file->private_data;
  90
  91        if (allow_meta)
  92                realinode = ovl_inode_real(inode);
  93        else
  94                realinode = ovl_inode_realdata(inode);
  95
  96        /* Has it been copied up since we'd opened it? */
  97        if (unlikely(file_inode(real->file) != realinode)) {
  98                real->flags = FDPUT_FPUT;
  99                real->file = ovl_open_realfile(file, realinode);
 100
 101                return PTR_ERR_OR_ZERO(real->file);
 102        }
 103
 104        /* Did the flags change since open? */
 105        if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME))
 106                return ovl_change_flags(real->file, file->f_flags);
 107
 108        return 0;
 109}
 110
 111static int ovl_real_fdget(const struct file *file, struct fd *real)
 112{
 113        return ovl_real_fdget_meta(file, real, false);
 114}
 115
 116static int ovl_open(struct inode *inode, struct file *file)
 117{
 118        struct file *realfile;
 119        int err;
 120
 121        err = ovl_maybe_copy_up(file_dentry(file), file->f_flags);
 122        if (err)
 123                return err;
 124
 125        /* No longer need these flags, so don't pass them on to underlying fs */
 126        file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 127
 128        realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
 129        if (IS_ERR(realfile))
 130                return PTR_ERR(realfile);
 131
 132        file->private_data = realfile;
 133
 134        return 0;
 135}
 136
 137static int ovl_release(struct inode *inode, struct file *file)
 138{
 139        fput(file->private_data);
 140
 141        return 0;
 142}
 143
 144static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
 145{
 146        struct inode *inode = file_inode(file);
 147        struct fd real;
 148        const struct cred *old_cred;
 149        ssize_t ret;
 150
 151        /*
 152         * The two special cases below do not need to involve real fs,
 153         * so we can optimizing concurrent callers.
 154         */
 155        if (offset == 0) {
 156                if (whence == SEEK_CUR)
 157                        return file->f_pos;
 158
 159                if (whence == SEEK_SET)
 160                        return vfs_setpos(file, 0, 0);
 161        }
 162
 163        ret = ovl_real_fdget(file, &real);
 164        if (ret)
 165                return ret;
 166
 167        /*
 168         * Overlay file f_pos is the master copy that is preserved
 169         * through copy up and modified on read/write, but only real
 170         * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
 171         * limitations that are more strict than ->s_maxbytes for specific
 172         * files, so we use the real file to perform seeks.
 173         */
 174        inode_lock(inode);
 175        real.file->f_pos = file->f_pos;
 176
 177        old_cred = ovl_override_creds(inode->i_sb);
 178        ret = vfs_llseek(real.file, offset, whence);
 179        revert_creds(old_cred);
 180
 181        file->f_pos = real.file->f_pos;
 182        inode_unlock(inode);
 183
 184        fdput(real);
 185
 186        return ret;
 187}
 188
 189static void ovl_file_accessed(struct file *file)
 190{
 191        struct inode *inode, *upperinode;
 192
 193        if (file->f_flags & O_NOATIME)
 194                return;
 195
 196        inode = file_inode(file);
 197        upperinode = ovl_inode_upper(inode);
 198
 199        if (!upperinode)
 200                return;
 201
 202        if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
 203             !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
 204                inode->i_mtime = upperinode->i_mtime;
 205                inode->i_ctime = upperinode->i_ctime;
 206        }
 207
 208        touch_atime(&file->f_path);
 209}
 210
 211static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
 212{
 213        int ifl = iocb->ki_flags;
 214        rwf_t flags = 0;
 215
 216        if (ifl & IOCB_NOWAIT)
 217                flags |= RWF_NOWAIT;
 218        if (ifl & IOCB_HIPRI)
 219                flags |= RWF_HIPRI;
 220        if (ifl & IOCB_DSYNC)
 221                flags |= RWF_DSYNC;
 222        if (ifl & IOCB_SYNC)
 223                flags |= RWF_SYNC;
 224
 225        return flags;
 226}
 227
 228static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 229{
 230        struct file *file = iocb->ki_filp;
 231        struct fd real;
 232        const struct cred *old_cred;
 233        ssize_t ret;
 234
 235        if (!iov_iter_count(iter))
 236                return 0;
 237
 238        ret = ovl_real_fdget(file, &real);
 239        if (ret)
 240                return ret;
 241
 242        old_cred = ovl_override_creds(file_inode(file)->i_sb);
 243        ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
 244                            ovl_iocb_to_rwf(iocb));
 245        revert_creds(old_cred);
 246
 247        ovl_file_accessed(file);
 248
 249        fdput(real);
 250
 251        return ret;
 252}
 253
 254static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 255{
 256        struct file *file = iocb->ki_filp;
 257        struct inode *inode = file_inode(file);
 258        struct fd real;
 259        const struct cred *old_cred;
 260        ssize_t ret;
 261
 262        if (!iov_iter_count(iter))
 263                return 0;
 264
 265        inode_lock(inode);
 266        /* Update mode */
 267        ovl_copyattr(ovl_inode_real(inode), inode);
 268        ret = file_remove_privs(file);
 269        if (ret)
 270                goto out_unlock;
 271
 272        ret = ovl_real_fdget(file, &real);
 273        if (ret)
 274                goto out_unlock;
 275
 276        old_cred = ovl_override_creds(file_inode(file)->i_sb);
 277        file_start_write(real.file);
 278        ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
 279                             ovl_iocb_to_rwf(iocb));
 280        file_end_write(real.file);
 281        revert_creds(old_cred);
 282
 283        /* Update size */
 284        ovl_copyattr(ovl_inode_real(inode), inode);
 285
 286        fdput(real);
 287
 288out_unlock:
 289        inode_unlock(inode);
 290
 291        return ret;
 292}
 293
 294static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 295{
 296        struct fd real;
 297        const struct cred *old_cred;
 298        int ret;
 299
 300        ret = ovl_real_fdget_meta(file, &real, !datasync);
 301        if (ret)
 302                return ret;
 303
 304        /* Don't sync lower file for fear of receiving EROFS error */
 305        if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
 306                old_cred = ovl_override_creds(file_inode(file)->i_sb);
 307                ret = vfs_fsync_range(real.file, start, end, datasync);
 308                revert_creds(old_cred);
 309        }
 310
 311        fdput(real);
 312
 313        return ret;
 314}
 315
 316static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 317{
 318        struct file *realfile = file->private_data;
 319        const struct cred *old_cred;
 320        int ret;
 321
 322        if (!realfile->f_op->mmap)
 323                return -ENODEV;
 324
 325        if (WARN_ON(file != vma->vm_file))
 326                return -EIO;
 327
 328        vma->vm_file = get_file(realfile);
 329
 330        old_cred = ovl_override_creds(file_inode(file)->i_sb);
 331        ret = call_mmap(vma->vm_file, vma);
 332        revert_creds(old_cred);
 333
 334        if (ret) {
 335                /* Drop reference count from new vm_file value */
 336                fput(realfile);
 337        } else {
 338                /* Drop reference count from previous vm_file value */
 339                fput(file);
 340        }
 341
 342        ovl_file_accessed(file);
 343
 344        return ret;
 345}
 346
 347static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 348{
 349        struct inode *inode = file_inode(file);
 350        struct fd real;
 351        const struct cred *old_cred;
 352        int ret;
 353
 354        ret = ovl_real_fdget(file, &real);
 355        if (ret)
 356                return ret;
 357
 358        old_cred = ovl_override_creds(file_inode(file)->i_sb);
 359        ret = vfs_fallocate(real.file, mode, offset, len);
 360        revert_creds(old_cred);
 361
 362        /* Update size */
 363        ovl_copyattr(ovl_inode_real(inode), inode);
 364
 365        fdput(real);
 366
 367        return ret;
 368}
 369
 370static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
 371{
 372        struct fd real;
 373        const struct cred *old_cred;
 374        int ret;
 375
 376        ret = ovl_real_fdget(file, &real);
 377        if (ret)
 378                return ret;
 379
 380        old_cred = ovl_override_creds(file_inode(file)->i_sb);
 381        ret = vfs_fadvise(real.file, offset, len, advice);
 382        revert_creds(old_cred);
 383
 384        fdput(real);
 385
 386        return ret;
 387}
 388
 389static long ovl_real_ioctl(struct file *file, unsigned int cmd,
 390                           unsigned long arg)
 391{
 392        struct fd real;
 393        const struct cred *old_cred;
 394        long ret;
 395
 396        ret = ovl_real_fdget(file, &real);
 397        if (ret)
 398                return ret;
 399
 400        old_cred = ovl_override_creds(file_inode(file)->i_sb);
 401        ret = vfs_ioctl(real.file, cmd, arg);
 402        revert_creds(old_cred);
 403
 404        fdput(real);
 405
 406        return ret;
 407}
 408
 409static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
 410                                unsigned long arg, unsigned int iflags)
 411{
 412        long ret;
 413        struct inode *inode = file_inode(file);
 414        unsigned int old_iflags;
 415
 416        if (!inode_owner_or_capable(inode))
 417                return -EACCES;
 418
 419        ret = mnt_want_write_file(file);
 420        if (ret)
 421                return ret;
 422
 423        inode_lock(inode);
 424
 425        /* Check the capability before cred override */
 426        ret = -EPERM;
 427        old_iflags = READ_ONCE(inode->i_flags);
 428        if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) &&
 429            !capable(CAP_LINUX_IMMUTABLE))
 430                goto unlock;
 431
 432        ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
 433        if (ret)
 434                goto unlock;
 435
 436        ret = ovl_real_ioctl(file, cmd, arg);
 437
 438        ovl_copyflags(ovl_inode_real(inode), inode);
 439unlock:
 440        inode_unlock(inode);
 441
 442        mnt_drop_write_file(file);
 443
 444        return ret;
 445
 446}
 447
 448static unsigned int ovl_fsflags_to_iflags(unsigned int flags)
 449{
 450        unsigned int iflags = 0;
 451
 452        if (flags & FS_SYNC_FL)
 453                iflags |= S_SYNC;
 454        if (flags & FS_APPEND_FL)
 455                iflags |= S_APPEND;
 456        if (flags & FS_IMMUTABLE_FL)
 457                iflags |= S_IMMUTABLE;
 458        if (flags & FS_NOATIME_FL)
 459                iflags |= S_NOATIME;
 460
 461        return iflags;
 462}
 463
 464static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
 465                                  unsigned long arg)
 466{
 467        unsigned int flags;
 468
 469        if (get_user(flags, (int __user *) arg))
 470                return -EFAULT;
 471
 472        return ovl_ioctl_set_flags(file, cmd, arg,
 473                                   ovl_fsflags_to_iflags(flags));
 474}
 475
 476static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags)
 477{
 478        unsigned int iflags = 0;
 479
 480        if (xflags & FS_XFLAG_SYNC)
 481                iflags |= S_SYNC;
 482        if (xflags & FS_XFLAG_APPEND)
 483                iflags |= S_APPEND;
 484        if (xflags & FS_XFLAG_IMMUTABLE)
 485                iflags |= S_IMMUTABLE;
 486        if (xflags & FS_XFLAG_NOATIME)
 487                iflags |= S_NOATIME;
 488
 489        return iflags;
 490}
 491
 492static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
 493                                   unsigned long arg)
 494{
 495        struct fsxattr fa;
 496
 497        memset(&fa, 0, sizeof(fa));
 498        if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
 499                return -EFAULT;
 500
 501        return ovl_ioctl_set_flags(file, cmd, arg,
 502                                   ovl_fsxflags_to_iflags(fa.fsx_xflags));
 503}
 504
 505static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 506{
 507        long ret;
 508
 509        switch (cmd) {
 510        case FS_IOC_GETFLAGS:
 511        case FS_IOC_FSGETXATTR:
 512                ret = ovl_real_ioctl(file, cmd, arg);
 513                break;
 514
 515        case FS_IOC_SETFLAGS:
 516                ret = ovl_ioctl_set_fsflags(file, cmd, arg);
 517                break;
 518
 519        case FS_IOC_FSSETXATTR:
 520                ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
 521                break;
 522
 523        default:
 524                ret = -ENOTTY;
 525        }
 526
 527        return ret;
 528}
 529
 530static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
 531                             unsigned long arg)
 532{
 533        switch (cmd) {
 534        case FS_IOC32_GETFLAGS:
 535                cmd = FS_IOC_GETFLAGS;
 536                break;
 537
 538        case FS_IOC32_SETFLAGS:
 539                cmd = FS_IOC_SETFLAGS;
 540                break;
 541
 542        default:
 543                return -ENOIOCTLCMD;
 544        }
 545
 546        return ovl_ioctl(file, cmd, arg);
 547}
 548
 549enum ovl_copyop {
 550        OVL_COPY,
 551        OVL_CLONE,
 552        OVL_DEDUPE,
 553};
 554
 555static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 556                            struct file *file_out, loff_t pos_out,
 557                            loff_t len, unsigned int flags, enum ovl_copyop op)
 558{
 559        struct inode *inode_out = file_inode(file_out);
 560        struct fd real_in, real_out;
 561        const struct cred *old_cred;
 562        loff_t ret;
 563
 564        ret = ovl_real_fdget(file_out, &real_out);
 565        if (ret)
 566                return ret;
 567
 568        ret = ovl_real_fdget(file_in, &real_in);
 569        if (ret) {
 570                fdput(real_out);
 571                return ret;
 572        }
 573
 574        old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
 575        switch (op) {
 576        case OVL_COPY:
 577                ret = vfs_copy_file_range(real_in.file, pos_in,
 578                                          real_out.file, pos_out, len, flags);
 579                break;
 580
 581        case OVL_CLONE:
 582                ret = vfs_clone_file_range(real_in.file, pos_in,
 583                                           real_out.file, pos_out, len, flags);
 584                break;
 585
 586        case OVL_DEDUPE:
 587                ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
 588                                                real_out.file, pos_out, len,
 589                                                flags);
 590                break;
 591        }
 592        revert_creds(old_cred);
 593
 594        /* Update size */
 595        ovl_copyattr(ovl_inode_real(inode_out), inode_out);
 596
 597        fdput(real_in);
 598        fdput(real_out);
 599
 600        return ret;
 601}
 602
 603static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
 604                                   struct file *file_out, loff_t pos_out,
 605                                   size_t len, unsigned int flags)
 606{
 607        return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
 608                            OVL_COPY);
 609}
 610
 611static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
 612                                   struct file *file_out, loff_t pos_out,
 613                                   loff_t len, unsigned int remap_flags)
 614{
 615        enum ovl_copyop op;
 616
 617        if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
 618                return -EINVAL;
 619
 620        if (remap_flags & REMAP_FILE_DEDUP)
 621                op = OVL_DEDUPE;
 622        else
 623                op = OVL_CLONE;
 624
 625        /*
 626         * Don't copy up because of a dedupe request, this wouldn't make sense
 627         * most of the time (data would be duplicated instead of deduplicated).
 628         */
 629        if (op == OVL_DEDUPE &&
 630            (!ovl_inode_upper(file_inode(file_in)) ||
 631             !ovl_inode_upper(file_inode(file_out))))
 632                return -EPERM;
 633
 634        return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
 635                            remap_flags, op);
 636}
 637
 638const struct file_operations ovl_file_operations = {
 639        .open           = ovl_open,
 640        .release        = ovl_release,
 641        .llseek         = ovl_llseek,
 642        .read_iter      = ovl_read_iter,
 643        .write_iter     = ovl_write_iter,
 644        .fsync          = ovl_fsync,
 645        .mmap           = ovl_mmap,
 646        .fallocate      = ovl_fallocate,
 647        .fadvise        = ovl_fadvise,
 648        .unlocked_ioctl = ovl_ioctl,
 649        .compat_ioctl   = ovl_compat_ioctl,
 650
 651        .copy_file_range        = ovl_copy_file_range,
 652        .remap_file_range       = ovl_remap_file_range,
 653};
 654