linux/fs/overlayfs/file.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2017 Red Hat, Inc.
   4 */
   5
   6#include <linux/cred.h>
   7#include <linux/file.h>
   8#include <linux/mount.h>
   9#include <linux/xattr.h>
  10#include <linux/uio.h>
  11#include <linux/uaccess.h>
  12#include <linux/splice.h>
  13#include <linux/security.h>
  14#include <linux/mm.h>
  15#include <linux/fs.h>
  16#include "overlayfs.h"
  17
  18struct ovl_aio_req {
  19        struct kiocb iocb;
  20        struct kiocb *orig_iocb;
  21        struct fd fd;
  22};
  23
  24static struct kmem_cache *ovl_aio_request_cachep;
  25
  26static char ovl_whatisit(struct inode *inode, struct inode *realinode)
  27{
  28        if (realinode != ovl_inode_upper(inode))
  29                return 'l';
  30        if (ovl_has_upperdata(inode))
  31                return 'u';
  32        else
  33                return 'm';
  34}
  35
  36/* No atime modificaton nor notify on underlying */
  37#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
  38
  39static struct file *ovl_open_realfile(const struct file *file,
  40                                      struct inode *realinode)
  41{
  42        struct inode *inode = file_inode(file);
  43        struct file *realfile;
  44        const struct cred *old_cred;
  45        int flags = file->f_flags | OVL_OPEN_FLAGS;
  46        int acc_mode = ACC_MODE(flags);
  47        int err;
  48
  49        if (flags & O_APPEND)
  50                acc_mode |= MAY_APPEND;
  51
  52        old_cred = ovl_override_creds(inode->i_sb);
  53        err = inode_permission(&init_user_ns, realinode, MAY_OPEN | acc_mode);
  54        if (err) {
  55                realfile = ERR_PTR(err);
  56        } else {
  57                if (!inode_owner_or_capable(&init_user_ns, realinode))
  58                        flags &= ~O_NOATIME;
  59
  60                realfile = open_with_fake_path(&file->f_path, flags, realinode,
  61                                               current_cred());
  62        }
  63        revert_creds(old_cred);
  64
  65        pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
  66                 file, file, ovl_whatisit(inode, realinode), file->f_flags,
  67                 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
  68
  69        return realfile;
  70}
  71
  72#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
  73
  74static int ovl_change_flags(struct file *file, unsigned int flags)
  75{
  76        struct inode *inode = file_inode(file);
  77        int err;
  78
  79        flags &= OVL_SETFL_MASK;
  80
  81        if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
  82                return -EPERM;
  83
  84        if (flags & O_DIRECT) {
  85                if (!file->f_mapping->a_ops ||
  86                    !file->f_mapping->a_ops->direct_IO)
  87                        return -EINVAL;
  88        }
  89
  90        if (file->f_op->check_flags) {
  91                err = file->f_op->check_flags(flags);
  92                if (err)
  93                        return err;
  94        }
  95
  96        spin_lock(&file->f_lock);
  97        file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
  98        spin_unlock(&file->f_lock);
  99
 100        return 0;
 101}
 102
 103static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
 104                               bool allow_meta)
 105{
 106        struct inode *inode = file_inode(file);
 107        struct inode *realinode;
 108
 109        real->flags = 0;
 110        real->file = file->private_data;
 111
 112        if (allow_meta)
 113                realinode = ovl_inode_real(inode);
 114        else
 115                realinode = ovl_inode_realdata(inode);
 116
 117        /* Has it been copied up since we'd opened it? */
 118        if (unlikely(file_inode(real->file) != realinode)) {
 119                real->flags = FDPUT_FPUT;
 120                real->file = ovl_open_realfile(file, realinode);
 121
 122                return PTR_ERR_OR_ZERO(real->file);
 123        }
 124
 125        /* Did the flags change since open? */
 126        if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
 127                return ovl_change_flags(real->file, file->f_flags);
 128
 129        return 0;
 130}
 131
 132static int ovl_real_fdget(const struct file *file, struct fd *real)
 133{
 134        if (d_is_dir(file_dentry(file))) {
 135                real->flags = 0;
 136                real->file = ovl_dir_real_file(file, false);
 137
 138                return PTR_ERR_OR_ZERO(real->file);
 139        }
 140
 141        return ovl_real_fdget_meta(file, real, false);
 142}
 143
 144static int ovl_open(struct inode *inode, struct file *file)
 145{
 146        struct file *realfile;
 147        int err;
 148
 149        err = ovl_maybe_copy_up(file_dentry(file), file->f_flags);
 150        if (err)
 151                return err;
 152
 153        /* No longer need these flags, so don't pass them on to underlying fs */
 154        file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 155
 156        realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
 157        if (IS_ERR(realfile))
 158                return PTR_ERR(realfile);
 159
 160        file->private_data = realfile;
 161
 162        return 0;
 163}
 164
 165static int ovl_release(struct inode *inode, struct file *file)
 166{
 167        fput(file->private_data);
 168
 169        return 0;
 170}
 171
 172static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
 173{
 174        struct inode *inode = file_inode(file);
 175        struct fd real;
 176        const struct cred *old_cred;
 177        loff_t ret;
 178
 179        /*
 180         * The two special cases below do not need to involve real fs,
 181         * so we can optimizing concurrent callers.
 182         */
 183        if (offset == 0) {
 184                if (whence == SEEK_CUR)
 185                        return file->f_pos;
 186
 187                if (whence == SEEK_SET)
 188                        return vfs_setpos(file, 0, 0);
 189        }
 190
 191        ret = ovl_real_fdget(file, &real);
 192        if (ret)
 193                return ret;
 194
 195        /*
 196         * Overlay file f_pos is the master copy that is preserved
 197         * through copy up and modified on read/write, but only real
 198         * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
 199         * limitations that are more strict than ->s_maxbytes for specific
 200         * files, so we use the real file to perform seeks.
 201         */
 202        ovl_inode_lock(inode);
 203        real.file->f_pos = file->f_pos;
 204
 205        old_cred = ovl_override_creds(inode->i_sb);
 206        ret = vfs_llseek(real.file, offset, whence);
 207        revert_creds(old_cred);
 208
 209        file->f_pos = real.file->f_pos;
 210        ovl_inode_unlock(inode);
 211
 212        fdput(real);
 213
 214        return ret;
 215}
 216
 217static void ovl_file_accessed(struct file *file)
 218{
 219        struct inode *inode, *upperinode;
 220
 221        if (file->f_flags & O_NOATIME)
 222                return;
 223
 224        inode = file_inode(file);
 225        upperinode = ovl_inode_upper(inode);
 226
 227        if (!upperinode)
 228                return;
 229
 230        if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
 231             !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
 232                inode->i_mtime = upperinode->i_mtime;
 233                inode->i_ctime = upperinode->i_ctime;
 234        }
 235
 236        touch_atime(&file->f_path);
 237}
 238
 239static rwf_t ovl_iocb_to_rwf(int ifl)
 240{
 241        rwf_t flags = 0;
 242
 243        if (ifl & IOCB_NOWAIT)
 244                flags |= RWF_NOWAIT;
 245        if (ifl & IOCB_HIPRI)
 246                flags |= RWF_HIPRI;
 247        if (ifl & IOCB_DSYNC)
 248                flags |= RWF_DSYNC;
 249        if (ifl & IOCB_SYNC)
 250                flags |= RWF_SYNC;
 251
 252        return flags;
 253}
 254
 255static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
 256{
 257        struct kiocb *iocb = &aio_req->iocb;
 258        struct kiocb *orig_iocb = aio_req->orig_iocb;
 259
 260        if (iocb->ki_flags & IOCB_WRITE) {
 261                struct inode *inode = file_inode(orig_iocb->ki_filp);
 262
 263                /* Actually acquired in ovl_write_iter() */
 264                __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
 265                                      SB_FREEZE_WRITE);
 266                file_end_write(iocb->ki_filp);
 267                ovl_copyattr(ovl_inode_real(inode), inode);
 268        }
 269
 270        orig_iocb->ki_pos = iocb->ki_pos;
 271        fdput(aio_req->fd);
 272        kmem_cache_free(ovl_aio_request_cachep, aio_req);
 273}
 274
 275static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
 276{
 277        struct ovl_aio_req *aio_req = container_of(iocb,
 278                                                   struct ovl_aio_req, iocb);
 279        struct kiocb *orig_iocb = aio_req->orig_iocb;
 280
 281        ovl_aio_cleanup_handler(aio_req);
 282        orig_iocb->ki_complete(orig_iocb, res, res2);
 283}
 284
 285static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 286{
 287        struct file *file = iocb->ki_filp;
 288        struct fd real;
 289        const struct cred *old_cred;
 290        ssize_t ret;
 291
 292        if (!iov_iter_count(iter))
 293                return 0;
 294
 295        ret = ovl_real_fdget(file, &real);
 296        if (ret)
 297                return ret;
 298
 299        old_cred = ovl_override_creds(file_inode(file)->i_sb);
 300        if (is_sync_kiocb(iocb)) {
 301                ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
 302                                    ovl_iocb_to_rwf(iocb->ki_flags));
 303        } else {
 304                struct ovl_aio_req *aio_req;
 305
 306                ret = -ENOMEM;
 307                aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
 308                if (!aio_req)
 309                        goto out;
 310
 311                aio_req->fd = real;
 312                real.flags = 0;
 313                aio_req->orig_iocb = iocb;
 314                kiocb_clone(&aio_req->iocb, iocb, real.file);
 315                aio_req->iocb.ki_complete = ovl_aio_rw_complete;
 316                ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
 317                if (ret != -EIOCBQUEUED)
 318                        ovl_aio_cleanup_handler(aio_req);
 319        }
 320out:
 321        revert_creds(old_cred);
 322        ovl_file_accessed(file);
 323
 324        fdput(real);
 325
 326        return ret;
 327}
 328
 329static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 330{
 331        struct file *file = iocb->ki_filp;
 332        struct inode *inode = file_inode(file);
 333        struct fd real;
 334        const struct cred *old_cred;
 335        ssize_t ret;
 336        int ifl = iocb->ki_flags;
 337
 338        if (!iov_iter_count(iter))
 339                return 0;
 340
 341        inode_lock(inode);
 342        /* Update mode */
 343        ovl_copyattr(ovl_inode_real(inode), inode);
 344        ret = file_remove_privs(file);
 345        if (ret)
 346                goto out_unlock;
 347
 348        ret = ovl_real_fdget(file, &real);
 349        if (ret)
 350                goto out_unlock;
 351
 352        if (!ovl_should_sync(OVL_FS(inode->i_sb)))
 353                ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
 354
 355        old_cred = ovl_override_creds(file_inode(file)->i_sb);
 356        if (is_sync_kiocb(iocb)) {
 357                file_start_write(real.file);
 358                ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
 359                                     ovl_iocb_to_rwf(ifl));
 360                file_end_write(real.file);
 361                /* Update size */
 362                ovl_copyattr(ovl_inode_real(inode), inode);
 363        } else {
 364                struct ovl_aio_req *aio_req;
 365
 366                ret = -ENOMEM;
 367                aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
 368                if (!aio_req)
 369                        goto out;
 370
 371                file_start_write(real.file);
 372                /* Pacify lockdep, same trick as done in aio_write() */
 373                __sb_writers_release(file_inode(real.file)->i_sb,
 374                                     SB_FREEZE_WRITE);
 375                aio_req->fd = real;
 376                real.flags = 0;
 377                aio_req->orig_iocb = iocb;
 378                kiocb_clone(&aio_req->iocb, iocb, real.file);
 379                aio_req->iocb.ki_flags = ifl;
 380                aio_req->iocb.ki_complete = ovl_aio_rw_complete;
 381                ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
 382                if (ret != -EIOCBQUEUED)
 383                        ovl_aio_cleanup_handler(aio_req);
 384        }
 385out:
 386        revert_creds(old_cred);
 387        fdput(real);
 388
 389out_unlock:
 390        inode_unlock(inode);
 391
 392        return ret;
 393}
 394
 395static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 396{
 397        struct fd real;
 398        const struct cred *old_cred;
 399        int ret;
 400
 401        ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
 402        if (ret <= 0)
 403                return ret;
 404
 405        ret = ovl_real_fdget_meta(file, &real, !datasync);
 406        if (ret)
 407                return ret;
 408
 409        /* Don't sync lower file for fear of receiving EROFS error */
 410        if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
 411                old_cred = ovl_override_creds(file_inode(file)->i_sb);
 412                ret = vfs_fsync_range(real.file, start, end, datasync);
 413                revert_creds(old_cred);
 414        }
 415
 416        fdput(real);
 417
 418        return ret;
 419}
 420
 421static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 422{
 423        struct file *realfile = file->private_data;
 424        const struct cred *old_cred;
 425        int ret;
 426
 427        if (!realfile->f_op->mmap)
 428                return -ENODEV;
 429
 430        if (WARN_ON(file != vma->vm_file))
 431                return -EIO;
 432
 433        vma_set_file(vma, realfile);
 434
 435        old_cred = ovl_override_creds(file_inode(file)->i_sb);
 436        ret = call_mmap(vma->vm_file, vma);
 437        revert_creds(old_cred);
 438        ovl_file_accessed(file);
 439
 440        return ret;
 441}
 442
 443static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 444{
 445        struct inode *inode = file_inode(file);
 446        struct fd real;
 447        const struct cred *old_cred;
 448        int ret;
 449
 450        ret = ovl_real_fdget(file, &real);
 451        if (ret)
 452                return ret;
 453
 454        old_cred = ovl_override_creds(file_inode(file)->i_sb);
 455        ret = vfs_fallocate(real.file, mode, offset, len);
 456        revert_creds(old_cred);
 457
 458        /* Update size */
 459        ovl_copyattr(ovl_inode_real(inode), inode);
 460
 461        fdput(real);
 462
 463        return ret;
 464}
 465
 466static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
 467{
 468        struct fd real;
 469        const struct cred *old_cred;
 470        int ret;
 471
 472        ret = ovl_real_fdget(file, &real);
 473        if (ret)
 474                return ret;
 475
 476        old_cred = ovl_override_creds(file_inode(file)->i_sb);
 477        ret = vfs_fadvise(real.file, offset, len, advice);
 478        revert_creds(old_cred);
 479
 480        fdput(real);
 481
 482        return ret;
 483}
 484
 485enum ovl_copyop {
 486        OVL_COPY,
 487        OVL_CLONE,
 488        OVL_DEDUPE,
 489};
 490
 491static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 492                            struct file *file_out, loff_t pos_out,
 493                            loff_t len, unsigned int flags, enum ovl_copyop op)
 494{
 495        struct inode *inode_out = file_inode(file_out);
 496        struct fd real_in, real_out;
 497        const struct cred *old_cred;
 498        loff_t ret;
 499
 500        ret = ovl_real_fdget(file_out, &real_out);
 501        if (ret)
 502                return ret;
 503
 504        ret = ovl_real_fdget(file_in, &real_in);
 505        if (ret) {
 506                fdput(real_out);
 507                return ret;
 508        }
 509
 510        old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
 511        switch (op) {
 512        case OVL_COPY:
 513                ret = vfs_copy_file_range(real_in.file, pos_in,
 514                                          real_out.file, pos_out, len, flags);
 515                break;
 516
 517        case OVL_CLONE:
 518                ret = vfs_clone_file_range(real_in.file, pos_in,
 519                                           real_out.file, pos_out, len, flags);
 520                break;
 521
 522        case OVL_DEDUPE:
 523                ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
 524                                                real_out.file, pos_out, len,
 525                                                flags);
 526                break;
 527        }
 528        revert_creds(old_cred);
 529
 530        /* Update size */
 531        ovl_copyattr(ovl_inode_real(inode_out), inode_out);
 532
 533        fdput(real_in);
 534        fdput(real_out);
 535
 536        return ret;
 537}
 538
 539static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
 540                                   struct file *file_out, loff_t pos_out,
 541                                   size_t len, unsigned int flags)
 542{
 543        return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
 544                            OVL_COPY);
 545}
 546
 547static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
 548                                   struct file *file_out, loff_t pos_out,
 549                                   loff_t len, unsigned int remap_flags)
 550{
 551        enum ovl_copyop op;
 552
 553        if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
 554                return -EINVAL;
 555
 556        if (remap_flags & REMAP_FILE_DEDUP)
 557                op = OVL_DEDUPE;
 558        else
 559                op = OVL_CLONE;
 560
 561        /*
 562         * Don't copy up because of a dedupe request, this wouldn't make sense
 563         * most of the time (data would be duplicated instead of deduplicated).
 564         */
 565        if (op == OVL_DEDUPE &&
 566            (!ovl_inode_upper(file_inode(file_in)) ||
 567             !ovl_inode_upper(file_inode(file_out))))
 568                return -EPERM;
 569
 570        return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
 571                            remap_flags, op);
 572}
 573
 574static int ovl_flush(struct file *file, fl_owner_t id)
 575{
 576        struct fd real;
 577        const struct cred *old_cred;
 578        int err;
 579
 580        err = ovl_real_fdget(file, &real);
 581        if (err)
 582                return err;
 583
 584        if (real.file->f_op->flush) {
 585                old_cred = ovl_override_creds(file_inode(file)->i_sb);
 586                err = real.file->f_op->flush(real.file, id);
 587                revert_creds(old_cred);
 588        }
 589        fdput(real);
 590
 591        return err;
 592}
 593
 594const struct file_operations ovl_file_operations = {
 595        .open           = ovl_open,
 596        .release        = ovl_release,
 597        .llseek         = ovl_llseek,
 598        .read_iter      = ovl_read_iter,
 599        .write_iter     = ovl_write_iter,
 600        .fsync          = ovl_fsync,
 601        .mmap           = ovl_mmap,
 602        .fallocate      = ovl_fallocate,
 603        .fadvise        = ovl_fadvise,
 604        .flush          = ovl_flush,
 605        .splice_read    = generic_file_splice_read,
 606        .splice_write   = iter_file_splice_write,
 607
 608        .copy_file_range        = ovl_copy_file_range,
 609        .remap_file_range       = ovl_remap_file_range,
 610};
 611
 612int __init ovl_aio_request_cache_init(void)
 613{
 614        ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
 615                                                   sizeof(struct ovl_aio_req),
 616                                                   0, SLAB_HWCACHE_ALIGN, NULL);
 617        if (!ovl_aio_request_cachep)
 618                return -ENOMEM;
 619
 620        return 0;
 621}
 622
 623void ovl_aio_request_cache_destroy(void)
 624{
 625        kmem_cache_destroy(ovl_aio_request_cachep);
 626}
 627