linux/fs/overlayfs/copy_up.c
<<
>>
Prefs
   1/*
   2 *
   3 * Copyright (C) 2011 Novell Inc.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms of the GNU General Public License version 2 as published by
   7 * the Free Software Foundation.
   8 */
   9
  10#include <linux/module.h>
  11#include <linux/fs.h>
  12#include <linux/slab.h>
  13#include <linux/file.h>
  14#include <linux/splice.h>
  15#include <linux/xattr.h>
  16#include <linux/security.h>
  17#include <linux/uaccess.h>
  18#include <linux/sched/signal.h>
  19#include <linux/cred.h>
  20#include <linux/namei.h>
  21#include <linux/fdtable.h>
  22#include <linux/ratelimit.h>
  23#include <linux/exportfs.h>
  24#include "overlayfs.h"
  25#include "ovl_entry.h"
  26
  27#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
  28
  29static bool __read_mostly ovl_check_copy_up;
  30module_param_named(check_copy_up, ovl_check_copy_up, bool,
  31                   S_IWUSR | S_IRUGO);
  32MODULE_PARM_DESC(ovl_check_copy_up,
  33                 "Warn on copy-up when causing process also has a R/O fd open");
  34
  35static int ovl_check_fd(const void *data, struct file *f, unsigned int fd)
  36{
  37        const struct dentry *dentry = data;
  38
  39        if (file_inode(f) == d_inode(dentry))
  40                pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n",
  41                                    f, fd, current->pid, current->comm);
  42        return 0;
  43}
  44
  45/*
  46 * Check the fds open by this process and warn if something like the following
  47 * scenario is about to occur:
  48 *
  49 *      fd1 = open("foo", O_RDONLY);
  50 *      fd2 = open("foo", O_RDWR);
  51 */
  52static void ovl_do_check_copy_up(struct dentry *dentry)
  53{
  54        if (ovl_check_copy_up)
  55                iterate_fd(current->files, 0, ovl_check_fd, dentry);
  56}
  57
  58int ovl_copy_xattr(struct dentry *old, struct dentry *new)
  59{
  60        ssize_t list_size, size, value_size = 0;
  61        char *buf, *name, *value = NULL;
  62        int uninitialized_var(error);
  63        size_t slen;
  64
  65        if (!(old->d_inode->i_opflags & IOP_XATTR) ||
  66            !(new->d_inode->i_opflags & IOP_XATTR))
  67                return 0;
  68
  69        list_size = vfs_listxattr(old, NULL, 0);
  70        if (list_size <= 0) {
  71                if (list_size == -EOPNOTSUPP)
  72                        return 0;
  73                return list_size;
  74        }
  75
  76        buf = kzalloc(list_size, GFP_KERNEL);
  77        if (!buf)
  78                return -ENOMEM;
  79
  80        list_size = vfs_listxattr(old, buf, list_size);
  81        if (list_size <= 0) {
  82                error = list_size;
  83                goto out;
  84        }
  85
  86        for (name = buf; list_size; name += slen) {
  87                slen = strnlen(name, list_size) + 1;
  88
  89                /* underlying fs providing us with an broken xattr list? */
  90                if (WARN_ON(slen > list_size)) {
  91                        error = -EIO;
  92                        break;
  93                }
  94                list_size -= slen;
  95
  96                if (ovl_is_private_xattr(name))
  97                        continue;
  98retry:
  99                size = vfs_getxattr(old, name, value, value_size);
 100                if (size == -ERANGE)
 101                        size = vfs_getxattr(old, name, NULL, 0);
 102
 103                if (size < 0) {
 104                        error = size;
 105                        break;
 106                }
 107
 108                if (size > value_size) {
 109                        void *new;
 110
 111                        new = krealloc(value, size, GFP_KERNEL);
 112                        if (!new) {
 113                                error = -ENOMEM;
 114                                break;
 115                        }
 116                        value = new;
 117                        value_size = size;
 118                        goto retry;
 119                }
 120
 121                error = security_inode_copy_up_xattr(name);
 122                if (error < 0 && error != -EOPNOTSUPP)
 123                        break;
 124                if (error == 1) {
 125                        error = 0;
 126                        continue; /* Discard */
 127                }
 128                error = vfs_setxattr(new, name, value, size, 0);
 129                if (error)
 130                        break;
 131        }
 132        kfree(value);
 133out:
 134        kfree(buf);
 135        return error;
 136}
 137
 138static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 139{
 140        struct file *old_file;
 141        struct file *new_file;
 142        loff_t old_pos = 0;
 143        loff_t new_pos = 0;
 144        int error = 0;
 145
 146        if (len == 0)
 147                return 0;
 148
 149        old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
 150        if (IS_ERR(old_file))
 151                return PTR_ERR(old_file);
 152
 153        new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
 154        if (IS_ERR(new_file)) {
 155                error = PTR_ERR(new_file);
 156                goto out_fput;
 157        }
 158
 159        /* Try to use clone_file_range to clone up within the same fs */
 160        error = vfs_clone_file_range(old_file, 0, new_file, 0, len);
 161        if (!error)
 162                goto out;
 163        /* Couldn't clone, so now we try to copy the data */
 164        error = 0;
 165
 166        /* FIXME: copy up sparse files efficiently */
 167        while (len) {
 168                size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
 169                long bytes;
 170
 171                if (len < this_len)
 172                        this_len = len;
 173
 174                if (signal_pending_state(TASK_KILLABLE, current)) {
 175                        error = -EINTR;
 176                        break;
 177                }
 178
 179                bytes = do_splice_direct(old_file, &old_pos,
 180                                         new_file, &new_pos,
 181                                         this_len, SPLICE_F_MOVE);
 182                if (bytes <= 0) {
 183                        error = bytes;
 184                        break;
 185                }
 186                WARN_ON(old_pos != new_pos);
 187
 188                len -= bytes;
 189        }
 190out:
 191        if (!error)
 192                error = vfs_fsync(new_file, 0);
 193        fput(new_file);
 194out_fput:
 195        fput(old_file);
 196        return error;
 197}
 198
 199static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
 200{
 201        struct iattr attr = {
 202                .ia_valid =
 203                     ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
 204                .ia_atime = stat->atime,
 205                .ia_mtime = stat->mtime,
 206        };
 207
 208        return notify_change(upperdentry, &attr, NULL);
 209}
 210
 211int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 212{
 213        int err = 0;
 214
 215        if (!S_ISLNK(stat->mode)) {
 216                struct iattr attr = {
 217                        .ia_valid = ATTR_MODE,
 218                        .ia_mode = stat->mode,
 219                };
 220                err = notify_change(upperdentry, &attr, NULL);
 221        }
 222        if (!err) {
 223                struct iattr attr = {
 224                        .ia_valid = ATTR_UID | ATTR_GID,
 225                        .ia_uid = stat->uid,
 226                        .ia_gid = stat->gid,
 227                };
 228                err = notify_change(upperdentry, &attr, NULL);
 229        }
 230        if (!err)
 231                ovl_set_timestamps(upperdentry, stat);
 232
 233        return err;
 234}
 235
 236struct ovl_fh *ovl_encode_fh(struct dentry *lower, bool is_upper)
 237{
 238        struct ovl_fh *fh;
 239        int fh_type, fh_len, dwords;
 240        void *buf;
 241        int buflen = MAX_HANDLE_SZ;
 242        uuid_t *uuid = &lower->d_sb->s_uuid;
 243
 244        buf = kmalloc(buflen, GFP_TEMPORARY);
 245        if (!buf)
 246                return ERR_PTR(-ENOMEM);
 247
 248        /*
 249         * We encode a non-connectable file handle for non-dir, because we
 250         * only need to find the lower inode number and we don't want to pay
 251         * the price or reconnecting the dentry.
 252         */
 253        dwords = buflen >> 2;
 254        fh_type = exportfs_encode_fh(lower, buf, &dwords, 0);
 255        buflen = (dwords << 2);
 256
 257        fh = ERR_PTR(-EIO);
 258        if (WARN_ON(fh_type < 0) ||
 259            WARN_ON(buflen > MAX_HANDLE_SZ) ||
 260            WARN_ON(fh_type == FILEID_INVALID))
 261                goto out;
 262
 263        BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
 264        fh_len = offsetof(struct ovl_fh, fid) + buflen;
 265        fh = kmalloc(fh_len, GFP_KERNEL);
 266        if (!fh) {
 267                fh = ERR_PTR(-ENOMEM);
 268                goto out;
 269        }
 270
 271        fh->version = OVL_FH_VERSION;
 272        fh->magic = OVL_FH_MAGIC;
 273        fh->type = fh_type;
 274        fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
 275        /*
 276         * When we will want to decode an overlay dentry from this handle
 277         * and all layers are on the same fs, if we get a disconncted real
 278         * dentry when we decode fid, the only way to tell if we should assign
 279         * it to upperdentry or to lowerstack is by checking this flag.
 280         */
 281        if (is_upper)
 282                fh->flags |= OVL_FH_FLAG_PATH_UPPER;
 283        fh->len = fh_len;
 284        fh->uuid = *uuid;
 285        memcpy(fh->fid, buf, buflen);
 286
 287out:
 288        kfree(buf);
 289        return fh;
 290}
 291
 292static int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 293                          struct dentry *upper)
 294{
 295        const struct ovl_fh *fh = NULL;
 296        int err;
 297
 298        /*
 299         * When lower layer doesn't support export operations store a 'null' fh,
 300         * so we can use the overlay.origin xattr to distignuish between a copy
 301         * up and a pure upper inode.
 302         */
 303        if (ovl_can_decode_fh(lower->d_sb)) {
 304                fh = ovl_encode_fh(lower, false);
 305                if (IS_ERR(fh))
 306                        return PTR_ERR(fh);
 307        }
 308
 309        /*
 310         * Do not fail when upper doesn't support xattrs.
 311         */
 312        err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
 313                                 fh ? fh->len : 0, 0);
 314        kfree(fh);
 315
 316        return err;
 317}
 318
 319struct ovl_copy_up_ctx {
 320        struct dentry *parent;
 321        struct dentry *dentry;
 322        struct path lowerpath;
 323        struct kstat stat;
 324        struct kstat pstat;
 325        const char *link;
 326        struct dentry *destdir;
 327        struct qstr destname;
 328        struct dentry *workdir;
 329        bool tmpfile;
 330        bool origin;
 331};
 332
 333static int ovl_link_up(struct ovl_copy_up_ctx *c)
 334{
 335        int err;
 336        struct dentry *upper;
 337        struct dentry *upperdir = ovl_dentry_upper(c->parent);
 338        struct inode *udir = d_inode(upperdir);
 339
 340        /* Mark parent "impure" because it may now contain non-pure upper */
 341        err = ovl_set_impure(c->parent, upperdir);
 342        if (err)
 343                return err;
 344
 345        err = ovl_set_nlink_lower(c->dentry);
 346        if (err)
 347                return err;
 348
 349        inode_lock_nested(udir, I_MUTEX_PARENT);
 350        upper = lookup_one_len(c->dentry->d_name.name, upperdir,
 351                               c->dentry->d_name.len);
 352        err = PTR_ERR(upper);
 353        if (!IS_ERR(upper)) {
 354                err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper,
 355                                  true);
 356                dput(upper);
 357
 358                if (!err) {
 359                        /* Restore timestamps on parent (best effort) */
 360                        ovl_set_timestamps(upperdir, &c->pstat);
 361                        ovl_dentry_set_upper_alias(c->dentry);
 362                }
 363        }
 364        inode_unlock(udir);
 365        ovl_set_nlink_upper(c->dentry);
 366
 367        return err;
 368}
 369
 370static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
 371                            struct dentry **newdentry)
 372{
 373        int err;
 374        struct dentry *upper;
 375        struct inode *udir = d_inode(c->destdir);
 376
 377        upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
 378        if (IS_ERR(upper))
 379                return PTR_ERR(upper);
 380
 381        if (c->tmpfile)
 382                err = ovl_do_link(temp, udir, upper, true);
 383        else
 384                err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
 385
 386        if (!err)
 387                *newdentry = dget(c->tmpfile ? upper : temp);
 388        dput(upper);
 389
 390        return err;
 391}
 392
 393static int ovl_get_tmpfile(struct ovl_copy_up_ctx *c, struct dentry **tempp)
 394{
 395        int err;
 396        struct dentry *temp;
 397        const struct cred *old_creds = NULL;
 398        struct cred *new_creds = NULL;
 399        struct cattr cattr = {
 400                /* Can't properly set mode on creation because of the umask */
 401                .mode = c->stat.mode & S_IFMT,
 402                .rdev = c->stat.rdev,
 403                .link = c->link
 404        };
 405
 406        err = security_inode_copy_up(c->dentry, &new_creds);
 407        if (err < 0)
 408                goto out;
 409
 410        if (new_creds)
 411                old_creds = override_creds(new_creds);
 412
 413        if (c->tmpfile) {
 414                temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
 415                if (IS_ERR(temp))
 416                        goto temp_err;
 417        } else {
 418                temp = ovl_lookup_temp(c->workdir);
 419                if (IS_ERR(temp))
 420                        goto temp_err;
 421
 422                err = ovl_create_real(d_inode(c->workdir), temp, &cattr,
 423                                      NULL, true);
 424                if (err) {
 425                        dput(temp);
 426                        goto out;
 427                }
 428        }
 429        err = 0;
 430        *tempp = temp;
 431out:
 432        if (new_creds) {
 433                revert_creds(old_creds);
 434                put_cred(new_creds);
 435        }
 436
 437        return err;
 438
 439temp_err:
 440        err = PTR_ERR(temp);
 441        goto out;
 442}
 443
 444static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 445{
 446        int err;
 447
 448        if (S_ISREG(c->stat.mode)) {
 449                struct path upperpath;
 450
 451                ovl_path_upper(c->dentry, &upperpath);
 452                BUG_ON(upperpath.dentry != NULL);
 453                upperpath.dentry = temp;
 454
 455                err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
 456                if (err)
 457                        return err;
 458        }
 459
 460        err = ovl_copy_xattr(c->lowerpath.dentry, temp);
 461        if (err)
 462                return err;
 463
 464        inode_lock(temp->d_inode);
 465        err = ovl_set_attr(temp, &c->stat);
 466        inode_unlock(temp->d_inode);
 467        if (err)
 468                return err;
 469
 470        /*
 471         * Store identifier of lower inode in upper inode xattr to
 472         * allow lookup of the copy up origin inode.
 473         *
 474         * Don't set origin when we are breaking the association with a lower
 475         * hard link.
 476         */
 477        if (c->origin) {
 478                err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
 479                if (err)
 480                        return err;
 481        }
 482
 483        return 0;
 484}
 485
 486static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
 487{
 488        struct inode *udir = c->destdir->d_inode;
 489        struct dentry *newdentry = NULL;
 490        struct dentry *temp = NULL;
 491        int err;
 492
 493        err = ovl_get_tmpfile(c, &temp);
 494        if (err)
 495                goto out;
 496
 497        err = ovl_copy_up_inode(c, temp);
 498        if (err)
 499                goto out_cleanup;
 500
 501        if (c->tmpfile) {
 502                inode_lock_nested(udir, I_MUTEX_PARENT);
 503                err = ovl_install_temp(c, temp, &newdentry);
 504                inode_unlock(udir);
 505        } else {
 506                err = ovl_install_temp(c, temp, &newdentry);
 507        }
 508        if (err)
 509                goto out_cleanup;
 510
 511        ovl_inode_update(d_inode(c->dentry), newdentry);
 512out:
 513        dput(temp);
 514        return err;
 515
 516out_cleanup:
 517        if (!c->tmpfile)
 518                ovl_cleanup(d_inode(c->workdir), temp);
 519        goto out;
 520}
 521
 522/*
 523 * Copy up a single dentry
 524 *
 525 * All renames start with copy up of source if necessary.  The actual
 526 * rename will only proceed once the copy up was successful.  Copy up uses
 527 * upper parent i_mutex for exclusion.  Since rename can change d_parent it
 528 * is possible that the copy up will lock the old parent.  At that point
 529 * the file will have already been copied up anyway.
 530 */
 531static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 532{
 533        int err;
 534        struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
 535        bool indexed = false;
 536
 537        if (ovl_indexdir(c->dentry->d_sb) && !S_ISDIR(c->stat.mode) &&
 538            c->stat.nlink > 1)
 539                indexed = true;
 540
 541        if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || indexed)
 542                c->origin = true;
 543
 544        if (indexed) {
 545                c->destdir = ovl_indexdir(c->dentry->d_sb);
 546                err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
 547                if (err)
 548                        return err;
 549        } else {
 550                /*
 551                 * Mark parent "impure" because it may now contain non-pure
 552                 * upper
 553                 */
 554                err = ovl_set_impure(c->parent, c->destdir);
 555                if (err)
 556                        return err;
 557        }
 558
 559        /* Should we copyup with O_TMPFILE or with workdir? */
 560        if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
 561                c->tmpfile = true;
 562                err = ovl_copy_up_locked(c);
 563        } else {
 564                err = -EIO;
 565                if (lock_rename(c->workdir, c->destdir) != NULL) {
 566                        pr_err("overlayfs: failed to lock workdir+upperdir\n");
 567                } else {
 568                        err = ovl_copy_up_locked(c);
 569                        unlock_rename(c->workdir, c->destdir);
 570                }
 571        }
 572
 573        if (indexed) {
 574                if (!err)
 575                        ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
 576                kfree(c->destname.name);
 577        } else if (!err) {
 578                struct inode *udir = d_inode(c->destdir);
 579
 580                /* Restore timestamps on parent (best effort) */
 581                inode_lock(udir);
 582                ovl_set_timestamps(c->destdir, &c->pstat);
 583                inode_unlock(udir);
 584
 585                ovl_dentry_set_upper_alias(c->dentry);
 586        }
 587
 588        return err;
 589}
 590
 591static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 592                           int flags)
 593{
 594        int err;
 595        DEFINE_DELAYED_CALL(done);
 596        struct path parentpath;
 597        struct ovl_copy_up_ctx ctx = {
 598                .parent = parent,
 599                .dentry = dentry,
 600                .workdir = ovl_workdir(dentry),
 601        };
 602
 603        if (WARN_ON(!ctx.workdir))
 604                return -EROFS;
 605
 606        ovl_path_lower(dentry, &ctx.lowerpath);
 607        err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
 608                          STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 609        if (err)
 610                return err;
 611
 612        ovl_path_upper(parent, &parentpath);
 613        ctx.destdir = parentpath.dentry;
 614        ctx.destname = dentry->d_name;
 615
 616        err = vfs_getattr(&parentpath, &ctx.pstat,
 617                          STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
 618        if (err)
 619                return err;
 620
 621        /* maybe truncate regular file. this has no effect on dirs */
 622        if (flags & O_TRUNC)
 623                ctx.stat.size = 0;
 624
 625        if (S_ISLNK(ctx.stat.mode)) {
 626                ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
 627                if (IS_ERR(ctx.link))
 628                        return PTR_ERR(ctx.link);
 629        }
 630        ovl_do_check_copy_up(ctx.lowerpath.dentry);
 631
 632        err = ovl_copy_up_start(dentry);
 633        /* err < 0: interrupted, err > 0: raced with another copy-up */
 634        if (unlikely(err)) {
 635                if (err > 0)
 636                        err = 0;
 637        } else {
 638                if (!ovl_dentry_upper(dentry))
 639                        err = ovl_do_copy_up(&ctx);
 640                if (!err && !ovl_dentry_has_upper_alias(dentry))
 641                        err = ovl_link_up(&ctx);
 642                ovl_copy_up_end(dentry);
 643        }
 644        do_delayed_call(&done);
 645
 646        return err;
 647}
 648
 649int ovl_copy_up_flags(struct dentry *dentry, int flags)
 650{
 651        int err = 0;
 652        const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
 653
 654        while (!err) {
 655                struct dentry *next;
 656                struct dentry *parent;
 657
 658                /*
 659                 * Check if copy-up has happened as well as for upper alias (in
 660                 * case of hard links) is there.
 661                 *
 662                 * Both checks are lockless:
 663                 *  - false negatives: will recheck under oi->lock
 664                 *  - false positives:
 665                 *    + ovl_dentry_upper() uses memory barriers to ensure the
 666                 *      upper dentry is up-to-date
 667                 *    + ovl_dentry_has_upper_alias() relies on locking of
 668                 *      upper parent i_rwsem to prevent reordering copy-up
 669                 *      with rename.
 670                 */
 671                if (ovl_dentry_upper(dentry) &&
 672                    ovl_dentry_has_upper_alias(dentry))
 673                        break;
 674
 675                next = dget(dentry);
 676                /* find the topmost dentry not yet copied up */
 677                for (;;) {
 678                        parent = dget_parent(next);
 679
 680                        if (ovl_dentry_upper(parent))
 681                                break;
 682
 683                        dput(next);
 684                        next = parent;
 685                }
 686
 687                err = ovl_copy_up_one(parent, next, flags);
 688
 689                dput(parent);
 690                dput(next);
 691        }
 692        revert_creds(old_cred);
 693
 694        return err;
 695}
 696
 697int ovl_copy_up(struct dentry *dentry)
 698{
 699        return ovl_copy_up_flags(dentry, 0);
 700}
 701