linux/fs/overlayfs/dir.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *
   4 * Copyright (C) 2011 Novell Inc.
   5 */
   6
   7#include <linux/fs.h>
   8#include <linux/namei.h>
   9#include <linux/xattr.h>
  10#include <linux/security.h>
  11#include <linux/cred.h>
  12#include <linux/module.h>
  13#include <linux/posix_acl.h>
  14#include <linux/posix_acl_xattr.h>
  15#include <linux/atomic.h>
  16#include <linux/ratelimit.h>
  17#include "overlayfs.h"
  18
  19static unsigned short ovl_redirect_max = 256;
  20module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
  21MODULE_PARM_DESC(redirect_max,
  22                 "Maximum length of absolute redirect xattr value");
  23
  24static int ovl_set_redirect(struct dentry *dentry, bool samedir);
  25
  26int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
  27{
  28        int err;
  29
  30        dget(wdentry);
  31        if (d_is_dir(wdentry))
  32                err = ovl_do_rmdir(wdir, wdentry);
  33        else
  34                err = ovl_do_unlink(wdir, wdentry);
  35        dput(wdentry);
  36
  37        if (err) {
  38                pr_err("cleanup of '%pd2' failed (%i)\n",
  39                       wdentry, err);
  40        }
  41
  42        return err;
  43}
  44
  45struct dentry *ovl_lookup_temp(struct dentry *workdir)
  46{
  47        struct dentry *temp;
  48        char name[20];
  49        static atomic_t temp_id = ATOMIC_INIT(0);
  50
  51        /* counter is allowed to wrap, since temp dentries are ephemeral */
  52        snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
  53
  54        temp = lookup_one_len(name, workdir, strlen(name));
  55        if (!IS_ERR(temp) && temp->d_inode) {
  56                pr_err("workdir/%s already exists\n", name);
  57                dput(temp);
  58                temp = ERR_PTR(-EIO);
  59        }
  60
  61        return temp;
  62}
  63
  64/* caller holds i_mutex on workdir */
  65static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
  66{
  67        int err;
  68        struct dentry *whiteout;
  69        struct dentry *workdir = ofs->workdir;
  70        struct inode *wdir = workdir->d_inode;
  71
  72        if (!ofs->whiteout) {
  73                whiteout = ovl_lookup_temp(workdir);
  74                if (IS_ERR(whiteout))
  75                        goto out;
  76
  77                err = ovl_do_whiteout(wdir, whiteout);
  78                if (err) {
  79                        dput(whiteout);
  80                        whiteout = ERR_PTR(err);
  81                        goto out;
  82                }
  83                ofs->whiteout = whiteout;
  84        }
  85
  86        if (ofs->share_whiteout) {
  87                whiteout = ovl_lookup_temp(workdir);
  88                if (IS_ERR(whiteout))
  89                        goto out;
  90
  91                err = ovl_do_link(ofs->whiteout, wdir, whiteout);
  92                if (!err)
  93                        goto out;
  94
  95                if (err != -EMLINK) {
  96                        pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
  97                                ofs->whiteout->d_inode->i_nlink, err);
  98                        ofs->share_whiteout = false;
  99                }
 100                dput(whiteout);
 101        }
 102        whiteout = ofs->whiteout;
 103        ofs->whiteout = NULL;
 104out:
 105        return whiteout;
 106}
 107
 108/* Caller must hold i_mutex on both workdir and dir */
 109int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
 110                             struct dentry *dentry)
 111{
 112        struct inode *wdir = ofs->workdir->d_inode;
 113        struct dentry *whiteout;
 114        int err;
 115        int flags = 0;
 116
 117        whiteout = ovl_whiteout(ofs);
 118        err = PTR_ERR(whiteout);
 119        if (IS_ERR(whiteout))
 120                return err;
 121
 122        if (d_is_dir(dentry))
 123                flags = RENAME_EXCHANGE;
 124
 125        err = ovl_do_rename(wdir, whiteout, dir, dentry, flags);
 126        if (err)
 127                goto kill_whiteout;
 128        if (flags)
 129                ovl_cleanup(wdir, dentry);
 130
 131out:
 132        dput(whiteout);
 133        return err;
 134
 135kill_whiteout:
 136        ovl_cleanup(wdir, whiteout);
 137        goto out;
 138}
 139
 140static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry,
 141                          umode_t mode)
 142{
 143        int err;
 144        struct dentry *d, *dentry = *newdentry;
 145
 146        err = ovl_do_mkdir(dir, dentry, mode);
 147        if (err)
 148                return err;
 149
 150        if (likely(!d_unhashed(dentry)))
 151                return 0;
 152
 153        /*
 154         * vfs_mkdir() may succeed and leave the dentry passed
 155         * to it unhashed and negative. If that happens, try to
 156         * lookup a new hashed and positive dentry.
 157         */
 158        d = lookup_one_len(dentry->d_name.name, dentry->d_parent,
 159                           dentry->d_name.len);
 160        if (IS_ERR(d)) {
 161                pr_warn("failed lookup after mkdir (%pd2, err=%i).\n",
 162                        dentry, err);
 163                return PTR_ERR(d);
 164        }
 165        dput(dentry);
 166        *newdentry = d;
 167
 168        return 0;
 169}
 170
 171struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
 172                               struct ovl_cattr *attr)
 173{
 174        int err;
 175
 176        if (IS_ERR(newdentry))
 177                return newdentry;
 178
 179        err = -ESTALE;
 180        if (newdentry->d_inode)
 181                goto out;
 182
 183        if (attr->hardlink) {
 184                err = ovl_do_link(attr->hardlink, dir, newdentry);
 185        } else {
 186                switch (attr->mode & S_IFMT) {
 187                case S_IFREG:
 188                        err = ovl_do_create(dir, newdentry, attr->mode);
 189                        break;
 190
 191                case S_IFDIR:
 192                        /* mkdir is special... */
 193                        err =  ovl_mkdir_real(dir, &newdentry, attr->mode);
 194                        break;
 195
 196                case S_IFCHR:
 197                case S_IFBLK:
 198                case S_IFIFO:
 199                case S_IFSOCK:
 200                        err = ovl_do_mknod(dir, newdentry, attr->mode,
 201                                           attr->rdev);
 202                        break;
 203
 204                case S_IFLNK:
 205                        err = ovl_do_symlink(dir, newdentry, attr->link);
 206                        break;
 207
 208                default:
 209                        err = -EPERM;
 210                }
 211        }
 212        if (!err && WARN_ON(!newdentry->d_inode)) {
 213                /*
 214                 * Not quite sure if non-instantiated dentry is legal or not.
 215                 * VFS doesn't seem to care so check and warn here.
 216                 */
 217                err = -EIO;
 218        }
 219out:
 220        if (err) {
 221                dput(newdentry);
 222                return ERR_PTR(err);
 223        }
 224        return newdentry;
 225}
 226
 227struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr)
 228{
 229        return ovl_create_real(d_inode(workdir), ovl_lookup_temp(workdir),
 230                               attr);
 231}
 232
 233static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
 234                               int xerr)
 235{
 236        struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 237        int err;
 238
 239        err = ovl_check_setxattr(ofs, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
 240        if (!err)
 241                ovl_dentry_set_opaque(dentry);
 242
 243        return err;
 244}
 245
 246static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
 247{
 248        /*
 249         * Fail with -EIO when trying to create opaque dir and upper doesn't
 250         * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
 251         * return a specific error for noxattr case.
 252         */
 253        return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
 254}
 255
 256/*
 257 * Common operations required to be done after creation of file on upper.
 258 * If @hardlink is false, then @inode is a pre-allocated inode, we may or
 259 * may not use to instantiate the new dentry.
 260 */
 261static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
 262                           struct dentry *newdentry, bool hardlink)
 263{
 264        struct ovl_inode_params oip = {
 265                .upperdentry = newdentry,
 266                .newinode = inode,
 267        };
 268
 269        ovl_dir_modified(dentry->d_parent, false);
 270        ovl_dentry_set_upper_alias(dentry);
 271        ovl_dentry_update_reval(dentry, newdentry,
 272                        DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
 273
 274        if (!hardlink) {
 275                /*
 276                 * ovl_obtain_alias() can be called after ovl_create_real()
 277                 * and before we get here, so we may get an inode from cache
 278                 * with the same real upperdentry that is not the inode we
 279                 * pre-allocated.  In this case we will use the cached inode
 280                 * to instantiate the new dentry.
 281                 *
 282                 * XXX: if we ever use ovl_obtain_alias() to decode directory
 283                 * file handles, need to use ovl_get_inode_locked() and
 284                 * d_instantiate_new() here to prevent from creating two
 285                 * hashed directory inode aliases.
 286                 */
 287                inode = ovl_get_inode(dentry->d_sb, &oip);
 288                if (IS_ERR(inode))
 289                        return PTR_ERR(inode);
 290                if (inode == oip.newinode)
 291                        ovl_set_flag(OVL_UPPERDATA, inode);
 292        } else {
 293                WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
 294                dput(newdentry);
 295                inc_nlink(inode);
 296        }
 297
 298        d_instantiate(dentry, inode);
 299        if (inode != oip.newinode) {
 300                pr_warn_ratelimited("newly created inode found in cache (%pd2)\n",
 301                                    dentry);
 302        }
 303
 304        /* Force lookup of new upper hardlink to find its lower */
 305        if (hardlink)
 306                d_drop(dentry);
 307
 308        return 0;
 309}
 310
 311static bool ovl_type_merge(struct dentry *dentry)
 312{
 313        return OVL_TYPE_MERGE(ovl_path_type(dentry));
 314}
 315
 316static bool ovl_type_origin(struct dentry *dentry)
 317{
 318        return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
 319}
 320
 321static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
 322                            struct ovl_cattr *attr)
 323{
 324        struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 325        struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
 326        struct inode *udir = upperdir->d_inode;
 327        struct dentry *newdentry;
 328        int err;
 329
 330        if (!attr->hardlink && !IS_POSIXACL(udir))
 331                attr->mode &= ~current_umask();
 332
 333        inode_lock_nested(udir, I_MUTEX_PARENT);
 334        newdentry = ovl_create_real(udir,
 335                                    lookup_one_len(dentry->d_name.name,
 336                                                   upperdir,
 337                                                   dentry->d_name.len),
 338                                    attr);
 339        err = PTR_ERR(newdentry);
 340        if (IS_ERR(newdentry))
 341                goto out_unlock;
 342
 343        if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) &&
 344            !ovl_allow_offline_changes(ofs)) {
 345                /* Setting opaque here is just an optimization, allow to fail */
 346                ovl_set_opaque(dentry, newdentry);
 347        }
 348
 349        err = ovl_instantiate(dentry, inode, newdentry, !!attr->hardlink);
 350        if (err)
 351                goto out_cleanup;
 352out_unlock:
 353        inode_unlock(udir);
 354        return err;
 355
 356out_cleanup:
 357        ovl_cleanup(udir, newdentry);
 358        dput(newdentry);
 359        goto out_unlock;
 360}
 361
 362static struct dentry *ovl_clear_empty(struct dentry *dentry,
 363                                      struct list_head *list)
 364{
 365        struct dentry *workdir = ovl_workdir(dentry);
 366        struct inode *wdir = workdir->d_inode;
 367        struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
 368        struct inode *udir = upperdir->d_inode;
 369        struct path upperpath;
 370        struct dentry *upper;
 371        struct dentry *opaquedir;
 372        struct kstat stat;
 373        int err;
 374
 375        if (WARN_ON(!workdir))
 376                return ERR_PTR(-EROFS);
 377
 378        err = ovl_lock_rename_workdir(workdir, upperdir);
 379        if (err)
 380                goto out;
 381
 382        ovl_path_upper(dentry, &upperpath);
 383        err = vfs_getattr(&upperpath, &stat,
 384                          STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 385        if (err)
 386                goto out_unlock;
 387
 388        err = -ESTALE;
 389        if (!S_ISDIR(stat.mode))
 390                goto out_unlock;
 391        upper = upperpath.dentry;
 392        if (upper->d_parent->d_inode != udir)
 393                goto out_unlock;
 394
 395        opaquedir = ovl_create_temp(workdir, OVL_CATTR(stat.mode));
 396        err = PTR_ERR(opaquedir);
 397        if (IS_ERR(opaquedir))
 398                goto out_unlock;
 399
 400        err = ovl_copy_xattr(dentry->d_sb, upper, opaquedir);
 401        if (err)
 402                goto out_cleanup;
 403
 404        err = ovl_set_opaque(dentry, opaquedir);
 405        if (err)
 406                goto out_cleanup;
 407
 408        inode_lock(opaquedir->d_inode);
 409        err = ovl_set_attr(opaquedir, &stat);
 410        inode_unlock(opaquedir->d_inode);
 411        if (err)
 412                goto out_cleanup;
 413
 414        err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
 415        if (err)
 416                goto out_cleanup;
 417
 418        ovl_cleanup_whiteouts(upper, list);
 419        ovl_cleanup(wdir, upper);
 420        unlock_rename(workdir, upperdir);
 421
 422        /* dentry's upper doesn't match now, get rid of it */
 423        d_drop(dentry);
 424
 425        return opaquedir;
 426
 427out_cleanup:
 428        ovl_cleanup(wdir, opaquedir);
 429        dput(opaquedir);
 430out_unlock:
 431        unlock_rename(workdir, upperdir);
 432out:
 433        return ERR_PTR(err);
 434}
 435
 436static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
 437                             const struct posix_acl *acl)
 438{
 439        void *buffer;
 440        size_t size;
 441        int err;
 442
 443        if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
 444                return 0;
 445
 446        size = posix_acl_xattr_size(acl->a_count);
 447        buffer = kmalloc(size, GFP_KERNEL);
 448        if (!buffer)
 449                return -ENOMEM;
 450
 451        err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
 452        if (err < 0)
 453                goto out_free;
 454
 455        err = vfs_setxattr(&init_user_ns, upperdentry, name, buffer, size, XATTR_CREATE);
 456out_free:
 457        kfree(buffer);
 458        return err;
 459}
 460
 461static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
 462                                    struct ovl_cattr *cattr)
 463{
 464        struct dentry *workdir = ovl_workdir(dentry);
 465        struct inode *wdir = workdir->d_inode;
 466        struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
 467        struct inode *udir = upperdir->d_inode;
 468        struct dentry *upper;
 469        struct dentry *newdentry;
 470        int err;
 471        struct posix_acl *acl, *default_acl;
 472        bool hardlink = !!cattr->hardlink;
 473
 474        if (WARN_ON(!workdir))
 475                return -EROFS;
 476
 477        if (!hardlink) {
 478                err = posix_acl_create(dentry->d_parent->d_inode,
 479                                       &cattr->mode, &default_acl, &acl);
 480                if (err)
 481                        return err;
 482        }
 483
 484        err = ovl_lock_rename_workdir(workdir, upperdir);
 485        if (err)
 486                goto out;
 487
 488        upper = lookup_one_len(dentry->d_name.name, upperdir,
 489                               dentry->d_name.len);
 490        err = PTR_ERR(upper);
 491        if (IS_ERR(upper))
 492                goto out_unlock;
 493
 494        err = -ESTALE;
 495        if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
 496                goto out_dput;
 497
 498        newdentry = ovl_create_temp(workdir, cattr);
 499        err = PTR_ERR(newdentry);
 500        if (IS_ERR(newdentry))
 501                goto out_dput;
 502
 503        /*
 504         * mode could have been mutilated due to umask (e.g. sgid directory)
 505         */
 506        if (!hardlink &&
 507            !S_ISLNK(cattr->mode) &&
 508            newdentry->d_inode->i_mode != cattr->mode) {
 509                struct iattr attr = {
 510                        .ia_valid = ATTR_MODE,
 511                        .ia_mode = cattr->mode,
 512                };
 513                inode_lock(newdentry->d_inode);
 514                err = notify_change(&init_user_ns, newdentry, &attr, NULL);
 515                inode_unlock(newdentry->d_inode);
 516                if (err)
 517                        goto out_cleanup;
 518        }
 519        if (!hardlink) {
 520                err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
 521                                        acl);
 522                if (err)
 523                        goto out_cleanup;
 524
 525                err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
 526                                        default_acl);
 527                if (err)
 528                        goto out_cleanup;
 529        }
 530
 531        if (!hardlink && S_ISDIR(cattr->mode)) {
 532                err = ovl_set_opaque(dentry, newdentry);
 533                if (err)
 534                        goto out_cleanup;
 535
 536                err = ovl_do_rename(wdir, newdentry, udir, upper,
 537                                    RENAME_EXCHANGE);
 538                if (err)
 539                        goto out_cleanup;
 540
 541                ovl_cleanup(wdir, upper);
 542        } else {
 543                err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
 544                if (err)
 545                        goto out_cleanup;
 546        }
 547        err = ovl_instantiate(dentry, inode, newdentry, hardlink);
 548        if (err) {
 549                ovl_cleanup(udir, newdentry);
 550                dput(newdentry);
 551        }
 552out_dput:
 553        dput(upper);
 554out_unlock:
 555        unlock_rename(workdir, upperdir);
 556out:
 557        if (!hardlink) {
 558                posix_acl_release(acl);
 559                posix_acl_release(default_acl);
 560        }
 561        return err;
 562
 563out_cleanup:
 564        ovl_cleanup(wdir, newdentry);
 565        dput(newdentry);
 566        goto out_dput;
 567}
 568
 569static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
 570                              struct ovl_cattr *attr, bool origin)
 571{
 572        int err;
 573        const struct cred *old_cred;
 574        struct cred *override_cred;
 575        struct dentry *parent = dentry->d_parent;
 576
 577        err = ovl_copy_up(parent);
 578        if (err)
 579                return err;
 580
 581        old_cred = ovl_override_creds(dentry->d_sb);
 582
 583        /*
 584         * When linking a file with copy up origin into a new parent, mark the
 585         * new parent dir "impure".
 586         */
 587        if (origin) {
 588                err = ovl_set_impure(parent, ovl_dentry_upper(parent));
 589                if (err)
 590                        goto out_revert_creds;
 591        }
 592
 593        err = -ENOMEM;
 594        override_cred = prepare_creds();
 595        if (override_cred) {
 596                override_cred->fsuid = inode->i_uid;
 597                override_cred->fsgid = inode->i_gid;
 598                if (!attr->hardlink) {
 599                        err = security_dentry_create_files_as(dentry,
 600                                        attr->mode, &dentry->d_name, old_cred,
 601                                        override_cred);
 602                        if (err) {
 603                                put_cred(override_cred);
 604                                goto out_revert_creds;
 605                        }
 606                }
 607                put_cred(override_creds(override_cred));
 608                put_cred(override_cred);
 609
 610                if (!ovl_dentry_is_whiteout(dentry))
 611                        err = ovl_create_upper(dentry, inode, attr);
 612                else
 613                        err = ovl_create_over_whiteout(dentry, inode, attr);
 614        }
 615out_revert_creds:
 616        revert_creds(old_cred);
 617        return err;
 618}
 619
 620static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
 621                             const char *link)
 622{
 623        int err;
 624        struct inode *inode;
 625        struct ovl_cattr attr = {
 626                .rdev = rdev,
 627                .link = link,
 628        };
 629
 630        err = ovl_want_write(dentry);
 631        if (err)
 632                goto out;
 633
 634        /* Preallocate inode to be used by ovl_get_inode() */
 635        err = -ENOMEM;
 636        inode = ovl_new_inode(dentry->d_sb, mode, rdev);
 637        if (!inode)
 638                goto out_drop_write;
 639
 640        spin_lock(&inode->i_lock);
 641        inode->i_state |= I_CREATING;
 642        spin_unlock(&inode->i_lock);
 643
 644        inode_init_owner(&init_user_ns, inode, dentry->d_parent->d_inode, mode);
 645        attr.mode = inode->i_mode;
 646
 647        err = ovl_create_or_link(dentry, inode, &attr, false);
 648        /* Did we end up using the preallocated inode? */
 649        if (inode != d_inode(dentry))
 650                iput(inode);
 651
 652out_drop_write:
 653        ovl_drop_write(dentry);
 654out:
 655        return err;
 656}
 657
 658static int ovl_create(struct user_namespace *mnt_userns, struct inode *dir,
 659                      struct dentry *dentry, umode_t mode, bool excl)
 660{
 661        return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
 662}
 663
 664static int ovl_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
 665                     struct dentry *dentry, umode_t mode)
 666{
 667        return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
 668}
 669
 670static int ovl_mknod(struct user_namespace *mnt_userns, struct inode *dir,
 671                     struct dentry *dentry, umode_t mode, dev_t rdev)
 672{
 673        /* Don't allow creation of "whiteout" on overlay */
 674        if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
 675                return -EPERM;
 676
 677        return ovl_create_object(dentry, mode, rdev, NULL);
 678}
 679
 680static int ovl_symlink(struct user_namespace *mnt_userns, struct inode *dir,
 681                       struct dentry *dentry, const char *link)
 682{
 683        return ovl_create_object(dentry, S_IFLNK, 0, link);
 684}
 685
 686static int ovl_set_link_redirect(struct dentry *dentry)
 687{
 688        const struct cred *old_cred;
 689        int err;
 690
 691        old_cred = ovl_override_creds(dentry->d_sb);
 692        err = ovl_set_redirect(dentry, false);
 693        revert_creds(old_cred);
 694
 695        return err;
 696}
 697
 698static int ovl_link(struct dentry *old, struct inode *newdir,
 699                    struct dentry *new)
 700{
 701        int err;
 702        struct inode *inode;
 703
 704        err = ovl_want_write(old);
 705        if (err)
 706                goto out;
 707
 708        err = ovl_copy_up(old);
 709        if (err)
 710                goto out_drop_write;
 711
 712        err = ovl_copy_up(new->d_parent);
 713        if (err)
 714                goto out_drop_write;
 715
 716        if (ovl_is_metacopy_dentry(old)) {
 717                err = ovl_set_link_redirect(old);
 718                if (err)
 719                        goto out_drop_write;
 720        }
 721
 722        err = ovl_nlink_start(old);
 723        if (err)
 724                goto out_drop_write;
 725
 726        inode = d_inode(old);
 727        ihold(inode);
 728
 729        err = ovl_create_or_link(new, inode,
 730                        &(struct ovl_cattr) {.hardlink = ovl_dentry_upper(old)},
 731                        ovl_type_origin(old));
 732        if (err)
 733                iput(inode);
 734
 735        ovl_nlink_end(old);
 736out_drop_write:
 737        ovl_drop_write(old);
 738out:
 739        return err;
 740}
 741
 742static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
 743{
 744        return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
 745}
 746
 747static int ovl_remove_and_whiteout(struct dentry *dentry,
 748                                   struct list_head *list)
 749{
 750        struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 751        struct dentry *workdir = ovl_workdir(dentry);
 752        struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
 753        struct dentry *upper;
 754        struct dentry *opaquedir = NULL;
 755        int err;
 756
 757        if (WARN_ON(!workdir))
 758                return -EROFS;
 759
 760        if (!list_empty(list)) {
 761                opaquedir = ovl_clear_empty(dentry, list);
 762                err = PTR_ERR(opaquedir);
 763                if (IS_ERR(opaquedir))
 764                        goto out;
 765        }
 766
 767        err = ovl_lock_rename_workdir(workdir, upperdir);
 768        if (err)
 769                goto out_dput;
 770
 771        upper = lookup_one_len(dentry->d_name.name, upperdir,
 772                               dentry->d_name.len);
 773        err = PTR_ERR(upper);
 774        if (IS_ERR(upper))
 775                goto out_unlock;
 776
 777        err = -ESTALE;
 778        if ((opaquedir && upper != opaquedir) ||
 779            (!opaquedir && ovl_dentry_upper(dentry) &&
 780             !ovl_matches_upper(dentry, upper))) {
 781                goto out_dput_upper;
 782        }
 783
 784        err = ovl_cleanup_and_whiteout(ofs, d_inode(upperdir), upper);
 785        if (err)
 786                goto out_d_drop;
 787
 788        ovl_dir_modified(dentry->d_parent, true);
 789out_d_drop:
 790        d_drop(dentry);
 791out_dput_upper:
 792        dput(upper);
 793out_unlock:
 794        unlock_rename(workdir, upperdir);
 795out_dput:
 796        dput(opaquedir);
 797out:
 798        return err;
 799}
 800
 801static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
 802                            struct list_head *list)
 803{
 804        struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
 805        struct inode *dir = upperdir->d_inode;
 806        struct dentry *upper;
 807        struct dentry *opaquedir = NULL;
 808        int err;
 809
 810        if (!list_empty(list)) {
 811                opaquedir = ovl_clear_empty(dentry, list);
 812                err = PTR_ERR(opaquedir);
 813                if (IS_ERR(opaquedir))
 814                        goto out;
 815        }
 816
 817        inode_lock_nested(dir, I_MUTEX_PARENT);
 818        upper = lookup_one_len(dentry->d_name.name, upperdir,
 819                               dentry->d_name.len);
 820        err = PTR_ERR(upper);
 821        if (IS_ERR(upper))
 822                goto out_unlock;
 823
 824        err = -ESTALE;
 825        if ((opaquedir && upper != opaquedir) ||
 826            (!opaquedir && !ovl_matches_upper(dentry, upper)))
 827                goto out_dput_upper;
 828
 829        if (is_dir)
 830                err = vfs_rmdir(&init_user_ns, dir, upper);
 831        else
 832                err = vfs_unlink(&init_user_ns, dir, upper, NULL);
 833        ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry));
 834
 835        /*
 836         * Keeping this dentry hashed would mean having to release
 837         * upperpath/lowerpath, which could only be done if we are the
 838         * sole user of this dentry.  Too tricky...  Just unhash for
 839         * now.
 840         */
 841        if (!err)
 842                d_drop(dentry);
 843out_dput_upper:
 844        dput(upper);
 845out_unlock:
 846        inode_unlock(dir);
 847        dput(opaquedir);
 848out:
 849        return err;
 850}
 851
 852static bool ovl_pure_upper(struct dentry *dentry)
 853{
 854        return !ovl_dentry_lower(dentry) &&
 855               !ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
 856}
 857
 858static void ovl_drop_nlink(struct dentry *dentry)
 859{
 860        struct inode *inode = d_inode(dentry);
 861        struct dentry *alias;
 862
 863        /* Try to find another, hashed alias */
 864        spin_lock(&inode->i_lock);
 865        hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
 866                if (alias != dentry && !d_unhashed(alias))
 867                        break;
 868        }
 869        spin_unlock(&inode->i_lock);
 870
 871        /*
 872         * Changes to underlying layers may cause i_nlink to lose sync with
 873         * reality.  In this case prevent the link count from going to zero
 874         * prematurely.
 875         */
 876        if (inode->i_nlink > !!alias)
 877                drop_nlink(inode);
 878}
 879
 880static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 881{
 882        int err;
 883        const struct cred *old_cred;
 884        struct dentry *upperdentry;
 885        bool lower_positive = ovl_lower_positive(dentry);
 886        LIST_HEAD(list);
 887
 888        /* No need to clean pure upper removed by vfs_rmdir() */
 889        if (is_dir && (lower_positive || !ovl_pure_upper(dentry))) {
 890                err = ovl_check_empty_dir(dentry, &list);
 891                if (err)
 892                        goto out;
 893        }
 894
 895        err = ovl_want_write(dentry);
 896        if (err)
 897                goto out;
 898
 899        err = ovl_copy_up(dentry->d_parent);
 900        if (err)
 901                goto out_drop_write;
 902
 903        err = ovl_nlink_start(dentry);
 904        if (err)
 905                goto out_drop_write;
 906
 907        old_cred = ovl_override_creds(dentry->d_sb);
 908        if (!lower_positive)
 909                err = ovl_remove_upper(dentry, is_dir, &list);
 910        else
 911                err = ovl_remove_and_whiteout(dentry, &list);
 912        revert_creds(old_cred);
 913        if (!err) {
 914                if (is_dir)
 915                        clear_nlink(dentry->d_inode);
 916                else
 917                        ovl_drop_nlink(dentry);
 918        }
 919        ovl_nlink_end(dentry);
 920
 921        /*
 922         * Copy ctime
 923         *
 924         * Note: we fail to update ctime if there was no copy-up, only a
 925         * whiteout
 926         */
 927        upperdentry = ovl_dentry_upper(dentry);
 928        if (upperdentry)
 929                ovl_copyattr(d_inode(upperdentry), d_inode(dentry));
 930
 931out_drop_write:
 932        ovl_drop_write(dentry);
 933out:
 934        ovl_cache_free(&list);
 935        return err;
 936}
 937
 938static int ovl_unlink(struct inode *dir, struct dentry *dentry)
 939{
 940        return ovl_do_remove(dentry, false);
 941}
 942
 943static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
 944{
 945        return ovl_do_remove(dentry, true);
 946}
 947
 948static bool ovl_type_merge_or_lower(struct dentry *dentry)
 949{
 950        enum ovl_path_type type = ovl_path_type(dentry);
 951
 952        return OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type);
 953}
 954
 955static bool ovl_can_move(struct dentry *dentry)
 956{
 957        return ovl_redirect_dir(dentry->d_sb) ||
 958                !d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
 959}
 960
 961static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect)
 962{
 963        char *buf, *ret;
 964        struct dentry *d, *tmp;
 965        int buflen = ovl_redirect_max + 1;
 966
 967        if (!abs_redirect) {
 968                ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
 969                               GFP_KERNEL);
 970                goto out;
 971        }
 972
 973        buf = ret = kmalloc(buflen, GFP_KERNEL);
 974        if (!buf)
 975                goto out;
 976
 977        buflen--;
 978        buf[buflen] = '\0';
 979        for (d = dget(dentry); !IS_ROOT(d);) {
 980                const char *name;
 981                int thislen;
 982
 983                spin_lock(&d->d_lock);
 984                name = ovl_dentry_get_redirect(d);
 985                if (name) {
 986                        thislen = strlen(name);
 987                } else {
 988                        name = d->d_name.name;
 989                        thislen = d->d_name.len;
 990                }
 991
 992                /* If path is too long, fall back to userspace move */
 993                if (thislen + (name[0] != '/') > buflen) {
 994                        ret = ERR_PTR(-EXDEV);
 995                        spin_unlock(&d->d_lock);
 996                        goto out_put;
 997                }
 998
 999                buflen -= thislen;
1000                memcpy(&buf[buflen], name, thislen);
1001                spin_unlock(&d->d_lock);
1002                tmp = dget_parent(d);
1003
1004                dput(d);
1005                d = tmp;
1006
1007                /* Absolute redirect: finished */
1008                if (buf[buflen] == '/')
1009                        break;
1010                buflen--;
1011                buf[buflen] = '/';
1012        }
1013        ret = kstrdup(&buf[buflen], GFP_KERNEL);
1014out_put:
1015        dput(d);
1016        kfree(buf);
1017out:
1018        return ret ? ret : ERR_PTR(-ENOMEM);
1019}
1020
1021static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir)
1022{
1023        struct dentry *lowerdentry;
1024
1025        if (!samedir)
1026                return true;
1027
1028        if (d_is_dir(dentry))
1029                return false;
1030
1031        /*
1032         * For non-dir hardlinked files, we need absolute redirects
1033         * in general as two upper hardlinks could be in different
1034         * dirs. We could put a relative redirect now and convert
1035         * it to absolute redirect later. But when nlink > 1 and
1036         * indexing is on, that means relative redirect needs to be
1037         * converted to absolute during copy up of another lower
1038         * hardllink as well.
1039         *
1040         * So without optimizing too much, just check if lower is
1041         * a hard link or not. If lower is hard link, put absolute
1042         * redirect.
1043         */
1044        lowerdentry = ovl_dentry_lower(dentry);
1045        return (d_inode(lowerdentry)->i_nlink > 1);
1046}
1047
1048static int ovl_set_redirect(struct dentry *dentry, bool samedir)
1049{
1050        int err;
1051        struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
1052        const char *redirect = ovl_dentry_get_redirect(dentry);
1053        bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir);
1054
1055        if (redirect && (!absolute_redirect || redirect[0] == '/'))
1056                return 0;
1057
1058        redirect = ovl_get_redirect(dentry, absolute_redirect);
1059        if (IS_ERR(redirect))
1060                return PTR_ERR(redirect);
1061
1062        err = ovl_check_setxattr(ofs, ovl_dentry_upper(dentry),
1063                                 OVL_XATTR_REDIRECT,
1064                                 redirect, strlen(redirect), -EXDEV);
1065        if (!err) {
1066                spin_lock(&dentry->d_lock);
1067                ovl_dentry_set_redirect(dentry, redirect);
1068                spin_unlock(&dentry->d_lock);
1069        } else {
1070                kfree(redirect);
1071                pr_warn_ratelimited("failed to set redirect (%i)\n",
1072                                    err);
1073                /* Fall back to userspace copy-up */
1074                err = -EXDEV;
1075        }
1076        return err;
1077}
1078
1079static int ovl_rename(struct user_namespace *mnt_userns, struct inode *olddir,
1080                      struct dentry *old, struct inode *newdir,
1081                      struct dentry *new, unsigned int flags)
1082{
1083        int err;
1084        struct dentry *old_upperdir;
1085        struct dentry *new_upperdir;
1086        struct dentry *olddentry;
1087        struct dentry *newdentry;
1088        struct dentry *trap;
1089        bool old_opaque;
1090        bool new_opaque;
1091        bool cleanup_whiteout = false;
1092        bool update_nlink = false;
1093        bool overwrite = !(flags & RENAME_EXCHANGE);
1094        bool is_dir = d_is_dir(old);
1095        bool new_is_dir = d_is_dir(new);
1096        bool samedir = olddir == newdir;
1097        struct dentry *opaquedir = NULL;
1098        const struct cred *old_cred = NULL;
1099        LIST_HEAD(list);
1100
1101        err = -EINVAL;
1102        if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
1103                goto out;
1104
1105        flags &= ~RENAME_NOREPLACE;
1106
1107        /* Don't copy up directory trees */
1108        err = -EXDEV;
1109        if (!ovl_can_move(old))
1110                goto out;
1111        if (!overwrite && !ovl_can_move(new))
1112                goto out;
1113
1114        if (overwrite && new_is_dir && !ovl_pure_upper(new)) {
1115                err = ovl_check_empty_dir(new, &list);
1116                if (err)
1117                        goto out;
1118        }
1119
1120        if (overwrite) {
1121                if (ovl_lower_positive(old)) {
1122                        if (!ovl_dentry_is_whiteout(new)) {
1123                                /* Whiteout source */
1124                                flags |= RENAME_WHITEOUT;
1125                        } else {
1126                                /* Switch whiteouts */
1127                                flags |= RENAME_EXCHANGE;
1128                        }
1129                } else if (is_dir && ovl_dentry_is_whiteout(new)) {
1130                        flags |= RENAME_EXCHANGE;
1131                        cleanup_whiteout = true;
1132                }
1133        }
1134
1135        err = ovl_want_write(old);
1136        if (err)
1137                goto out;
1138
1139        err = ovl_copy_up(old);
1140        if (err)
1141                goto out_drop_write;
1142
1143        err = ovl_copy_up(new->d_parent);
1144        if (err)
1145                goto out_drop_write;
1146        if (!overwrite) {
1147                err = ovl_copy_up(new);
1148                if (err)
1149                        goto out_drop_write;
1150        } else if (d_inode(new)) {
1151                err = ovl_nlink_start(new);
1152                if (err)
1153                        goto out_drop_write;
1154
1155                update_nlink = true;
1156        }
1157
1158        old_cred = ovl_override_creds(old->d_sb);
1159
1160        if (!list_empty(&list)) {
1161                opaquedir = ovl_clear_empty(new, &list);
1162                err = PTR_ERR(opaquedir);
1163                if (IS_ERR(opaquedir)) {
1164                        opaquedir = NULL;
1165                        goto out_revert_creds;
1166                }
1167        }
1168
1169        old_upperdir = ovl_dentry_upper(old->d_parent);
1170        new_upperdir = ovl_dentry_upper(new->d_parent);
1171
1172        if (!samedir) {
1173                /*
1174                 * When moving a merge dir or non-dir with copy up origin into
1175                 * a new parent, we are marking the new parent dir "impure".
1176                 * When ovl_iterate() iterates an "impure" upper dir, it will
1177                 * lookup the origin inodes of the entries to fill d_ino.
1178                 */
1179                if (ovl_type_origin(old)) {
1180                        err = ovl_set_impure(new->d_parent, new_upperdir);
1181                        if (err)
1182                                goto out_revert_creds;
1183                }
1184                if (!overwrite && ovl_type_origin(new)) {
1185                        err = ovl_set_impure(old->d_parent, old_upperdir);
1186                        if (err)
1187                                goto out_revert_creds;
1188                }
1189        }
1190
1191        trap = lock_rename(new_upperdir, old_upperdir);
1192
1193        olddentry = lookup_one_len(old->d_name.name, old_upperdir,
1194                                   old->d_name.len);
1195        err = PTR_ERR(olddentry);
1196        if (IS_ERR(olddentry))
1197                goto out_unlock;
1198
1199        err = -ESTALE;
1200        if (!ovl_matches_upper(old, olddentry))
1201                goto out_dput_old;
1202
1203        newdentry = lookup_one_len(new->d_name.name, new_upperdir,
1204                                   new->d_name.len);
1205        err = PTR_ERR(newdentry);
1206        if (IS_ERR(newdentry))
1207                goto out_dput_old;
1208
1209        old_opaque = ovl_dentry_is_opaque(old);
1210        new_opaque = ovl_dentry_is_opaque(new);
1211
1212        err = -ESTALE;
1213        if (d_inode(new) && ovl_dentry_upper(new)) {
1214                if (opaquedir) {
1215                        if (newdentry != opaquedir)
1216                                goto out_dput;
1217                } else {
1218                        if (!ovl_matches_upper(new, newdentry))
1219                                goto out_dput;
1220                }
1221        } else {
1222                if (!d_is_negative(newdentry)) {
1223                        if (!new_opaque || !ovl_is_whiteout(newdentry))
1224                                goto out_dput;
1225                } else {
1226                        if (flags & RENAME_EXCHANGE)
1227                                goto out_dput;
1228                }
1229        }
1230
1231        if (olddentry == trap)
1232                goto out_dput;
1233        if (newdentry == trap)
1234                goto out_dput;
1235
1236        if (olddentry->d_inode == newdentry->d_inode)
1237                goto out_dput;
1238
1239        err = 0;
1240        if (ovl_type_merge_or_lower(old))
1241                err = ovl_set_redirect(old, samedir);
1242        else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent))
1243                err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
1244        if (err)
1245                goto out_dput;
1246
1247        if (!overwrite && ovl_type_merge_or_lower(new))
1248                err = ovl_set_redirect(new, samedir);
1249        else if (!overwrite && new_is_dir && !new_opaque &&
1250                 ovl_type_merge(old->d_parent))
1251                err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
1252        if (err)
1253                goto out_dput;
1254
1255        err = ovl_do_rename(old_upperdir->d_inode, olddentry,
1256                            new_upperdir->d_inode, newdentry, flags);
1257        if (err)
1258                goto out_dput;
1259
1260        if (cleanup_whiteout)
1261                ovl_cleanup(old_upperdir->d_inode, newdentry);
1262
1263        if (overwrite && d_inode(new)) {
1264                if (new_is_dir)
1265                        clear_nlink(d_inode(new));
1266                else
1267                        ovl_drop_nlink(new);
1268        }
1269
1270        ovl_dir_modified(old->d_parent, ovl_type_origin(old) ||
1271                         (!overwrite && ovl_type_origin(new)));
1272        ovl_dir_modified(new->d_parent, ovl_type_origin(old) ||
1273                         (d_inode(new) && ovl_type_origin(new)));
1274
1275        /* copy ctime: */
1276        ovl_copyattr(d_inode(olddentry), d_inode(old));
1277        if (d_inode(new) && ovl_dentry_upper(new))
1278                ovl_copyattr(d_inode(newdentry), d_inode(new));
1279
1280out_dput:
1281        dput(newdentry);
1282out_dput_old:
1283        dput(olddentry);
1284out_unlock:
1285        unlock_rename(new_upperdir, old_upperdir);
1286out_revert_creds:
1287        revert_creds(old_cred);
1288        if (update_nlink)
1289                ovl_nlink_end(new);
1290out_drop_write:
1291        ovl_drop_write(old);
1292out:
1293        dput(opaquedir);
1294        ovl_cache_free(&list);
1295        return err;
1296}
1297
1298const struct inode_operations ovl_dir_inode_operations = {
1299        .lookup         = ovl_lookup,
1300        .mkdir          = ovl_mkdir,
1301        .symlink        = ovl_symlink,
1302        .unlink         = ovl_unlink,
1303        .rmdir          = ovl_rmdir,
1304        .rename         = ovl_rename,
1305        .link           = ovl_link,
1306        .setattr        = ovl_setattr,
1307        .create         = ovl_create,
1308        .mknod          = ovl_mknod,
1309        .permission     = ovl_permission,
1310        .getattr        = ovl_getattr,
1311        .listxattr      = ovl_listxattr,
1312        .get_acl        = ovl_get_acl,
1313        .update_time    = ovl_update_time,
1314        .fileattr_get   = ovl_fileattr_get,
1315        .fileattr_set   = ovl_fileattr_set,
1316};
1317