linux/fs/overlayfs/namei.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2011 Novell Inc.
   4 * Copyright (C) 2016 Red Hat, Inc.
   5 */
   6
   7#include <linux/fs.h>
   8#include <linux/cred.h>
   9#include <linux/ctype.h>
  10#include <linux/namei.h>
  11#include <linux/xattr.h>
  12#include <linux/ratelimit.h>
  13#include <linux/mount.h>
  14#include <linux/exportfs.h>
  15#include "overlayfs.h"
  16
  17struct ovl_lookup_data {
  18        struct super_block *sb;
  19        struct qstr name;
  20        bool is_dir;
  21        bool opaque;
  22        bool stop;
  23        bool last;
  24        char *redirect;
  25        bool metacopy;
  26};
  27
  28static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
  29                              size_t prelen, const char *post)
  30{
  31        int res;
  32        char *buf;
  33        struct ovl_fs *ofs = OVL_FS(d->sb);
  34
  35        buf = ovl_get_redirect_xattr(ofs, dentry, prelen + strlen(post));
  36        if (IS_ERR_OR_NULL(buf))
  37                return PTR_ERR(buf);
  38
  39        if (buf[0] == '/') {
  40                /*
  41                 * One of the ancestor path elements in an absolute path
  42                 * lookup in ovl_lookup_layer() could have been opaque and
  43                 * that will stop further lookup in lower layers (d->stop=true)
  44                 * But we have found an absolute redirect in decendant path
  45                 * element and that should force continue lookup in lower
  46                 * layers (reset d->stop).
  47                 */
  48                d->stop = false;
  49        } else {
  50                res = strlen(buf) + 1;
  51                memmove(buf + prelen, buf, res);
  52                memcpy(buf, d->name.name, prelen);
  53        }
  54
  55        strcat(buf, post);
  56        kfree(d->redirect);
  57        d->redirect = buf;
  58        d->name.name = d->redirect;
  59        d->name.len = strlen(d->redirect);
  60
  61        return 0;
  62}
  63
  64static int ovl_acceptable(void *ctx, struct dentry *dentry)
  65{
  66        /*
  67         * A non-dir origin may be disconnected, which is fine, because
  68         * we only need it for its unique inode number.
  69         */
  70        if (!d_is_dir(dentry))
  71                return 1;
  72
  73        /* Don't decode a deleted empty directory */
  74        if (d_unhashed(dentry))
  75                return 0;
  76
  77        /* Check if directory belongs to the layer we are decoding from */
  78        return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
  79}
  80
  81/*
  82 * Check validity of an overlay file handle buffer.
  83 *
  84 * Return 0 for a valid file handle.
  85 * Return -ENODATA for "origin unknown".
  86 * Return <0 for an invalid file handle.
  87 */
  88int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
  89{
  90        if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
  91                return -EINVAL;
  92
  93        if (fb->magic != OVL_FH_MAGIC)
  94                return -EINVAL;
  95
  96        /* Treat larger version and unknown flags as "origin unknown" */
  97        if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
  98                return -ENODATA;
  99
 100        /* Treat endianness mismatch as "origin unknown" */
 101        if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
 102            (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
 103                return -ENODATA;
 104
 105        return 0;
 106}
 107
 108static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *dentry,
 109                                 enum ovl_xattr ox)
 110{
 111        int res, err;
 112        struct ovl_fh *fh = NULL;
 113
 114        res = ovl_do_getxattr(ofs, dentry, ox, NULL, 0);
 115        if (res < 0) {
 116                if (res == -ENODATA || res == -EOPNOTSUPP)
 117                        return NULL;
 118                goto fail;
 119        }
 120        /* Zero size value means "copied up but origin unknown" */
 121        if (res == 0)
 122                return NULL;
 123
 124        fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
 125        if (!fh)
 126                return ERR_PTR(-ENOMEM);
 127
 128        res = ovl_do_getxattr(ofs, dentry, ox, fh->buf, res);
 129        if (res < 0)
 130                goto fail;
 131
 132        err = ovl_check_fb_len(&fh->fb, res);
 133        if (err < 0) {
 134                if (err == -ENODATA)
 135                        goto out;
 136                goto invalid;
 137        }
 138
 139        return fh;
 140
 141out:
 142        kfree(fh);
 143        return NULL;
 144
 145fail:
 146        pr_warn_ratelimited("failed to get origin (%i)\n", res);
 147        goto out;
 148invalid:
 149        pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
 150        goto out;
 151}
 152
 153struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
 154                                  struct vfsmount *mnt, bool connected)
 155{
 156        struct dentry *real;
 157        int bytes;
 158
 159        if (!capable(CAP_DAC_READ_SEARCH))
 160                return NULL;
 161
 162        /*
 163         * Make sure that the stored uuid matches the uuid of the lower
 164         * layer where file handle will be decoded.
 165         * In case of uuid=off option just make sure that stored uuid is null.
 166         */
 167        if (ofs->config.uuid ? !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) :
 168                              !uuid_is_null(&fh->fb.uuid))
 169                return NULL;
 170
 171        bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
 172        real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
 173                                  bytes >> 2, (int)fh->fb.type,
 174                                  connected ? ovl_acceptable : NULL, mnt);
 175        if (IS_ERR(real)) {
 176                /*
 177                 * Treat stale file handle to lower file as "origin unknown".
 178                 * upper file handle could become stale when upper file is
 179                 * unlinked and this information is needed to handle stale
 180                 * index entries correctly.
 181                 */
 182                if (real == ERR_PTR(-ESTALE) &&
 183                    !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
 184                        real = NULL;
 185                return real;
 186        }
 187
 188        if (ovl_dentry_weird(real)) {
 189                dput(real);
 190                return NULL;
 191        }
 192
 193        return real;
 194}
 195
 196static bool ovl_is_opaquedir(struct super_block *sb, struct dentry *dentry)
 197{
 198        return ovl_check_dir_xattr(sb, dentry, OVL_XATTR_OPAQUE);
 199}
 200
 201static struct dentry *ovl_lookup_positive_unlocked(const char *name,
 202                                                   struct dentry *base, int len,
 203                                                   bool drop_negative)
 204{
 205        struct dentry *ret = lookup_one_len_unlocked(name, base, len);
 206
 207        if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
 208                if (drop_negative && ret->d_lockref.count == 1) {
 209                        spin_lock(&ret->d_lock);
 210                        /* Recheck condition under lock */
 211                        if (d_is_negative(ret) && ret->d_lockref.count == 1)
 212                                __d_drop(ret);
 213                        spin_unlock(&ret->d_lock);
 214                }
 215                dput(ret);
 216                ret = ERR_PTR(-ENOENT);
 217        }
 218        return ret;
 219}
 220
 221static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
 222                             const char *name, unsigned int namelen,
 223                             size_t prelen, const char *post,
 224                             struct dentry **ret, bool drop_negative)
 225{
 226        struct dentry *this;
 227        int err;
 228        bool last_element = !post[0];
 229
 230        this = ovl_lookup_positive_unlocked(name, base, namelen, drop_negative);
 231        if (IS_ERR(this)) {
 232                err = PTR_ERR(this);
 233                this = NULL;
 234                if (err == -ENOENT || err == -ENAMETOOLONG)
 235                        goto out;
 236                goto out_err;
 237        }
 238
 239        if (ovl_dentry_weird(this)) {
 240                /* Don't support traversing automounts and other weirdness */
 241                err = -EREMOTE;
 242                goto out_err;
 243        }
 244        if (ovl_is_whiteout(this)) {
 245                d->stop = d->opaque = true;
 246                goto put_and_out;
 247        }
 248        /*
 249         * This dentry should be a regular file if previous layer lookup
 250         * found a metacopy dentry.
 251         */
 252        if (last_element && d->metacopy && !d_is_reg(this)) {
 253                d->stop = true;
 254                goto put_and_out;
 255        }
 256        if (!d_can_lookup(this)) {
 257                if (d->is_dir || !last_element) {
 258                        d->stop = true;
 259                        goto put_and_out;
 260                }
 261                err = ovl_check_metacopy_xattr(OVL_FS(d->sb), this);
 262                if (err < 0)
 263                        goto out_err;
 264
 265                d->metacopy = err;
 266                d->stop = !d->metacopy;
 267                if (!d->metacopy || d->last)
 268                        goto out;
 269        } else {
 270                if (ovl_lookup_trap_inode(d->sb, this)) {
 271                        /* Caught in a trap of overlapping layers */
 272                        err = -ELOOP;
 273                        goto out_err;
 274                }
 275
 276                if (last_element)
 277                        d->is_dir = true;
 278                if (d->last)
 279                        goto out;
 280
 281                if (ovl_is_opaquedir(d->sb, this)) {
 282                        d->stop = true;
 283                        if (last_element)
 284                                d->opaque = true;
 285                        goto out;
 286                }
 287        }
 288        err = ovl_check_redirect(this, d, prelen, post);
 289        if (err)
 290                goto out_err;
 291out:
 292        *ret = this;
 293        return 0;
 294
 295put_and_out:
 296        dput(this);
 297        this = NULL;
 298        goto out;
 299
 300out_err:
 301        dput(this);
 302        return err;
 303}
 304
 305static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
 306                            struct dentry **ret, bool drop_negative)
 307{
 308        /* Counting down from the end, since the prefix can change */
 309        size_t rem = d->name.len - 1;
 310        struct dentry *dentry = NULL;
 311        int err;
 312
 313        if (d->name.name[0] != '/')
 314                return ovl_lookup_single(base, d, d->name.name, d->name.len,
 315                                         0, "", ret, drop_negative);
 316
 317        while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
 318                const char *s = d->name.name + d->name.len - rem;
 319                const char *next = strchrnul(s, '/');
 320                size_t thislen = next - s;
 321                bool end = !next[0];
 322
 323                /* Verify we did not go off the rails */
 324                if (WARN_ON(s[-1] != '/'))
 325                        return -EIO;
 326
 327                err = ovl_lookup_single(base, d, s, thislen,
 328                                        d->name.len - rem, next, &base,
 329                                        drop_negative);
 330                dput(dentry);
 331                if (err)
 332                        return err;
 333                dentry = base;
 334                if (end)
 335                        break;
 336
 337                rem -= thislen + 1;
 338
 339                if (WARN_ON(rem >= d->name.len))
 340                        return -EIO;
 341        }
 342        *ret = dentry;
 343        return 0;
 344}
 345
 346
 347int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
 348                        struct dentry *upperdentry, struct ovl_path **stackp)
 349{
 350        struct dentry *origin = NULL;
 351        int i;
 352
 353        for (i = 1; i < ofs->numlayer; i++) {
 354                /*
 355                 * If lower fs uuid is not unique among lower fs we cannot match
 356                 * fh->uuid to layer.
 357                 */
 358                if (ofs->layers[i].fsid &&
 359                    ofs->layers[i].fs->bad_uuid)
 360                        continue;
 361
 362                origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt,
 363                                            connected);
 364                if (origin)
 365                        break;
 366        }
 367
 368        if (!origin)
 369                return -ESTALE;
 370        else if (IS_ERR(origin))
 371                return PTR_ERR(origin);
 372
 373        if (upperdentry && !ovl_is_whiteout(upperdentry) &&
 374            inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
 375                goto invalid;
 376
 377        if (!*stackp)
 378                *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
 379        if (!*stackp) {
 380                dput(origin);
 381                return -ENOMEM;
 382        }
 383        **stackp = (struct ovl_path){
 384                .dentry = origin,
 385                .layer = &ofs->layers[i]
 386        };
 387
 388        return 0;
 389
 390invalid:
 391        pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
 392                            upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
 393                            d_inode(origin)->i_mode & S_IFMT);
 394        dput(origin);
 395        return -ESTALE;
 396}
 397
 398static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
 399                            struct ovl_path **stackp)
 400{
 401        struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN);
 402        int err;
 403
 404        if (IS_ERR_OR_NULL(fh))
 405                return PTR_ERR(fh);
 406
 407        err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
 408        kfree(fh);
 409
 410        if (err) {
 411                if (err == -ESTALE)
 412                        return 0;
 413                return err;
 414        }
 415
 416        return 0;
 417}
 418
 419/*
 420 * Verify that @fh matches the file handle stored in xattr @name.
 421 * Return 0 on match, -ESTALE on mismatch, < 0 on error.
 422 */
 423static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
 424                         enum ovl_xattr ox, const struct ovl_fh *fh)
 425{
 426        struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox);
 427        int err = 0;
 428
 429        if (!ofh)
 430                return -ENODATA;
 431
 432        if (IS_ERR(ofh))
 433                return PTR_ERR(ofh);
 434
 435        if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
 436                err = -ESTALE;
 437
 438        kfree(ofh);
 439        return err;
 440}
 441
 442/*
 443 * Verify that @real dentry matches the file handle stored in xattr @name.
 444 *
 445 * If @set is true and there is no stored file handle, encode @real and store
 446 * file handle in xattr @name.
 447 *
 448 * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
 449 */
 450int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
 451                      enum ovl_xattr ox, struct dentry *real, bool is_upper,
 452                      bool set)
 453{
 454        struct inode *inode;
 455        struct ovl_fh *fh;
 456        int err;
 457
 458        fh = ovl_encode_real_fh(ofs, real, is_upper);
 459        err = PTR_ERR(fh);
 460        if (IS_ERR(fh)) {
 461                fh = NULL;
 462                goto fail;
 463        }
 464
 465        err = ovl_verify_fh(ofs, dentry, ox, fh);
 466        if (set && err == -ENODATA)
 467                err = ovl_do_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
 468        if (err)
 469                goto fail;
 470
 471out:
 472        kfree(fh);
 473        return err;
 474
 475fail:
 476        inode = d_inode(real);
 477        pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
 478                            is_upper ? "upper" : "origin", real,
 479                            inode ? inode->i_ino : 0, err);
 480        goto out;
 481}
 482
 483/* Get upper dentry from index */
 484struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
 485{
 486        struct ovl_fh *fh;
 487        struct dentry *upper;
 488
 489        if (!d_is_dir(index))
 490                return dget(index);
 491
 492        fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER);
 493        if (IS_ERR_OR_NULL(fh))
 494                return ERR_CAST(fh);
 495
 496        upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), true);
 497        kfree(fh);
 498
 499        if (IS_ERR_OR_NULL(upper))
 500                return upper ?: ERR_PTR(-ESTALE);
 501
 502        if (!d_is_dir(upper)) {
 503                pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
 504                                    index, upper);
 505                dput(upper);
 506                return ERR_PTR(-EIO);
 507        }
 508
 509        return upper;
 510}
 511
 512/*
 513 * Verify that an index entry name matches the origin file handle stored in
 514 * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
 515 * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
 516 */
 517int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
 518{
 519        struct ovl_fh *fh = NULL;
 520        size_t len;
 521        struct ovl_path origin = { };
 522        struct ovl_path *stack = &origin;
 523        struct dentry *upper = NULL;
 524        int err;
 525
 526        if (!d_inode(index))
 527                return 0;
 528
 529        err = -EINVAL;
 530        if (index->d_name.len < sizeof(struct ovl_fb)*2)
 531                goto fail;
 532
 533        err = -ENOMEM;
 534        len = index->d_name.len / 2;
 535        fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
 536        if (!fh)
 537                goto fail;
 538
 539        err = -EINVAL;
 540        if (hex2bin(fh->buf, index->d_name.name, len))
 541                goto fail;
 542
 543        err = ovl_check_fb_len(&fh->fb, len);
 544        if (err)
 545                goto fail;
 546
 547        /*
 548         * Whiteout index entries are used as an indication that an exported
 549         * overlay file handle should be treated as stale (i.e. after unlink
 550         * of the overlay inode). These entries contain no origin xattr.
 551         */
 552        if (ovl_is_whiteout(index))
 553                goto out;
 554
 555        /*
 556         * Verifying directory index entries are not stale is expensive, so
 557         * only verify stale dir index if NFS export is enabled.
 558         */
 559        if (d_is_dir(index) && !ofs->config.nfs_export)
 560                goto out;
 561
 562        /*
 563         * Directory index entries should have 'upper' xattr pointing to the
 564         * real upper dir. Non-dir index entries are hardlinks to the upper
 565         * real inode. For non-dir index, we can read the copy up origin xattr
 566         * directly from the index dentry, but for dir index we first need to
 567         * decode the upper directory.
 568         */
 569        upper = ovl_index_upper(ofs, index);
 570        if (IS_ERR_OR_NULL(upper)) {
 571                err = PTR_ERR(upper);
 572                /*
 573                 * Directory index entries with no 'upper' xattr need to be
 574                 * removed. When dir index entry has a stale 'upper' xattr,
 575                 * we assume that upper dir was removed and we treat the dir
 576                 * index as orphan entry that needs to be whited out.
 577                 */
 578                if (err == -ESTALE)
 579                        goto orphan;
 580                else if (!err)
 581                        err = -ESTALE;
 582                goto fail;
 583        }
 584
 585        err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh);
 586        dput(upper);
 587        if (err)
 588                goto fail;
 589
 590        /* Check if non-dir index is orphan and don't warn before cleaning it */
 591        if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
 592                err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
 593                if (err)
 594                        goto fail;
 595
 596                if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0)
 597                        goto orphan;
 598        }
 599
 600out:
 601        dput(origin.dentry);
 602        kfree(fh);
 603        return err;
 604
 605fail:
 606        pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
 607                            index, d_inode(index)->i_mode & S_IFMT, err);
 608        goto out;
 609
 610orphan:
 611        pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
 612                            index, d_inode(index)->i_mode & S_IFMT,
 613                            d_inode(index)->i_nlink);
 614        err = -ENOENT;
 615        goto out;
 616}
 617
 618static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
 619{
 620        char *n, *s;
 621
 622        n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
 623        if (!n)
 624                return -ENOMEM;
 625
 626        s  = bin2hex(n, fh->buf, fh->fb.len);
 627        *name = (struct qstr) QSTR_INIT(n, s - n);
 628
 629        return 0;
 630
 631}
 632
 633/*
 634 * Lookup in indexdir for the index entry of a lower real inode or a copy up
 635 * origin inode. The index entry name is the hex representation of the lower
 636 * inode file handle.
 637 *
 638 * If the index dentry in negative, then either no lower aliases have been
 639 * copied up yet, or aliases have been copied up in older kernels and are
 640 * not indexed.
 641 *
 642 * If the index dentry for a copy up origin inode is positive, but points
 643 * to an inode different than the upper inode, then either the upper inode
 644 * has been copied up and not indexed or it was indexed, but since then
 645 * index dir was cleared. Either way, that index cannot be used to indentify
 646 * the overlay inode.
 647 */
 648int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
 649                       struct qstr *name)
 650{
 651        struct ovl_fh *fh;
 652        int err;
 653
 654        fh = ovl_encode_real_fh(ofs, origin, false);
 655        if (IS_ERR(fh))
 656                return PTR_ERR(fh);
 657
 658        err = ovl_get_index_name_fh(fh, name);
 659
 660        kfree(fh);
 661        return err;
 662}
 663
 664/* Lookup index by file handle for NFS export */
 665struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
 666{
 667        struct dentry *index;
 668        struct qstr name;
 669        int err;
 670
 671        err = ovl_get_index_name_fh(fh, &name);
 672        if (err)
 673                return ERR_PTR(err);
 674
 675        index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
 676        kfree(name.name);
 677        if (IS_ERR(index)) {
 678                if (PTR_ERR(index) == -ENOENT)
 679                        index = NULL;
 680                return index;
 681        }
 682
 683        if (ovl_is_whiteout(index))
 684                err = -ESTALE;
 685        else if (ovl_dentry_weird(index))
 686                err = -EIO;
 687        else
 688                return index;
 689
 690        dput(index);
 691        return ERR_PTR(err);
 692}
 693
 694struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
 695                                struct dentry *origin, bool verify)
 696{
 697        struct dentry *index;
 698        struct inode *inode;
 699        struct qstr name;
 700        bool is_dir = d_is_dir(origin);
 701        int err;
 702
 703        err = ovl_get_index_name(ofs, origin, &name);
 704        if (err)
 705                return ERR_PTR(err);
 706
 707        index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
 708        if (IS_ERR(index)) {
 709                err = PTR_ERR(index);
 710                if (err == -ENOENT) {
 711                        index = NULL;
 712                        goto out;
 713                }
 714                pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
 715                                    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
 716                                    d_inode(origin)->i_ino, name.len, name.name,
 717                                    err);
 718                goto out;
 719        }
 720
 721        inode = d_inode(index);
 722        if (ovl_is_whiteout(index) && !verify) {
 723                /*
 724                 * When index lookup is called with !verify for decoding an
 725                 * overlay file handle, a whiteout index implies that decode
 726                 * should treat file handle as stale and no need to print a
 727                 * warning about it.
 728                 */
 729                dput(index);
 730                index = ERR_PTR(-ESTALE);
 731                goto out;
 732        } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
 733                   inode_wrong_type(inode, d_inode(origin)->i_mode)) {
 734                /*
 735                 * Index should always be of the same file type as origin
 736                 * except for the case of a whiteout index. A whiteout
 737                 * index should only exist if all lower aliases have been
 738                 * unlinked, which means that finding a lower origin on lookup
 739                 * whose index is a whiteout should be treated as an error.
 740                 */
 741                pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
 742                                    index, d_inode(index)->i_mode & S_IFMT,
 743                                    d_inode(origin)->i_mode & S_IFMT);
 744                goto fail;
 745        } else if (is_dir && verify) {
 746                if (!upper) {
 747                        pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
 748                                            origin, index);
 749                        goto fail;
 750                }
 751
 752                /* Verify that dir index 'upper' xattr points to upper dir */
 753                err = ovl_verify_upper(ofs, index, upper, false);
 754                if (err) {
 755                        if (err == -ESTALE) {
 756                                pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
 757                                                    upper, origin, index);
 758                        }
 759                        goto fail;
 760                }
 761        } else if (upper && d_inode(upper) != inode) {
 762                goto out_dput;
 763        }
 764out:
 765        kfree(name.name);
 766        return index;
 767
 768out_dput:
 769        dput(index);
 770        index = NULL;
 771        goto out;
 772
 773fail:
 774        dput(index);
 775        index = ERR_PTR(-EIO);
 776        goto out;
 777}
 778
 779/*
 780 * Returns next layer in stack starting from top.
 781 * Returns -1 if this is the last layer.
 782 */
 783int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
 784{
 785        struct ovl_entry *oe = dentry->d_fsdata;
 786
 787        BUG_ON(idx < 0);
 788        if (idx == 0) {
 789                ovl_path_upper(dentry, path);
 790                if (path->dentry)
 791                        return oe->numlower ? 1 : -1;
 792                idx++;
 793        }
 794        BUG_ON(idx > oe->numlower);
 795        path->dentry = oe->lowerstack[idx - 1].dentry;
 796        path->mnt = oe->lowerstack[idx - 1].layer->mnt;
 797
 798        return (idx < oe->numlower) ? idx + 1 : -1;
 799}
 800
 801/* Fix missing 'origin' xattr */
 802static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
 803                          struct dentry *lower, struct dentry *upper)
 804{
 805        int err;
 806
 807        if (ovl_check_origin_xattr(ofs, upper))
 808                return 0;
 809
 810        err = ovl_want_write(dentry);
 811        if (err)
 812                return err;
 813
 814        err = ovl_set_origin(ofs, lower, upper);
 815        if (!err)
 816                err = ovl_set_impure(dentry->d_parent, upper->d_parent);
 817
 818        ovl_drop_write(dentry);
 819        return err;
 820}
 821
 822struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 823                          unsigned int flags)
 824{
 825        struct ovl_entry *oe;
 826        const struct cred *old_cred;
 827        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 828        struct ovl_entry *poe = dentry->d_parent->d_fsdata;
 829        struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
 830        struct ovl_path *stack = NULL, *origin_path = NULL;
 831        struct dentry *upperdir, *upperdentry = NULL;
 832        struct dentry *origin = NULL;
 833        struct dentry *index = NULL;
 834        unsigned int ctr = 0;
 835        struct inode *inode = NULL;
 836        bool upperopaque = false;
 837        char *upperredirect = NULL;
 838        struct dentry *this;
 839        unsigned int i;
 840        int err;
 841        bool uppermetacopy = false;
 842        struct ovl_lookup_data d = {
 843                .sb = dentry->d_sb,
 844                .name = dentry->d_name,
 845                .is_dir = false,
 846                .opaque = false,
 847                .stop = false,
 848                .last = ofs->config.redirect_follow ? false : !poe->numlower,
 849                .redirect = NULL,
 850                .metacopy = false,
 851        };
 852
 853        if (dentry->d_name.len > ofs->namelen)
 854                return ERR_PTR(-ENAMETOOLONG);
 855
 856        old_cred = ovl_override_creds(dentry->d_sb);
 857        upperdir = ovl_dentry_upper(dentry->d_parent);
 858        if (upperdir) {
 859                err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
 860                if (err)
 861                        goto out;
 862
 863                if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) {
 864                        dput(upperdentry);
 865                        err = -EREMOTE;
 866                        goto out;
 867                }
 868                if (upperdentry && !d.is_dir) {
 869                        /*
 870                         * Lookup copy up origin by decoding origin file handle.
 871                         * We may get a disconnected dentry, which is fine,
 872                         * because we only need to hold the origin inode in
 873                         * cache and use its inode number.  We may even get a
 874                         * connected dentry, that is not under any of the lower
 875                         * layers root.  That is also fine for using it's inode
 876                         * number - it's the same as if we held a reference
 877                         * to a dentry in lower layer that was moved under us.
 878                         */
 879                        err = ovl_check_origin(ofs, upperdentry, &origin_path);
 880                        if (err)
 881                                goto out_put_upper;
 882
 883                        if (d.metacopy)
 884                                uppermetacopy = true;
 885                }
 886
 887                if (d.redirect) {
 888                        err = -ENOMEM;
 889                        upperredirect = kstrdup(d.redirect, GFP_KERNEL);
 890                        if (!upperredirect)
 891                                goto out_put_upper;
 892                        if (d.redirect[0] == '/')
 893                                poe = roe;
 894                }
 895                upperopaque = d.opaque;
 896        }
 897
 898        if (!d.stop && poe->numlower) {
 899                err = -ENOMEM;
 900                stack = kcalloc(ofs->numlayer - 1, sizeof(struct ovl_path),
 901                                GFP_KERNEL);
 902                if (!stack)
 903                        goto out_put_upper;
 904        }
 905
 906        for (i = 0; !d.stop && i < poe->numlower; i++) {
 907                struct ovl_path lower = poe->lowerstack[i];
 908
 909                if (!ofs->config.redirect_follow)
 910                        d.last = i == poe->numlower - 1;
 911                else
 912                        d.last = lower.layer->idx == roe->numlower;
 913
 914                err = ovl_lookup_layer(lower.dentry, &d, &this, false);
 915                if (err)
 916                        goto out_put;
 917
 918                if (!this)
 919                        continue;
 920
 921                if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) {
 922                        dput(this);
 923                        err = -EPERM;
 924                        pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
 925                        goto out_put;
 926                }
 927
 928                /*
 929                 * If no origin fh is stored in upper of a merge dir, store fh
 930                 * of lower dir and set upper parent "impure".
 931                 */
 932                if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
 933                        err = ovl_fix_origin(ofs, dentry, this, upperdentry);
 934                        if (err) {
 935                                dput(this);
 936                                goto out_put;
 937                        }
 938                }
 939
 940                /*
 941                 * When "verify_lower" feature is enabled, do not merge with a
 942                 * lower dir that does not match a stored origin xattr. In any
 943                 * case, only verified origin is used for index lookup.
 944                 *
 945                 * For non-dir dentry, if index=on, then ensure origin
 946                 * matches the dentry found using path based lookup,
 947                 * otherwise error out.
 948                 */
 949                if (upperdentry && !ctr &&
 950                    ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
 951                     (!d.is_dir && ofs->config.index && origin_path))) {
 952                        err = ovl_verify_origin(ofs, upperdentry, this, false);
 953                        if (err) {
 954                                dput(this);
 955                                if (d.is_dir)
 956                                        break;
 957                                goto out_put;
 958                        }
 959                        origin = this;
 960                }
 961
 962                if (d.metacopy && ctr) {
 963                        /*
 964                         * Do not store intermediate metacopy dentries in
 965                         * lower chain, except top most lower metacopy dentry.
 966                         * Continue the loop so that if there is an absolute
 967                         * redirect on this dentry, poe can be reset to roe.
 968                         */
 969                        dput(this);
 970                        this = NULL;
 971                } else {
 972                        stack[ctr].dentry = this;
 973                        stack[ctr].layer = lower.layer;
 974                        ctr++;
 975                }
 976
 977                /*
 978                 * Following redirects can have security consequences: it's like
 979                 * a symlink into the lower layer without the permission checks.
 980                 * This is only a problem if the upper layer is untrusted (e.g
 981                 * comes from an USB drive).  This can allow a non-readable file
 982                 * or directory to become readable.
 983                 *
 984                 * Only following redirects when redirects are enabled disables
 985                 * this attack vector when not necessary.
 986                 */
 987                err = -EPERM;
 988                if (d.redirect && !ofs->config.redirect_follow) {
 989                        pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
 990                                            dentry);
 991                        goto out_put;
 992                }
 993
 994                if (d.stop)
 995                        break;
 996
 997                if (d.redirect && d.redirect[0] == '/' && poe != roe) {
 998                        poe = roe;
 999                        /* Find the current layer on the root dentry */
1000                        i = lower.layer->idx - 1;
1001                }
1002        }
1003
1004        /*
1005         * For regular non-metacopy upper dentries, there is no lower
1006         * path based lookup, hence ctr will be zero. If a dentry is found
1007         * using ORIGIN xattr on upper, install it in stack.
1008         *
1009         * For metacopy dentry, path based lookup will find lower dentries.
1010         * Just make sure a corresponding data dentry has been found.
1011         */
1012        if (d.metacopy || (uppermetacopy && !ctr)) {
1013                pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
1014                                    dentry);
1015                err = -EIO;
1016                goto out_put;
1017        } else if (!d.is_dir && upperdentry && !ctr && origin_path) {
1018                if (WARN_ON(stack != NULL)) {
1019                        err = -EIO;
1020                        goto out_put;
1021                }
1022                stack = origin_path;
1023                ctr = 1;
1024                origin = origin_path->dentry;
1025                origin_path = NULL;
1026        }
1027
1028        /*
1029         * Always lookup index if there is no-upperdentry.
1030         *
1031         * For the case of upperdentry, we have set origin by now if it
1032         * needed to be set. There are basically three cases.
1033         *
1034         * For directories, lookup index by lower inode and verify it matches
1035         * upper inode. We only trust dir index if we verified that lower dir
1036         * matches origin, otherwise dir index entries may be inconsistent
1037         * and we ignore them.
1038         *
1039         * For regular upper, we already set origin if upper had ORIGIN
1040         * xattr. There is no verification though as there is no path
1041         * based dentry lookup in lower in this case.
1042         *
1043         * For metacopy upper, we set a verified origin already if index
1044         * is enabled and if upper had an ORIGIN xattr.
1045         *
1046         */
1047        if (!upperdentry && ctr)
1048                origin = stack[0].dentry;
1049
1050        if (origin && ovl_indexdir(dentry->d_sb) &&
1051            (!d.is_dir || ovl_index_all(dentry->d_sb))) {
1052                index = ovl_lookup_index(ofs, upperdentry, origin, true);
1053                if (IS_ERR(index)) {
1054                        err = PTR_ERR(index);
1055                        index = NULL;
1056                        goto out_put;
1057                }
1058        }
1059
1060        oe = ovl_alloc_entry(ctr);
1061        err = -ENOMEM;
1062        if (!oe)
1063                goto out_put;
1064
1065        memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
1066        dentry->d_fsdata = oe;
1067
1068        if (upperopaque)
1069                ovl_dentry_set_opaque(dentry);
1070
1071        if (upperdentry)
1072                ovl_dentry_set_upper_alias(dentry);
1073        else if (index) {
1074                upperdentry = dget(index);
1075                upperredirect = ovl_get_redirect_xattr(ofs, upperdentry, 0);
1076                if (IS_ERR(upperredirect)) {
1077                        err = PTR_ERR(upperredirect);
1078                        upperredirect = NULL;
1079                        goto out_free_oe;
1080                }
1081                err = ovl_check_metacopy_xattr(ofs, upperdentry);
1082                if (err < 0)
1083                        goto out_free_oe;
1084                uppermetacopy = err;
1085        }
1086
1087        if (upperdentry || ctr) {
1088                struct ovl_inode_params oip = {
1089                        .upperdentry = upperdentry,
1090                        .lowerpath = stack,
1091                        .index = index,
1092                        .numlower = ctr,
1093                        .redirect = upperredirect,
1094                        .lowerdata = (ctr > 1 && !d.is_dir) ?
1095                                      stack[ctr - 1].dentry : NULL,
1096                };
1097
1098                inode = ovl_get_inode(dentry->d_sb, &oip);
1099                err = PTR_ERR(inode);
1100                if (IS_ERR(inode))
1101                        goto out_free_oe;
1102                if (upperdentry && !uppermetacopy)
1103                        ovl_set_flag(OVL_UPPERDATA, inode);
1104        }
1105
1106        ovl_dentry_update_reval(dentry, upperdentry,
1107                        DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
1108
1109        revert_creds(old_cred);
1110        if (origin_path) {
1111                dput(origin_path->dentry);
1112                kfree(origin_path);
1113        }
1114        dput(index);
1115        kfree(stack);
1116        kfree(d.redirect);
1117        return d_splice_alias(inode, dentry);
1118
1119out_free_oe:
1120        dentry->d_fsdata = NULL;
1121        kfree(oe);
1122out_put:
1123        dput(index);
1124        for (i = 0; i < ctr; i++)
1125                dput(stack[i].dentry);
1126        kfree(stack);
1127out_put_upper:
1128        if (origin_path) {
1129                dput(origin_path->dentry);
1130                kfree(origin_path);
1131        }
1132        dput(upperdentry);
1133        kfree(upperredirect);
1134out:
1135        kfree(d.redirect);
1136        revert_creds(old_cred);
1137        return ERR_PTR(err);
1138}
1139
1140bool ovl_lower_positive(struct dentry *dentry)
1141{
1142        struct ovl_entry *poe = dentry->d_parent->d_fsdata;
1143        const struct qstr *name = &dentry->d_name;
1144        const struct cred *old_cred;
1145        unsigned int i;
1146        bool positive = false;
1147        bool done = false;
1148
1149        /*
1150         * If dentry is negative, then lower is positive iff this is a
1151         * whiteout.
1152         */
1153        if (!dentry->d_inode)
1154                return ovl_dentry_is_opaque(dentry);
1155
1156        /* Negative upper -> positive lower */
1157        if (!ovl_dentry_upper(dentry))
1158                return true;
1159
1160        old_cred = ovl_override_creds(dentry->d_sb);
1161        /* Positive upper -> have to look up lower to see whether it exists */
1162        for (i = 0; !done && !positive && i < poe->numlower; i++) {
1163                struct dentry *this;
1164                struct dentry *lowerdir = poe->lowerstack[i].dentry;
1165
1166                this = lookup_positive_unlocked(name->name, lowerdir,
1167                                               name->len);
1168                if (IS_ERR(this)) {
1169                        switch (PTR_ERR(this)) {
1170                        case -ENOENT:
1171                        case -ENAMETOOLONG:
1172                                break;
1173
1174                        default:
1175                                /*
1176                                 * Assume something is there, we just couldn't
1177                                 * access it.
1178                                 */
1179                                positive = true;
1180                                break;
1181                        }
1182                } else {
1183                        positive = !ovl_is_whiteout(this);
1184                        done = true;
1185                        dput(this);
1186                }
1187        }
1188        revert_creds(old_cred);
1189
1190        return positive;
1191}
1192