linux/fs/overlayfs/namei.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2011 Novell Inc.
   4 * Copyright (C) 2016 Red Hat, Inc.
   5 */
   6
   7#include <linux/fs.h>
   8#include <linux/cred.h>
   9#include <linux/ctype.h>
  10#include <linux/namei.h>
  11#include <linux/xattr.h>
  12#include <linux/ratelimit.h>
  13#include <linux/mount.h>
  14#include <linux/exportfs.h>
  15#include "overlayfs.h"
  16
  17struct ovl_lookup_data {
  18        struct super_block *sb;
  19        struct qstr name;
  20        bool is_dir;
  21        bool opaque;
  22        bool stop;
  23        bool last;
  24        char *redirect;
  25        bool metacopy;
  26};
  27
  28static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
  29                              size_t prelen, const char *post)
  30{
  31        int res;
  32        char *buf;
  33
  34        buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post));
  35        if (IS_ERR_OR_NULL(buf))
  36                return PTR_ERR(buf);
  37
  38        if (buf[0] == '/') {
  39                /*
  40                 * One of the ancestor path elements in an absolute path
  41                 * lookup in ovl_lookup_layer() could have been opaque and
  42                 * that will stop further lookup in lower layers (d->stop=true)
  43                 * But we have found an absolute redirect in decendant path
  44                 * element and that should force continue lookup in lower
  45                 * layers (reset d->stop).
  46                 */
  47                d->stop = false;
  48        } else {
  49                res = strlen(buf) + 1;
  50                memmove(buf + prelen, buf, res);
  51                memcpy(buf, d->name.name, prelen);
  52        }
  53
  54        strcat(buf, post);
  55        kfree(d->redirect);
  56        d->redirect = buf;
  57        d->name.name = d->redirect;
  58        d->name.len = strlen(d->redirect);
  59
  60        return 0;
  61}
  62
  63static int ovl_acceptable(void *ctx, struct dentry *dentry)
  64{
  65        /*
  66         * A non-dir origin may be disconnected, which is fine, because
  67         * we only need it for its unique inode number.
  68         */
  69        if (!d_is_dir(dentry))
  70                return 1;
  71
  72        /* Don't decode a deleted empty directory */
  73        if (d_unhashed(dentry))
  74                return 0;
  75
  76        /* Check if directory belongs to the layer we are decoding from */
  77        return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
  78}
  79
  80/*
  81 * Check validity of an overlay file handle buffer.
  82 *
  83 * Return 0 for a valid file handle.
  84 * Return -ENODATA for "origin unknown".
  85 * Return <0 for an invalid file handle.
  86 */
  87int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
  88{
  89        if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
  90                return -EINVAL;
  91
  92        if (fb->magic != OVL_FH_MAGIC)
  93                return -EINVAL;
  94
  95        /* Treat larger version and unknown flags as "origin unknown" */
  96        if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
  97                return -ENODATA;
  98
  99        /* Treat endianness mismatch as "origin unknown" */
 100        if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
 101            (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
 102                return -ENODATA;
 103
 104        return 0;
 105}
 106
 107static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
 108{
 109        int res, err;
 110        struct ovl_fh *fh = NULL;
 111
 112        res = vfs_getxattr(dentry, name, NULL, 0);
 113        if (res < 0) {
 114                if (res == -ENODATA || res == -EOPNOTSUPP)
 115                        return NULL;
 116                goto fail;
 117        }
 118        /* Zero size value means "copied up but origin unknown" */
 119        if (res == 0)
 120                return NULL;
 121
 122        fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
 123        if (!fh)
 124                return ERR_PTR(-ENOMEM);
 125
 126        res = vfs_getxattr(dentry, name, fh->buf, res);
 127        if (res < 0)
 128                goto fail;
 129
 130        err = ovl_check_fb_len(&fh->fb, res);
 131        if (err < 0) {
 132                if (err == -ENODATA)
 133                        goto out;
 134                goto invalid;
 135        }
 136
 137        return fh;
 138
 139out:
 140        kfree(fh);
 141        return NULL;
 142
 143fail:
 144        pr_warn_ratelimited("failed to get origin (%i)\n", res);
 145        goto out;
 146invalid:
 147        pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
 148        goto out;
 149}
 150
 151struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
 152                                  bool connected)
 153{
 154        struct dentry *real;
 155        int bytes;
 156
 157        /*
 158         * Make sure that the stored uuid matches the uuid of the lower
 159         * layer where file handle will be decoded.
 160         */
 161        if (!uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid))
 162                return NULL;
 163
 164        bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
 165        real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
 166                                  bytes >> 2, (int)fh->fb.type,
 167                                  connected ? ovl_acceptable : NULL, mnt);
 168        if (IS_ERR(real)) {
 169                /*
 170                 * Treat stale file handle to lower file as "origin unknown".
 171                 * upper file handle could become stale when upper file is
 172                 * unlinked and this information is needed to handle stale
 173                 * index entries correctly.
 174                 */
 175                if (real == ERR_PTR(-ESTALE) &&
 176                    !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
 177                        real = NULL;
 178                return real;
 179        }
 180
 181        if (ovl_dentry_weird(real)) {
 182                dput(real);
 183                return NULL;
 184        }
 185
 186        return real;
 187}
 188
 189static bool ovl_is_opaquedir(struct dentry *dentry)
 190{
 191        return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE);
 192}
 193
 194static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
 195                             const char *name, unsigned int namelen,
 196                             size_t prelen, const char *post,
 197                             struct dentry **ret)
 198{
 199        struct dentry *this;
 200        int err;
 201        bool last_element = !post[0];
 202
 203        this = lookup_positive_unlocked(name, base, namelen);
 204        if (IS_ERR(this)) {
 205                err = PTR_ERR(this);
 206                this = NULL;
 207                if (err == -ENOENT || err == -ENAMETOOLONG)
 208                        goto out;
 209                goto out_err;
 210        }
 211
 212        if (ovl_dentry_weird(this)) {
 213                /* Don't support traversing automounts and other weirdness */
 214                err = -EREMOTE;
 215                goto out_err;
 216        }
 217        if (ovl_is_whiteout(this)) {
 218                d->stop = d->opaque = true;
 219                goto put_and_out;
 220        }
 221        /*
 222         * This dentry should be a regular file if previous layer lookup
 223         * found a metacopy dentry.
 224         */
 225        if (last_element && d->metacopy && !d_is_reg(this)) {
 226                d->stop = true;
 227                goto put_and_out;
 228        }
 229        if (!d_can_lookup(this)) {
 230                if (d->is_dir || !last_element) {
 231                        d->stop = true;
 232                        goto put_and_out;
 233                }
 234                err = ovl_check_metacopy_xattr(this);
 235                if (err < 0)
 236                        goto out_err;
 237
 238                d->metacopy = err;
 239                d->stop = !d->metacopy;
 240                if (!d->metacopy || d->last)
 241                        goto out;
 242        } else {
 243                if (ovl_lookup_trap_inode(d->sb, this)) {
 244                        /* Caught in a trap of overlapping layers */
 245                        err = -ELOOP;
 246                        goto out_err;
 247                }
 248
 249                if (last_element)
 250                        d->is_dir = true;
 251                if (d->last)
 252                        goto out;
 253
 254                if (ovl_is_opaquedir(this)) {
 255                        d->stop = true;
 256                        if (last_element)
 257                                d->opaque = true;
 258                        goto out;
 259                }
 260        }
 261        err = ovl_check_redirect(this, d, prelen, post);
 262        if (err)
 263                goto out_err;
 264out:
 265        *ret = this;
 266        return 0;
 267
 268put_and_out:
 269        dput(this);
 270        this = NULL;
 271        goto out;
 272
 273out_err:
 274        dput(this);
 275        return err;
 276}
 277
 278static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
 279                            struct dentry **ret)
 280{
 281        /* Counting down from the end, since the prefix can change */
 282        size_t rem = d->name.len - 1;
 283        struct dentry *dentry = NULL;
 284        int err;
 285
 286        if (d->name.name[0] != '/')
 287                return ovl_lookup_single(base, d, d->name.name, d->name.len,
 288                                         0, "", ret);
 289
 290        while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
 291                const char *s = d->name.name + d->name.len - rem;
 292                const char *next = strchrnul(s, '/');
 293                size_t thislen = next - s;
 294                bool end = !next[0];
 295
 296                /* Verify we did not go off the rails */
 297                if (WARN_ON(s[-1] != '/'))
 298                        return -EIO;
 299
 300                err = ovl_lookup_single(base, d, s, thislen,
 301                                        d->name.len - rem, next, &base);
 302                dput(dentry);
 303                if (err)
 304                        return err;
 305                dentry = base;
 306                if (end)
 307                        break;
 308
 309                rem -= thislen + 1;
 310
 311                if (WARN_ON(rem >= d->name.len))
 312                        return -EIO;
 313        }
 314        *ret = dentry;
 315        return 0;
 316}
 317
 318
 319int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
 320                        struct dentry *upperdentry, struct ovl_path **stackp)
 321{
 322        struct dentry *origin = NULL;
 323        int i;
 324
 325        for (i = 1; i < ofs->numlayer; i++) {
 326                /*
 327                 * If lower fs uuid is not unique among lower fs we cannot match
 328                 * fh->uuid to layer.
 329                 */
 330                if (ofs->layers[i].fsid &&
 331                    ofs->layers[i].fs->bad_uuid)
 332                        continue;
 333
 334                origin = ovl_decode_real_fh(fh, ofs->layers[i].mnt,
 335                                            connected);
 336                if (origin)
 337                        break;
 338        }
 339
 340        if (!origin)
 341                return -ESTALE;
 342        else if (IS_ERR(origin))
 343                return PTR_ERR(origin);
 344
 345        if (upperdentry && !ovl_is_whiteout(upperdentry) &&
 346            ((d_inode(origin)->i_mode ^ d_inode(upperdentry)->i_mode) & S_IFMT))
 347                goto invalid;
 348
 349        if (!*stackp)
 350                *stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
 351        if (!*stackp) {
 352                dput(origin);
 353                return -ENOMEM;
 354        }
 355        **stackp = (struct ovl_path){
 356                .dentry = origin,
 357                .layer = &ofs->layers[i]
 358        };
 359
 360        return 0;
 361
 362invalid:
 363        pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
 364                            upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
 365                            d_inode(origin)->i_mode & S_IFMT);
 366        dput(origin);
 367        return -EIO;
 368}
 369
 370static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
 371                            struct ovl_path **stackp, unsigned int *ctrp)
 372{
 373        struct ovl_fh *fh = ovl_get_fh(upperdentry, OVL_XATTR_ORIGIN);
 374        int err;
 375
 376        if (IS_ERR_OR_NULL(fh))
 377                return PTR_ERR(fh);
 378
 379        err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
 380        kfree(fh);
 381
 382        if (err) {
 383                if (err == -ESTALE)
 384                        return 0;
 385                return err;
 386        }
 387
 388        if (WARN_ON(*ctrp))
 389                return -EIO;
 390
 391        *ctrp = 1;
 392        return 0;
 393}
 394
 395/*
 396 * Verify that @fh matches the file handle stored in xattr @name.
 397 * Return 0 on match, -ESTALE on mismatch, < 0 on error.
 398 */
 399static int ovl_verify_fh(struct dentry *dentry, const char *name,
 400                         const struct ovl_fh *fh)
 401{
 402        struct ovl_fh *ofh = ovl_get_fh(dentry, name);
 403        int err = 0;
 404
 405        if (!ofh)
 406                return -ENODATA;
 407
 408        if (IS_ERR(ofh))
 409                return PTR_ERR(ofh);
 410
 411        if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
 412                err = -ESTALE;
 413
 414        kfree(ofh);
 415        return err;
 416}
 417
 418/*
 419 * Verify that @real dentry matches the file handle stored in xattr @name.
 420 *
 421 * If @set is true and there is no stored file handle, encode @real and store
 422 * file handle in xattr @name.
 423 *
 424 * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
 425 */
 426int ovl_verify_set_fh(struct dentry *dentry, const char *name,
 427                      struct dentry *real, bool is_upper, bool set)
 428{
 429        struct inode *inode;
 430        struct ovl_fh *fh;
 431        int err;
 432
 433        fh = ovl_encode_real_fh(real, is_upper);
 434        err = PTR_ERR(fh);
 435        if (IS_ERR(fh)) {
 436                fh = NULL;
 437                goto fail;
 438        }
 439
 440        err = ovl_verify_fh(dentry, name, fh);
 441        if (set && err == -ENODATA)
 442                err = ovl_do_setxattr(dentry, name, fh->buf, fh->fb.len, 0);
 443        if (err)
 444                goto fail;
 445
 446out:
 447        kfree(fh);
 448        return err;
 449
 450fail:
 451        inode = d_inode(real);
 452        pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
 453                            is_upper ? "upper" : "origin", real,
 454                            inode ? inode->i_ino : 0, err);
 455        goto out;
 456}
 457
 458/* Get upper dentry from index */
 459struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
 460{
 461        struct ovl_fh *fh;
 462        struct dentry *upper;
 463
 464        if (!d_is_dir(index))
 465                return dget(index);
 466
 467        fh = ovl_get_fh(index, OVL_XATTR_UPPER);
 468        if (IS_ERR_OR_NULL(fh))
 469                return ERR_CAST(fh);
 470
 471        upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
 472        kfree(fh);
 473
 474        if (IS_ERR_OR_NULL(upper))
 475                return upper ?: ERR_PTR(-ESTALE);
 476
 477        if (!d_is_dir(upper)) {
 478                pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
 479                                    index, upper);
 480                dput(upper);
 481                return ERR_PTR(-EIO);
 482        }
 483
 484        return upper;
 485}
 486
 487/* Is this a leftover from create/whiteout of directory index entry? */
 488static bool ovl_is_temp_index(struct dentry *index)
 489{
 490        return index->d_name.name[0] == '#';
 491}
 492
 493/*
 494 * Verify that an index entry name matches the origin file handle stored in
 495 * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
 496 * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
 497 */
 498int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
 499{
 500        struct ovl_fh *fh = NULL;
 501        size_t len;
 502        struct ovl_path origin = { };
 503        struct ovl_path *stack = &origin;
 504        struct dentry *upper = NULL;
 505        int err;
 506
 507        if (!d_inode(index))
 508                return 0;
 509
 510        /* Cleanup leftover from index create/cleanup attempt */
 511        err = -ESTALE;
 512        if (ovl_is_temp_index(index))
 513                goto fail;
 514
 515        err = -EINVAL;
 516        if (index->d_name.len < sizeof(struct ovl_fb)*2)
 517                goto fail;
 518
 519        err = -ENOMEM;
 520        len = index->d_name.len / 2;
 521        fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
 522        if (!fh)
 523                goto fail;
 524
 525        err = -EINVAL;
 526        if (hex2bin(fh->buf, index->d_name.name, len))
 527                goto fail;
 528
 529        err = ovl_check_fb_len(&fh->fb, len);
 530        if (err)
 531                goto fail;
 532
 533        /*
 534         * Whiteout index entries are used as an indication that an exported
 535         * overlay file handle should be treated as stale (i.e. after unlink
 536         * of the overlay inode). These entries contain no origin xattr.
 537         */
 538        if (ovl_is_whiteout(index))
 539                goto out;
 540
 541        /*
 542         * Verifying directory index entries are not stale is expensive, so
 543         * only verify stale dir index if NFS export is enabled.
 544         */
 545        if (d_is_dir(index) && !ofs->config.nfs_export)
 546                goto out;
 547
 548        /*
 549         * Directory index entries should have 'upper' xattr pointing to the
 550         * real upper dir. Non-dir index entries are hardlinks to the upper
 551         * real inode. For non-dir index, we can read the copy up origin xattr
 552         * directly from the index dentry, but for dir index we first need to
 553         * decode the upper directory.
 554         */
 555        upper = ovl_index_upper(ofs, index);
 556        if (IS_ERR_OR_NULL(upper)) {
 557                err = PTR_ERR(upper);
 558                /*
 559                 * Directory index entries with no 'upper' xattr need to be
 560                 * removed. When dir index entry has a stale 'upper' xattr,
 561                 * we assume that upper dir was removed and we treat the dir
 562                 * index as orphan entry that needs to be whited out.
 563                 */
 564                if (err == -ESTALE)
 565                        goto orphan;
 566                else if (!err)
 567                        err = -ESTALE;
 568                goto fail;
 569        }
 570
 571        err = ovl_verify_fh(upper, OVL_XATTR_ORIGIN, fh);
 572        dput(upper);
 573        if (err)
 574                goto fail;
 575
 576        /* Check if non-dir index is orphan and don't warn before cleaning it */
 577        if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
 578                err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
 579                if (err)
 580                        goto fail;
 581
 582                if (ovl_get_nlink(origin.dentry, index, 0) == 0)
 583                        goto orphan;
 584        }
 585
 586out:
 587        dput(origin.dentry);
 588        kfree(fh);
 589        return err;
 590
 591fail:
 592        pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
 593                            index, d_inode(index)->i_mode & S_IFMT, err);
 594        goto out;
 595
 596orphan:
 597        pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
 598                            index, d_inode(index)->i_mode & S_IFMT,
 599                            d_inode(index)->i_nlink);
 600        err = -ENOENT;
 601        goto out;
 602}
 603
 604static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
 605{
 606        char *n, *s;
 607
 608        n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
 609        if (!n)
 610                return -ENOMEM;
 611
 612        s  = bin2hex(n, fh->buf, fh->fb.len);
 613        *name = (struct qstr) QSTR_INIT(n, s - n);
 614
 615        return 0;
 616
 617}
 618
 619/*
 620 * Lookup in indexdir for the index entry of a lower real inode or a copy up
 621 * origin inode. The index entry name is the hex representation of the lower
 622 * inode file handle.
 623 *
 624 * If the index dentry in negative, then either no lower aliases have been
 625 * copied up yet, or aliases have been copied up in older kernels and are
 626 * not indexed.
 627 *
 628 * If the index dentry for a copy up origin inode is positive, but points
 629 * to an inode different than the upper inode, then either the upper inode
 630 * has been copied up and not indexed or it was indexed, but since then
 631 * index dir was cleared. Either way, that index cannot be used to indentify
 632 * the overlay inode.
 633 */
 634int ovl_get_index_name(struct dentry *origin, struct qstr *name)
 635{
 636        struct ovl_fh *fh;
 637        int err;
 638
 639        fh = ovl_encode_real_fh(origin, false);
 640        if (IS_ERR(fh))
 641                return PTR_ERR(fh);
 642
 643        err = ovl_get_index_name_fh(fh, name);
 644
 645        kfree(fh);
 646        return err;
 647}
 648
 649/* Lookup index by file handle for NFS export */
 650struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
 651{
 652        struct dentry *index;
 653        struct qstr name;
 654        int err;
 655
 656        err = ovl_get_index_name_fh(fh, &name);
 657        if (err)
 658                return ERR_PTR(err);
 659
 660        index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
 661        kfree(name.name);
 662        if (IS_ERR(index)) {
 663                if (PTR_ERR(index) == -ENOENT)
 664                        index = NULL;
 665                return index;
 666        }
 667
 668        if (ovl_is_whiteout(index))
 669                err = -ESTALE;
 670        else if (ovl_dentry_weird(index))
 671                err = -EIO;
 672        else
 673                return index;
 674
 675        dput(index);
 676        return ERR_PTR(err);
 677}
 678
 679struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
 680                                struct dentry *origin, bool verify)
 681{
 682        struct dentry *index;
 683        struct inode *inode;
 684        struct qstr name;
 685        bool is_dir = d_is_dir(origin);
 686        int err;
 687
 688        err = ovl_get_index_name(origin, &name);
 689        if (err)
 690                return ERR_PTR(err);
 691
 692        index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
 693        if (IS_ERR(index)) {
 694                err = PTR_ERR(index);
 695                if (err == -ENOENT) {
 696                        index = NULL;
 697                        goto out;
 698                }
 699                pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
 700                                    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
 701                                    d_inode(origin)->i_ino, name.len, name.name,
 702                                    err);
 703                goto out;
 704        }
 705
 706        inode = d_inode(index);
 707        if (ovl_is_whiteout(index) && !verify) {
 708                /*
 709                 * When index lookup is called with !verify for decoding an
 710                 * overlay file handle, a whiteout index implies that decode
 711                 * should treat file handle as stale and no need to print a
 712                 * warning about it.
 713                 */
 714                dput(index);
 715                index = ERR_PTR(-ESTALE);
 716                goto out;
 717        } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
 718                   ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) {
 719                /*
 720                 * Index should always be of the same file type as origin
 721                 * except for the case of a whiteout index. A whiteout
 722                 * index should only exist if all lower aliases have been
 723                 * unlinked, which means that finding a lower origin on lookup
 724                 * whose index is a whiteout should be treated as an error.
 725                 */
 726                pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
 727                                    index, d_inode(index)->i_mode & S_IFMT,
 728                                    d_inode(origin)->i_mode & S_IFMT);
 729                goto fail;
 730        } else if (is_dir && verify) {
 731                if (!upper) {
 732                        pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
 733                                            origin, index);
 734                        goto fail;
 735                }
 736
 737                /* Verify that dir index 'upper' xattr points to upper dir */
 738                err = ovl_verify_upper(index, upper, false);
 739                if (err) {
 740                        if (err == -ESTALE) {
 741                                pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
 742                                                    upper, origin, index);
 743                        }
 744                        goto fail;
 745                }
 746        } else if (upper && d_inode(upper) != inode) {
 747                goto out_dput;
 748        }
 749out:
 750        kfree(name.name);
 751        return index;
 752
 753out_dput:
 754        dput(index);
 755        index = NULL;
 756        goto out;
 757
 758fail:
 759        dput(index);
 760        index = ERR_PTR(-EIO);
 761        goto out;
 762}
 763
 764/*
 765 * Returns next layer in stack starting from top.
 766 * Returns -1 if this is the last layer.
 767 */
 768int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
 769{
 770        struct ovl_entry *oe = dentry->d_fsdata;
 771
 772        BUG_ON(idx < 0);
 773        if (idx == 0) {
 774                ovl_path_upper(dentry, path);
 775                if (path->dentry)
 776                        return oe->numlower ? 1 : -1;
 777                idx++;
 778        }
 779        BUG_ON(idx > oe->numlower);
 780        path->dentry = oe->lowerstack[idx - 1].dentry;
 781        path->mnt = oe->lowerstack[idx - 1].layer->mnt;
 782
 783        return (idx < oe->numlower) ? idx + 1 : -1;
 784}
 785
 786/* Fix missing 'origin' xattr */
 787static int ovl_fix_origin(struct dentry *dentry, struct dentry *lower,
 788                          struct dentry *upper)
 789{
 790        int err;
 791
 792        if (ovl_check_origin_xattr(upper))
 793                return 0;
 794
 795        err = ovl_want_write(dentry);
 796        if (err)
 797                return err;
 798
 799        err = ovl_set_origin(dentry, lower, upper);
 800        if (!err)
 801                err = ovl_set_impure(dentry->d_parent, upper->d_parent);
 802
 803        ovl_drop_write(dentry);
 804        return err;
 805}
 806
 807struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 808                          unsigned int flags)
 809{
 810        struct ovl_entry *oe;
 811        const struct cred *old_cred;
 812        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 813        struct ovl_entry *poe = dentry->d_parent->d_fsdata;
 814        struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
 815        struct ovl_path *stack = NULL, *origin_path = NULL;
 816        struct dentry *upperdir, *upperdentry = NULL;
 817        struct dentry *origin = NULL;
 818        struct dentry *index = NULL;
 819        unsigned int ctr = 0;
 820        struct inode *inode = NULL;
 821        bool upperopaque = false;
 822        char *upperredirect = NULL;
 823        struct dentry *this;
 824        unsigned int i;
 825        int err;
 826        bool metacopy = false;
 827        struct ovl_lookup_data d = {
 828                .sb = dentry->d_sb,
 829                .name = dentry->d_name,
 830                .is_dir = false,
 831                .opaque = false,
 832                .stop = false,
 833                .last = ofs->config.redirect_follow ? false : !poe->numlower,
 834                .redirect = NULL,
 835                .metacopy = false,
 836        };
 837
 838        if (dentry->d_name.len > ofs->namelen)
 839                return ERR_PTR(-ENAMETOOLONG);
 840
 841        old_cred = ovl_override_creds(dentry->d_sb);
 842        upperdir = ovl_dentry_upper(dentry->d_parent);
 843        if (upperdir) {
 844                err = ovl_lookup_layer(upperdir, &d, &upperdentry);
 845                if (err)
 846                        goto out;
 847
 848                if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) {
 849                        dput(upperdentry);
 850                        err = -EREMOTE;
 851                        goto out;
 852                }
 853                if (upperdentry && !d.is_dir) {
 854                        unsigned int origin_ctr = 0;
 855
 856                        /*
 857                         * Lookup copy up origin by decoding origin file handle.
 858                         * We may get a disconnected dentry, which is fine,
 859                         * because we only need to hold the origin inode in
 860                         * cache and use its inode number.  We may even get a
 861                         * connected dentry, that is not under any of the lower
 862                         * layers root.  That is also fine for using it's inode
 863                         * number - it's the same as if we held a reference
 864                         * to a dentry in lower layer that was moved under us.
 865                         */
 866                        err = ovl_check_origin(ofs, upperdentry, &origin_path,
 867                                               &origin_ctr);
 868                        if (err)
 869                                goto out_put_upper;
 870
 871                        if (d.metacopy)
 872                                metacopy = true;
 873                }
 874
 875                if (d.redirect) {
 876                        err = -ENOMEM;
 877                        upperredirect = kstrdup(d.redirect, GFP_KERNEL);
 878                        if (!upperredirect)
 879                                goto out_put_upper;
 880                        if (d.redirect[0] == '/')
 881                                poe = roe;
 882                }
 883                upperopaque = d.opaque;
 884        }
 885
 886        if (!d.stop && poe->numlower) {
 887                err = -ENOMEM;
 888                stack = kcalloc(ofs->numlayer - 1, sizeof(struct ovl_path),
 889                                GFP_KERNEL);
 890                if (!stack)
 891                        goto out_put_upper;
 892        }
 893
 894        for (i = 0; !d.stop && i < poe->numlower; i++) {
 895                struct ovl_path lower = poe->lowerstack[i];
 896
 897                if (!ofs->config.redirect_follow)
 898                        d.last = i == poe->numlower - 1;
 899                else
 900                        d.last = lower.layer->idx == roe->numlower;
 901
 902                err = ovl_lookup_layer(lower.dentry, &d, &this);
 903                if (err)
 904                        goto out_put;
 905
 906                if (!this)
 907                        continue;
 908
 909                /*
 910                 * If no origin fh is stored in upper of a merge dir, store fh
 911                 * of lower dir and set upper parent "impure".
 912                 */
 913                if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
 914                        err = ovl_fix_origin(dentry, this, upperdentry);
 915                        if (err) {
 916                                dput(this);
 917                                goto out_put;
 918                        }
 919                }
 920
 921                /*
 922                 * When "verify_lower" feature is enabled, do not merge with a
 923                 * lower dir that does not match a stored origin xattr. In any
 924                 * case, only verified origin is used for index lookup.
 925                 *
 926                 * For non-dir dentry, if index=on, then ensure origin
 927                 * matches the dentry found using path based lookup,
 928                 * otherwise error out.
 929                 */
 930                if (upperdentry && !ctr &&
 931                    ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
 932                     (!d.is_dir && ofs->config.index && origin_path))) {
 933                        err = ovl_verify_origin(upperdentry, this, false);
 934                        if (err) {
 935                                dput(this);
 936                                if (d.is_dir)
 937                                        break;
 938                                goto out_put;
 939                        }
 940                        origin = this;
 941                }
 942
 943                if (d.metacopy)
 944                        metacopy = true;
 945                /*
 946                 * Do not store intermediate metacopy dentries in chain,
 947                 * except top most lower metacopy dentry
 948                 */
 949                if (d.metacopy && ctr) {
 950                        dput(this);
 951                        continue;
 952                }
 953
 954                stack[ctr].dentry = this;
 955                stack[ctr].layer = lower.layer;
 956                ctr++;
 957
 958                /*
 959                 * Following redirects can have security consequences: it's like
 960                 * a symlink into the lower layer without the permission checks.
 961                 * This is only a problem if the upper layer is untrusted (e.g
 962                 * comes from an USB drive).  This can allow a non-readable file
 963                 * or directory to become readable.
 964                 *
 965                 * Only following redirects when redirects are enabled disables
 966                 * this attack vector when not necessary.
 967                 */
 968                err = -EPERM;
 969                if (d.redirect && !ofs->config.redirect_follow) {
 970                        pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
 971                                            dentry);
 972                        goto out_put;
 973                }
 974
 975                if (d.stop)
 976                        break;
 977
 978                if (d.redirect && d.redirect[0] == '/' && poe != roe) {
 979                        poe = roe;
 980                        /* Find the current layer on the root dentry */
 981                        i = lower.layer->idx - 1;
 982                }
 983        }
 984
 985        if (metacopy) {
 986                /*
 987                 * Found a metacopy dentry but did not find corresponding
 988                 * data dentry
 989                 */
 990                if (d.metacopy) {
 991                        err = -EIO;
 992                        goto out_put;
 993                }
 994
 995                err = -EPERM;
 996                if (!ofs->config.metacopy) {
 997                        pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n",
 998                                            dentry);
 999                        goto out_put;
1000                }
1001        } else if (!d.is_dir && upperdentry && !ctr && origin_path) {
1002                if (WARN_ON(stack != NULL)) {
1003                        err = -EIO;
1004                        goto out_put;
1005                }
1006                stack = origin_path;
1007                ctr = 1;
1008                origin_path = NULL;
1009        }
1010
1011        /*
1012         * Lookup index by lower inode and verify it matches upper inode.
1013         * We only trust dir index if we verified that lower dir matches
1014         * origin, otherwise dir index entries may be inconsistent and we
1015         * ignore them.
1016         *
1017         * For non-dir upper metacopy dentry, we already set "origin" if we
1018         * verified that lower matched upper origin. If upper origin was
1019         * not present (because lower layer did not support fh encode/decode),
1020         * or indexing is not enabled, do not set "origin" and skip looking up
1021         * index. This case should be handled in same way as a non-dir upper
1022         * without ORIGIN is handled.
1023         *
1024         * Always lookup index of non-dir non-metacopy and non-upper.
1025         */
1026        if (ctr && (!upperdentry || (!d.is_dir && !metacopy)))
1027                origin = stack[0].dentry;
1028
1029        if (origin && ovl_indexdir(dentry->d_sb) &&
1030            (!d.is_dir || ovl_index_all(dentry->d_sb))) {
1031                index = ovl_lookup_index(ofs, upperdentry, origin, true);
1032                if (IS_ERR(index)) {
1033                        err = PTR_ERR(index);
1034                        index = NULL;
1035                        goto out_put;
1036                }
1037        }
1038
1039        oe = ovl_alloc_entry(ctr);
1040        err = -ENOMEM;
1041        if (!oe)
1042                goto out_put;
1043
1044        memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
1045        dentry->d_fsdata = oe;
1046
1047        if (upperopaque)
1048                ovl_dentry_set_opaque(dentry);
1049
1050        if (upperdentry)
1051                ovl_dentry_set_upper_alias(dentry);
1052        else if (index) {
1053                upperdentry = dget(index);
1054                upperredirect = ovl_get_redirect_xattr(upperdentry, 0);
1055                if (IS_ERR(upperredirect)) {
1056                        err = PTR_ERR(upperredirect);
1057                        upperredirect = NULL;
1058                        goto out_free_oe;
1059                }
1060        }
1061
1062        if (upperdentry || ctr) {
1063                struct ovl_inode_params oip = {
1064                        .upperdentry = upperdentry,
1065                        .lowerpath = stack,
1066                        .index = index,
1067                        .numlower = ctr,
1068                        .redirect = upperredirect,
1069                        .lowerdata = (ctr > 1 && !d.is_dir) ?
1070                                      stack[ctr - 1].dentry : NULL,
1071                };
1072
1073                inode = ovl_get_inode(dentry->d_sb, &oip);
1074                err = PTR_ERR(inode);
1075                if (IS_ERR(inode))
1076                        goto out_free_oe;
1077        }
1078
1079        ovl_dentry_update_reval(dentry, upperdentry,
1080                        DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
1081
1082        revert_creds(old_cred);
1083        if (origin_path) {
1084                dput(origin_path->dentry);
1085                kfree(origin_path);
1086        }
1087        dput(index);
1088        kfree(stack);
1089        kfree(d.redirect);
1090        return d_splice_alias(inode, dentry);
1091
1092out_free_oe:
1093        dentry->d_fsdata = NULL;
1094        kfree(oe);
1095out_put:
1096        dput(index);
1097        for (i = 0; i < ctr; i++)
1098                dput(stack[i].dentry);
1099        kfree(stack);
1100out_put_upper:
1101        if (origin_path) {
1102                dput(origin_path->dentry);
1103                kfree(origin_path);
1104        }
1105        dput(upperdentry);
1106        kfree(upperredirect);
1107out:
1108        kfree(d.redirect);
1109        revert_creds(old_cred);
1110        return ERR_PTR(err);
1111}
1112
1113bool ovl_lower_positive(struct dentry *dentry)
1114{
1115        struct ovl_entry *poe = dentry->d_parent->d_fsdata;
1116        const struct qstr *name = &dentry->d_name;
1117        const struct cred *old_cred;
1118        unsigned int i;
1119        bool positive = false;
1120        bool done = false;
1121
1122        /*
1123         * If dentry is negative, then lower is positive iff this is a
1124         * whiteout.
1125         */
1126        if (!dentry->d_inode)
1127                return ovl_dentry_is_opaque(dentry);
1128
1129        /* Negative upper -> positive lower */
1130        if (!ovl_dentry_upper(dentry))
1131                return true;
1132
1133        old_cred = ovl_override_creds(dentry->d_sb);
1134        /* Positive upper -> have to look up lower to see whether it exists */
1135        for (i = 0; !done && !positive && i < poe->numlower; i++) {
1136                struct dentry *this;
1137                struct dentry *lowerdir = poe->lowerstack[i].dentry;
1138
1139                this = lookup_positive_unlocked(name->name, lowerdir,
1140                                               name->len);
1141                if (IS_ERR(this)) {
1142                        switch (PTR_ERR(this)) {
1143                        case -ENOENT:
1144                        case -ENAMETOOLONG:
1145                                break;
1146
1147                        default:
1148                                /*
1149                                 * Assume something is there, we just couldn't
1150                                 * access it.
1151                                 */
1152                                positive = true;
1153                                break;
1154                        }
1155                } else {
1156                        positive = !ovl_is_whiteout(this);
1157                        done = true;
1158                        dput(this);
1159                }
1160        }
1161        revert_creds(old_cred);
1162
1163        return positive;
1164}
1165