linux/fs/overlayfs/readdir.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *
   4 * Copyright (C) 2011 Novell Inc.
   5 */
   6
   7#include <linux/fs.h>
   8#include <linux/slab.h>
   9#include <linux/namei.h>
  10#include <linux/file.h>
  11#include <linux/xattr.h>
  12#include <linux/rbtree.h>
  13#include <linux/security.h>
  14#include <linux/cred.h>
  15#include <linux/ratelimit.h>
  16#include "overlayfs.h"
  17
  18struct ovl_cache_entry {
  19        unsigned int len;
  20        unsigned int type;
  21        u64 real_ino;
  22        u64 ino;
  23        struct list_head l_node;
  24        struct rb_node node;
  25        struct ovl_cache_entry *next_maybe_whiteout;
  26        bool is_upper;
  27        bool is_whiteout;
  28        char name[];
  29};
  30
  31struct ovl_dir_cache {
  32        long refcount;
  33        u64 version;
  34        struct list_head entries;
  35        struct rb_root root;
  36};
  37
  38struct ovl_readdir_data {
  39        struct dir_context ctx;
  40        struct dentry *dentry;
  41        bool is_lowest;
  42        struct rb_root *root;
  43        struct list_head *list;
  44        struct list_head middle;
  45        struct ovl_cache_entry *first_maybe_whiteout;
  46        int count;
  47        int err;
  48        bool is_upper;
  49        bool d_type_supported;
  50};
  51
  52struct ovl_dir_file {
  53        bool is_real;
  54        bool is_upper;
  55        struct ovl_dir_cache *cache;
  56        struct list_head *cursor;
  57        struct file *realfile;
  58        struct file *upperfile;
  59};
  60
  61static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
  62{
  63        return rb_entry(n, struct ovl_cache_entry, node);
  64}
  65
  66static bool ovl_cache_entry_find_link(const char *name, int len,
  67                                      struct rb_node ***link,
  68                                      struct rb_node **parent)
  69{
  70        bool found = false;
  71        struct rb_node **newp = *link;
  72
  73        while (!found && *newp) {
  74                int cmp;
  75                struct ovl_cache_entry *tmp;
  76
  77                *parent = *newp;
  78                tmp = ovl_cache_entry_from_node(*newp);
  79                cmp = strncmp(name, tmp->name, len);
  80                if (cmp > 0)
  81                        newp = &tmp->node.rb_right;
  82                else if (cmp < 0 || len < tmp->len)
  83                        newp = &tmp->node.rb_left;
  84                else
  85                        found = true;
  86        }
  87        *link = newp;
  88
  89        return found;
  90}
  91
  92static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
  93                                                    const char *name, int len)
  94{
  95        struct rb_node *node = root->rb_node;
  96        int cmp;
  97
  98        while (node) {
  99                struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
 100
 101                cmp = strncmp(name, p->name, len);
 102                if (cmp > 0)
 103                        node = p->node.rb_right;
 104                else if (cmp < 0 || len < p->len)
 105                        node = p->node.rb_left;
 106                else
 107                        return p;
 108        }
 109
 110        return NULL;
 111}
 112
 113static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
 114                           struct ovl_cache_entry *p)
 115{
 116        /* Don't care if not doing ovl_iter() */
 117        if (!rdd->dentry)
 118                return false;
 119
 120        /* Always recalc d_ino when remapping lower inode numbers */
 121        if (ovl_xino_bits(rdd->dentry->d_sb))
 122                return true;
 123
 124        /* Always recalc d_ino for parent */
 125        if (strcmp(p->name, "..") == 0)
 126                return true;
 127
 128        /* If this is lower, then native d_ino will do */
 129        if (!rdd->is_upper)
 130                return false;
 131
 132        /*
 133         * Recalc d_ino for '.' and for all entries if dir is impure (contains
 134         * copied up entries)
 135         */
 136        if ((p->name[0] == '.' && p->len == 1) ||
 137            ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry)))
 138                return true;
 139
 140        return false;
 141}
 142
 143static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
 144                                                   const char *name, int len,
 145                                                   u64 ino, unsigned int d_type)
 146{
 147        struct ovl_cache_entry *p;
 148        size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
 149
 150        p = kmalloc(size, GFP_KERNEL);
 151        if (!p)
 152                return NULL;
 153
 154        memcpy(p->name, name, len);
 155        p->name[len] = '\0';
 156        p->len = len;
 157        p->type = d_type;
 158        p->real_ino = ino;
 159        p->ino = ino;
 160        /* Defer setting d_ino for upper entry to ovl_iterate() */
 161        if (ovl_calc_d_ino(rdd, p))
 162                p->ino = 0;
 163        p->is_upper = rdd->is_upper;
 164        p->is_whiteout = false;
 165
 166        if (d_type == DT_CHR) {
 167                p->next_maybe_whiteout = rdd->first_maybe_whiteout;
 168                rdd->first_maybe_whiteout = p;
 169        }
 170        return p;
 171}
 172
 173static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
 174                                  const char *name, int len, u64 ino,
 175                                  unsigned int d_type)
 176{
 177        struct rb_node **newp = &rdd->root->rb_node;
 178        struct rb_node *parent = NULL;
 179        struct ovl_cache_entry *p;
 180
 181        if (ovl_cache_entry_find_link(name, len, &newp, &parent))
 182                return 0;
 183
 184        p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
 185        if (p == NULL) {
 186                rdd->err = -ENOMEM;
 187                return -ENOMEM;
 188        }
 189
 190        list_add_tail(&p->l_node, rdd->list);
 191        rb_link_node(&p->node, parent, newp);
 192        rb_insert_color(&p->node, rdd->root);
 193
 194        return 0;
 195}
 196
 197static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
 198                           const char *name, int namelen,
 199                           loff_t offset, u64 ino, unsigned int d_type)
 200{
 201        struct ovl_cache_entry *p;
 202
 203        p = ovl_cache_entry_find(rdd->root, name, namelen);
 204        if (p) {
 205                list_move_tail(&p->l_node, &rdd->middle);
 206        } else {
 207                p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
 208                if (p == NULL)
 209                        rdd->err = -ENOMEM;
 210                else
 211                        list_add_tail(&p->l_node, &rdd->middle);
 212        }
 213
 214        return rdd->err;
 215}
 216
 217void ovl_cache_free(struct list_head *list)
 218{
 219        struct ovl_cache_entry *p;
 220        struct ovl_cache_entry *n;
 221
 222        list_for_each_entry_safe(p, n, list, l_node)
 223                kfree(p);
 224
 225        INIT_LIST_HEAD(list);
 226}
 227
 228void ovl_dir_cache_free(struct inode *inode)
 229{
 230        struct ovl_dir_cache *cache = ovl_dir_cache(inode);
 231
 232        if (cache) {
 233                ovl_cache_free(&cache->entries);
 234                kfree(cache);
 235        }
 236}
 237
 238static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
 239{
 240        struct ovl_dir_cache *cache = od->cache;
 241
 242        WARN_ON(cache->refcount <= 0);
 243        cache->refcount--;
 244        if (!cache->refcount) {
 245                if (ovl_dir_cache(d_inode(dentry)) == cache)
 246                        ovl_set_dir_cache(d_inode(dentry), NULL);
 247
 248                ovl_cache_free(&cache->entries);
 249                kfree(cache);
 250        }
 251}
 252
 253static int ovl_fill_merge(struct dir_context *ctx, const char *name,
 254                          int namelen, loff_t offset, u64 ino,
 255                          unsigned int d_type)
 256{
 257        struct ovl_readdir_data *rdd =
 258                container_of(ctx, struct ovl_readdir_data, ctx);
 259
 260        rdd->count++;
 261        if (!rdd->is_lowest)
 262                return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
 263        else
 264                return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
 265}
 266
 267static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
 268{
 269        int err;
 270        struct ovl_cache_entry *p;
 271        struct dentry *dentry;
 272        const struct cred *old_cred;
 273
 274        old_cred = ovl_override_creds(rdd->dentry->d_sb);
 275
 276        err = down_write_killable(&dir->d_inode->i_rwsem);
 277        if (!err) {
 278                while (rdd->first_maybe_whiteout) {
 279                        p = rdd->first_maybe_whiteout;
 280                        rdd->first_maybe_whiteout = p->next_maybe_whiteout;
 281                        dentry = lookup_one_len(p->name, dir, p->len);
 282                        if (!IS_ERR(dentry)) {
 283                                p->is_whiteout = ovl_is_whiteout(dentry);
 284                                dput(dentry);
 285                        }
 286                }
 287                inode_unlock(dir->d_inode);
 288        }
 289        revert_creds(old_cred);
 290
 291        return err;
 292}
 293
 294static inline int ovl_dir_read(struct path *realpath,
 295                               struct ovl_readdir_data *rdd)
 296{
 297        struct file *realfile;
 298        int err;
 299
 300        realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE);
 301        if (IS_ERR(realfile))
 302                return PTR_ERR(realfile);
 303
 304        rdd->first_maybe_whiteout = NULL;
 305        rdd->ctx.pos = 0;
 306        do {
 307                rdd->count = 0;
 308                rdd->err = 0;
 309                err = iterate_dir(realfile, &rdd->ctx);
 310                if (err >= 0)
 311                        err = rdd->err;
 312        } while (!err && rdd->count);
 313
 314        if (!err && rdd->first_maybe_whiteout && rdd->dentry)
 315                err = ovl_check_whiteouts(realpath->dentry, rdd);
 316
 317        fput(realfile);
 318
 319        return err;
 320}
 321
 322static void ovl_dir_reset(struct file *file)
 323{
 324        struct ovl_dir_file *od = file->private_data;
 325        struct ovl_dir_cache *cache = od->cache;
 326        struct dentry *dentry = file->f_path.dentry;
 327        bool is_real;
 328
 329        if (cache && ovl_dentry_version_get(dentry) != cache->version) {
 330                ovl_cache_put(od, dentry);
 331                od->cache = NULL;
 332                od->cursor = NULL;
 333        }
 334        is_real = ovl_dir_is_real(dentry);
 335        if (od->is_real != is_real) {
 336                /* is_real can only become false when dir is copied up */
 337                if (WARN_ON(is_real))
 338                        return;
 339                od->is_real = false;
 340        }
 341}
 342
 343static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
 344        struct rb_root *root)
 345{
 346        int err;
 347        struct path realpath;
 348        struct ovl_readdir_data rdd = {
 349                .ctx.actor = ovl_fill_merge,
 350                .dentry = dentry,
 351                .list = list,
 352                .root = root,
 353                .is_lowest = false,
 354        };
 355        int idx, next;
 356
 357        for (idx = 0; idx != -1; idx = next) {
 358                next = ovl_path_next(idx, dentry, &realpath);
 359                rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
 360
 361                if (next != -1) {
 362                        err = ovl_dir_read(&realpath, &rdd);
 363                        if (err)
 364                                break;
 365                } else {
 366                        /*
 367                         * Insert lowest layer entries before upper ones, this
 368                         * allows offsets to be reasonably constant
 369                         */
 370                        list_add(&rdd.middle, rdd.list);
 371                        rdd.is_lowest = true;
 372                        err = ovl_dir_read(&realpath, &rdd);
 373                        list_del(&rdd.middle);
 374                }
 375        }
 376        return err;
 377}
 378
 379static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
 380{
 381        struct list_head *p;
 382        loff_t off = 0;
 383
 384        list_for_each(p, &od->cache->entries) {
 385                if (off >= pos)
 386                        break;
 387                off++;
 388        }
 389        /* Cursor is safe since the cache is stable */
 390        od->cursor = p;
 391}
 392
 393static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
 394{
 395        int res;
 396        struct ovl_dir_cache *cache;
 397
 398        cache = ovl_dir_cache(d_inode(dentry));
 399        if (cache && ovl_dentry_version_get(dentry) == cache->version) {
 400                WARN_ON(!cache->refcount);
 401                cache->refcount++;
 402                return cache;
 403        }
 404        ovl_set_dir_cache(d_inode(dentry), NULL);
 405
 406        cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
 407        if (!cache)
 408                return ERR_PTR(-ENOMEM);
 409
 410        cache->refcount = 1;
 411        INIT_LIST_HEAD(&cache->entries);
 412        cache->root = RB_ROOT;
 413
 414        res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root);
 415        if (res) {
 416                ovl_cache_free(&cache->entries);
 417                kfree(cache);
 418                return ERR_PTR(res);
 419        }
 420
 421        cache->version = ovl_dentry_version_get(dentry);
 422        ovl_set_dir_cache(d_inode(dentry), cache);
 423
 424        return cache;
 425}
 426
 427/* Map inode number to lower fs unique range */
 428static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
 429                               const char *name, int namelen, bool warn)
 430{
 431        unsigned int xinoshift = 64 - xinobits;
 432
 433        if (unlikely(ino >> xinoshift)) {
 434                if (warn) {
 435                        pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
 436                                            namelen, name, ino, xinobits);
 437                }
 438                return ino;
 439        }
 440
 441        /*
 442         * The lowest xinobit is reserved for mapping the non-peresistent inode
 443         * numbers range, but this range is only exposed via st_ino, not here.
 444         */
 445        return ino | ((u64)fsid) << (xinoshift + 1);
 446}
 447
 448/*
 449 * Set d_ino for upper entries. Non-upper entries should always report
 450 * the uppermost real inode ino and should not call this function.
 451 *
 452 * When not all layer are on same fs, report real ino also for upper.
 453 *
 454 * When all layers are on the same fs, and upper has a reference to
 455 * copy up origin, call vfs_getattr() on the overlay entry to make
 456 * sure that d_ino will be consistent with st_ino from stat(2).
 457 */
 458static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
 459
 460{
 461        struct dentry *dir = path->dentry;
 462        struct dentry *this = NULL;
 463        enum ovl_path_type type;
 464        u64 ino = p->real_ino;
 465        int xinobits = ovl_xino_bits(dir->d_sb);
 466        int err = 0;
 467
 468        if (!ovl_same_dev(dir->d_sb))
 469                goto out;
 470
 471        if (p->name[0] == '.') {
 472                if (p->len == 1) {
 473                        this = dget(dir);
 474                        goto get;
 475                }
 476                if (p->len == 2 && p->name[1] == '.') {
 477                        /* we shall not be moved */
 478                        this = dget(dir->d_parent);
 479                        goto get;
 480                }
 481        }
 482        this = lookup_one_len(p->name, dir, p->len);
 483        if (IS_ERR_OR_NULL(this) || !this->d_inode) {
 484                /* Mark a stale entry */
 485                p->is_whiteout = true;
 486                if (IS_ERR(this)) {
 487                        err = PTR_ERR(this);
 488                        this = NULL;
 489                        goto fail;
 490                }
 491                goto out;
 492        }
 493
 494get:
 495        type = ovl_path_type(this);
 496        if (OVL_TYPE_ORIGIN(type)) {
 497                struct kstat stat;
 498                struct path statpath = *path;
 499
 500                statpath.dentry = this;
 501                err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
 502                if (err)
 503                        goto fail;
 504
 505                /*
 506                 * Directory inode is always on overlay st_dev.
 507                 * Non-dir with ovl_same_dev() could be on pseudo st_dev in case
 508                 * of xino bits overflow.
 509                 */
 510                WARN_ON_ONCE(S_ISDIR(stat.mode) &&
 511                             dir->d_sb->s_dev != stat.dev);
 512                ino = stat.ino;
 513        } else if (xinobits && !OVL_TYPE_UPPER(type)) {
 514                ino = ovl_remap_lower_ino(ino, xinobits,
 515                                          ovl_layer_lower(this)->fsid,
 516                                          p->name, p->len,
 517                                          ovl_xino_warn(dir->d_sb));
 518        }
 519
 520out:
 521        p->ino = ino;
 522        dput(this);
 523        return err;
 524
 525fail:
 526        pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n",
 527                            p->name, err);
 528        goto out;
 529}
 530
 531static int ovl_fill_plain(struct dir_context *ctx, const char *name,
 532                          int namelen, loff_t offset, u64 ino,
 533                          unsigned int d_type)
 534{
 535        struct ovl_cache_entry *p;
 536        struct ovl_readdir_data *rdd =
 537                container_of(ctx, struct ovl_readdir_data, ctx);
 538
 539        rdd->count++;
 540        p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
 541        if (p == NULL) {
 542                rdd->err = -ENOMEM;
 543                return -ENOMEM;
 544        }
 545        list_add_tail(&p->l_node, rdd->list);
 546
 547        return 0;
 548}
 549
 550static int ovl_dir_read_impure(struct path *path,  struct list_head *list,
 551                               struct rb_root *root)
 552{
 553        int err;
 554        struct path realpath;
 555        struct ovl_cache_entry *p, *n;
 556        struct ovl_readdir_data rdd = {
 557                .ctx.actor = ovl_fill_plain,
 558                .list = list,
 559                .root = root,
 560        };
 561
 562        INIT_LIST_HEAD(list);
 563        *root = RB_ROOT;
 564        ovl_path_upper(path->dentry, &realpath);
 565
 566        err = ovl_dir_read(&realpath, &rdd);
 567        if (err)
 568                return err;
 569
 570        list_for_each_entry_safe(p, n, list, l_node) {
 571                if (strcmp(p->name, ".") != 0 &&
 572                    strcmp(p->name, "..") != 0) {
 573                        err = ovl_cache_update_ino(path, p);
 574                        if (err)
 575                                return err;
 576                }
 577                if (p->ino == p->real_ino) {
 578                        list_del(&p->l_node);
 579                        kfree(p);
 580                } else {
 581                        struct rb_node **newp = &root->rb_node;
 582                        struct rb_node *parent = NULL;
 583
 584                        if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
 585                                                              &newp, &parent)))
 586                                return -EIO;
 587
 588                        rb_link_node(&p->node, parent, newp);
 589                        rb_insert_color(&p->node, root);
 590                }
 591        }
 592        return 0;
 593}
 594
 595static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
 596{
 597        int res;
 598        struct dentry *dentry = path->dentry;
 599        struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 600        struct ovl_dir_cache *cache;
 601
 602        cache = ovl_dir_cache(d_inode(dentry));
 603        if (cache && ovl_dentry_version_get(dentry) == cache->version)
 604                return cache;
 605
 606        /* Impure cache is not refcounted, free it here */
 607        ovl_dir_cache_free(d_inode(dentry));
 608        ovl_set_dir_cache(d_inode(dentry), NULL);
 609
 610        cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
 611        if (!cache)
 612                return ERR_PTR(-ENOMEM);
 613
 614        res = ovl_dir_read_impure(path, &cache->entries, &cache->root);
 615        if (res) {
 616                ovl_cache_free(&cache->entries);
 617                kfree(cache);
 618                return ERR_PTR(res);
 619        }
 620        if (list_empty(&cache->entries)) {
 621                /*
 622                 * A good opportunity to get rid of an unneeded "impure" flag.
 623                 * Removing the "impure" xattr is best effort.
 624                 */
 625                if (!ovl_want_write(dentry)) {
 626                        ovl_do_removexattr(ofs, ovl_dentry_upper(dentry),
 627                                           OVL_XATTR_IMPURE);
 628                        ovl_drop_write(dentry);
 629                }
 630                ovl_clear_flag(OVL_IMPURE, d_inode(dentry));
 631                kfree(cache);
 632                return NULL;
 633        }
 634
 635        cache->version = ovl_dentry_version_get(dentry);
 636        ovl_set_dir_cache(d_inode(dentry), cache);
 637
 638        return cache;
 639}
 640
 641struct ovl_readdir_translate {
 642        struct dir_context *orig_ctx;
 643        struct ovl_dir_cache *cache;
 644        struct dir_context ctx;
 645        u64 parent_ino;
 646        int fsid;
 647        int xinobits;
 648        bool xinowarn;
 649};
 650
 651static int ovl_fill_real(struct dir_context *ctx, const char *name,
 652                           int namelen, loff_t offset, u64 ino,
 653                           unsigned int d_type)
 654{
 655        struct ovl_readdir_translate *rdt =
 656                container_of(ctx, struct ovl_readdir_translate, ctx);
 657        struct dir_context *orig_ctx = rdt->orig_ctx;
 658
 659        if (rdt->parent_ino && strcmp(name, "..") == 0) {
 660                ino = rdt->parent_ino;
 661        } else if (rdt->cache) {
 662                struct ovl_cache_entry *p;
 663
 664                p = ovl_cache_entry_find(&rdt->cache->root, name, namelen);
 665                if (p)
 666                        ino = p->ino;
 667        } else if (rdt->xinobits) {
 668                ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid,
 669                                          name, namelen, rdt->xinowarn);
 670        }
 671
 672        return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
 673}
 674
 675static bool ovl_is_impure_dir(struct file *file)
 676{
 677        struct ovl_dir_file *od = file->private_data;
 678        struct inode *dir = d_inode(file->f_path.dentry);
 679
 680        /*
 681         * Only upper dir can be impure, but if we are in the middle of
 682         * iterating a lower real dir, dir could be copied up and marked
 683         * impure. We only want the impure cache if we started iterating
 684         * a real upper dir to begin with.
 685         */
 686        return od->is_upper && ovl_test_flag(OVL_IMPURE, dir);
 687
 688}
 689
 690static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
 691{
 692        int err;
 693        struct ovl_dir_file *od = file->private_data;
 694        struct dentry *dir = file->f_path.dentry;
 695        const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
 696        struct ovl_readdir_translate rdt = {
 697                .ctx.actor = ovl_fill_real,
 698                .orig_ctx = ctx,
 699                .xinobits = ovl_xino_bits(dir->d_sb),
 700                .xinowarn = ovl_xino_warn(dir->d_sb),
 701        };
 702
 703        if (rdt.xinobits && lower_layer)
 704                rdt.fsid = lower_layer->fsid;
 705
 706        if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) {
 707                struct kstat stat;
 708                struct path statpath = file->f_path;
 709
 710                statpath.dentry = dir->d_parent;
 711                err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
 712                if (err)
 713                        return err;
 714
 715                WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
 716                rdt.parent_ino = stat.ino;
 717        }
 718
 719        if (ovl_is_impure_dir(file)) {
 720                rdt.cache = ovl_cache_get_impure(&file->f_path);
 721                if (IS_ERR(rdt.cache))
 722                        return PTR_ERR(rdt.cache);
 723        }
 724
 725        err = iterate_dir(od->realfile, &rdt.ctx);
 726        ctx->pos = rdt.ctx.pos;
 727
 728        return err;
 729}
 730
 731
 732static int ovl_iterate(struct file *file, struct dir_context *ctx)
 733{
 734        struct ovl_dir_file *od = file->private_data;
 735        struct dentry *dentry = file->f_path.dentry;
 736        struct ovl_cache_entry *p;
 737        const struct cred *old_cred;
 738        int err;
 739
 740        old_cred = ovl_override_creds(dentry->d_sb);
 741        if (!ctx->pos)
 742                ovl_dir_reset(file);
 743
 744        if (od->is_real) {
 745                /*
 746                 * If parent is merge, then need to adjust d_ino for '..', if
 747                 * dir is impure then need to adjust d_ino for copied up
 748                 * entries.
 749                 */
 750                if (ovl_xino_bits(dentry->d_sb) ||
 751                    (ovl_same_fs(dentry->d_sb) &&
 752                     (ovl_is_impure_dir(file) ||
 753                      OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
 754                        err = ovl_iterate_real(file, ctx);
 755                } else {
 756                        err = iterate_dir(od->realfile, ctx);
 757                }
 758                goto out;
 759        }
 760
 761        if (!od->cache) {
 762                struct ovl_dir_cache *cache;
 763
 764                cache = ovl_cache_get(dentry);
 765                err = PTR_ERR(cache);
 766                if (IS_ERR(cache))
 767                        goto out;
 768
 769                od->cache = cache;
 770                ovl_seek_cursor(od, ctx->pos);
 771        }
 772
 773        while (od->cursor != &od->cache->entries) {
 774                p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
 775                if (!p->is_whiteout) {
 776                        if (!p->ino) {
 777                                err = ovl_cache_update_ino(&file->f_path, p);
 778                                if (err)
 779                                        goto out;
 780                        }
 781                }
 782                /* ovl_cache_update_ino() sets is_whiteout on stale entry */
 783                if (!p->is_whiteout) {
 784                        if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
 785                                break;
 786                }
 787                od->cursor = p->l_node.next;
 788                ctx->pos++;
 789        }
 790        err = 0;
 791out:
 792        revert_creds(old_cred);
 793        return err;
 794}
 795
 796static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
 797{
 798        loff_t res;
 799        struct ovl_dir_file *od = file->private_data;
 800
 801        inode_lock(file_inode(file));
 802        if (!file->f_pos)
 803                ovl_dir_reset(file);
 804
 805        if (od->is_real) {
 806                res = vfs_llseek(od->realfile, offset, origin);
 807                file->f_pos = od->realfile->f_pos;
 808        } else {
 809                res = -EINVAL;
 810
 811                switch (origin) {
 812                case SEEK_CUR:
 813                        offset += file->f_pos;
 814                        break;
 815                case SEEK_SET:
 816                        break;
 817                default:
 818                        goto out_unlock;
 819                }
 820                if (offset < 0)
 821                        goto out_unlock;
 822
 823                if (offset != file->f_pos) {
 824                        file->f_pos = offset;
 825                        if (od->cache)
 826                                ovl_seek_cursor(od, offset);
 827                }
 828                res = offset;
 829        }
 830out_unlock:
 831        inode_unlock(file_inode(file));
 832
 833        return res;
 834}
 835
 836static struct file *ovl_dir_open_realfile(const struct file *file,
 837                                          struct path *realpath)
 838{
 839        struct file *res;
 840        const struct cred *old_cred;
 841
 842        old_cred = ovl_override_creds(file_inode(file)->i_sb);
 843        res = ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE));
 844        revert_creds(old_cred);
 845
 846        return res;
 847}
 848
 849/*
 850 * Like ovl_real_fdget(), returns upperfile if dir was copied up since open.
 851 * Unlike ovl_real_fdget(), this caches upperfile in file->private_data.
 852 *
 853 * TODO: use same abstract type for file->private_data of dir and file so
 854 * upperfile could also be cached for files as well.
 855 */
 856struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
 857{
 858
 859        struct ovl_dir_file *od = file->private_data;
 860        struct dentry *dentry = file->f_path.dentry;
 861        struct file *old, *realfile = od->realfile;
 862
 863        if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
 864                return want_upper ? NULL : realfile;
 865
 866        /*
 867         * Need to check if we started out being a lower dir, but got copied up
 868         */
 869        if (!od->is_upper) {
 870                realfile = READ_ONCE(od->upperfile);
 871                if (!realfile) {
 872                        struct path upperpath;
 873
 874                        ovl_path_upper(dentry, &upperpath);
 875                        realfile = ovl_dir_open_realfile(file, &upperpath);
 876                        if (IS_ERR(realfile))
 877                                return realfile;
 878
 879                        old = cmpxchg_release(&od->upperfile, NULL, realfile);
 880                        if (old) {
 881                                fput(realfile);
 882                                realfile = old;
 883                        }
 884                }
 885        }
 886
 887        return realfile;
 888}
 889
 890static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
 891                         int datasync)
 892{
 893        struct file *realfile;
 894        int err;
 895
 896        err = ovl_sync_status(OVL_FS(file->f_path.dentry->d_sb));
 897        if (err <= 0)
 898                return err;
 899
 900        realfile = ovl_dir_real_file(file, true);
 901        err = PTR_ERR_OR_ZERO(realfile);
 902
 903        /* Nothing to sync for lower */
 904        if (!realfile || err)
 905                return err;
 906
 907        return vfs_fsync_range(realfile, start, end, datasync);
 908}
 909
 910static int ovl_dir_release(struct inode *inode, struct file *file)
 911{
 912        struct ovl_dir_file *od = file->private_data;
 913
 914        if (od->cache) {
 915                inode_lock(inode);
 916                ovl_cache_put(od, file->f_path.dentry);
 917                inode_unlock(inode);
 918        }
 919        fput(od->realfile);
 920        if (od->upperfile)
 921                fput(od->upperfile);
 922        kfree(od);
 923
 924        return 0;
 925}
 926
 927static int ovl_dir_open(struct inode *inode, struct file *file)
 928{
 929        struct path realpath;
 930        struct file *realfile;
 931        struct ovl_dir_file *od;
 932        enum ovl_path_type type;
 933
 934        od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
 935        if (!od)
 936                return -ENOMEM;
 937
 938        type = ovl_path_real(file->f_path.dentry, &realpath);
 939        realfile = ovl_dir_open_realfile(file, &realpath);
 940        if (IS_ERR(realfile)) {
 941                kfree(od);
 942                return PTR_ERR(realfile);
 943        }
 944        od->realfile = realfile;
 945        od->is_real = ovl_dir_is_real(file->f_path.dentry);
 946        od->is_upper = OVL_TYPE_UPPER(type);
 947        file->private_data = od;
 948
 949        return 0;
 950}
 951
 952const struct file_operations ovl_dir_operations = {
 953        .read           = generic_read_dir,
 954        .open           = ovl_dir_open,
 955        .iterate        = ovl_iterate,
 956        .llseek         = ovl_dir_llseek,
 957        .fsync          = ovl_dir_fsync,
 958        .release        = ovl_dir_release,
 959};
 960
 961int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
 962{
 963        int err;
 964        struct ovl_cache_entry *p, *n;
 965        struct rb_root root = RB_ROOT;
 966        const struct cred *old_cred;
 967
 968        old_cred = ovl_override_creds(dentry->d_sb);
 969        err = ovl_dir_read_merged(dentry, list, &root);
 970        revert_creds(old_cred);
 971        if (err)
 972                return err;
 973
 974        err = 0;
 975
 976        list_for_each_entry_safe(p, n, list, l_node) {
 977                /*
 978                 * Select whiteouts in upperdir, they should
 979                 * be cleared when deleting this directory.
 980                 */
 981                if (p->is_whiteout) {
 982                        if (p->is_upper)
 983                                continue;
 984                        goto del_entry;
 985                }
 986
 987                if (p->name[0] == '.') {
 988                        if (p->len == 1)
 989                                goto del_entry;
 990                        if (p->len == 2 && p->name[1] == '.')
 991                                goto del_entry;
 992                }
 993                err = -ENOTEMPTY;
 994                break;
 995
 996del_entry:
 997                list_del(&p->l_node);
 998                kfree(p);
 999        }
1000
1001        return err;
1002}
1003
1004void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
1005{
1006        struct ovl_cache_entry *p;
1007
1008        inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
1009        list_for_each_entry(p, list, l_node) {
1010                struct dentry *dentry;
1011
1012                if (WARN_ON(!p->is_whiteout || !p->is_upper))
1013                        continue;
1014
1015                dentry = lookup_one_len(p->name, upper, p->len);
1016                if (IS_ERR(dentry)) {
1017                        pr_err("lookup '%s/%.*s' failed (%i)\n",
1018                               upper->d_name.name, p->len, p->name,
1019                               (int) PTR_ERR(dentry));
1020                        continue;
1021                }
1022                if (dentry->d_inode)
1023                        ovl_cleanup(upper->d_inode, dentry);
1024                dput(dentry);
1025        }
1026        inode_unlock(upper->d_inode);
1027}
1028
1029static int ovl_check_d_type(struct dir_context *ctx, const char *name,
1030                          int namelen, loff_t offset, u64 ino,
1031                          unsigned int d_type)
1032{
1033        struct ovl_readdir_data *rdd =
1034                container_of(ctx, struct ovl_readdir_data, ctx);
1035
1036        /* Even if d_type is not supported, DT_DIR is returned for . and .. */
1037        if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
1038                return 0;
1039
1040        if (d_type != DT_UNKNOWN)
1041                rdd->d_type_supported = true;
1042
1043        return 0;
1044}
1045
1046/*
1047 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
1048 * if error is encountered.
1049 */
1050int ovl_check_d_type_supported(struct path *realpath)
1051{
1052        int err;
1053        struct ovl_readdir_data rdd = {
1054                .ctx.actor = ovl_check_d_type,
1055                .d_type_supported = false,
1056        };
1057
1058        err = ovl_dir_read(realpath, &rdd);
1059        if (err)
1060                return err;
1061
1062        return rdd.d_type_supported;
1063}
1064
1065#define OVL_INCOMPATDIR_NAME "incompat"
1066
1067static int ovl_workdir_cleanup_recurse(struct path *path, int level)
1068{
1069        int err;
1070        struct inode *dir = path->dentry->d_inode;
1071        LIST_HEAD(list);
1072        struct rb_root root = RB_ROOT;
1073        struct ovl_cache_entry *p;
1074        struct ovl_readdir_data rdd = {
1075                .ctx.actor = ovl_fill_merge,
1076                .dentry = NULL,
1077                .list = &list,
1078                .root = &root,
1079                .is_lowest = false,
1080        };
1081        bool incompat = false;
1082
1083        /*
1084         * The "work/incompat" directory is treated specially - if it is not
1085         * empty, instead of printing a generic error and mounting read-only,
1086         * we will error about incompat features and fail the mount.
1087         *
1088         * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name
1089         * starts with '#'.
1090         */
1091        if (level == 2 &&
1092            !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME))
1093                incompat = true;
1094
1095        err = ovl_dir_read(path, &rdd);
1096        if (err)
1097                goto out;
1098
1099        inode_lock_nested(dir, I_MUTEX_PARENT);
1100        list_for_each_entry(p, &list, l_node) {
1101                struct dentry *dentry;
1102
1103                if (p->name[0] == '.') {
1104                        if (p->len == 1)
1105                                continue;
1106                        if (p->len == 2 && p->name[1] == '.')
1107                                continue;
1108                } else if (incompat) {
1109                        pr_err("overlay with incompat feature '%s' cannot be mounted\n",
1110                                p->name);
1111                        err = -EINVAL;
1112                        break;
1113                }
1114                dentry = lookup_one_len(p->name, path->dentry, p->len);
1115                if (IS_ERR(dentry))
1116                        continue;
1117                if (dentry->d_inode)
1118                        err = ovl_workdir_cleanup(dir, path->mnt, dentry, level);
1119                dput(dentry);
1120                if (err)
1121                        break;
1122        }
1123        inode_unlock(dir);
1124out:
1125        ovl_cache_free(&list);
1126        return err;
1127}
1128
1129int ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
1130                         struct dentry *dentry, int level)
1131{
1132        int err;
1133
1134        if (!d_is_dir(dentry) || level > 1) {
1135                return ovl_cleanup(dir, dentry);
1136        }
1137
1138        err = ovl_do_rmdir(dir, dentry);
1139        if (err) {
1140                struct path path = { .mnt = mnt, .dentry = dentry };
1141
1142                inode_unlock(dir);
1143                err = ovl_workdir_cleanup_recurse(&path, level + 1);
1144                inode_lock_nested(dir, I_MUTEX_PARENT);
1145                if (!err)
1146                        err = ovl_cleanup(dir, dentry);
1147        }
1148
1149        return err;
1150}
1151
1152int ovl_indexdir_cleanup(struct ovl_fs *ofs)
1153{
1154        int err;
1155        struct dentry *indexdir = ofs->indexdir;
1156        struct dentry *index = NULL;
1157        struct inode *dir = indexdir->d_inode;
1158        struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir };
1159        LIST_HEAD(list);
1160        struct rb_root root = RB_ROOT;
1161        struct ovl_cache_entry *p;
1162        struct ovl_readdir_data rdd = {
1163                .ctx.actor = ovl_fill_merge,
1164                .dentry = NULL,
1165                .list = &list,
1166                .root = &root,
1167                .is_lowest = false,
1168        };
1169
1170        err = ovl_dir_read(&path, &rdd);
1171        if (err)
1172                goto out;
1173
1174        inode_lock_nested(dir, I_MUTEX_PARENT);
1175        list_for_each_entry(p, &list, l_node) {
1176                if (p->name[0] == '.') {
1177                        if (p->len == 1)
1178                                continue;
1179                        if (p->len == 2 && p->name[1] == '.')
1180                                continue;
1181                }
1182                index = lookup_one_len(p->name, indexdir, p->len);
1183                if (IS_ERR(index)) {
1184                        err = PTR_ERR(index);
1185                        index = NULL;
1186                        break;
1187                }
1188                /* Cleanup leftover from index create/cleanup attempt */
1189                if (index->d_name.name[0] == '#') {
1190                        err = ovl_workdir_cleanup(dir, path.mnt, index, 1);
1191                        if (err)
1192                                break;
1193                        goto next;
1194                }
1195                err = ovl_verify_index(ofs, index);
1196                if (!err) {
1197                        goto next;
1198                } else if (err == -ESTALE) {
1199                        /* Cleanup stale index entries */
1200                        err = ovl_cleanup(dir, index);
1201                } else if (err != -ENOENT) {
1202                        /*
1203                         * Abort mount to avoid corrupting the index if
1204                         * an incompatible index entry was found or on out
1205                         * of memory.
1206                         */
1207                        break;
1208                } else if (ofs->config.nfs_export) {
1209                        /*
1210                         * Whiteout orphan index to block future open by
1211                         * handle after overlay nlink dropped to zero.
1212                         */
1213                        err = ovl_cleanup_and_whiteout(ofs, dir, index);
1214                } else {
1215                        /* Cleanup orphan index entries */
1216                        err = ovl_cleanup(dir, index);
1217                }
1218
1219                if (err)
1220                        break;
1221
1222next:
1223                dput(index);
1224                index = NULL;
1225        }
1226        dput(index);
1227        inode_unlock(dir);
1228out:
1229        ovl_cache_free(&list);
1230        if (err)
1231                pr_err("failed index dir cleanup (%i)\n", err);
1232        return err;
1233}
1234