linux/fs/overlayfs/readdir.c
<<
>>
Prefs
   1/*
   2 *
   3 * Copyright (C) 2011 Novell Inc.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms of the GNU General Public License version 2 as published by
   7 * the Free Software Foundation.
   8 */
   9
  10#include <linux/fs.h>
  11#include <linux/slab.h>
  12#include <linux/namei.h>
  13#include <linux/file.h>
  14#include <linux/xattr.h>
  15#include <linux/rbtree.h>
  16#include <linux/security.h>
  17#include <linux/cred.h>
  18#include "overlayfs.h"
  19
  20struct ovl_cache_entry {
  21        unsigned int len;
  22        unsigned int type;
  23        u64 ino;
  24        struct list_head l_node;
  25        struct rb_node node;
  26        struct ovl_cache_entry *next_maybe_whiteout;
  27        bool is_whiteout;
  28        char name[];
  29};
  30
  31struct ovl_dir_cache {
  32        long refcount;
  33        u64 version;
  34        struct list_head entries;
  35};
  36
  37struct ovl_readdir_data {
  38        struct dir_context ctx;
  39        bool is_lowest;
  40        struct rb_root root;
  41        struct list_head *list;
  42        struct list_head middle;
  43        struct ovl_cache_entry *first_maybe_whiteout;
  44        int count;
  45        int err;
  46        bool d_type_supported;
  47};
  48
  49struct ovl_dir_file {
  50        bool is_real;
  51        bool is_upper;
  52        struct ovl_dir_cache *cache;
  53        struct list_head *cursor;
  54        struct file *realfile;
  55        struct file *upperfile;
  56};
  57
  58static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
  59{
  60        return container_of(n, struct ovl_cache_entry, node);
  61}
  62
  63static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
  64                                                    const char *name, int len)
  65{
  66        struct rb_node *node = root->rb_node;
  67        int cmp;
  68
  69        while (node) {
  70                struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
  71
  72                cmp = strncmp(name, p->name, len);
  73                if (cmp > 0)
  74                        node = p->node.rb_right;
  75                else if (cmp < 0 || len < p->len)
  76                        node = p->node.rb_left;
  77                else
  78                        return p;
  79        }
  80
  81        return NULL;
  82}
  83
  84static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
  85                                                   const char *name, int len,
  86                                                   u64 ino, unsigned int d_type)
  87{
  88        struct ovl_cache_entry *p;
  89        size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
  90
  91        p = kmalloc(size, GFP_KERNEL);
  92        if (!p)
  93                return NULL;
  94
  95        memcpy(p->name, name, len);
  96        p->name[len] = '\0';
  97        p->len = len;
  98        p->type = d_type;
  99        p->ino = ino;
 100        p->is_whiteout = false;
 101
 102        if (d_type == DT_CHR) {
 103                p->next_maybe_whiteout = rdd->first_maybe_whiteout;
 104                rdd->first_maybe_whiteout = p;
 105        }
 106        return p;
 107}
 108
 109static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
 110                                  const char *name, int len, u64 ino,
 111                                  unsigned int d_type)
 112{
 113        struct rb_node **newp = &rdd->root.rb_node;
 114        struct rb_node *parent = NULL;
 115        struct ovl_cache_entry *p;
 116
 117        while (*newp) {
 118                int cmp;
 119                struct ovl_cache_entry *tmp;
 120
 121                parent = *newp;
 122                tmp = ovl_cache_entry_from_node(*newp);
 123                cmp = strncmp(name, tmp->name, len);
 124                if (cmp > 0)
 125                        newp = &tmp->node.rb_right;
 126                else if (cmp < 0 || len < tmp->len)
 127                        newp = &tmp->node.rb_left;
 128                else
 129                        return 0;
 130        }
 131
 132        p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
 133        if (p == NULL)
 134                return -ENOMEM;
 135
 136        list_add_tail(&p->l_node, rdd->list);
 137        rb_link_node(&p->node, parent, newp);
 138        rb_insert_color(&p->node, &rdd->root);
 139
 140        return 0;
 141}
 142
 143static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
 144                           const char *name, int namelen,
 145                           loff_t offset, u64 ino, unsigned int d_type)
 146{
 147        struct ovl_cache_entry *p;
 148
 149        p = ovl_cache_entry_find(&rdd->root, name, namelen);
 150        if (p) {
 151                list_move_tail(&p->l_node, &rdd->middle);
 152        } else {
 153                p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
 154                if (p == NULL)
 155                        rdd->err = -ENOMEM;
 156                else
 157                        list_add_tail(&p->l_node, &rdd->middle);
 158        }
 159
 160        return rdd->err;
 161}
 162
 163void ovl_cache_free(struct list_head *list)
 164{
 165        struct ovl_cache_entry *p;
 166        struct ovl_cache_entry *n;
 167
 168        list_for_each_entry_safe(p, n, list, l_node)
 169                kfree(p);
 170
 171        INIT_LIST_HEAD(list);
 172}
 173
 174static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
 175{
 176        struct ovl_dir_cache *cache = od->cache;
 177
 178        WARN_ON(cache->refcount <= 0);
 179        cache->refcount--;
 180        if (!cache->refcount) {
 181                if (ovl_dir_cache(dentry) == cache)
 182                        ovl_set_dir_cache(dentry, NULL);
 183
 184                ovl_cache_free(&cache->entries);
 185                kfree(cache);
 186        }
 187}
 188
 189static int ovl_fill_merge(struct dir_context *ctx, const char *name,
 190                          int namelen, loff_t offset, u64 ino,
 191                          unsigned int d_type)
 192{
 193        struct ovl_readdir_data *rdd =
 194                container_of(ctx, struct ovl_readdir_data, ctx);
 195
 196        rdd->count++;
 197        if (!rdd->is_lowest)
 198                return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
 199        else
 200                return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
 201}
 202
 203static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
 204{
 205        int err;
 206        struct ovl_cache_entry *p;
 207        struct dentry *dentry;
 208        const struct cred *old_cred;
 209        struct cred *override_cred;
 210
 211        override_cred = prepare_creds();
 212        if (!override_cred)
 213                return -ENOMEM;
 214
 215        /*
 216         * CAP_DAC_OVERRIDE for lookup
 217         */
 218        cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
 219        old_cred = override_creds(override_cred);
 220
 221        err = mutex_lock_killable(&dir->d_inode->i_mutex);
 222        if (!err) {
 223                while (rdd->first_maybe_whiteout) {
 224                        p = rdd->first_maybe_whiteout;
 225                        rdd->first_maybe_whiteout = p->next_maybe_whiteout;
 226                        dentry = lookup_one_len(p->name, dir, p->len);
 227                        if (!IS_ERR(dentry)) {
 228                                p->is_whiteout = ovl_is_whiteout(dentry);
 229                                dput(dentry);
 230                        }
 231                }
 232                inode_unlock(dir->d_inode);
 233        }
 234        revert_creds(old_cred);
 235        put_cred(override_cred);
 236
 237        return err;
 238}
 239
 240static inline int ovl_dir_read(struct path *realpath,
 241                               struct ovl_readdir_data *rdd)
 242{
 243        struct file *realfile;
 244        int err;
 245
 246        realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
 247        if (IS_ERR(realfile))
 248                return PTR_ERR(realfile);
 249
 250        rdd->first_maybe_whiteout = NULL;
 251        rdd->ctx.pos = 0;
 252        do {
 253                rdd->count = 0;
 254                rdd->err = 0;
 255                err = iterate_dir(realfile, &rdd->ctx);
 256                if (err >= 0)
 257                        err = rdd->err;
 258        } while (!err && rdd->count);
 259
 260        if (!err && rdd->first_maybe_whiteout)
 261                err = ovl_check_whiteouts(realpath->dentry, rdd);
 262
 263        fput(realfile);
 264
 265        return err;
 266}
 267
 268static void ovl_dir_reset(struct file *file)
 269{
 270        struct ovl_dir_file *od = file->private_data;
 271        struct ovl_dir_cache *cache = od->cache;
 272        struct dentry *dentry = file->f_path.dentry;
 273        enum ovl_path_type type = ovl_path_type(dentry);
 274
 275        if (cache && ovl_dentry_version_get(dentry) != cache->version) {
 276                ovl_cache_put(od, dentry);
 277                od->cache = NULL;
 278                od->cursor = NULL;
 279        }
 280        WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type));
 281        if (od->is_real && OVL_TYPE_MERGE(type))
 282                od->is_real = false;
 283}
 284
 285static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
 286{
 287        int err;
 288        struct path realpath;
 289        struct ovl_readdir_data rdd = {
 290                .ctx.actor = ovl_fill_merge,
 291                .list = list,
 292                .root = RB_ROOT,
 293                .is_lowest = false,
 294        };
 295        int idx, next;
 296
 297        for (idx = 0; idx != -1; idx = next) {
 298                next = ovl_path_next(idx, dentry, &realpath);
 299
 300                if (next != -1) {
 301                        err = ovl_dir_read(&realpath, &rdd);
 302                        if (err)
 303                                break;
 304                } else {
 305                        /*
 306                         * Insert lowest layer entries before upper ones, this
 307                         * allows offsets to be reasonably constant
 308                         */
 309                        list_add(&rdd.middle, rdd.list);
 310                        rdd.is_lowest = true;
 311                        err = ovl_dir_read(&realpath, &rdd);
 312                        list_del(&rdd.middle);
 313                }
 314        }
 315        return err;
 316}
 317
 318static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
 319{
 320        struct list_head *p;
 321        loff_t off = 0;
 322
 323        list_for_each(p, &od->cache->entries) {
 324                if (off >= pos)
 325                        break;
 326                off++;
 327        }
 328        /* Cursor is safe since the cache is stable */
 329        od->cursor = p;
 330}
 331
 332static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
 333{
 334        int res;
 335        struct ovl_dir_cache *cache;
 336
 337        cache = ovl_dir_cache(dentry);
 338        if (cache && ovl_dentry_version_get(dentry) == cache->version) {
 339                cache->refcount++;
 340                return cache;
 341        }
 342        ovl_set_dir_cache(dentry, NULL);
 343
 344        cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
 345        if (!cache)
 346                return ERR_PTR(-ENOMEM);
 347
 348        cache->refcount = 1;
 349        INIT_LIST_HEAD(&cache->entries);
 350
 351        res = ovl_dir_read_merged(dentry, &cache->entries);
 352        if (res) {
 353                ovl_cache_free(&cache->entries);
 354                kfree(cache);
 355                return ERR_PTR(res);
 356        }
 357
 358        cache->version = ovl_dentry_version_get(dentry);
 359        ovl_set_dir_cache(dentry, cache);
 360
 361        return cache;
 362}
 363
 364static int ovl_iterate(struct file *file, struct dir_context *ctx)
 365{
 366        struct ovl_dir_file *od = file->private_data;
 367        struct dentry *dentry = file->f_path.dentry;
 368        struct ovl_cache_entry *p;
 369
 370        if (!ctx->pos)
 371                ovl_dir_reset(file);
 372
 373        if (od->is_real)
 374                return iterate_dir(od->realfile, ctx);
 375
 376        if (!od->cache) {
 377                struct ovl_dir_cache *cache;
 378
 379                cache = ovl_cache_get(dentry);
 380                if (IS_ERR(cache))
 381                        return PTR_ERR(cache);
 382
 383                od->cache = cache;
 384                ovl_seek_cursor(od, ctx->pos);
 385        }
 386
 387        while (od->cursor != &od->cache->entries) {
 388                p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
 389                if (!p->is_whiteout)
 390                        if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
 391                                break;
 392                od->cursor = p->l_node.next;
 393                ctx->pos++;
 394        }
 395        return 0;
 396}
 397
 398static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
 399{
 400        loff_t res;
 401        struct ovl_dir_file *od = file->private_data;
 402
 403        inode_lock(file_inode(file));
 404        if (!file->f_pos)
 405                ovl_dir_reset(file);
 406
 407        if (od->is_real) {
 408                res = vfs_llseek(od->realfile, offset, origin);
 409                file->f_pos = od->realfile->f_pos;
 410        } else {
 411                res = -EINVAL;
 412
 413                switch (origin) {
 414                case SEEK_CUR:
 415                        offset += file->f_pos;
 416                        break;
 417                case SEEK_SET:
 418                        break;
 419                default:
 420                        goto out_unlock;
 421                }
 422                if (offset < 0)
 423                        goto out_unlock;
 424
 425                if (offset != file->f_pos) {
 426                        file->f_pos = offset;
 427                        if (od->cache)
 428                                ovl_seek_cursor(od, offset);
 429                }
 430                res = offset;
 431        }
 432out_unlock:
 433        inode_unlock(file_inode(file));
 434
 435        return res;
 436}
 437
 438static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
 439                         int datasync)
 440{
 441        struct ovl_dir_file *od = file->private_data;
 442        struct dentry *dentry = file->f_path.dentry;
 443        struct file *realfile = od->realfile;
 444
 445        /*
 446         * Need to check if we started out being a lower dir, but got copied up
 447         */
 448        if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
 449                struct inode *inode = file_inode(file);
 450
 451                realfile = lockless_dereference(od->upperfile);
 452                if (!realfile) {
 453                        struct path upperpath;
 454
 455                        ovl_path_upper(dentry, &upperpath);
 456                        realfile = ovl_path_open(&upperpath, O_RDONLY);
 457                        smp_mb__before_spinlock();
 458                        inode_lock(inode);
 459                        if (!od->upperfile) {
 460                                if (IS_ERR(realfile)) {
 461                                        inode_unlock(inode);
 462                                        return PTR_ERR(realfile);
 463                                }
 464                                od->upperfile = realfile;
 465                        } else {
 466                                /* somebody has beaten us to it */
 467                                if (!IS_ERR(realfile))
 468                                        fput(realfile);
 469                                realfile = od->upperfile;
 470                        }
 471                        inode_unlock(inode);
 472                }
 473        }
 474
 475        return vfs_fsync_range(realfile, start, end, datasync);
 476}
 477
 478static int ovl_dir_release(struct inode *inode, struct file *file)
 479{
 480        struct ovl_dir_file *od = file->private_data;
 481
 482        if (od->cache) {
 483                inode_lock(inode);
 484                ovl_cache_put(od, file->f_path.dentry);
 485                inode_unlock(inode);
 486        }
 487        fput(od->realfile);
 488        if (od->upperfile)
 489                fput(od->upperfile);
 490        kfree(od);
 491
 492        return 0;
 493}
 494
 495static int ovl_dir_open(struct inode *inode, struct file *file)
 496{
 497        struct path realpath;
 498        struct file *realfile;
 499        struct ovl_dir_file *od;
 500        enum ovl_path_type type;
 501
 502        od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
 503        if (!od)
 504                return -ENOMEM;
 505
 506        type = ovl_path_real(file->f_path.dentry, &realpath);
 507        realfile = ovl_path_open(&realpath, file->f_flags);
 508        if (IS_ERR(realfile)) {
 509                kfree(od);
 510                return PTR_ERR(realfile);
 511        }
 512        od->realfile = realfile;
 513        od->is_real = !OVL_TYPE_MERGE(type);
 514        od->is_upper = OVL_TYPE_UPPER(type);
 515        file->private_data = od;
 516
 517        return 0;
 518}
 519
 520const struct file_operations ovl_dir_operations = {
 521        .read           = generic_read_dir,
 522        .open           = ovl_dir_open,
 523        .iterate        = ovl_iterate,
 524        .llseek         = ovl_dir_llseek,
 525        .fsync          = ovl_dir_fsync,
 526        .release        = ovl_dir_release,
 527};
 528
 529int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
 530{
 531        int err;
 532        struct ovl_cache_entry *p;
 533
 534        err = ovl_dir_read_merged(dentry, list);
 535        if (err)
 536                return err;
 537
 538        err = 0;
 539
 540        list_for_each_entry(p, list, l_node) {
 541                if (p->is_whiteout)
 542                        continue;
 543
 544                if (p->name[0] == '.') {
 545                        if (p->len == 1)
 546                                continue;
 547                        if (p->len == 2 && p->name[1] == '.')
 548                                continue;
 549                }
 550                err = -ENOTEMPTY;
 551                break;
 552        }
 553
 554        return err;
 555}
 556
 557void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
 558{
 559        struct ovl_cache_entry *p;
 560
 561        inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
 562        list_for_each_entry(p, list, l_node) {
 563                struct dentry *dentry;
 564
 565                if (!p->is_whiteout)
 566                        continue;
 567
 568                dentry = lookup_one_len(p->name, upper, p->len);
 569                if (IS_ERR(dentry)) {
 570                        pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
 571                               upper->d_name.name, p->len, p->name,
 572                               (int) PTR_ERR(dentry));
 573                        continue;
 574                }
 575                if (dentry->d_inode)
 576                        ovl_cleanup(upper->d_inode, dentry);
 577                dput(dentry);
 578        }
 579        inode_unlock(upper->d_inode);
 580}
 581
 582static int ovl_check_d_type(struct dir_context *ctx, const char *name,
 583                          int namelen, loff_t offset, u64 ino,
 584                          unsigned int d_type)
 585{
 586        struct ovl_readdir_data *rdd =
 587                container_of(ctx, struct ovl_readdir_data, ctx);
 588
 589        /* Even if d_type is not supported, DT_DIR is returned for . and .. */
 590        if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
 591                return 0;
 592
 593        if (d_type != DT_UNKNOWN)
 594                rdd->d_type_supported = true;
 595
 596        return 0;
 597}
 598
 599/*
 600 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
 601 * if error is encountered.
 602 */
 603int ovl_check_d_type_supported(struct path *realpath)
 604{
 605        int err;
 606        struct ovl_readdir_data rdd = {
 607                .ctx.actor = ovl_check_d_type,
 608                .d_type_supported = false,
 609        };
 610
 611        err = ovl_dir_read(realpath, &rdd);
 612        if (err)
 613                return err;
 614
 615        return rdd.d_type_supported;
 616}
 617