linux/fs/overlayfs/super.c
<<
>>
Prefs
   1/*
   2 *
   3 * Copyright (C) 2011 Novell Inc.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms of the GNU General Public License version 2 as published by
   7 * the Free Software Foundation.
   8 */
   9
  10#include <linux/fs.h>
  11#include <linux/namei.h>
  12#include <linux/pagemap.h>
  13#include <linux/xattr.h>
  14#include <linux/security.h>
  15#include <linux/mount.h>
  16#include <linux/slab.h>
  17#include <linux/parser.h>
  18#include <linux/module.h>
  19#include <linux/sched.h>
  20#include <linux/statfs.h>
  21#include <linux/seq_file.h>
  22#include <linux/posix_acl_xattr.h>
  23#include "overlayfs.h"
  24
  25MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
  26MODULE_DESCRIPTION("Overlay filesystem");
  27MODULE_LICENSE("GPL");
  28
  29struct ovl_config {
  30        char *lowerdir;
  31        char *upperdir;
  32        char *workdir;
  33        bool default_permissions;
  34};
  35
  36/* private information held for overlayfs's superblock */
  37struct ovl_fs {
  38        struct vfsmount *upper_mnt;
  39        unsigned numlower;
  40        struct vfsmount **lower_mnt;
  41        struct dentry *workdir;
  42        long lower_namelen;
  43        /* pathnames of lower and upper dirs, for show_options */
  44        struct ovl_config config;
  45        /* creds of process who forced instantiation of super block */
  46        const struct cred *creator_cred;
  47};
  48
  49struct ovl_dir_cache;
  50
  51/* private information held for every overlayfs dentry */
  52struct ovl_entry {
  53        struct dentry *__upperdentry;
  54        struct ovl_dir_cache *cache;
  55        union {
  56                struct {
  57                        u64 version;
  58                        bool opaque;
  59                };
  60                struct rcu_head rcu;
  61        };
  62        unsigned numlower;
  63        struct path lowerstack[];
  64};
  65
  66#define OVL_MAX_STACK 500
  67
  68static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe)
  69{
  70        return oe->numlower ? oe->lowerstack[0].dentry : NULL;
  71}
  72
  73enum ovl_path_type ovl_path_type(struct dentry *dentry)
  74{
  75        struct ovl_entry *oe = dentry->d_fsdata;
  76        enum ovl_path_type type = 0;
  77
  78        if (oe->__upperdentry) {
  79                type = __OVL_PATH_UPPER;
  80
  81                /*
  82                 * Non-dir dentry can hold lower dentry from previous
  83                 * location. Its purity depends only on opaque flag.
  84                 */
  85                if (oe->numlower && S_ISDIR(dentry->d_inode->i_mode))
  86                        type |= __OVL_PATH_MERGE;
  87                else if (!oe->opaque)
  88                        type |= __OVL_PATH_PURE;
  89        } else {
  90                if (oe->numlower > 1)
  91                        type |= __OVL_PATH_MERGE;
  92        }
  93        return type;
  94}
  95
  96static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
  97{
  98        return lockless_dereference(oe->__upperdentry);
  99}
 100
 101void ovl_path_upper(struct dentry *dentry, struct path *path)
 102{
 103        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 104        struct ovl_entry *oe = dentry->d_fsdata;
 105
 106        path->mnt = ofs->upper_mnt;
 107        path->dentry = ovl_upperdentry_dereference(oe);
 108}
 109
 110enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
 111{
 112        enum ovl_path_type type = ovl_path_type(dentry);
 113
 114        if (!OVL_TYPE_UPPER(type))
 115                ovl_path_lower(dentry, path);
 116        else
 117                ovl_path_upper(dentry, path);
 118
 119        return type;
 120}
 121
 122struct dentry *ovl_dentry_upper(struct dentry *dentry)
 123{
 124        struct ovl_entry *oe = dentry->d_fsdata;
 125
 126        return ovl_upperdentry_dereference(oe);
 127}
 128
 129struct dentry *ovl_dentry_lower(struct dentry *dentry)
 130{
 131        struct ovl_entry *oe = dentry->d_fsdata;
 132
 133        return __ovl_dentry_lower(oe);
 134}
 135
 136struct dentry *ovl_dentry_real(struct dentry *dentry)
 137{
 138        struct ovl_entry *oe = dentry->d_fsdata;
 139        struct dentry *realdentry;
 140
 141        realdentry = ovl_upperdentry_dereference(oe);
 142        if (!realdentry)
 143                realdentry = __ovl_dentry_lower(oe);
 144
 145        return realdentry;
 146}
 147
 148static void ovl_inode_init(struct inode *inode, struct inode *realinode,
 149                           bool is_upper)
 150{
 151        WRITE_ONCE(inode->i_private, (unsigned long) realinode |
 152                   (is_upper ? OVL_ISUPPER_MASK : 0));
 153}
 154
 155struct vfsmount *ovl_entry_mnt_real(struct ovl_entry *oe, struct inode *inode,
 156                                    bool is_upper)
 157{
 158        if (is_upper) {
 159                struct ovl_fs *ofs = inode->i_sb->s_fs_info;
 160
 161                return ofs->upper_mnt;
 162        } else {
 163                return oe->numlower ? oe->lowerstack[0].mnt : NULL;
 164        }
 165}
 166
 167struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
 168{
 169        struct ovl_entry *oe = dentry->d_fsdata;
 170
 171        return oe->cache;
 172}
 173
 174void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
 175{
 176        struct ovl_entry *oe = dentry->d_fsdata;
 177
 178        oe->cache = cache;
 179}
 180
 181void ovl_path_lower(struct dentry *dentry, struct path *path)
 182{
 183        struct ovl_entry *oe = dentry->d_fsdata;
 184
 185        *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL };
 186}
 187
 188int ovl_want_write(struct dentry *dentry)
 189{
 190        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 191        return mnt_want_write(ofs->upper_mnt);
 192}
 193
 194void ovl_drop_write(struct dentry *dentry)
 195{
 196        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 197        mnt_drop_write(ofs->upper_mnt);
 198}
 199
 200struct dentry *ovl_workdir(struct dentry *dentry)
 201{
 202        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 203        return ofs->workdir;
 204}
 205
 206bool ovl_dentry_is_opaque(struct dentry *dentry)
 207{
 208        struct ovl_entry *oe = dentry->d_fsdata;
 209        return oe->opaque;
 210}
 211
 212void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque)
 213{
 214        struct ovl_entry *oe = dentry->d_fsdata;
 215        oe->opaque = opaque;
 216}
 217
 218void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
 219{
 220        struct ovl_entry *oe = dentry->d_fsdata;
 221
 222        WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
 223        WARN_ON(oe->__upperdentry);
 224        /*
 225         * Make sure upperdentry is consistent before making it visible to
 226         * ovl_upperdentry_dereference().
 227         */
 228        smp_wmb();
 229        oe->__upperdentry = upperdentry;
 230}
 231
 232void ovl_inode_update(struct inode *inode, struct inode *upperinode)
 233{
 234        WARN_ON(!upperinode);
 235        WARN_ON(!inode_unhashed(inode));
 236        WRITE_ONCE(inode->i_private,
 237                   (unsigned long) upperinode | OVL_ISUPPER_MASK);
 238        if (!S_ISDIR(upperinode->i_mode))
 239                __insert_inode_hash(inode, (unsigned long) upperinode);
 240}
 241
 242void ovl_dentry_version_inc(struct dentry *dentry)
 243{
 244        struct ovl_entry *oe = dentry->d_fsdata;
 245
 246        WARN_ON(!inode_is_locked(dentry->d_inode));
 247        oe->version++;
 248}
 249
 250u64 ovl_dentry_version_get(struct dentry *dentry)
 251{
 252        struct ovl_entry *oe = dentry->d_fsdata;
 253
 254        WARN_ON(!inode_is_locked(dentry->d_inode));
 255        return oe->version;
 256}
 257
 258bool ovl_is_whiteout(struct dentry *dentry)
 259{
 260        struct inode *inode = dentry->d_inode;
 261
 262        return inode && IS_WHITEOUT(inode);
 263}
 264
 265const struct cred *ovl_override_creds(struct super_block *sb)
 266{
 267        struct ovl_fs *ofs = sb->s_fs_info;
 268
 269        return override_creds(ofs->creator_cred);
 270}
 271
 272static bool ovl_is_opaquedir(struct dentry *dentry)
 273{
 274        int res;
 275        char val;
 276
 277        if (!d_is_dir(dentry))
 278                return false;
 279
 280        res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1);
 281        if (res == 1 && val == 'y')
 282                return true;
 283
 284        return false;
 285}
 286
 287static void ovl_dentry_release(struct dentry *dentry)
 288{
 289        struct ovl_entry *oe = dentry->d_fsdata;
 290
 291        if (oe) {
 292                unsigned int i;
 293
 294                dput(oe->__upperdentry);
 295                for (i = 0; i < oe->numlower; i++)
 296                        dput(oe->lowerstack[i].dentry);
 297                kfree_rcu(oe, rcu);
 298        }
 299}
 300
 301static struct dentry *ovl_d_real(struct dentry *dentry,
 302                                 const struct inode *inode,
 303                                 unsigned int open_flags)
 304{
 305        struct dentry *real;
 306
 307        if (d_is_dir(dentry)) {
 308                if (!inode || inode == d_inode(dentry))
 309                        return dentry;
 310                goto bug;
 311        }
 312
 313        if (d_is_negative(dentry))
 314                return dentry;
 315
 316        if (open_flags) {
 317                int err = ovl_open_maybe_copy_up(dentry, open_flags);
 318
 319                if (err)
 320                        return ERR_PTR(err);
 321        }
 322
 323        real = ovl_dentry_upper(dentry);
 324        if (real && (!inode || inode == d_inode(real)))
 325                return real;
 326
 327        real = ovl_dentry_lower(dentry);
 328        if (!real)
 329                goto bug;
 330
 331        /* Handle recursion */
 332        real = d_real(real, inode, open_flags);
 333
 334        if (!inode || inode == d_inode(real))
 335                return real;
 336bug:
 337        WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
 338             inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
 339        return dentry;
 340}
 341
 342static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 343{
 344        struct ovl_entry *oe = dentry->d_fsdata;
 345        unsigned int i;
 346        int ret = 1;
 347
 348        for (i = 0; i < oe->numlower; i++) {
 349                struct dentry *d = oe->lowerstack[i].dentry;
 350
 351                if (d->d_flags & DCACHE_OP_REVALIDATE) {
 352                        ret = d->d_op->d_revalidate(d, flags);
 353                        if (ret < 0)
 354                                return ret;
 355                        if (!ret) {
 356                                if (!(flags & LOOKUP_RCU))
 357                                        d_invalidate(d);
 358                                return -ESTALE;
 359                        }
 360                }
 361        }
 362        return 1;
 363}
 364
 365static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
 366{
 367        struct ovl_entry *oe = dentry->d_fsdata;
 368        unsigned int i;
 369        int ret = 1;
 370
 371        for (i = 0; i < oe->numlower; i++) {
 372                struct dentry *d = oe->lowerstack[i].dentry;
 373
 374                if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
 375                        ret = d->d_op->d_weak_revalidate(d, flags);
 376                        if (ret <= 0)
 377                                break;
 378                }
 379        }
 380        return ret;
 381}
 382
 383static const struct dentry_operations ovl_dentry_operations = {
 384        .d_release = ovl_dentry_release,
 385        .d_real = ovl_d_real,
 386};
 387
 388static const struct dentry_operations ovl_reval_dentry_operations = {
 389        .d_release = ovl_dentry_release,
 390        .d_real = ovl_d_real,
 391        .d_revalidate = ovl_dentry_revalidate,
 392        .d_weak_revalidate = ovl_dentry_weak_revalidate,
 393};
 394
 395static struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
 396{
 397        size_t size = offsetof(struct ovl_entry, lowerstack[numlower]);
 398        struct ovl_entry *oe = kzalloc(size, GFP_KERNEL);
 399
 400        if (oe)
 401                oe->numlower = numlower;
 402
 403        return oe;
 404}
 405
 406static bool ovl_dentry_remote(struct dentry *dentry)
 407{
 408        return dentry->d_flags &
 409                (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE |
 410                 DCACHE_OP_REAL);
 411}
 412
 413static bool ovl_dentry_weird(struct dentry *dentry)
 414{
 415        return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT |
 416                                  DCACHE_MANAGE_TRANSIT |
 417                                  DCACHE_OP_HASH |
 418                                  DCACHE_OP_COMPARE);
 419}
 420
 421static inline struct dentry *ovl_lookup_real(struct dentry *dir,
 422                                             const struct qstr *name)
 423{
 424        struct dentry *dentry;
 425
 426        dentry = lookup_one_len_unlocked(name->name, dir, name->len);
 427
 428        if (IS_ERR(dentry)) {
 429                if (PTR_ERR(dentry) == -ENOENT)
 430                        dentry = NULL;
 431        } else if (!dentry->d_inode) {
 432                dput(dentry);
 433                dentry = NULL;
 434        } else if (ovl_dentry_weird(dentry)) {
 435                dput(dentry);
 436                /* Don't support traversing automounts and other weirdness */
 437                dentry = ERR_PTR(-EREMOTE);
 438        }
 439        return dentry;
 440}
 441
 442/*
 443 * Returns next layer in stack starting from top.
 444 * Returns -1 if this is the last layer.
 445 */
 446int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
 447{
 448        struct ovl_entry *oe = dentry->d_fsdata;
 449
 450        BUG_ON(idx < 0);
 451        if (idx == 0) {
 452                ovl_path_upper(dentry, path);
 453                if (path->dentry)
 454                        return oe->numlower ? 1 : -1;
 455                idx++;
 456        }
 457        BUG_ON(idx > oe->numlower);
 458        *path = oe->lowerstack[idx - 1];
 459
 460        return (idx < oe->numlower) ? idx + 1 : -1;
 461}
 462
 463struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 464                          unsigned int flags)
 465{
 466        struct ovl_entry *oe;
 467        const struct cred *old_cred;
 468        struct ovl_entry *poe = dentry->d_parent->d_fsdata;
 469        struct path *stack = NULL;
 470        struct dentry *upperdir, *upperdentry = NULL;
 471        unsigned int ctr = 0;
 472        struct inode *inode = NULL;
 473        bool upperopaque = false;
 474        struct dentry *this, *prev = NULL;
 475        unsigned int i;
 476        int err;
 477
 478        old_cred = ovl_override_creds(dentry->d_sb);
 479        upperdir = ovl_upperdentry_dereference(poe);
 480        if (upperdir) {
 481                this = ovl_lookup_real(upperdir, &dentry->d_name);
 482                err = PTR_ERR(this);
 483                if (IS_ERR(this))
 484                        goto out;
 485
 486                if (this) {
 487                        if (unlikely(ovl_dentry_remote(this))) {
 488                                dput(this);
 489                                err = -EREMOTE;
 490                                goto out;
 491                        }
 492                        if (ovl_is_whiteout(this)) {
 493                                dput(this);
 494                                this = NULL;
 495                                upperopaque = true;
 496                        } else if (poe->numlower && ovl_is_opaquedir(this)) {
 497                                upperopaque = true;
 498                        }
 499                }
 500                upperdentry = prev = this;
 501        }
 502
 503        if (!upperopaque && poe->numlower) {
 504                err = -ENOMEM;
 505                stack = kcalloc(poe->numlower, sizeof(struct path), GFP_KERNEL);
 506                if (!stack)
 507                        goto out_put_upper;
 508        }
 509
 510        for (i = 0; !upperopaque && i < poe->numlower; i++) {
 511                bool opaque = false;
 512                struct path lowerpath = poe->lowerstack[i];
 513
 514                this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name);
 515                err = PTR_ERR(this);
 516                if (IS_ERR(this)) {
 517                        /*
 518                         * If it's positive, then treat ENAMETOOLONG as ENOENT.
 519                         */
 520                        if (err == -ENAMETOOLONG && (upperdentry || ctr))
 521                                continue;
 522                        goto out_put;
 523                }
 524                if (!this)
 525                        continue;
 526                if (ovl_is_whiteout(this)) {
 527                        dput(this);
 528                        break;
 529                }
 530                /*
 531                 * Only makes sense to check opaque dir if this is not the
 532                 * lowermost layer.
 533                 */
 534                if (i < poe->numlower - 1 && ovl_is_opaquedir(this))
 535                        opaque = true;
 536
 537                if (prev && (!S_ISDIR(prev->d_inode->i_mode) ||
 538                             !S_ISDIR(this->d_inode->i_mode))) {
 539                        /*
 540                         * FIXME: check for upper-opaqueness maybe better done
 541                         * in remove code.
 542                         */
 543                        if (prev == upperdentry)
 544                                upperopaque = true;
 545                        dput(this);
 546                        break;
 547                }
 548                /*
 549                 * If this is a non-directory then stop here.
 550                 */
 551                if (!S_ISDIR(this->d_inode->i_mode))
 552                        opaque = true;
 553
 554                stack[ctr].dentry = this;
 555                stack[ctr].mnt = lowerpath.mnt;
 556                ctr++;
 557                prev = this;
 558                if (opaque)
 559                        break;
 560        }
 561
 562        oe = ovl_alloc_entry(ctr);
 563        err = -ENOMEM;
 564        if (!oe)
 565                goto out_put;
 566
 567        if (upperdentry || ctr) {
 568                struct dentry *realdentry;
 569                struct inode *realinode;
 570
 571                realdentry = upperdentry ? upperdentry : stack[0].dentry;
 572                realinode = d_inode(realdentry);
 573
 574                err = -ENOMEM;
 575                if (upperdentry && !d_is_dir(upperdentry)) {
 576                        inode = ovl_get_inode(dentry->d_sb, realinode);
 577                } else {
 578                        inode = ovl_new_inode(dentry->d_sb, realinode->i_mode);
 579                        if (inode)
 580                                ovl_inode_init(inode, realinode, !!upperdentry);
 581                }
 582                if (!inode)
 583                        goto out_free_oe;
 584                ovl_copyattr(realdentry->d_inode, inode);
 585        }
 586
 587        revert_creds(old_cred);
 588        oe->opaque = upperopaque;
 589        oe->__upperdentry = upperdentry;
 590        memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
 591        kfree(stack);
 592        dentry->d_fsdata = oe;
 593        d_add(dentry, inode);
 594
 595        return NULL;
 596
 597out_free_oe:
 598        kfree(oe);
 599out_put:
 600        for (i = 0; i < ctr; i++)
 601                dput(stack[i].dentry);
 602        kfree(stack);
 603out_put_upper:
 604        dput(upperdentry);
 605out:
 606        revert_creds(old_cred);
 607        return ERR_PTR(err);
 608}
 609
 610struct file *ovl_path_open(struct path *path, int flags)
 611{
 612        return dentry_open(path, flags | O_NOATIME, current_cred());
 613}
 614
 615static void ovl_put_super(struct super_block *sb)
 616{
 617        struct ovl_fs *ufs = sb->s_fs_info;
 618        unsigned i;
 619
 620        dput(ufs->workdir);
 621        mntput(ufs->upper_mnt);
 622        for (i = 0; i < ufs->numlower; i++)
 623                mntput(ufs->lower_mnt[i]);
 624        kfree(ufs->lower_mnt);
 625
 626        kfree(ufs->config.lowerdir);
 627        kfree(ufs->config.upperdir);
 628        kfree(ufs->config.workdir);
 629        put_cred(ufs->creator_cred);
 630        kfree(ufs);
 631}
 632
 633/**
 634 * ovl_statfs
 635 * @sb: The overlayfs super block
 636 * @buf: The struct kstatfs to fill in with stats
 637 *
 638 * Get the filesystem statistics.  As writes always target the upper layer
 639 * filesystem pass the statfs to the upper filesystem (if it exists)
 640 */
 641static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
 642{
 643        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 644        struct dentry *root_dentry = dentry->d_sb->s_root;
 645        struct path path;
 646        int err;
 647
 648        ovl_path_real(root_dentry, &path);
 649
 650        err = vfs_statfs(&path, buf);
 651        if (!err) {
 652                buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen);
 653                buf->f_type = OVERLAYFS_SUPER_MAGIC;
 654        }
 655
 656        return err;
 657}
 658
 659/**
 660 * ovl_show_options
 661 *
 662 * Prints the mount options for a given superblock.
 663 * Returns zero; does not fail.
 664 */
 665static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 666{
 667        struct super_block *sb = dentry->d_sb;
 668        struct ovl_fs *ufs = sb->s_fs_info;
 669
 670        seq_show_option(m, "lowerdir", ufs->config.lowerdir);
 671        if (ufs->config.upperdir) {
 672                seq_show_option(m, "upperdir", ufs->config.upperdir);
 673                seq_show_option(m, "workdir", ufs->config.workdir);
 674        }
 675        if (ufs->config.default_permissions)
 676                seq_puts(m, ",default_permissions");
 677        return 0;
 678}
 679
 680static int ovl_remount(struct super_block *sb, int *flags, char *data)
 681{
 682        struct ovl_fs *ufs = sb->s_fs_info;
 683
 684        if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir))
 685                return -EROFS;
 686
 687        return 0;
 688}
 689
 690static const struct super_operations ovl_super_operations = {
 691        .put_super      = ovl_put_super,
 692        .statfs         = ovl_statfs,
 693        .show_options   = ovl_show_options,
 694        .remount_fs     = ovl_remount,
 695        .drop_inode     = generic_delete_inode,
 696};
 697
 698enum {
 699        OPT_LOWERDIR,
 700        OPT_UPPERDIR,
 701        OPT_WORKDIR,
 702        OPT_DEFAULT_PERMISSIONS,
 703        OPT_ERR,
 704};
 705
 706static const match_table_t ovl_tokens = {
 707        {OPT_LOWERDIR,                  "lowerdir=%s"},
 708        {OPT_UPPERDIR,                  "upperdir=%s"},
 709        {OPT_WORKDIR,                   "workdir=%s"},
 710        {OPT_DEFAULT_PERMISSIONS,       "default_permissions"},
 711        {OPT_ERR,                       NULL}
 712};
 713
 714static char *ovl_next_opt(char **s)
 715{
 716        char *sbegin = *s;
 717        char *p;
 718
 719        if (sbegin == NULL)
 720                return NULL;
 721
 722        for (p = sbegin; *p; p++) {
 723                if (*p == '\\') {
 724                        p++;
 725                        if (!*p)
 726                                break;
 727                } else if (*p == ',') {
 728                        *p = '\0';
 729                        *s = p + 1;
 730                        return sbegin;
 731                }
 732        }
 733        *s = NULL;
 734        return sbegin;
 735}
 736
 737static int ovl_parse_opt(char *opt, struct ovl_config *config)
 738{
 739        char *p;
 740
 741        while ((p = ovl_next_opt(&opt)) != NULL) {
 742                int token;
 743                substring_t args[MAX_OPT_ARGS];
 744
 745                if (!*p)
 746                        continue;
 747
 748                token = match_token(p, ovl_tokens, args);
 749                switch (token) {
 750                case OPT_UPPERDIR:
 751                        kfree(config->upperdir);
 752                        config->upperdir = match_strdup(&args[0]);
 753                        if (!config->upperdir)
 754                                return -ENOMEM;
 755                        break;
 756
 757                case OPT_LOWERDIR:
 758                        kfree(config->lowerdir);
 759                        config->lowerdir = match_strdup(&args[0]);
 760                        if (!config->lowerdir)
 761                                return -ENOMEM;
 762                        break;
 763
 764                case OPT_WORKDIR:
 765                        kfree(config->workdir);
 766                        config->workdir = match_strdup(&args[0]);
 767                        if (!config->workdir)
 768                                return -ENOMEM;
 769                        break;
 770
 771                case OPT_DEFAULT_PERMISSIONS:
 772                        config->default_permissions = true;
 773                        break;
 774
 775                default:
 776                        pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
 777                        return -EINVAL;
 778                }
 779        }
 780
 781        /* Workdir is useless in non-upper mount */
 782        if (!config->upperdir && config->workdir) {
 783                pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
 784                        config->workdir);
 785                kfree(config->workdir);
 786                config->workdir = NULL;
 787        }
 788
 789        return 0;
 790}
 791
 792#define OVL_WORKDIR_NAME "work"
 793
 794static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
 795                                         struct dentry *dentry)
 796{
 797        struct inode *dir = dentry->d_inode;
 798        struct dentry *work;
 799        int err;
 800        bool retried = false;
 801
 802        err = mnt_want_write(mnt);
 803        if (err)
 804                return ERR_PTR(err);
 805
 806        inode_lock_nested(dir, I_MUTEX_PARENT);
 807retry:
 808        work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
 809                              strlen(OVL_WORKDIR_NAME));
 810
 811        if (!IS_ERR(work)) {
 812                struct kstat stat = {
 813                        .mode = S_IFDIR | 0,
 814                };
 815                struct iattr attr = {
 816                        .ia_valid = ATTR_MODE,
 817                        .ia_mode = stat.mode,
 818                };
 819
 820                if (work->d_inode) {
 821                        err = -EEXIST;
 822                        if (retried)
 823                                goto out_dput;
 824
 825                        retried = true;
 826                        ovl_workdir_cleanup(dir, mnt, work, 0);
 827                        dput(work);
 828                        goto retry;
 829                }
 830
 831                err = ovl_create_real(dir, work, &stat, NULL, NULL, true);
 832                if (err)
 833                        goto out_dput;
 834
 835                /*
 836                 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
 837                 *
 838                 * a) success (there was a POSIX ACL xattr and was removed)
 839                 * b) -ENODATA (there was no POSIX ACL xattr)
 840                 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
 841                 *
 842                 * There are various other error values that could effectively
 843                 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
 844                 * if the xattr name is too long), but the set of filesystems
 845                 * allowed as upper are limited to "normal" ones, where checking
 846                 * for the above two errors is sufficient.
 847                 */
 848                err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
 849                if (err && err != -ENODATA && err != -EOPNOTSUPP)
 850                        goto out_dput;
 851
 852                err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
 853                if (err && err != -ENODATA && err != -EOPNOTSUPP)
 854                        goto out_dput;
 855
 856                /* Clear any inherited mode bits */
 857                inode_lock(work->d_inode);
 858                err = notify_change(work, &attr, NULL);
 859                inode_unlock(work->d_inode);
 860                if (err)
 861                        goto out_dput;
 862        }
 863out_unlock:
 864        inode_unlock(dir);
 865        mnt_drop_write(mnt);
 866
 867        return work;
 868
 869out_dput:
 870        dput(work);
 871        work = ERR_PTR(err);
 872        goto out_unlock;
 873}
 874
 875static void ovl_unescape(char *s)
 876{
 877        char *d = s;
 878
 879        for (;; s++, d++) {
 880                if (*s == '\\')
 881                        s++;
 882                *d = *s;
 883                if (!*s)
 884                        break;
 885        }
 886}
 887
 888static int ovl_mount_dir_noesc(const char *name, struct path *path)
 889{
 890        int err = -EINVAL;
 891
 892        if (!*name) {
 893                pr_err("overlayfs: empty lowerdir\n");
 894                goto out;
 895        }
 896        err = kern_path(name, LOOKUP_FOLLOW, path);
 897        if (err) {
 898                pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
 899                goto out;
 900        }
 901        err = -EINVAL;
 902        if (ovl_dentry_weird(path->dentry)) {
 903                pr_err("overlayfs: filesystem on '%s' not supported\n", name);
 904                goto out_put;
 905        }
 906        if (!S_ISDIR(path->dentry->d_inode->i_mode)) {
 907                pr_err("overlayfs: '%s' not a directory\n", name);
 908                goto out_put;
 909        }
 910        return 0;
 911
 912out_put:
 913        path_put(path);
 914out:
 915        return err;
 916}
 917
 918static int ovl_mount_dir(const char *name, struct path *path)
 919{
 920        int err = -ENOMEM;
 921        char *tmp = kstrdup(name, GFP_KERNEL);
 922
 923        if (tmp) {
 924                ovl_unescape(tmp);
 925                err = ovl_mount_dir_noesc(tmp, path);
 926
 927                if (!err)
 928                        if (ovl_dentry_remote(path->dentry)) {
 929                                pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
 930                                       tmp);
 931                                path_put(path);
 932                                err = -EINVAL;
 933                        }
 934                kfree(tmp);
 935        }
 936        return err;
 937}
 938
 939static int ovl_lower_dir(const char *name, struct path *path, long *namelen,
 940                         int *stack_depth, bool *remote)
 941{
 942        int err;
 943        struct kstatfs statfs;
 944
 945        err = ovl_mount_dir_noesc(name, path);
 946        if (err)
 947                goto out;
 948
 949        err = vfs_statfs(path, &statfs);
 950        if (err) {
 951                pr_err("overlayfs: statfs failed on '%s'\n", name);
 952                goto out_put;
 953        }
 954        *namelen = max(*namelen, statfs.f_namelen);
 955        *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
 956
 957        if (ovl_dentry_remote(path->dentry))
 958                *remote = true;
 959
 960        return 0;
 961
 962out_put:
 963        path_put(path);
 964out:
 965        return err;
 966}
 967
 968/* Workdir should not be subdir of upperdir and vice versa */
 969static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
 970{
 971        bool ok = false;
 972
 973        if (workdir != upperdir) {
 974                ok = (lock_rename(workdir, upperdir) == NULL);
 975                unlock_rename(workdir, upperdir);
 976        }
 977        return ok;
 978}
 979
 980static unsigned int ovl_split_lowerdirs(char *str)
 981{
 982        unsigned int ctr = 1;
 983        char *s, *d;
 984
 985        for (s = d = str;; s++, d++) {
 986                if (*s == '\\') {
 987                        s++;
 988                } else if (*s == ':') {
 989                        *d = '\0';
 990                        ctr++;
 991                        continue;
 992                }
 993                *d = *s;
 994                if (!*s)
 995                        break;
 996        }
 997        return ctr;
 998}
 999
1000static int __maybe_unused
1001ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
1002                        struct dentry *dentry, struct inode *inode,
1003                        const char *name, void *buffer, size_t size)
1004{
1005        return ovl_xattr_get(dentry, handler->name, buffer, size);
1006}
1007
1008static int __maybe_unused
1009ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
1010                        struct dentry *dentry, struct inode *inode,
1011                        const char *name, const void *value,
1012                        size_t size, int flags)
1013{
1014        struct dentry *workdir = ovl_workdir(dentry);
1015        struct inode *realinode = ovl_inode_real(inode, NULL);
1016        struct posix_acl *acl = NULL;
1017        int err;
1018
1019        /* Check that everything is OK before copy-up */
1020        if (value) {
1021                acl = posix_acl_from_xattr(&init_user_ns, value, size);
1022                if (IS_ERR(acl))
1023                        return PTR_ERR(acl);
1024        }
1025        err = -EOPNOTSUPP;
1026        if (!IS_POSIXACL(d_inode(workdir)))
1027                goto out_acl_release;
1028        if (!realinode->i_op->set_acl)
1029                goto out_acl_release;
1030        if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
1031                err = acl ? -EACCES : 0;
1032                goto out_acl_release;
1033        }
1034        err = -EPERM;
1035        if (!inode_owner_or_capable(inode))
1036                goto out_acl_release;
1037
1038        posix_acl_release(acl);
1039
1040        /*
1041         * Check if sgid bit needs to be cleared (actual setacl operation will
1042         * be done with mounter's capabilities and so that won't do it for us).
1043         */
1044        if (unlikely(inode->i_mode & S_ISGID) &&
1045            handler->flags == ACL_TYPE_ACCESS &&
1046            !in_group_p(inode->i_gid) &&
1047            !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
1048                struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
1049
1050                err = ovl_setattr(dentry, &iattr);
1051                if (err)
1052                        return err;
1053        }
1054
1055        err = ovl_xattr_set(dentry, handler->name, value, size, flags);
1056        if (!err)
1057                ovl_copyattr(ovl_inode_real(inode, NULL), inode);
1058
1059        return err;
1060
1061out_acl_release:
1062        posix_acl_release(acl);
1063        return err;
1064}
1065
1066static int ovl_own_xattr_get(const struct xattr_handler *handler,
1067                             struct dentry *dentry, struct inode *inode,
1068                             const char *name, void *buffer, size_t size)
1069{
1070        return -EPERM;
1071}
1072
1073static int ovl_own_xattr_set(const struct xattr_handler *handler,
1074                             struct dentry *dentry, struct inode *inode,
1075                             const char *name, const void *value,
1076                             size_t size, int flags)
1077{
1078        return -EPERM;
1079}
1080
1081static int ovl_other_xattr_get(const struct xattr_handler *handler,
1082                               struct dentry *dentry, struct inode *inode,
1083                               const char *name, void *buffer, size_t size)
1084{
1085        return ovl_xattr_get(dentry, name, buffer, size);
1086}
1087
1088static int ovl_other_xattr_set(const struct xattr_handler *handler,
1089                               struct dentry *dentry, struct inode *inode,
1090                               const char *name, const void *value,
1091                               size_t size, int flags)
1092{
1093        return ovl_xattr_set(dentry, name, value, size, flags);
1094}
1095
1096static const struct xattr_handler __maybe_unused
1097ovl_posix_acl_access_xattr_handler = {
1098        .name = XATTR_NAME_POSIX_ACL_ACCESS,
1099        .flags = ACL_TYPE_ACCESS,
1100        .get = ovl_posix_acl_xattr_get,
1101        .set = ovl_posix_acl_xattr_set,
1102};
1103
1104static const struct xattr_handler __maybe_unused
1105ovl_posix_acl_default_xattr_handler = {
1106        .name = XATTR_NAME_POSIX_ACL_DEFAULT,
1107        .flags = ACL_TYPE_DEFAULT,
1108        .get = ovl_posix_acl_xattr_get,
1109        .set = ovl_posix_acl_xattr_set,
1110};
1111
1112static const struct xattr_handler ovl_own_xattr_handler = {
1113        .prefix = OVL_XATTR_PREFIX,
1114        .get = ovl_own_xattr_get,
1115        .set = ovl_own_xattr_set,
1116};
1117
1118static const struct xattr_handler ovl_other_xattr_handler = {
1119        .prefix = "", /* catch all */
1120        .get = ovl_other_xattr_get,
1121        .set = ovl_other_xattr_set,
1122};
1123
1124static const struct xattr_handler *ovl_xattr_handlers[] = {
1125#ifdef CONFIG_FS_POSIX_ACL
1126        &ovl_posix_acl_access_xattr_handler,
1127        &ovl_posix_acl_default_xattr_handler,
1128#endif
1129        &ovl_own_xattr_handler,
1130        &ovl_other_xattr_handler,
1131        NULL
1132};
1133
1134static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1135{
1136        struct path upperpath = { NULL, NULL };
1137        struct path workpath = { NULL, NULL };
1138        struct dentry *root_dentry;
1139        struct inode *realinode;
1140        struct ovl_entry *oe;
1141        struct ovl_fs *ufs;
1142        struct path *stack = NULL;
1143        char *lowertmp;
1144        char *lower;
1145        unsigned int numlower;
1146        unsigned int stacklen = 0;
1147        unsigned int i;
1148        bool remote = false;
1149        int err;
1150
1151        err = -ENOMEM;
1152        ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
1153        if (!ufs)
1154                goto out;
1155
1156        err = ovl_parse_opt((char *) data, &ufs->config);
1157        if (err)
1158                goto out_free_config;
1159
1160        err = -EINVAL;
1161        if (!ufs->config.lowerdir) {
1162                if (!silent)
1163                        pr_err("overlayfs: missing 'lowerdir'\n");
1164                goto out_free_config;
1165        }
1166
1167        sb->s_stack_depth = 0;
1168        sb->s_maxbytes = MAX_LFS_FILESIZE;
1169        if (ufs->config.upperdir) {
1170                if (!ufs->config.workdir) {
1171                        pr_err("overlayfs: missing 'workdir'\n");
1172                        goto out_free_config;
1173                }
1174
1175                err = ovl_mount_dir(ufs->config.upperdir, &upperpath);
1176                if (err)
1177                        goto out_free_config;
1178
1179                /* Upper fs should not be r/o */
1180                if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) {
1181                        pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
1182                        err = -EINVAL;
1183                        goto out_put_upperpath;
1184                }
1185
1186                err = ovl_mount_dir(ufs->config.workdir, &workpath);
1187                if (err)
1188                        goto out_put_upperpath;
1189
1190                err = -EINVAL;
1191                if (upperpath.mnt != workpath.mnt) {
1192                        pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
1193                        goto out_put_workpath;
1194                }
1195                if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) {
1196                        pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
1197                        goto out_put_workpath;
1198                }
1199                sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth;
1200        }
1201        err = -ENOMEM;
1202        lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL);
1203        if (!lowertmp)
1204                goto out_put_workpath;
1205
1206        err = -EINVAL;
1207        stacklen = ovl_split_lowerdirs(lowertmp);
1208        if (stacklen > OVL_MAX_STACK) {
1209                pr_err("overlayfs: too many lower directories, limit is %d\n",
1210                       OVL_MAX_STACK);
1211                goto out_free_lowertmp;
1212        } else if (!ufs->config.upperdir && stacklen == 1) {
1213                pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
1214                goto out_free_lowertmp;
1215        }
1216
1217        stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
1218        if (!stack)
1219                goto out_free_lowertmp;
1220
1221        lower = lowertmp;
1222        for (numlower = 0; numlower < stacklen; numlower++) {
1223                err = ovl_lower_dir(lower, &stack[numlower],
1224                                    &ufs->lower_namelen, &sb->s_stack_depth,
1225                                    &remote);
1226                if (err)
1227                        goto out_put_lowerpath;
1228
1229                lower = strchr(lower, '\0') + 1;
1230        }
1231
1232        err = -EINVAL;
1233        sb->s_stack_depth++;
1234        if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
1235                pr_err("overlayfs: maximum fs stacking depth exceeded\n");
1236                goto out_put_lowerpath;
1237        }
1238
1239        if (ufs->config.upperdir) {
1240                ufs->upper_mnt = clone_private_mount(&upperpath);
1241                err = PTR_ERR(ufs->upper_mnt);
1242                if (IS_ERR(ufs->upper_mnt)) {
1243                        pr_err("overlayfs: failed to clone upperpath\n");
1244                        goto out_put_lowerpath;
1245                }
1246                /* Don't inherit atime flags */
1247                ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
1248
1249                sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran;
1250
1251                ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
1252                err = PTR_ERR(ufs->workdir);
1253                if (IS_ERR(ufs->workdir)) {
1254                        pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
1255                                ufs->config.workdir, OVL_WORKDIR_NAME, -err);
1256                        sb->s_flags |= MS_RDONLY;
1257                        ufs->workdir = NULL;
1258                }
1259
1260                /*
1261                 * Upper should support d_type, else whiteouts are visible.
1262                 * Given workdir and upper are on same fs, we can do
1263                 * iterate_dir() on workdir. This check requires successful
1264                 * creation of workdir in previous step.
1265                 */
1266                if (ufs->workdir) {
1267                        err = ovl_check_d_type_supported(&workpath);
1268                        if (err < 0)
1269                                goto out_put_workdir;
1270
1271                        /*
1272                         * We allowed this configuration and don't want to
1273                         * break users over kernel upgrade. So warn instead
1274                         * of erroring out.
1275                         */
1276                        if (!err)
1277                                pr_warn("overlayfs: upper fs needs to support d_type.\n");
1278                }
1279        }
1280
1281        err = -ENOMEM;
1282        ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL);
1283        if (ufs->lower_mnt == NULL)
1284                goto out_put_workdir;
1285        for (i = 0; i < numlower; i++) {
1286                struct vfsmount *mnt = clone_private_mount(&stack[i]);
1287
1288                err = PTR_ERR(mnt);
1289                if (IS_ERR(mnt)) {
1290                        pr_err("overlayfs: failed to clone lowerpath\n");
1291                        goto out_put_lower_mnt;
1292                }
1293                /*
1294                 * Make lower_mnt R/O.  That way fchmod/fchown on lower file
1295                 * will fail instead of modifying lower fs.
1296                 */
1297                mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
1298
1299                ufs->lower_mnt[ufs->numlower] = mnt;
1300                ufs->numlower++;
1301        }
1302
1303        /* If the upper fs is nonexistent, we mark overlayfs r/o too */
1304        if (!ufs->upper_mnt)
1305                sb->s_flags |= MS_RDONLY;
1306
1307        if (remote)
1308                sb->s_d_op = &ovl_reval_dentry_operations;
1309        else
1310                sb->s_d_op = &ovl_dentry_operations;
1311
1312        ufs->creator_cred = prepare_creds();
1313        if (!ufs->creator_cred)
1314                goto out_put_lower_mnt;
1315
1316        err = -ENOMEM;
1317        oe = ovl_alloc_entry(numlower);
1318        if (!oe)
1319                goto out_put_cred;
1320
1321        sb->s_magic = OVERLAYFS_SUPER_MAGIC;
1322        sb->s_op = &ovl_super_operations;
1323        sb->s_xattr = ovl_xattr_handlers;
1324        sb->s_fs_info = ufs;
1325        sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
1326
1327        root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR));
1328        if (!root_dentry)
1329                goto out_free_oe;
1330
1331        mntput(upperpath.mnt);
1332        for (i = 0; i < numlower; i++)
1333                mntput(stack[i].mnt);
1334        path_put(&workpath);
1335        kfree(lowertmp);
1336
1337        oe->__upperdentry = upperpath.dentry;
1338        for (i = 0; i < numlower; i++) {
1339                oe->lowerstack[i].dentry = stack[i].dentry;
1340                oe->lowerstack[i].mnt = ufs->lower_mnt[i];
1341        }
1342        kfree(stack);
1343
1344        root_dentry->d_fsdata = oe;
1345
1346        realinode = d_inode(ovl_dentry_real(root_dentry));
1347        ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry);
1348        ovl_copyattr(realinode, d_inode(root_dentry));
1349
1350        sb->s_root = root_dentry;
1351
1352        return 0;
1353
1354out_free_oe:
1355        kfree(oe);
1356out_put_cred:
1357        put_cred(ufs->creator_cred);
1358out_put_lower_mnt:
1359        for (i = 0; i < ufs->numlower; i++)
1360                mntput(ufs->lower_mnt[i]);
1361        kfree(ufs->lower_mnt);
1362out_put_workdir:
1363        dput(ufs->workdir);
1364        mntput(ufs->upper_mnt);
1365out_put_lowerpath:
1366        for (i = 0; i < numlower; i++)
1367                path_put(&stack[i]);
1368        kfree(stack);
1369out_free_lowertmp:
1370        kfree(lowertmp);
1371out_put_workpath:
1372        path_put(&workpath);
1373out_put_upperpath:
1374        path_put(&upperpath);
1375out_free_config:
1376        kfree(ufs->config.lowerdir);
1377        kfree(ufs->config.upperdir);
1378        kfree(ufs->config.workdir);
1379        kfree(ufs);
1380out:
1381        return err;
1382}
1383
1384static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
1385                                const char *dev_name, void *raw_data)
1386{
1387        return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
1388}
1389
1390static struct file_system_type ovl_fs_type = {
1391        .owner          = THIS_MODULE,
1392        .name           = "overlay",
1393        .mount          = ovl_mount,
1394        .kill_sb        = kill_anon_super,
1395};
1396MODULE_ALIAS_FS("overlay");
1397
1398static int __init ovl_init(void)
1399{
1400        return register_filesystem(&ovl_fs_type);
1401}
1402
1403static void __exit ovl_exit(void)
1404{
1405        unregister_filesystem(&ovl_fs_type);
1406}
1407
1408module_init(ovl_init);
1409module_exit(ovl_exit);
1410