linux/fs/overlayfs/super.c
<<
>>
Prefs
   1/*
   2 *
   3 * Copyright (C) 2011 Novell Inc.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms of the GNU General Public License version 2 as published by
   7 * the Free Software Foundation.
   8 */
   9
  10#include <uapi/linux/magic.h>
  11#include <linux/fs.h>
  12#include <linux/namei.h>
  13#include <linux/xattr.h>
  14#include <linux/mount.h>
  15#include <linux/parser.h>
  16#include <linux/module.h>
  17#include <linux/statfs.h>
  18#include <linux/seq_file.h>
  19#include <linux/posix_acl_xattr.h>
  20#include <linux/exportfs.h>
  21#include "overlayfs.h"
  22
  23MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
  24MODULE_DESCRIPTION("Overlay filesystem");
  25MODULE_LICENSE("GPL");
  26
  27
  28struct ovl_dir_cache;
  29
  30#define OVL_MAX_STACK 500
  31
  32static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
  33module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
  34MODULE_PARM_DESC(ovl_redirect_dir_def,
  35                 "Default to on or off for the redirect_dir feature");
  36
  37static bool ovl_redirect_always_follow =
  38        IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
  39module_param_named(redirect_always_follow, ovl_redirect_always_follow,
  40                   bool, 0644);
  41MODULE_PARM_DESC(ovl_redirect_always_follow,
  42                 "Follow redirects even if redirect_dir feature is turned off");
  43
  44static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
  45module_param_named(index, ovl_index_def, bool, 0644);
  46MODULE_PARM_DESC(ovl_index_def,
  47                 "Default to on or off for the inodes index feature");
  48
  49static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
  50module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
  51MODULE_PARM_DESC(ovl_nfs_export_def,
  52                 "Default to on or off for the NFS export feature");
  53
  54static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
  55module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
  56MODULE_PARM_DESC(ovl_xino_auto_def,
  57                 "Auto enable xino feature");
  58
  59static void ovl_entry_stack_free(struct ovl_entry *oe)
  60{
  61        unsigned int i;
  62
  63        for (i = 0; i < oe->numlower; i++)
  64                dput(oe->lowerstack[i].dentry);
  65}
  66
  67static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
  68module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
  69MODULE_PARM_DESC(ovl_metacopy_def,
  70                 "Default to on or off for the metadata only copy up feature");
  71
  72static void ovl_dentry_release(struct dentry *dentry)
  73{
  74        struct ovl_entry *oe = dentry->d_fsdata;
  75
  76        if (oe) {
  77                ovl_entry_stack_free(oe);
  78                kfree_rcu(oe, rcu);
  79        }
  80}
  81
  82static struct dentry *ovl_d_real(struct dentry *dentry,
  83                                 const struct inode *inode)
  84{
  85        struct dentry *real;
  86
  87        /* It's an overlay file */
  88        if (inode && d_inode(dentry) == inode)
  89                return dentry;
  90
  91        if (!d_is_reg(dentry)) {
  92                if (!inode || inode == d_inode(dentry))
  93                        return dentry;
  94                goto bug;
  95        }
  96
  97        real = ovl_dentry_upper(dentry);
  98        if (real && (inode == d_inode(real)))
  99                return real;
 100
 101        if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
 102                return real;
 103
 104        real = ovl_dentry_lowerdata(dentry);
 105        if (!real)
 106                goto bug;
 107
 108        /* Handle recursion */
 109        real = d_real(real, inode);
 110
 111        if (!inode || inode == d_inode(real))
 112                return real;
 113bug:
 114        WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
 115             inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
 116        return dentry;
 117}
 118
 119static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 120{
 121        struct ovl_entry *oe = dentry->d_fsdata;
 122        unsigned int i;
 123        int ret = 1;
 124
 125        for (i = 0; i < oe->numlower; i++) {
 126                struct dentry *d = oe->lowerstack[i].dentry;
 127
 128                if (d->d_flags & DCACHE_OP_REVALIDATE) {
 129                        ret = d->d_op->d_revalidate(d, flags);
 130                        if (ret < 0)
 131                                return ret;
 132                        if (!ret) {
 133                                if (!(flags & LOOKUP_RCU))
 134                                        d_invalidate(d);
 135                                return -ESTALE;
 136                        }
 137                }
 138        }
 139        return 1;
 140}
 141
 142static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
 143{
 144        struct ovl_entry *oe = dentry->d_fsdata;
 145        unsigned int i;
 146        int ret = 1;
 147
 148        for (i = 0; i < oe->numlower; i++) {
 149                struct dentry *d = oe->lowerstack[i].dentry;
 150
 151                if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
 152                        ret = d->d_op->d_weak_revalidate(d, flags);
 153                        if (ret <= 0)
 154                                break;
 155                }
 156        }
 157        return ret;
 158}
 159
 160static const struct dentry_operations ovl_dentry_operations = {
 161        .d_release = ovl_dentry_release,
 162        .d_real = ovl_d_real,
 163};
 164
 165static const struct dentry_operations ovl_reval_dentry_operations = {
 166        .d_release = ovl_dentry_release,
 167        .d_real = ovl_d_real,
 168        .d_revalidate = ovl_dentry_revalidate,
 169        .d_weak_revalidate = ovl_dentry_weak_revalidate,
 170};
 171
 172static struct kmem_cache *ovl_inode_cachep;
 173
 174static struct inode *ovl_alloc_inode(struct super_block *sb)
 175{
 176        struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
 177
 178        if (!oi)
 179                return NULL;
 180
 181        oi->cache = NULL;
 182        oi->redirect = NULL;
 183        oi->version = 0;
 184        oi->flags = 0;
 185        oi->__upperdentry = NULL;
 186        oi->lower = NULL;
 187        oi->lowerdata = NULL;
 188        mutex_init(&oi->lock);
 189
 190        return &oi->vfs_inode;
 191}
 192
 193static void ovl_i_callback(struct rcu_head *head)
 194{
 195        struct inode *inode = container_of(head, struct inode, i_rcu);
 196
 197        kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
 198}
 199
 200static void ovl_destroy_inode(struct inode *inode)
 201{
 202        struct ovl_inode *oi = OVL_I(inode);
 203
 204        dput(oi->__upperdentry);
 205        iput(oi->lower);
 206        if (S_ISDIR(inode->i_mode))
 207                ovl_dir_cache_free(inode);
 208        else
 209                iput(oi->lowerdata);
 210        kfree(oi->redirect);
 211        mutex_destroy(&oi->lock);
 212
 213        call_rcu(&inode->i_rcu, ovl_i_callback);
 214}
 215
 216static void ovl_free_fs(struct ovl_fs *ofs)
 217{
 218        unsigned i;
 219
 220        dput(ofs->indexdir);
 221        dput(ofs->workdir);
 222        if (ofs->workdir_locked)
 223                ovl_inuse_unlock(ofs->workbasedir);
 224        dput(ofs->workbasedir);
 225        if (ofs->upperdir_locked)
 226                ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
 227        mntput(ofs->upper_mnt);
 228        for (i = 0; i < ofs->numlower; i++)
 229                mntput(ofs->lower_layers[i].mnt);
 230        for (i = 0; i < ofs->numlowerfs; i++)
 231                free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
 232        kfree(ofs->lower_layers);
 233        kfree(ofs->lower_fs);
 234
 235        kfree(ofs->config.lowerdir);
 236        kfree(ofs->config.upperdir);
 237        kfree(ofs->config.workdir);
 238        kfree(ofs->config.redirect_mode);
 239        if (ofs->creator_cred)
 240                put_cred(ofs->creator_cred);
 241        kfree(ofs);
 242}
 243
 244static void ovl_put_super(struct super_block *sb)
 245{
 246        struct ovl_fs *ofs = sb->s_fs_info;
 247
 248        ovl_free_fs(ofs);
 249}
 250
 251/* Sync real dirty inodes in upper filesystem (if it exists) */
 252static int ovl_sync_fs(struct super_block *sb, int wait)
 253{
 254        struct ovl_fs *ofs = sb->s_fs_info;
 255        struct super_block *upper_sb;
 256        int ret;
 257
 258        if (!ofs->upper_mnt)
 259                return 0;
 260
 261        /*
 262         * If this is a sync(2) call or an emergency sync, all the super blocks
 263         * will be iterated, including upper_sb, so no need to do anything.
 264         *
 265         * If this is a syncfs(2) call, then we do need to call
 266         * sync_filesystem() on upper_sb, but enough if we do it when being
 267         * called with wait == 1.
 268         */
 269        if (!wait)
 270                return 0;
 271
 272        upper_sb = ofs->upper_mnt->mnt_sb;
 273
 274        down_read(&upper_sb->s_umount);
 275        ret = sync_filesystem(upper_sb);
 276        up_read(&upper_sb->s_umount);
 277
 278        return ret;
 279}
 280
 281/**
 282 * ovl_statfs
 283 * @sb: The overlayfs super block
 284 * @buf: The struct kstatfs to fill in with stats
 285 *
 286 * Get the filesystem statistics.  As writes always target the upper layer
 287 * filesystem pass the statfs to the upper filesystem (if it exists)
 288 */
 289static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
 290{
 291        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 292        struct dentry *root_dentry = dentry->d_sb->s_root;
 293        struct path path;
 294        int err;
 295
 296        ovl_path_real(root_dentry, &path);
 297
 298        err = vfs_statfs(&path, buf);
 299        if (!err) {
 300                buf->f_namelen = ofs->namelen;
 301                buf->f_type = OVERLAYFS_SUPER_MAGIC;
 302        }
 303
 304        return err;
 305}
 306
 307/* Will this overlay be forced to mount/remount ro? */
 308static bool ovl_force_readonly(struct ovl_fs *ofs)
 309{
 310        return (!ofs->upper_mnt || !ofs->workdir);
 311}
 312
 313static const char *ovl_redirect_mode_def(void)
 314{
 315        return ovl_redirect_dir_def ? "on" : "off";
 316}
 317
 318enum {
 319        OVL_XINO_OFF,
 320        OVL_XINO_AUTO,
 321        OVL_XINO_ON,
 322};
 323
 324static const char * const ovl_xino_str[] = {
 325        "off",
 326        "auto",
 327        "on",
 328};
 329
 330static inline int ovl_xino_def(void)
 331{
 332        return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
 333}
 334
 335/**
 336 * ovl_show_options
 337 *
 338 * Prints the mount options for a given superblock.
 339 * Returns zero; does not fail.
 340 */
 341static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 342{
 343        struct super_block *sb = dentry->d_sb;
 344        struct ovl_fs *ofs = sb->s_fs_info;
 345
 346        seq_show_option(m, "lowerdir", ofs->config.lowerdir);
 347        if (ofs->config.upperdir) {
 348                seq_show_option(m, "upperdir", ofs->config.upperdir);
 349                seq_show_option(m, "workdir", ofs->config.workdir);
 350        }
 351        if (ofs->config.default_permissions)
 352                seq_puts(m, ",default_permissions");
 353        if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
 354                seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
 355        if (ofs->config.index != ovl_index_def)
 356                seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
 357        if (ofs->config.nfs_export != ovl_nfs_export_def)
 358                seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
 359                                                "on" : "off");
 360        if (ofs->config.xino != ovl_xino_def())
 361                seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
 362        if (ofs->config.metacopy != ovl_metacopy_def)
 363                seq_printf(m, ",metacopy=%s",
 364                           ofs->config.metacopy ? "on" : "off");
 365        return 0;
 366}
 367
 368static int ovl_remount(struct super_block *sb, int *flags, char *data)
 369{
 370        struct ovl_fs *ofs = sb->s_fs_info;
 371
 372        if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
 373                return -EROFS;
 374
 375        return 0;
 376}
 377
 378static const struct super_operations ovl_super_operations = {
 379        .alloc_inode    = ovl_alloc_inode,
 380        .destroy_inode  = ovl_destroy_inode,
 381        .drop_inode     = generic_delete_inode,
 382        .put_super      = ovl_put_super,
 383        .sync_fs        = ovl_sync_fs,
 384        .statfs         = ovl_statfs,
 385        .show_options   = ovl_show_options,
 386        .remount_fs     = ovl_remount,
 387};
 388
 389enum {
 390        OPT_LOWERDIR,
 391        OPT_UPPERDIR,
 392        OPT_WORKDIR,
 393        OPT_DEFAULT_PERMISSIONS,
 394        OPT_REDIRECT_DIR,
 395        OPT_INDEX_ON,
 396        OPT_INDEX_OFF,
 397        OPT_NFS_EXPORT_ON,
 398        OPT_NFS_EXPORT_OFF,
 399        OPT_XINO_ON,
 400        OPT_XINO_OFF,
 401        OPT_XINO_AUTO,
 402        OPT_METACOPY_ON,
 403        OPT_METACOPY_OFF,
 404        OPT_ERR,
 405};
 406
 407static const match_table_t ovl_tokens = {
 408        {OPT_LOWERDIR,                  "lowerdir=%s"},
 409        {OPT_UPPERDIR,                  "upperdir=%s"},
 410        {OPT_WORKDIR,                   "workdir=%s"},
 411        {OPT_DEFAULT_PERMISSIONS,       "default_permissions"},
 412        {OPT_REDIRECT_DIR,              "redirect_dir=%s"},
 413        {OPT_INDEX_ON,                  "index=on"},
 414        {OPT_INDEX_OFF,                 "index=off"},
 415        {OPT_NFS_EXPORT_ON,             "nfs_export=on"},
 416        {OPT_NFS_EXPORT_OFF,            "nfs_export=off"},
 417        {OPT_XINO_ON,                   "xino=on"},
 418        {OPT_XINO_OFF,                  "xino=off"},
 419        {OPT_XINO_AUTO,                 "xino=auto"},
 420        {OPT_METACOPY_ON,               "metacopy=on"},
 421        {OPT_METACOPY_OFF,              "metacopy=off"},
 422        {OPT_ERR,                       NULL}
 423};
 424
 425static char *ovl_next_opt(char **s)
 426{
 427        char *sbegin = *s;
 428        char *p;
 429
 430        if (sbegin == NULL)
 431                return NULL;
 432
 433        for (p = sbegin; *p; p++) {
 434                if (*p == '\\') {
 435                        p++;
 436                        if (!*p)
 437                                break;
 438                } else if (*p == ',') {
 439                        *p = '\0';
 440                        *s = p + 1;
 441                        return sbegin;
 442                }
 443        }
 444        *s = NULL;
 445        return sbegin;
 446}
 447
 448static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
 449{
 450        if (strcmp(mode, "on") == 0) {
 451                config->redirect_dir = true;
 452                /*
 453                 * Does not make sense to have redirect creation without
 454                 * redirect following.
 455                 */
 456                config->redirect_follow = true;
 457        } else if (strcmp(mode, "follow") == 0) {
 458                config->redirect_follow = true;
 459        } else if (strcmp(mode, "off") == 0) {
 460                if (ovl_redirect_always_follow)
 461                        config->redirect_follow = true;
 462        } else if (strcmp(mode, "nofollow") != 0) {
 463                pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
 464                       mode);
 465                return -EINVAL;
 466        }
 467
 468        return 0;
 469}
 470
 471static int ovl_parse_opt(char *opt, struct ovl_config *config)
 472{
 473        char *p;
 474        int err;
 475
 476        config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
 477        if (!config->redirect_mode)
 478                return -ENOMEM;
 479
 480        while ((p = ovl_next_opt(&opt)) != NULL) {
 481                int token;
 482                substring_t args[MAX_OPT_ARGS];
 483
 484                if (!*p)
 485                        continue;
 486
 487                token = match_token(p, ovl_tokens, args);
 488                switch (token) {
 489                case OPT_UPPERDIR:
 490                        kfree(config->upperdir);
 491                        config->upperdir = match_strdup(&args[0]);
 492                        if (!config->upperdir)
 493                                return -ENOMEM;
 494                        break;
 495
 496                case OPT_LOWERDIR:
 497                        kfree(config->lowerdir);
 498                        config->lowerdir = match_strdup(&args[0]);
 499                        if (!config->lowerdir)
 500                                return -ENOMEM;
 501                        break;
 502
 503                case OPT_WORKDIR:
 504                        kfree(config->workdir);
 505                        config->workdir = match_strdup(&args[0]);
 506                        if (!config->workdir)
 507                                return -ENOMEM;
 508                        break;
 509
 510                case OPT_DEFAULT_PERMISSIONS:
 511                        config->default_permissions = true;
 512                        break;
 513
 514                case OPT_REDIRECT_DIR:
 515                        kfree(config->redirect_mode);
 516                        config->redirect_mode = match_strdup(&args[0]);
 517                        if (!config->redirect_mode)
 518                                return -ENOMEM;
 519                        break;
 520
 521                case OPT_INDEX_ON:
 522                        config->index = true;
 523                        break;
 524
 525                case OPT_INDEX_OFF:
 526                        config->index = false;
 527                        break;
 528
 529                case OPT_NFS_EXPORT_ON:
 530                        config->nfs_export = true;
 531                        break;
 532
 533                case OPT_NFS_EXPORT_OFF:
 534                        config->nfs_export = false;
 535                        break;
 536
 537                case OPT_XINO_ON:
 538                        config->xino = OVL_XINO_ON;
 539                        break;
 540
 541                case OPT_XINO_OFF:
 542                        config->xino = OVL_XINO_OFF;
 543                        break;
 544
 545                case OPT_XINO_AUTO:
 546                        config->xino = OVL_XINO_AUTO;
 547                        break;
 548
 549                case OPT_METACOPY_ON:
 550                        config->metacopy = true;
 551                        break;
 552
 553                case OPT_METACOPY_OFF:
 554                        config->metacopy = false;
 555                        break;
 556
 557                default:
 558                        pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
 559                        return -EINVAL;
 560                }
 561        }
 562
 563        /* Workdir is useless in non-upper mount */
 564        if (!config->upperdir && config->workdir) {
 565                pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
 566                        config->workdir);
 567                kfree(config->workdir);
 568                config->workdir = NULL;
 569        }
 570
 571        err = ovl_parse_redirect_mode(config, config->redirect_mode);
 572        if (err)
 573                return err;
 574
 575        /* metacopy feature with upper requires redirect_dir=on */
 576        if (config->upperdir && config->metacopy && !config->redirect_dir) {
 577                pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=on\", falling back to metacopy=off.\n");
 578                config->metacopy = false;
 579        } else if (config->metacopy && !config->redirect_follow) {
 580                pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=follow\" on non-upper mount, falling back to metacopy=off.\n");
 581                config->metacopy = false;
 582        }
 583
 584        return 0;
 585}
 586
 587#define OVL_WORKDIR_NAME "work"
 588#define OVL_INDEXDIR_NAME "index"
 589
 590static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
 591                                         const char *name, bool persist)
 592{
 593        struct inode *dir =  ofs->workbasedir->d_inode;
 594        struct vfsmount *mnt = ofs->upper_mnt;
 595        struct dentry *work;
 596        int err;
 597        bool retried = false;
 598        bool locked = false;
 599
 600        inode_lock_nested(dir, I_MUTEX_PARENT);
 601        locked = true;
 602
 603retry:
 604        work = lookup_one_len(name, ofs->workbasedir, strlen(name));
 605
 606        if (!IS_ERR(work)) {
 607                struct iattr attr = {
 608                        .ia_valid = ATTR_MODE,
 609                        .ia_mode = S_IFDIR | 0,
 610                };
 611
 612                if (work->d_inode) {
 613                        err = -EEXIST;
 614                        if (retried)
 615                                goto out_dput;
 616
 617                        if (persist)
 618                                goto out_unlock;
 619
 620                        retried = true;
 621                        ovl_workdir_cleanup(dir, mnt, work, 0);
 622                        dput(work);
 623                        goto retry;
 624                }
 625
 626                work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode));
 627                err = PTR_ERR(work);
 628                if (IS_ERR(work))
 629                        goto out_err;
 630
 631                /*
 632                 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
 633                 *
 634                 * a) success (there was a POSIX ACL xattr and was removed)
 635                 * b) -ENODATA (there was no POSIX ACL xattr)
 636                 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
 637                 *
 638                 * There are various other error values that could effectively
 639                 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
 640                 * if the xattr name is too long), but the set of filesystems
 641                 * allowed as upper are limited to "normal" ones, where checking
 642                 * for the above two errors is sufficient.
 643                 */
 644                err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
 645                if (err && err != -ENODATA && err != -EOPNOTSUPP)
 646                        goto out_dput;
 647
 648                err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
 649                if (err && err != -ENODATA && err != -EOPNOTSUPP)
 650                        goto out_dput;
 651
 652                /* Clear any inherited mode bits */
 653                inode_lock(work->d_inode);
 654                err = notify_change(work, &attr, NULL);
 655                inode_unlock(work->d_inode);
 656                if (err)
 657                        goto out_dput;
 658        } else {
 659                err = PTR_ERR(work);
 660                goto out_err;
 661        }
 662out_unlock:
 663        if (locked)
 664                inode_unlock(dir);
 665
 666        return work;
 667
 668out_dput:
 669        dput(work);
 670out_err:
 671        pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
 672                ofs->config.workdir, name, -err);
 673        work = NULL;
 674        goto out_unlock;
 675}
 676
 677static void ovl_unescape(char *s)
 678{
 679        char *d = s;
 680
 681        for (;; s++, d++) {
 682                if (*s == '\\')
 683                        s++;
 684                *d = *s;
 685                if (!*s)
 686                        break;
 687        }
 688}
 689
 690static int ovl_mount_dir_noesc(const char *name, struct path *path)
 691{
 692        int err = -EINVAL;
 693
 694        if (!*name) {
 695                pr_err("overlayfs: empty lowerdir\n");
 696                goto out;
 697        }
 698        err = kern_path(name, LOOKUP_FOLLOW, path);
 699        if (err) {
 700                pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
 701                goto out;
 702        }
 703        err = -EINVAL;
 704        if (ovl_dentry_weird(path->dentry)) {
 705                pr_err("overlayfs: filesystem on '%s' not supported\n", name);
 706                goto out_put;
 707        }
 708        if (!d_is_dir(path->dentry)) {
 709                pr_err("overlayfs: '%s' not a directory\n", name);
 710                goto out_put;
 711        }
 712        return 0;
 713
 714out_put:
 715        path_put_init(path);
 716out:
 717        return err;
 718}
 719
 720static int ovl_mount_dir(const char *name, struct path *path)
 721{
 722        int err = -ENOMEM;
 723        char *tmp = kstrdup(name, GFP_KERNEL);
 724
 725        if (tmp) {
 726                ovl_unescape(tmp);
 727                err = ovl_mount_dir_noesc(tmp, path);
 728
 729                if (!err)
 730                        if (ovl_dentry_remote(path->dentry)) {
 731                                pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
 732                                       tmp);
 733                                path_put_init(path);
 734                                err = -EINVAL;
 735                        }
 736                kfree(tmp);
 737        }
 738        return err;
 739}
 740
 741static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
 742                             const char *name)
 743{
 744        struct kstatfs statfs;
 745        int err = vfs_statfs(path, &statfs);
 746
 747        if (err)
 748                pr_err("overlayfs: statfs failed on '%s'\n", name);
 749        else
 750                ofs->namelen = max(ofs->namelen, statfs.f_namelen);
 751
 752        return err;
 753}
 754
 755static int ovl_lower_dir(const char *name, struct path *path,
 756                         struct ovl_fs *ofs, int *stack_depth, bool *remote)
 757{
 758        int fh_type;
 759        int err;
 760
 761        err = ovl_mount_dir_noesc(name, path);
 762        if (err)
 763                goto out;
 764
 765        err = ovl_check_namelen(path, ofs, name);
 766        if (err)
 767                goto out_put;
 768
 769        *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
 770
 771        if (ovl_dentry_remote(path->dentry))
 772                *remote = true;
 773
 774        /*
 775         * The inodes index feature and NFS export need to encode and decode
 776         * file handles, so they require that all layers support them.
 777         */
 778        fh_type = ovl_can_decode_fh(path->dentry->d_sb);
 779        if ((ofs->config.nfs_export ||
 780             (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
 781                ofs->config.index = false;
 782                ofs->config.nfs_export = false;
 783                pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
 784                        name);
 785        }
 786
 787        /* Check if lower fs has 32bit inode numbers */
 788        if (fh_type != FILEID_INO32_GEN)
 789                ofs->xino_bits = 0;
 790
 791        return 0;
 792
 793out_put:
 794        path_put_init(path);
 795out:
 796        return err;
 797}
 798
 799/* Workdir should not be subdir of upperdir and vice versa */
 800static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
 801{
 802        bool ok = false;
 803
 804        if (workdir != upperdir) {
 805                ok = (lock_rename(workdir, upperdir) == NULL);
 806                unlock_rename(workdir, upperdir);
 807        }
 808        return ok;
 809}
 810
 811static unsigned int ovl_split_lowerdirs(char *str)
 812{
 813        unsigned int ctr = 1;
 814        char *s, *d;
 815
 816        for (s = d = str;; s++, d++) {
 817                if (*s == '\\') {
 818                        s++;
 819                } else if (*s == ':') {
 820                        *d = '\0';
 821                        ctr++;
 822                        continue;
 823                }
 824                *d = *s;
 825                if (!*s)
 826                        break;
 827        }
 828        return ctr;
 829}
 830
 831static int __maybe_unused
 832ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
 833                        struct dentry *dentry, struct inode *inode,
 834                        const char *name, void *buffer, size_t size)
 835{
 836        return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
 837}
 838
 839static int __maybe_unused
 840ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
 841                        struct dentry *dentry, struct inode *inode,
 842                        const char *name, const void *value,
 843                        size_t size, int flags)
 844{
 845        struct dentry *workdir = ovl_workdir(dentry);
 846        struct inode *realinode = ovl_inode_real(inode);
 847        struct posix_acl *acl = NULL;
 848        int err;
 849
 850        /* Check that everything is OK before copy-up */
 851        if (value) {
 852                acl = posix_acl_from_xattr(&init_user_ns, value, size);
 853                if (IS_ERR(acl))
 854                        return PTR_ERR(acl);
 855        }
 856        err = -EOPNOTSUPP;
 857        if (!IS_POSIXACL(d_inode(workdir)))
 858                goto out_acl_release;
 859        if (!realinode->i_op->set_acl)
 860                goto out_acl_release;
 861        if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
 862                err = acl ? -EACCES : 0;
 863                goto out_acl_release;
 864        }
 865        err = -EPERM;
 866        if (!inode_owner_or_capable(inode))
 867                goto out_acl_release;
 868
 869        posix_acl_release(acl);
 870
 871        /*
 872         * Check if sgid bit needs to be cleared (actual setacl operation will
 873         * be done with mounter's capabilities and so that won't do it for us).
 874         */
 875        if (unlikely(inode->i_mode & S_ISGID) &&
 876            handler->flags == ACL_TYPE_ACCESS &&
 877            !in_group_p(inode->i_gid) &&
 878            !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
 879                struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
 880
 881                err = ovl_setattr(dentry, &iattr);
 882                if (err)
 883                        return err;
 884        }
 885
 886        err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
 887        if (!err)
 888                ovl_copyattr(ovl_inode_real(inode), inode);
 889
 890        return err;
 891
 892out_acl_release:
 893        posix_acl_release(acl);
 894        return err;
 895}
 896
 897static int ovl_own_xattr_get(const struct xattr_handler *handler,
 898                             struct dentry *dentry, struct inode *inode,
 899                             const char *name, void *buffer, size_t size)
 900{
 901        return -EOPNOTSUPP;
 902}
 903
 904static int ovl_own_xattr_set(const struct xattr_handler *handler,
 905                             struct dentry *dentry, struct inode *inode,
 906                             const char *name, const void *value,
 907                             size_t size, int flags)
 908{
 909        return -EOPNOTSUPP;
 910}
 911
 912static int ovl_other_xattr_get(const struct xattr_handler *handler,
 913                               struct dentry *dentry, struct inode *inode,
 914                               const char *name, void *buffer, size_t size)
 915{
 916        return ovl_xattr_get(dentry, inode, name, buffer, size);
 917}
 918
 919static int ovl_other_xattr_set(const struct xattr_handler *handler,
 920                               struct dentry *dentry, struct inode *inode,
 921                               const char *name, const void *value,
 922                               size_t size, int flags)
 923{
 924        return ovl_xattr_set(dentry, inode, name, value, size, flags);
 925}
 926
 927static const struct xattr_handler __maybe_unused
 928ovl_posix_acl_access_xattr_handler = {
 929        .name = XATTR_NAME_POSIX_ACL_ACCESS,
 930        .flags = ACL_TYPE_ACCESS,
 931        .get = ovl_posix_acl_xattr_get,
 932        .set = ovl_posix_acl_xattr_set,
 933};
 934
 935static const struct xattr_handler __maybe_unused
 936ovl_posix_acl_default_xattr_handler = {
 937        .name = XATTR_NAME_POSIX_ACL_DEFAULT,
 938        .flags = ACL_TYPE_DEFAULT,
 939        .get = ovl_posix_acl_xattr_get,
 940        .set = ovl_posix_acl_xattr_set,
 941};
 942
 943static const struct xattr_handler ovl_own_xattr_handler = {
 944        .prefix = OVL_XATTR_PREFIX,
 945        .get = ovl_own_xattr_get,
 946        .set = ovl_own_xattr_set,
 947};
 948
 949static const struct xattr_handler ovl_other_xattr_handler = {
 950        .prefix = "", /* catch all */
 951        .get = ovl_other_xattr_get,
 952        .set = ovl_other_xattr_set,
 953};
 954
 955static const struct xattr_handler *ovl_xattr_handlers[] = {
 956#ifdef CONFIG_FS_POSIX_ACL
 957        &ovl_posix_acl_access_xattr_handler,
 958        &ovl_posix_acl_default_xattr_handler,
 959#endif
 960        &ovl_own_xattr_handler,
 961        &ovl_other_xattr_handler,
 962        NULL
 963};
 964
 965static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
 966{
 967        struct vfsmount *upper_mnt;
 968        int err;
 969
 970        err = ovl_mount_dir(ofs->config.upperdir, upperpath);
 971        if (err)
 972                goto out;
 973
 974        /* Upper fs should not be r/o */
 975        if (sb_rdonly(upperpath->mnt->mnt_sb)) {
 976                pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
 977                err = -EINVAL;
 978                goto out;
 979        }
 980
 981        err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
 982        if (err)
 983                goto out;
 984
 985        upper_mnt = clone_private_mount(upperpath);
 986        err = PTR_ERR(upper_mnt);
 987        if (IS_ERR(upper_mnt)) {
 988                pr_err("overlayfs: failed to clone upperpath\n");
 989                goto out;
 990        }
 991
 992        /* Don't inherit atime flags */
 993        upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
 994        ofs->upper_mnt = upper_mnt;
 995
 996        err = -EBUSY;
 997        if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
 998                ofs->upperdir_locked = true;
 999        } else if (ofs->config.index) {
1000                pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
1001                goto out;
1002        } else {
1003                pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
1004        }
1005
1006        err = 0;
1007out:
1008        return err;
1009}
1010
1011static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
1012{
1013        struct vfsmount *mnt = ofs->upper_mnt;
1014        struct dentry *temp;
1015        int fh_type;
1016        int err;
1017
1018        err = mnt_want_write(mnt);
1019        if (err)
1020                return err;
1021
1022        ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
1023        if (!ofs->workdir)
1024                goto out;
1025
1026        /*
1027         * Upper should support d_type, else whiteouts are visible.  Given
1028         * workdir and upper are on same fs, we can do iterate_dir() on
1029         * workdir. This check requires successful creation of workdir in
1030         * previous step.
1031         */
1032        err = ovl_check_d_type_supported(workpath);
1033        if (err < 0)
1034                goto out;
1035
1036        /*
1037         * We allowed this configuration and don't want to break users over
1038         * kernel upgrade. So warn instead of erroring out.
1039         */
1040        if (!err)
1041                pr_warn("overlayfs: upper fs needs to support d_type.\n");
1042
1043        /* Check if upper/work fs supports O_TMPFILE */
1044        temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
1045        ofs->tmpfile = !IS_ERR(temp);
1046        if (ofs->tmpfile)
1047                dput(temp);
1048        else
1049                pr_warn("overlayfs: upper fs does not support tmpfile.\n");
1050
1051        /*
1052         * Check if upper/work fs supports trusted.overlay.* xattr
1053         */
1054        err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
1055        if (err) {
1056                ofs->noxattr = true;
1057                ofs->config.index = false;
1058                ofs->config.metacopy = false;
1059                pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
1060                err = 0;
1061        } else {
1062                vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
1063        }
1064
1065        /* Check if upper/work fs supports file handles */
1066        fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
1067        if (ofs->config.index && !fh_type) {
1068                ofs->config.index = false;
1069                pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
1070        }
1071
1072        /* Check if upper fs has 32bit inode numbers */
1073        if (fh_type != FILEID_INO32_GEN)
1074                ofs->xino_bits = 0;
1075
1076        /* NFS export of r/w mount depends on index */
1077        if (ofs->config.nfs_export && !ofs->config.index) {
1078                pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
1079                ofs->config.nfs_export = false;
1080        }
1081out:
1082        mnt_drop_write(mnt);
1083        return err;
1084}
1085
1086static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
1087{
1088        int err;
1089        struct path workpath = { };
1090
1091        err = ovl_mount_dir(ofs->config.workdir, &workpath);
1092        if (err)
1093                goto out;
1094
1095        err = -EINVAL;
1096        if (upperpath->mnt != workpath.mnt) {
1097                pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
1098                goto out;
1099        }
1100        if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
1101                pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
1102                goto out;
1103        }
1104
1105        ofs->workbasedir = dget(workpath.dentry);
1106
1107        err = -EBUSY;
1108        if (ovl_inuse_trylock(ofs->workbasedir)) {
1109                ofs->workdir_locked = true;
1110        } else if (ofs->config.index) {
1111                pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
1112                goto out;
1113        } else {
1114                pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
1115        }
1116
1117        err = ovl_make_workdir(ofs, &workpath);
1118        if (err)
1119                goto out;
1120
1121        err = 0;
1122out:
1123        path_put(&workpath);
1124
1125        return err;
1126}
1127
1128static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
1129                            struct path *upperpath)
1130{
1131        struct vfsmount *mnt = ofs->upper_mnt;
1132        int err;
1133
1134        err = mnt_want_write(mnt);
1135        if (err)
1136                return err;
1137
1138        /* Verify lower root is upper root origin */
1139        err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
1140                                true);
1141        if (err) {
1142                pr_err("overlayfs: failed to verify upper root origin\n");
1143                goto out;
1144        }
1145
1146        ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
1147        if (ofs->indexdir) {
1148                /*
1149                 * Verify upper root is exclusively associated with index dir.
1150                 * Older kernels stored upper fh in "trusted.overlay.origin"
1151                 * xattr. If that xattr exists, verify that it is a match to
1152                 * upper dir file handle. In any case, verify or set xattr
1153                 * "trusted.overlay.upper" to indicate that index may have
1154                 * directory entries.
1155                 */
1156                if (ovl_check_origin_xattr(ofs->indexdir)) {
1157                        err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
1158                                                upperpath->dentry, true, false);
1159                        if (err)
1160                                pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
1161                }
1162                err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
1163                if (err)
1164                        pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
1165
1166                /* Cleanup bad/stale/orphan index entries */
1167                if (!err)
1168                        err = ovl_indexdir_cleanup(ofs);
1169        }
1170        if (err || !ofs->indexdir)
1171                pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
1172
1173out:
1174        mnt_drop_write(mnt);
1175        return err;
1176}
1177
1178/* Get a unique fsid for the layer */
1179static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
1180{
1181        unsigned int i;
1182        dev_t dev;
1183        int err;
1184
1185        /* fsid 0 is reserved for upper fs even with non upper overlay */
1186        if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
1187                return 0;
1188
1189        for (i = 0; i < ofs->numlowerfs; i++) {
1190                if (ofs->lower_fs[i].sb == sb)
1191                        return i + 1;
1192        }
1193
1194        err = get_anon_bdev(&dev);
1195        if (err) {
1196                pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
1197                return err;
1198        }
1199
1200        ofs->lower_fs[ofs->numlowerfs].sb = sb;
1201        ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
1202        ofs->numlowerfs++;
1203
1204        return ofs->numlowerfs;
1205}
1206
1207static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
1208                                unsigned int numlower)
1209{
1210        int err;
1211        unsigned int i;
1212
1213        err = -ENOMEM;
1214        ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
1215                                    GFP_KERNEL);
1216        if (ofs->lower_layers == NULL)
1217                goto out;
1218
1219        ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb),
1220                                GFP_KERNEL);
1221        if (ofs->lower_fs == NULL)
1222                goto out;
1223
1224        for (i = 0; i < numlower; i++) {
1225                struct vfsmount *mnt;
1226                int fsid;
1227
1228                err = fsid = ovl_get_fsid(ofs, stack[i].mnt->mnt_sb);
1229                if (err < 0)
1230                        goto out;
1231
1232                mnt = clone_private_mount(&stack[i]);
1233                err = PTR_ERR(mnt);
1234                if (IS_ERR(mnt)) {
1235                        pr_err("overlayfs: failed to clone lowerpath\n");
1236                        goto out;
1237                }
1238
1239                /*
1240                 * Make lower layers R/O.  That way fchmod/fchown on lower file
1241                 * will fail instead of modifying lower fs.
1242                 */
1243                mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
1244
1245                ofs->lower_layers[ofs->numlower].mnt = mnt;
1246                ofs->lower_layers[ofs->numlower].idx = i + 1;
1247                ofs->lower_layers[ofs->numlower].fsid = fsid;
1248                if (fsid) {
1249                        ofs->lower_layers[ofs->numlower].fs =
1250                                &ofs->lower_fs[fsid - 1];
1251                }
1252                ofs->numlower++;
1253        }
1254
1255        /*
1256         * When all layers on same fs, overlay can use real inode numbers.
1257         * With mount option "xino=on", mounter declares that there are enough
1258         * free high bits in underlying fs to hold the unique fsid.
1259         * If overlayfs does encounter underlying inodes using the high xino
1260         * bits reserved for fsid, it emits a warning and uses the original
1261         * inode number.
1262         */
1263        if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) {
1264                ofs->xino_bits = 0;
1265                ofs->config.xino = OVL_XINO_OFF;
1266        } else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) {
1267                /*
1268                 * This is a roundup of number of bits needed for numlowerfs+1
1269                 * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for
1270                 * upper fs even with non upper overlay.
1271                 */
1272                BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
1273                ofs->xino_bits = ilog2(ofs->numlowerfs) + 1;
1274        }
1275
1276        if (ofs->xino_bits) {
1277                pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n",
1278                        ofs->xino_bits);
1279        }
1280
1281        err = 0;
1282out:
1283        return err;
1284}
1285
1286static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
1287                                            struct ovl_fs *ofs)
1288{
1289        int err;
1290        char *lowertmp, *lower;
1291        struct path *stack = NULL;
1292        unsigned int stacklen, numlower = 0, i;
1293        bool remote = false;
1294        struct ovl_entry *oe;
1295
1296        err = -ENOMEM;
1297        lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
1298        if (!lowertmp)
1299                goto out_err;
1300
1301        err = -EINVAL;
1302        stacklen = ovl_split_lowerdirs(lowertmp);
1303        if (stacklen > OVL_MAX_STACK) {
1304                pr_err("overlayfs: too many lower directories, limit is %d\n",
1305                       OVL_MAX_STACK);
1306                goto out_err;
1307        } else if (!ofs->config.upperdir && stacklen == 1) {
1308                pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
1309                goto out_err;
1310        } else if (!ofs->config.upperdir && ofs->config.nfs_export &&
1311                   ofs->config.redirect_follow) {
1312                pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
1313                ofs->config.nfs_export = false;
1314        }
1315
1316        err = -ENOMEM;
1317        stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
1318        if (!stack)
1319                goto out_err;
1320
1321        err = -EINVAL;
1322        lower = lowertmp;
1323        for (numlower = 0; numlower < stacklen; numlower++) {
1324                err = ovl_lower_dir(lower, &stack[numlower], ofs,
1325                                    &sb->s_stack_depth, &remote);
1326                if (err)
1327                        goto out_err;
1328
1329                lower = strchr(lower, '\0') + 1;
1330        }
1331
1332        err = -EINVAL;
1333        sb->s_stack_depth++;
1334        if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
1335                pr_err("overlayfs: maximum fs stacking depth exceeded\n");
1336                goto out_err;
1337        }
1338
1339        err = ovl_get_lower_layers(ofs, stack, numlower);
1340        if (err)
1341                goto out_err;
1342
1343        err = -ENOMEM;
1344        oe = ovl_alloc_entry(numlower);
1345        if (!oe)
1346                goto out_err;
1347
1348        for (i = 0; i < numlower; i++) {
1349                oe->lowerstack[i].dentry = dget(stack[i].dentry);
1350                oe->lowerstack[i].layer = &ofs->lower_layers[i];
1351        }
1352
1353        if (remote)
1354                sb->s_d_op = &ovl_reval_dentry_operations;
1355        else
1356                sb->s_d_op = &ovl_dentry_operations;
1357
1358out:
1359        for (i = 0; i < numlower; i++)
1360                path_put(&stack[i]);
1361        kfree(stack);
1362        kfree(lowertmp);
1363
1364        return oe;
1365
1366out_err:
1367        oe = ERR_PTR(err);
1368        goto out;
1369}
1370
1371static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1372{
1373        struct path upperpath = { };
1374        struct dentry *root_dentry;
1375        struct ovl_entry *oe;
1376        struct ovl_fs *ofs;
1377        struct cred *cred;
1378        int err;
1379
1380        err = -ENOMEM;
1381        ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
1382        if (!ofs)
1383                goto out;
1384
1385        ofs->creator_cred = cred = prepare_creds();
1386        if (!cred)
1387                goto out_err;
1388
1389        ofs->config.index = ovl_index_def;
1390        ofs->config.nfs_export = ovl_nfs_export_def;
1391        ofs->config.xino = ovl_xino_def();
1392        ofs->config.metacopy = ovl_metacopy_def;
1393        err = ovl_parse_opt((char *) data, &ofs->config);
1394        if (err)
1395                goto out_err;
1396
1397        err = -EINVAL;
1398        if (!ofs->config.lowerdir) {
1399                if (!silent)
1400                        pr_err("overlayfs: missing 'lowerdir'\n");
1401                goto out_err;
1402        }
1403
1404        sb->s_stack_depth = 0;
1405        sb->s_maxbytes = MAX_LFS_FILESIZE;
1406        /* Assume underlaying fs uses 32bit inodes unless proven otherwise */
1407        if (ofs->config.xino != OVL_XINO_OFF)
1408                ofs->xino_bits = BITS_PER_LONG - 32;
1409
1410        if (ofs->config.upperdir) {
1411                if (!ofs->config.workdir) {
1412                        pr_err("overlayfs: missing 'workdir'\n");
1413                        goto out_err;
1414                }
1415
1416                err = ovl_get_upper(ofs, &upperpath);
1417                if (err)
1418                        goto out_err;
1419
1420                err = ovl_get_workdir(ofs, &upperpath);
1421                if (err)
1422                        goto out_err;
1423
1424                if (!ofs->workdir)
1425                        sb->s_flags |= SB_RDONLY;
1426
1427                sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
1428                sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
1429
1430        }
1431        oe = ovl_get_lowerstack(sb, ofs);
1432        err = PTR_ERR(oe);
1433        if (IS_ERR(oe))
1434                goto out_err;
1435
1436        /* If the upper fs is nonexistent, we mark overlayfs r/o too */
1437        if (!ofs->upper_mnt)
1438                sb->s_flags |= SB_RDONLY;
1439
1440        if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
1441                err = ovl_get_indexdir(ofs, oe, &upperpath);
1442                if (err)
1443                        goto out_free_oe;
1444
1445                /* Force r/o mount with no index dir */
1446                if (!ofs->indexdir) {
1447                        dput(ofs->workdir);
1448                        ofs->workdir = NULL;
1449                        sb->s_flags |= SB_RDONLY;
1450                }
1451
1452        }
1453
1454        /* Show index=off in /proc/mounts for forced r/o mount */
1455        if (!ofs->indexdir) {
1456                ofs->config.index = false;
1457                if (ofs->upper_mnt && ofs->config.nfs_export) {
1458                        pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
1459                        ofs->config.nfs_export = false;
1460                }
1461        }
1462
1463        if (ofs->config.metacopy && ofs->config.nfs_export) {
1464                pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
1465                ofs->config.nfs_export = false;
1466        }
1467
1468        if (ofs->config.nfs_export)
1469                sb->s_export_op = &ovl_export_operations;
1470
1471        /* Never override disk quota limits or use reserved space */
1472        cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
1473
1474        sb->s_magic = OVERLAYFS_SUPER_MAGIC;
1475        sb->s_op = &ovl_super_operations;
1476        sb->s_xattr = ovl_xattr_handlers;
1477        sb->s_fs_info = ofs;
1478        sb->s_flags |= SB_POSIXACL;
1479
1480        err = -ENOMEM;
1481        root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
1482        if (!root_dentry)
1483                goto out_free_oe;
1484
1485        root_dentry->d_fsdata = oe;
1486
1487        mntput(upperpath.mnt);
1488        if (upperpath.dentry) {
1489                ovl_dentry_set_upper_alias(root_dentry);
1490                if (ovl_is_impuredir(upperpath.dentry))
1491                        ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
1492        }
1493
1494        /* Root is always merge -> can have whiteouts */
1495        ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
1496        ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
1497        ovl_set_upperdata(d_inode(root_dentry));
1498        ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
1499                       ovl_dentry_lower(root_dentry), NULL);
1500
1501        sb->s_root = root_dentry;
1502
1503        return 0;
1504
1505out_free_oe:
1506        ovl_entry_stack_free(oe);
1507        kfree(oe);
1508out_err:
1509        path_put(&upperpath);
1510        ovl_free_fs(ofs);
1511out:
1512        return err;
1513}
1514
1515static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
1516                                const char *dev_name, void *raw_data)
1517{
1518        return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
1519}
1520
1521static struct file_system_type ovl_fs_type = {
1522        .owner          = THIS_MODULE,
1523        .name           = "overlay",
1524        .mount          = ovl_mount,
1525        .kill_sb        = kill_anon_super,
1526};
1527MODULE_ALIAS_FS("overlay");
1528
1529static void ovl_inode_init_once(void *foo)
1530{
1531        struct ovl_inode *oi = foo;
1532
1533        inode_init_once(&oi->vfs_inode);
1534}
1535
1536static int __init ovl_init(void)
1537{
1538        int err;
1539
1540        ovl_inode_cachep = kmem_cache_create("ovl_inode",
1541                                             sizeof(struct ovl_inode), 0,
1542                                             (SLAB_RECLAIM_ACCOUNT|
1543                                              SLAB_MEM_SPREAD|SLAB_ACCOUNT),
1544                                             ovl_inode_init_once);
1545        if (ovl_inode_cachep == NULL)
1546                return -ENOMEM;
1547
1548        err = register_filesystem(&ovl_fs_type);
1549        if (err)
1550                kmem_cache_destroy(ovl_inode_cachep);
1551
1552        return err;
1553}
1554
1555static void __exit ovl_exit(void)
1556{
1557        unregister_filesystem(&ovl_fs_type);
1558
1559        /*
1560         * Make sure all delayed rcu free inodes are flushed before we
1561         * destroy cache.
1562         */
1563        rcu_barrier();
1564        kmem_cache_destroy(ovl_inode_cachep);
1565
1566}
1567
1568module_init(ovl_init);
1569module_exit(ovl_exit);
1570