linux/fs/overlayfs/super.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *
   4 * Copyright (C) 2011 Novell Inc.
   5 */
   6
   7#include <uapi/linux/magic.h>
   8#include <linux/fs.h>
   9#include <linux/namei.h>
  10#include <linux/xattr.h>
  11#include <linux/mount.h>
  12#include <linux/parser.h>
  13#include <linux/module.h>
  14#include <linux/statfs.h>
  15#include <linux/seq_file.h>
  16#include <linux/posix_acl_xattr.h>
  17#include <linux/exportfs.h>
  18#include "overlayfs.h"
  19
  20MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
  21MODULE_DESCRIPTION("Overlay filesystem");
  22MODULE_LICENSE("GPL");
  23
  24
  25struct ovl_dir_cache;
  26
  27#define OVL_MAX_STACK 500
  28
  29static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
  30module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
  31MODULE_PARM_DESC(redirect_dir,
  32                 "Default to on or off for the redirect_dir feature");
  33
  34static bool ovl_redirect_always_follow =
  35        IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
  36module_param_named(redirect_always_follow, ovl_redirect_always_follow,
  37                   bool, 0644);
  38MODULE_PARM_DESC(redirect_always_follow,
  39                 "Follow redirects even if redirect_dir feature is turned off");
  40
  41static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
  42module_param_named(index, ovl_index_def, bool, 0644);
  43MODULE_PARM_DESC(index,
  44                 "Default to on or off for the inodes index feature");
  45
  46static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
  47module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
  48MODULE_PARM_DESC(nfs_export,
  49                 "Default to on or off for the NFS export feature");
  50
  51static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
  52module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
  53MODULE_PARM_DESC(xino_auto,
  54                 "Auto enable xino feature");
  55
  56static void ovl_entry_stack_free(struct ovl_entry *oe)
  57{
  58        unsigned int i;
  59
  60        for (i = 0; i < oe->numlower; i++)
  61                dput(oe->lowerstack[i].dentry);
  62}
  63
  64static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
  65module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
  66MODULE_PARM_DESC(metacopy,
  67                 "Default to on or off for the metadata only copy up feature");
  68
  69static void ovl_dentry_release(struct dentry *dentry)
  70{
  71        struct ovl_entry *oe = dentry->d_fsdata;
  72
  73        if (oe) {
  74                ovl_entry_stack_free(oe);
  75                kfree_rcu(oe, rcu);
  76        }
  77}
  78
  79static struct dentry *ovl_d_real(struct dentry *dentry,
  80                                 const struct inode *inode)
  81{
  82        struct dentry *real = NULL, *lower;
  83
  84        /* It's an overlay file */
  85        if (inode && d_inode(dentry) == inode)
  86                return dentry;
  87
  88        if (!d_is_reg(dentry)) {
  89                if (!inode || inode == d_inode(dentry))
  90                        return dentry;
  91                goto bug;
  92        }
  93
  94        real = ovl_dentry_upper(dentry);
  95        if (real && (inode == d_inode(real)))
  96                return real;
  97
  98        if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
  99                return real;
 100
 101        lower = ovl_dentry_lowerdata(dentry);
 102        if (!lower)
 103                goto bug;
 104        real = lower;
 105
 106        /* Handle recursion */
 107        real = d_real(real, inode);
 108
 109        if (!inode || inode == d_inode(real))
 110                return real;
 111bug:
 112        WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n",
 113             __func__, dentry, inode ? inode->i_sb->s_id : "NULL",
 114             inode ? inode->i_ino : 0, real,
 115             real && d_inode(real) ? d_inode(real)->i_ino : 0);
 116        return dentry;
 117}
 118
 119static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak)
 120{
 121        int ret = 1;
 122
 123        if (weak) {
 124                if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE)
 125                        ret =  d->d_op->d_weak_revalidate(d, flags);
 126        } else if (d->d_flags & DCACHE_OP_REVALIDATE) {
 127                ret = d->d_op->d_revalidate(d, flags);
 128                if (!ret) {
 129                        if (!(flags & LOOKUP_RCU))
 130                                d_invalidate(d);
 131                        ret = -ESTALE;
 132                }
 133        }
 134        return ret;
 135}
 136
 137static int ovl_dentry_revalidate_common(struct dentry *dentry,
 138                                        unsigned int flags, bool weak)
 139{
 140        struct ovl_entry *oe = dentry->d_fsdata;
 141        struct dentry *upper;
 142        unsigned int i;
 143        int ret = 1;
 144
 145        upper = ovl_dentry_upper(dentry);
 146        if (upper)
 147                ret = ovl_revalidate_real(upper, flags, weak);
 148
 149        for (i = 0; ret > 0 && i < oe->numlower; i++) {
 150                ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags,
 151                                          weak);
 152        }
 153        return ret;
 154}
 155
 156static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 157{
 158        return ovl_dentry_revalidate_common(dentry, flags, false);
 159}
 160
 161static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
 162{
 163        return ovl_dentry_revalidate_common(dentry, flags, true);
 164}
 165
 166static const struct dentry_operations ovl_dentry_operations = {
 167        .d_release = ovl_dentry_release,
 168        .d_real = ovl_d_real,
 169        .d_revalidate = ovl_dentry_revalidate,
 170        .d_weak_revalidate = ovl_dentry_weak_revalidate,
 171};
 172
 173static struct kmem_cache *ovl_inode_cachep;
 174
 175static struct inode *ovl_alloc_inode(struct super_block *sb)
 176{
 177        struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
 178
 179        if (!oi)
 180                return NULL;
 181
 182        oi->cache = NULL;
 183        oi->redirect = NULL;
 184        oi->version = 0;
 185        oi->flags = 0;
 186        oi->__upperdentry = NULL;
 187        oi->lower = NULL;
 188        oi->lowerdata = NULL;
 189        mutex_init(&oi->lock);
 190
 191        return &oi->vfs_inode;
 192}
 193
 194static void ovl_free_inode(struct inode *inode)
 195{
 196        struct ovl_inode *oi = OVL_I(inode);
 197
 198        kfree(oi->redirect);
 199        mutex_destroy(&oi->lock);
 200        kmem_cache_free(ovl_inode_cachep, oi);
 201}
 202
 203static void ovl_destroy_inode(struct inode *inode)
 204{
 205        struct ovl_inode *oi = OVL_I(inode);
 206
 207        dput(oi->__upperdentry);
 208        iput(oi->lower);
 209        if (S_ISDIR(inode->i_mode))
 210                ovl_dir_cache_free(inode);
 211        else
 212                iput(oi->lowerdata);
 213}
 214
 215static void ovl_free_fs(struct ovl_fs *ofs)
 216{
 217        struct vfsmount **mounts;
 218        unsigned i;
 219
 220        iput(ofs->workbasedir_trap);
 221        iput(ofs->indexdir_trap);
 222        iput(ofs->workdir_trap);
 223        dput(ofs->whiteout);
 224        dput(ofs->indexdir);
 225        dput(ofs->workdir);
 226        if (ofs->workdir_locked)
 227                ovl_inuse_unlock(ofs->workbasedir);
 228        dput(ofs->workbasedir);
 229        if (ofs->upperdir_locked)
 230                ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
 231
 232        /* Hack!  Reuse ofs->layers as a vfsmount array before freeing it */
 233        mounts = (struct vfsmount **) ofs->layers;
 234        for (i = 0; i < ofs->numlayer; i++) {
 235                iput(ofs->layers[i].trap);
 236                mounts[i] = ofs->layers[i].mnt;
 237        }
 238        kern_unmount_array(mounts, ofs->numlayer);
 239        kfree(ofs->layers);
 240        for (i = 0; i < ofs->numfs; i++)
 241                free_anon_bdev(ofs->fs[i].pseudo_dev);
 242        kfree(ofs->fs);
 243
 244        kfree(ofs->config.lowerdir);
 245        kfree(ofs->config.upperdir);
 246        kfree(ofs->config.workdir);
 247        kfree(ofs->config.redirect_mode);
 248        if (ofs->creator_cred)
 249                put_cred(ofs->creator_cred);
 250        kfree(ofs);
 251}
 252
 253static void ovl_put_super(struct super_block *sb)
 254{
 255        struct ovl_fs *ofs = sb->s_fs_info;
 256
 257        ovl_free_fs(ofs);
 258}
 259
 260/* Sync real dirty inodes in upper filesystem (if it exists) */
 261static int ovl_sync_fs(struct super_block *sb, int wait)
 262{
 263        struct ovl_fs *ofs = sb->s_fs_info;
 264        struct super_block *upper_sb;
 265        int ret;
 266
 267        ret = ovl_sync_status(ofs);
 268        /*
 269         * We have to always set the err, because the return value isn't
 270         * checked in syncfs, and instead indirectly return an error via
 271         * the sb's writeback errseq, which VFS inspects after this call.
 272         */
 273        if (ret < 0) {
 274                errseq_set(&sb->s_wb_err, -EIO);
 275                return -EIO;
 276        }
 277
 278        if (!ret)
 279                return ret;
 280
 281        /*
 282         * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
 283         * All the super blocks will be iterated, including upper_sb.
 284         *
 285         * If this is a syncfs(2) call, then we do need to call
 286         * sync_filesystem() on upper_sb, but enough if we do it when being
 287         * called with wait == 1.
 288         */
 289        if (!wait)
 290                return 0;
 291
 292        upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
 293
 294        down_read(&upper_sb->s_umount);
 295        ret = sync_filesystem(upper_sb);
 296        up_read(&upper_sb->s_umount);
 297
 298        return ret;
 299}
 300
 301/**
 302 * ovl_statfs
 303 * @sb: The overlayfs super block
 304 * @buf: The struct kstatfs to fill in with stats
 305 *
 306 * Get the filesystem statistics.  As writes always target the upper layer
 307 * filesystem pass the statfs to the upper filesystem (if it exists)
 308 */
 309static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
 310{
 311        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 312        struct dentry *root_dentry = dentry->d_sb->s_root;
 313        struct path path;
 314        int err;
 315
 316        ovl_path_real(root_dentry, &path);
 317
 318        err = vfs_statfs(&path, buf);
 319        if (!err) {
 320                buf->f_namelen = ofs->namelen;
 321                buf->f_type = OVERLAYFS_SUPER_MAGIC;
 322        }
 323
 324        return err;
 325}
 326
 327/* Will this overlay be forced to mount/remount ro? */
 328static bool ovl_force_readonly(struct ovl_fs *ofs)
 329{
 330        return (!ovl_upper_mnt(ofs) || !ofs->workdir);
 331}
 332
 333static const char *ovl_redirect_mode_def(void)
 334{
 335        return ovl_redirect_dir_def ? "on" : "off";
 336}
 337
 338static const char * const ovl_xino_str[] = {
 339        "off",
 340        "auto",
 341        "on",
 342};
 343
 344static inline int ovl_xino_def(void)
 345{
 346        return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
 347}
 348
 349/**
 350 * ovl_show_options
 351 *
 352 * Prints the mount options for a given superblock.
 353 * Returns zero; does not fail.
 354 */
 355static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 356{
 357        struct super_block *sb = dentry->d_sb;
 358        struct ovl_fs *ofs = sb->s_fs_info;
 359
 360        seq_show_option(m, "lowerdir", ofs->config.lowerdir);
 361        if (ofs->config.upperdir) {
 362                seq_show_option(m, "upperdir", ofs->config.upperdir);
 363                seq_show_option(m, "workdir", ofs->config.workdir);
 364        }
 365        if (ofs->config.default_permissions)
 366                seq_puts(m, ",default_permissions");
 367        if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
 368                seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
 369        if (ofs->config.index != ovl_index_def)
 370                seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
 371        if (!ofs->config.uuid)
 372                seq_puts(m, ",uuid=off");
 373        if (ofs->config.nfs_export != ovl_nfs_export_def)
 374                seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
 375                                                "on" : "off");
 376        if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb))
 377                seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
 378        if (ofs->config.metacopy != ovl_metacopy_def)
 379                seq_printf(m, ",metacopy=%s",
 380                           ofs->config.metacopy ? "on" : "off");
 381        if (ofs->config.ovl_volatile)
 382                seq_puts(m, ",volatile");
 383        return 0;
 384}
 385
 386static int ovl_remount(struct super_block *sb, int *flags, char *data)
 387{
 388        struct ovl_fs *ofs = sb->s_fs_info;
 389        struct super_block *upper_sb;
 390        int ret = 0;
 391
 392        if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
 393                return -EROFS;
 394
 395        if (*flags & SB_RDONLY && !sb_rdonly(sb)) {
 396                upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
 397                if (ovl_should_sync(ofs)) {
 398                        down_read(&upper_sb->s_umount);
 399                        ret = sync_filesystem(upper_sb);
 400                        up_read(&upper_sb->s_umount);
 401                }
 402        }
 403
 404        return ret;
 405}
 406
 407static const struct super_operations ovl_super_operations = {
 408        .alloc_inode    = ovl_alloc_inode,
 409        .free_inode     = ovl_free_inode,
 410        .destroy_inode  = ovl_destroy_inode,
 411        .drop_inode     = generic_delete_inode,
 412        .put_super      = ovl_put_super,
 413        .sync_fs        = ovl_sync_fs,
 414        .statfs         = ovl_statfs,
 415        .show_options   = ovl_show_options,
 416        .remount_fs     = ovl_remount,
 417};
 418
 419enum {
 420        OPT_LOWERDIR,
 421        OPT_UPPERDIR,
 422        OPT_WORKDIR,
 423        OPT_DEFAULT_PERMISSIONS,
 424        OPT_REDIRECT_DIR,
 425        OPT_INDEX_ON,
 426        OPT_INDEX_OFF,
 427        OPT_UUID_ON,
 428        OPT_UUID_OFF,
 429        OPT_NFS_EXPORT_ON,
 430        OPT_USERXATTR,
 431        OPT_NFS_EXPORT_OFF,
 432        OPT_XINO_ON,
 433        OPT_XINO_OFF,
 434        OPT_XINO_AUTO,
 435        OPT_METACOPY_ON,
 436        OPT_METACOPY_OFF,
 437        OPT_VOLATILE,
 438        OPT_ERR,
 439};
 440
 441static const match_table_t ovl_tokens = {
 442        {OPT_LOWERDIR,                  "lowerdir=%s"},
 443        {OPT_UPPERDIR,                  "upperdir=%s"},
 444        {OPT_WORKDIR,                   "workdir=%s"},
 445        {OPT_DEFAULT_PERMISSIONS,       "default_permissions"},
 446        {OPT_REDIRECT_DIR,              "redirect_dir=%s"},
 447        {OPT_INDEX_ON,                  "index=on"},
 448        {OPT_INDEX_OFF,                 "index=off"},
 449        {OPT_USERXATTR,                 "userxattr"},
 450        {OPT_UUID_ON,                   "uuid=on"},
 451        {OPT_UUID_OFF,                  "uuid=off"},
 452        {OPT_NFS_EXPORT_ON,             "nfs_export=on"},
 453        {OPT_NFS_EXPORT_OFF,            "nfs_export=off"},
 454        {OPT_XINO_ON,                   "xino=on"},
 455        {OPT_XINO_OFF,                  "xino=off"},
 456        {OPT_XINO_AUTO,                 "xino=auto"},
 457        {OPT_METACOPY_ON,               "metacopy=on"},
 458        {OPT_METACOPY_OFF,              "metacopy=off"},
 459        {OPT_VOLATILE,                  "volatile"},
 460        {OPT_ERR,                       NULL}
 461};
 462
 463static char *ovl_next_opt(char **s)
 464{
 465        char *sbegin = *s;
 466        char *p;
 467
 468        if (sbegin == NULL)
 469                return NULL;
 470
 471        for (p = sbegin; *p; p++) {
 472                if (*p == '\\') {
 473                        p++;
 474                        if (!*p)
 475                                break;
 476                } else if (*p == ',') {
 477                        *p = '\0';
 478                        *s = p + 1;
 479                        return sbegin;
 480                }
 481        }
 482        *s = NULL;
 483        return sbegin;
 484}
 485
 486static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
 487{
 488        if (strcmp(mode, "on") == 0) {
 489                config->redirect_dir = true;
 490                /*
 491                 * Does not make sense to have redirect creation without
 492                 * redirect following.
 493                 */
 494                config->redirect_follow = true;
 495        } else if (strcmp(mode, "follow") == 0) {
 496                config->redirect_follow = true;
 497        } else if (strcmp(mode, "off") == 0) {
 498                if (ovl_redirect_always_follow)
 499                        config->redirect_follow = true;
 500        } else if (strcmp(mode, "nofollow") != 0) {
 501                pr_err("bad mount option \"redirect_dir=%s\"\n",
 502                       mode);
 503                return -EINVAL;
 504        }
 505
 506        return 0;
 507}
 508
 509static int ovl_parse_opt(char *opt, struct ovl_config *config)
 510{
 511        char *p;
 512        int err;
 513        bool metacopy_opt = false, redirect_opt = false;
 514        bool nfs_export_opt = false, index_opt = false;
 515
 516        config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
 517        if (!config->redirect_mode)
 518                return -ENOMEM;
 519
 520        while ((p = ovl_next_opt(&opt)) != NULL) {
 521                int token;
 522                substring_t args[MAX_OPT_ARGS];
 523
 524                if (!*p)
 525                        continue;
 526
 527                token = match_token(p, ovl_tokens, args);
 528                switch (token) {
 529                case OPT_UPPERDIR:
 530                        kfree(config->upperdir);
 531                        config->upperdir = match_strdup(&args[0]);
 532                        if (!config->upperdir)
 533                                return -ENOMEM;
 534                        break;
 535
 536                case OPT_LOWERDIR:
 537                        kfree(config->lowerdir);
 538                        config->lowerdir = match_strdup(&args[0]);
 539                        if (!config->lowerdir)
 540                                return -ENOMEM;
 541                        break;
 542
 543                case OPT_WORKDIR:
 544                        kfree(config->workdir);
 545                        config->workdir = match_strdup(&args[0]);
 546                        if (!config->workdir)
 547                                return -ENOMEM;
 548                        break;
 549
 550                case OPT_DEFAULT_PERMISSIONS:
 551                        config->default_permissions = true;
 552                        break;
 553
 554                case OPT_REDIRECT_DIR:
 555                        kfree(config->redirect_mode);
 556                        config->redirect_mode = match_strdup(&args[0]);
 557                        if (!config->redirect_mode)
 558                                return -ENOMEM;
 559                        redirect_opt = true;
 560                        break;
 561
 562                case OPT_INDEX_ON:
 563                        config->index = true;
 564                        index_opt = true;
 565                        break;
 566
 567                case OPT_INDEX_OFF:
 568                        config->index = false;
 569                        index_opt = true;
 570                        break;
 571
 572                case OPT_UUID_ON:
 573                        config->uuid = true;
 574                        break;
 575
 576                case OPT_UUID_OFF:
 577                        config->uuid = false;
 578                        break;
 579
 580                case OPT_NFS_EXPORT_ON:
 581                        config->nfs_export = true;
 582                        nfs_export_opt = true;
 583                        break;
 584
 585                case OPT_NFS_EXPORT_OFF:
 586                        config->nfs_export = false;
 587                        nfs_export_opt = true;
 588                        break;
 589
 590                case OPT_XINO_ON:
 591                        config->xino = OVL_XINO_ON;
 592                        break;
 593
 594                case OPT_XINO_OFF:
 595                        config->xino = OVL_XINO_OFF;
 596                        break;
 597
 598                case OPT_XINO_AUTO:
 599                        config->xino = OVL_XINO_AUTO;
 600                        break;
 601
 602                case OPT_METACOPY_ON:
 603                        config->metacopy = true;
 604                        metacopy_opt = true;
 605                        break;
 606
 607                case OPT_METACOPY_OFF:
 608                        config->metacopy = false;
 609                        metacopy_opt = true;
 610                        break;
 611
 612                case OPT_VOLATILE:
 613                        config->ovl_volatile = true;
 614                        break;
 615
 616                case OPT_USERXATTR:
 617                        config->userxattr = true;
 618                        break;
 619
 620                default:
 621                        pr_err("unrecognized mount option \"%s\" or missing value\n",
 622                                        p);
 623                        return -EINVAL;
 624                }
 625        }
 626
 627        /* Workdir/index are useless in non-upper mount */
 628        if (!config->upperdir) {
 629                if (config->workdir) {
 630                        pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
 631                                config->workdir);
 632                        kfree(config->workdir);
 633                        config->workdir = NULL;
 634                }
 635                if (config->index && index_opt) {
 636                        pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n");
 637                        index_opt = false;
 638                }
 639                config->index = false;
 640        }
 641
 642        if (!config->upperdir && config->ovl_volatile) {
 643                pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
 644                config->ovl_volatile = false;
 645        }
 646
 647        err = ovl_parse_redirect_mode(config, config->redirect_mode);
 648        if (err)
 649                return err;
 650
 651        /*
 652         * This is to make the logic below simpler.  It doesn't make any other
 653         * difference, since config->redirect_dir is only used for upper.
 654         */
 655        if (!config->upperdir && config->redirect_follow)
 656                config->redirect_dir = true;
 657
 658        /* Resolve metacopy -> redirect_dir dependency */
 659        if (config->metacopy && !config->redirect_dir) {
 660                if (metacopy_opt && redirect_opt) {
 661                        pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
 662                               config->redirect_mode);
 663                        return -EINVAL;
 664                }
 665                if (redirect_opt) {
 666                        /*
 667                         * There was an explicit redirect_dir=... that resulted
 668                         * in this conflict.
 669                         */
 670                        pr_info("disabling metacopy due to redirect_dir=%s\n",
 671                                config->redirect_mode);
 672                        config->metacopy = false;
 673                } else {
 674                        /* Automatically enable redirect otherwise. */
 675                        config->redirect_follow = config->redirect_dir = true;
 676                }
 677        }
 678
 679        /* Resolve nfs_export -> index dependency */
 680        if (config->nfs_export && !config->index) {
 681                if (!config->upperdir && config->redirect_follow) {
 682                        pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
 683                        config->nfs_export = false;
 684                } else if (nfs_export_opt && index_opt) {
 685                        pr_err("conflicting options: nfs_export=on,index=off\n");
 686                        return -EINVAL;
 687                } else if (index_opt) {
 688                        /*
 689                         * There was an explicit index=off that resulted
 690                         * in this conflict.
 691                         */
 692                        pr_info("disabling nfs_export due to index=off\n");
 693                        config->nfs_export = false;
 694                } else {
 695                        /* Automatically enable index otherwise. */
 696                        config->index = true;
 697                }
 698        }
 699
 700        /* Resolve nfs_export -> !metacopy dependency */
 701        if (config->nfs_export && config->metacopy) {
 702                if (nfs_export_opt && metacopy_opt) {
 703                        pr_err("conflicting options: nfs_export=on,metacopy=on\n");
 704                        return -EINVAL;
 705                }
 706                if (metacopy_opt) {
 707                        /*
 708                         * There was an explicit metacopy=on that resulted
 709                         * in this conflict.
 710                         */
 711                        pr_info("disabling nfs_export due to metacopy=on\n");
 712                        config->nfs_export = false;
 713                } else {
 714                        /*
 715                         * There was an explicit nfs_export=on that resulted
 716                         * in this conflict.
 717                         */
 718                        pr_info("disabling metacopy due to nfs_export=on\n");
 719                        config->metacopy = false;
 720                }
 721        }
 722
 723
 724        /* Resolve userxattr -> !redirect && !metacopy dependency */
 725        if (config->userxattr) {
 726                if (config->redirect_follow && redirect_opt) {
 727                        pr_err("conflicting options: userxattr,redirect_dir=%s\n",
 728                               config->redirect_mode);
 729                        return -EINVAL;
 730                }
 731                if (config->metacopy && metacopy_opt) {
 732                        pr_err("conflicting options: userxattr,metacopy=on\n");
 733                        return -EINVAL;
 734                }
 735                /*
 736                 * Silently disable default setting of redirect and metacopy.
 737                 * This shall be the default in the future as well: these
 738                 * options must be explicitly enabled if used together with
 739                 * userxattr.
 740                 */
 741                config->redirect_dir = config->redirect_follow = false;
 742                config->metacopy = false;
 743        }
 744
 745        return 0;
 746}
 747
 748#define OVL_WORKDIR_NAME "work"
 749#define OVL_INDEXDIR_NAME "index"
 750
 751static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
 752                                         const char *name, bool persist)
 753{
 754        struct inode *dir =  ofs->workbasedir->d_inode;
 755        struct vfsmount *mnt = ovl_upper_mnt(ofs);
 756        struct dentry *work;
 757        int err;
 758        bool retried = false;
 759
 760        inode_lock_nested(dir, I_MUTEX_PARENT);
 761retry:
 762        work = lookup_one_len(name, ofs->workbasedir, strlen(name));
 763
 764        if (!IS_ERR(work)) {
 765                struct iattr attr = {
 766                        .ia_valid = ATTR_MODE,
 767                        .ia_mode = S_IFDIR | 0,
 768                };
 769
 770                if (work->d_inode) {
 771                        err = -EEXIST;
 772                        if (retried)
 773                                goto out_dput;
 774
 775                        if (persist)
 776                                goto out_unlock;
 777
 778                        retried = true;
 779                        err = ovl_workdir_cleanup(dir, mnt, work, 0);
 780                        dput(work);
 781                        if (err == -EINVAL) {
 782                                work = ERR_PTR(err);
 783                                goto out_unlock;
 784                        }
 785                        goto retry;
 786                }
 787
 788                work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode));
 789                err = PTR_ERR(work);
 790                if (IS_ERR(work))
 791                        goto out_err;
 792
 793                /*
 794                 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
 795                 *
 796                 * a) success (there was a POSIX ACL xattr and was removed)
 797                 * b) -ENODATA (there was no POSIX ACL xattr)
 798                 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
 799                 *
 800                 * There are various other error values that could effectively
 801                 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
 802                 * if the xattr name is too long), but the set of filesystems
 803                 * allowed as upper are limited to "normal" ones, where checking
 804                 * for the above two errors is sufficient.
 805                 */
 806                err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
 807                if (err && err != -ENODATA && err != -EOPNOTSUPP)
 808                        goto out_dput;
 809
 810                err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
 811                if (err && err != -ENODATA && err != -EOPNOTSUPP)
 812                        goto out_dput;
 813
 814                /* Clear any inherited mode bits */
 815                inode_lock(work->d_inode);
 816                err = notify_change(work, &attr, NULL);
 817                inode_unlock(work->d_inode);
 818                if (err)
 819                        goto out_dput;
 820        } else {
 821                err = PTR_ERR(work);
 822                goto out_err;
 823        }
 824out_unlock:
 825        inode_unlock(dir);
 826        return work;
 827
 828out_dput:
 829        dput(work);
 830out_err:
 831        pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
 832                ofs->config.workdir, name, -err);
 833        work = NULL;
 834        goto out_unlock;
 835}
 836
 837static void ovl_unescape(char *s)
 838{
 839        char *d = s;
 840
 841        for (;; s++, d++) {
 842                if (*s == '\\')
 843                        s++;
 844                *d = *s;
 845                if (!*s)
 846                        break;
 847        }
 848}
 849
 850static int ovl_mount_dir_noesc(const char *name, struct path *path)
 851{
 852        int err = -EINVAL;
 853
 854        if (!*name) {
 855                pr_err("empty lowerdir\n");
 856                goto out;
 857        }
 858        err = kern_path(name, LOOKUP_FOLLOW, path);
 859        if (err) {
 860                pr_err("failed to resolve '%s': %i\n", name, err);
 861                goto out;
 862        }
 863        err = -EINVAL;
 864        if (ovl_dentry_weird(path->dentry)) {
 865                pr_err("filesystem on '%s' not supported\n", name);
 866                goto out_put;
 867        }
 868        if (!d_is_dir(path->dentry)) {
 869                pr_err("'%s' not a directory\n", name);
 870                goto out_put;
 871        }
 872        return 0;
 873
 874out_put:
 875        path_put_init(path);
 876out:
 877        return err;
 878}
 879
 880static int ovl_mount_dir(const char *name, struct path *path)
 881{
 882        int err = -ENOMEM;
 883        char *tmp = kstrdup(name, GFP_KERNEL);
 884
 885        if (tmp) {
 886                ovl_unescape(tmp);
 887                err = ovl_mount_dir_noesc(tmp, path);
 888
 889                if (!err && path->dentry->d_flags & DCACHE_OP_REAL) {
 890                        pr_err("filesystem on '%s' not supported as upperdir\n",
 891                               tmp);
 892                        path_put_init(path);
 893                        err = -EINVAL;
 894                }
 895                kfree(tmp);
 896        }
 897        return err;
 898}
 899
 900static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
 901                             const char *name)
 902{
 903        struct kstatfs statfs;
 904        int err = vfs_statfs(path, &statfs);
 905
 906        if (err)
 907                pr_err("statfs failed on '%s'\n", name);
 908        else
 909                ofs->namelen = max(ofs->namelen, statfs.f_namelen);
 910
 911        return err;
 912}
 913
 914static int ovl_lower_dir(const char *name, struct path *path,
 915                         struct ovl_fs *ofs, int *stack_depth)
 916{
 917        int fh_type;
 918        int err;
 919
 920        err = ovl_mount_dir_noesc(name, path);
 921        if (err)
 922                return err;
 923
 924        err = ovl_check_namelen(path, ofs, name);
 925        if (err)
 926                return err;
 927
 928        *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
 929
 930        /*
 931         * The inodes index feature and NFS export need to encode and decode
 932         * file handles, so they require that all layers support them.
 933         */
 934        fh_type = ovl_can_decode_fh(path->dentry->d_sb);
 935        if ((ofs->config.nfs_export ||
 936             (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
 937                ofs->config.index = false;
 938                ofs->config.nfs_export = false;
 939                pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
 940                        name);
 941        }
 942
 943        /* Check if lower fs has 32bit inode numbers */
 944        if (fh_type != FILEID_INO32_GEN)
 945                ofs->xino_mode = -1;
 946
 947        return 0;
 948}
 949
 950/* Workdir should not be subdir of upperdir and vice versa */
 951static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
 952{
 953        bool ok = false;
 954
 955        if (workdir != upperdir) {
 956                ok = (lock_rename(workdir, upperdir) == NULL);
 957                unlock_rename(workdir, upperdir);
 958        }
 959        return ok;
 960}
 961
 962static unsigned int ovl_split_lowerdirs(char *str)
 963{
 964        unsigned int ctr = 1;
 965        char *s, *d;
 966
 967        for (s = d = str;; s++, d++) {
 968                if (*s == '\\') {
 969                        s++;
 970                } else if (*s == ':') {
 971                        *d = '\0';
 972                        ctr++;
 973                        continue;
 974                }
 975                *d = *s;
 976                if (!*s)
 977                        break;
 978        }
 979        return ctr;
 980}
 981
 982static int __maybe_unused
 983ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
 984                        struct dentry *dentry, struct inode *inode,
 985                        const char *name, void *buffer, size_t size)
 986{
 987        return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
 988}
 989
 990static int __maybe_unused
 991ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
 992                        struct dentry *dentry, struct inode *inode,
 993                        const char *name, const void *value,
 994                        size_t size, int flags)
 995{
 996        struct dentry *workdir = ovl_workdir(dentry);
 997        struct inode *realinode = ovl_inode_real(inode);
 998        struct posix_acl *acl = NULL;
 999        int err;
1000
1001        /* Check that everything is OK before copy-up */
1002        if (value) {
1003                acl = posix_acl_from_xattr(&init_user_ns, value, size);
1004                if (IS_ERR(acl))
1005                        return PTR_ERR(acl);
1006        }
1007        err = -EOPNOTSUPP;
1008        if (!IS_POSIXACL(d_inode(workdir)))
1009                goto out_acl_release;
1010        if (!realinode->i_op->set_acl)
1011                goto out_acl_release;
1012        if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
1013                err = acl ? -EACCES : 0;
1014                goto out_acl_release;
1015        }
1016        err = -EPERM;
1017        if (!inode_owner_or_capable(inode))
1018                goto out_acl_release;
1019
1020        posix_acl_release(acl);
1021
1022        /*
1023         * Check if sgid bit needs to be cleared (actual setacl operation will
1024         * be done with mounter's capabilities and so that won't do it for us).
1025         */
1026        if (unlikely(inode->i_mode & S_ISGID) &&
1027            handler->flags == ACL_TYPE_ACCESS &&
1028            !in_group_p(inode->i_gid) &&
1029            !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
1030                struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
1031
1032                err = ovl_setattr(dentry, &iattr);
1033                if (err)
1034                        return err;
1035        }
1036
1037        err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
1038        if (!err)
1039                ovl_copyattr(ovl_inode_real(inode), inode);
1040
1041        return err;
1042
1043out_acl_release:
1044        posix_acl_release(acl);
1045        return err;
1046}
1047
1048static int ovl_own_xattr_get(const struct xattr_handler *handler,
1049                             struct dentry *dentry, struct inode *inode,
1050                             const char *name, void *buffer, size_t size)
1051{
1052        return -EOPNOTSUPP;
1053}
1054
1055static int ovl_own_xattr_set(const struct xattr_handler *handler,
1056                             struct dentry *dentry, struct inode *inode,
1057                             const char *name, const void *value,
1058                             size_t size, int flags)
1059{
1060        return -EOPNOTSUPP;
1061}
1062
1063static int ovl_other_xattr_get(const struct xattr_handler *handler,
1064                               struct dentry *dentry, struct inode *inode,
1065                               const char *name, void *buffer, size_t size)
1066{
1067        return ovl_xattr_get(dentry, inode, name, buffer, size);
1068}
1069
1070static int ovl_other_xattr_set(const struct xattr_handler *handler,
1071                               struct dentry *dentry, struct inode *inode,
1072                               const char *name, const void *value,
1073                               size_t size, int flags)
1074{
1075        return ovl_xattr_set(dentry, inode, name, value, size, flags);
1076}
1077
1078static const struct xattr_handler __maybe_unused
1079ovl_posix_acl_access_xattr_handler = {
1080        .name = XATTR_NAME_POSIX_ACL_ACCESS,
1081        .flags = ACL_TYPE_ACCESS,
1082        .get = ovl_posix_acl_xattr_get,
1083        .set = ovl_posix_acl_xattr_set,
1084};
1085
1086static const struct xattr_handler __maybe_unused
1087ovl_posix_acl_default_xattr_handler = {
1088        .name = XATTR_NAME_POSIX_ACL_DEFAULT,
1089        .flags = ACL_TYPE_DEFAULT,
1090        .get = ovl_posix_acl_xattr_get,
1091        .set = ovl_posix_acl_xattr_set,
1092};
1093
1094static const struct xattr_handler ovl_own_trusted_xattr_handler = {
1095        .prefix = OVL_XATTR_TRUSTED_PREFIX,
1096        .get = ovl_own_xattr_get,
1097        .set = ovl_own_xattr_set,
1098};
1099
1100static const struct xattr_handler ovl_own_user_xattr_handler = {
1101        .prefix = OVL_XATTR_USER_PREFIX,
1102        .get = ovl_own_xattr_get,
1103        .set = ovl_own_xattr_set,
1104};
1105
1106static const struct xattr_handler ovl_other_xattr_handler = {
1107        .prefix = "", /* catch all */
1108        .get = ovl_other_xattr_get,
1109        .set = ovl_other_xattr_set,
1110};
1111
1112static const struct xattr_handler *ovl_trusted_xattr_handlers[] = {
1113#ifdef CONFIG_FS_POSIX_ACL
1114        &ovl_posix_acl_access_xattr_handler,
1115        &ovl_posix_acl_default_xattr_handler,
1116#endif
1117        &ovl_own_trusted_xattr_handler,
1118        &ovl_other_xattr_handler,
1119        NULL
1120};
1121
1122static const struct xattr_handler *ovl_user_xattr_handlers[] = {
1123#ifdef CONFIG_FS_POSIX_ACL
1124        &ovl_posix_acl_access_xattr_handler,
1125        &ovl_posix_acl_default_xattr_handler,
1126#endif
1127        &ovl_own_user_xattr_handler,
1128        &ovl_other_xattr_handler,
1129        NULL
1130};
1131
1132static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
1133                          struct inode **ptrap, const char *name)
1134{
1135        struct inode *trap;
1136        int err;
1137
1138        trap = ovl_get_trap_inode(sb, dir);
1139        err = PTR_ERR_OR_ZERO(trap);
1140        if (err) {
1141                if (err == -ELOOP)
1142                        pr_err("conflicting %s path\n", name);
1143                return err;
1144        }
1145
1146        *ptrap = trap;
1147        return 0;
1148}
1149
1150/*
1151 * Determine how we treat concurrent use of upperdir/workdir based on the
1152 * index feature. This is papering over mount leaks of container runtimes,
1153 * for example, an old overlay mount is leaked and now its upperdir is
1154 * attempted to be used as a lower layer in a new overlay mount.
1155 */
1156static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
1157{
1158        if (ofs->config.index) {
1159                pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
1160                       name);
1161                return -EBUSY;
1162        } else {
1163                pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
1164                        name);
1165                return 0;
1166        }
1167}
1168
1169static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
1170                         struct ovl_layer *upper_layer, struct path *upperpath)
1171{
1172        struct vfsmount *upper_mnt;
1173        int err;
1174
1175        err = ovl_mount_dir(ofs->config.upperdir, upperpath);
1176        if (err)
1177                goto out;
1178
1179        /* Upper fs should not be r/o */
1180        if (sb_rdonly(upperpath->mnt->mnt_sb)) {
1181                pr_err("upper fs is r/o, try multi-lower layers mount\n");
1182                err = -EINVAL;
1183                goto out;
1184        }
1185
1186        err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
1187        if (err)
1188                goto out;
1189
1190        err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap,
1191                             "upperdir");
1192        if (err)
1193                goto out;
1194
1195        upper_mnt = clone_private_mount(upperpath);
1196        err = PTR_ERR(upper_mnt);
1197        if (IS_ERR(upper_mnt)) {
1198                pr_err("failed to clone upperpath\n");
1199                goto out;
1200        }
1201
1202        /* Don't inherit atime flags */
1203        upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
1204        upper_layer->mnt = upper_mnt;
1205        upper_layer->idx = 0;
1206        upper_layer->fsid = 0;
1207
1208        /*
1209         * Inherit SB_NOSEC flag from upperdir.
1210         *
1211         * This optimization changes behavior when a security related attribute
1212         * (suid/sgid/security.*) is changed on an underlying layer.  This is
1213         * okay because we don't yet have guarantees in that case, but it will
1214         * need careful treatment once we want to honour changes to underlying
1215         * filesystems.
1216         */
1217        if (upper_mnt->mnt_sb->s_flags & SB_NOSEC)
1218                sb->s_flags |= SB_NOSEC;
1219
1220        if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) {
1221                ofs->upperdir_locked = true;
1222        } else {
1223                err = ovl_report_in_use(ofs, "upperdir");
1224                if (err)
1225                        goto out;
1226        }
1227
1228        err = 0;
1229out:
1230        return err;
1231}
1232
1233/*
1234 * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and
1235 * negative values if error is encountered.
1236 */
1237static int ovl_check_rename_whiteout(struct dentry *workdir)
1238{
1239        struct inode *dir = d_inode(workdir);
1240        struct dentry *temp;
1241        struct dentry *dest;
1242        struct dentry *whiteout;
1243        struct name_snapshot name;
1244        int err;
1245
1246        inode_lock_nested(dir, I_MUTEX_PARENT);
1247
1248        temp = ovl_create_temp(workdir, OVL_CATTR(S_IFREG | 0));
1249        err = PTR_ERR(temp);
1250        if (IS_ERR(temp))
1251                goto out_unlock;
1252
1253        dest = ovl_lookup_temp(workdir);
1254        err = PTR_ERR(dest);
1255        if (IS_ERR(dest)) {
1256                dput(temp);
1257                goto out_unlock;
1258        }
1259
1260        /* Name is inline and stable - using snapshot as a copy helper */
1261        take_dentry_name_snapshot(&name, temp);
1262        err = ovl_do_rename(dir, temp, dir, dest, RENAME_WHITEOUT);
1263        if (err) {
1264                if (err == -EINVAL)
1265                        err = 0;
1266                goto cleanup_temp;
1267        }
1268
1269        whiteout = lookup_one_len(name.name.name, workdir, name.name.len);
1270        err = PTR_ERR(whiteout);
1271        if (IS_ERR(whiteout))
1272                goto cleanup_temp;
1273
1274        err = ovl_is_whiteout(whiteout);
1275
1276        /* Best effort cleanup of whiteout and temp file */
1277        if (err)
1278                ovl_cleanup(dir, whiteout);
1279        dput(whiteout);
1280
1281cleanup_temp:
1282        ovl_cleanup(dir, temp);
1283        release_dentry_name_snapshot(&name);
1284        dput(temp);
1285        dput(dest);
1286
1287out_unlock:
1288        inode_unlock(dir);
1289
1290        return err;
1291}
1292
1293static struct dentry *ovl_lookup_or_create(struct dentry *parent,
1294                                           const char *name, umode_t mode)
1295{
1296        size_t len = strlen(name);
1297        struct dentry *child;
1298
1299        inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
1300        child = lookup_one_len(name, parent, len);
1301        if (!IS_ERR(child) && !child->d_inode)
1302                child = ovl_create_real(parent->d_inode, child,
1303                                        OVL_CATTR(mode));
1304        inode_unlock(parent->d_inode);
1305        dput(parent);
1306
1307        return child;
1308}
1309
1310/*
1311 * Creates $workdir/work/incompat/volatile/dirty file if it is not already
1312 * present.
1313 */
1314static int ovl_create_volatile_dirty(struct ovl_fs *ofs)
1315{
1316        unsigned int ctr;
1317        struct dentry *d = dget(ofs->workbasedir);
1318        static const char *const volatile_path[] = {
1319                OVL_WORKDIR_NAME, "incompat", "volatile", "dirty"
1320        };
1321        const char *const *name = volatile_path;
1322
1323        for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) {
1324                d = ovl_lookup_or_create(d, *name, ctr > 1 ? S_IFDIR : S_IFREG);
1325                if (IS_ERR(d))
1326                        return PTR_ERR(d);
1327        }
1328        dput(d);
1329        return 0;
1330}
1331
1332static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
1333                            struct path *workpath)
1334{
1335        struct vfsmount *mnt = ovl_upper_mnt(ofs);
1336        struct dentry *temp, *workdir;
1337        bool rename_whiteout;
1338        bool d_type;
1339        int fh_type;
1340        int err;
1341
1342        err = mnt_want_write(mnt);
1343        if (err)
1344                return err;
1345
1346        workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
1347        err = PTR_ERR(workdir);
1348        if (IS_ERR_OR_NULL(workdir))
1349                goto out;
1350
1351        ofs->workdir = workdir;
1352
1353        err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
1354        if (err)
1355                goto out;
1356
1357        /*
1358         * Upper should support d_type, else whiteouts are visible.  Given
1359         * workdir and upper are on same fs, we can do iterate_dir() on
1360         * workdir. This check requires successful creation of workdir in
1361         * previous step.
1362         */
1363        err = ovl_check_d_type_supported(workpath);
1364        if (err < 0)
1365                goto out;
1366
1367        d_type = err;
1368        if (!d_type)
1369                pr_warn("upper fs needs to support d_type.\n");
1370
1371        /* Check if upper/work fs supports O_TMPFILE */
1372        temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
1373        ofs->tmpfile = !IS_ERR(temp);
1374        if (ofs->tmpfile)
1375                dput(temp);
1376        else
1377                pr_warn("upper fs does not support tmpfile.\n");
1378
1379
1380        /* Check if upper/work fs supports RENAME_WHITEOUT */
1381        err = ovl_check_rename_whiteout(ofs->workdir);
1382        if (err < 0)
1383                goto out;
1384
1385        rename_whiteout = err;
1386        if (!rename_whiteout)
1387                pr_warn("upper fs does not support RENAME_WHITEOUT.\n");
1388
1389        /*
1390         * Check if upper/work fs supports (trusted|user).overlay.* xattr
1391         */
1392        err = ovl_do_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1);
1393        if (err) {
1394                ofs->noxattr = true;
1395                ofs->config.index = false;
1396                ofs->config.metacopy = false;
1397                pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
1398                err = 0;
1399        } else {
1400                ovl_do_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE);
1401        }
1402
1403        /*
1404         * We allowed sub-optimal upper fs configuration and don't want to break
1405         * users over kernel upgrade, but we never allowed remote upper fs, so
1406         * we can enforce strict requirements for remote upper fs.
1407         */
1408        if (ovl_dentry_remote(ofs->workdir) &&
1409            (!d_type || !rename_whiteout || ofs->noxattr)) {
1410                pr_err("upper fs missing required features.\n");
1411                err = -EINVAL;
1412                goto out;
1413        }
1414
1415        /*
1416         * For volatile mount, create a incompat/volatile/dirty file to keep
1417         * track of it.
1418         */
1419        if (ofs->config.ovl_volatile) {
1420                err = ovl_create_volatile_dirty(ofs);
1421                if (err < 0) {
1422                        pr_err("Failed to create volatile/dirty file.\n");
1423                        goto out;
1424                }
1425        }
1426
1427        /* Check if upper/work fs supports file handles */
1428        fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
1429        if (ofs->config.index && !fh_type) {
1430                ofs->config.index = false;
1431                pr_warn("upper fs does not support file handles, falling back to index=off.\n");
1432        }
1433
1434        /* Check if upper fs has 32bit inode numbers */
1435        if (fh_type != FILEID_INO32_GEN)
1436                ofs->xino_mode = -1;
1437
1438        /* NFS export of r/w mount depends on index */
1439        if (ofs->config.nfs_export && !ofs->config.index) {
1440                pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
1441                ofs->config.nfs_export = false;
1442        }
1443out:
1444        mnt_drop_write(mnt);
1445        return err;
1446}
1447
1448static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
1449                           struct path *upperpath)
1450{
1451        int err;
1452        struct path workpath = { };
1453
1454        err = ovl_mount_dir(ofs->config.workdir, &workpath);
1455        if (err)
1456                goto out;
1457
1458        err = -EINVAL;
1459        if (upperpath->mnt != workpath.mnt) {
1460                pr_err("workdir and upperdir must reside under the same mount\n");
1461                goto out;
1462        }
1463        if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
1464                pr_err("workdir and upperdir must be separate subtrees\n");
1465                goto out;
1466        }
1467
1468        ofs->workbasedir = dget(workpath.dentry);
1469
1470        if (ovl_inuse_trylock(ofs->workbasedir)) {
1471                ofs->workdir_locked = true;
1472        } else {
1473                err = ovl_report_in_use(ofs, "workdir");
1474                if (err)
1475                        goto out;
1476        }
1477
1478        err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
1479                             "workdir");
1480        if (err)
1481                goto out;
1482
1483        err = ovl_make_workdir(sb, ofs, &workpath);
1484
1485out:
1486        path_put(&workpath);
1487
1488        return err;
1489}
1490
1491static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
1492                            struct ovl_entry *oe, struct path *upperpath)
1493{
1494        struct vfsmount *mnt = ovl_upper_mnt(ofs);
1495        struct dentry *indexdir;
1496        int err;
1497
1498        err = mnt_want_write(mnt);
1499        if (err)
1500                return err;
1501
1502        /* Verify lower root is upper root origin */
1503        err = ovl_verify_origin(ofs, upperpath->dentry,
1504                                oe->lowerstack[0].dentry, true);
1505        if (err) {
1506                pr_err("failed to verify upper root origin\n");
1507                goto out;
1508        }
1509
1510        /* index dir will act also as workdir */
1511        iput(ofs->workdir_trap);
1512        ofs->workdir_trap = NULL;
1513        dput(ofs->workdir);
1514        ofs->workdir = NULL;
1515        indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
1516        if (IS_ERR(indexdir)) {
1517                err = PTR_ERR(indexdir);
1518        } else if (indexdir) {
1519                ofs->indexdir = indexdir;
1520                ofs->workdir = dget(indexdir);
1521
1522                err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
1523                                     "indexdir");
1524                if (err)
1525                        goto out;
1526
1527                /*
1528                 * Verify upper root is exclusively associated with index dir.
1529                 * Older kernels stored upper fh in ".overlay.origin"
1530                 * xattr. If that xattr exists, verify that it is a match to
1531                 * upper dir file handle. In any case, verify or set xattr
1532                 * ".overlay.upper" to indicate that index may have
1533                 * directory entries.
1534                 */
1535                if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
1536                        err = ovl_verify_set_fh(ofs, ofs->indexdir,
1537                                                OVL_XATTR_ORIGIN,
1538                                                upperpath->dentry, true, false);
1539                        if (err)
1540                                pr_err("failed to verify index dir 'origin' xattr\n");
1541                }
1542                err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry,
1543                                       true);
1544                if (err)
1545                        pr_err("failed to verify index dir 'upper' xattr\n");
1546
1547                /* Cleanup bad/stale/orphan index entries */
1548                if (!err)
1549                        err = ovl_indexdir_cleanup(ofs);
1550        }
1551        if (err || !ofs->indexdir)
1552                pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
1553
1554out:
1555        mnt_drop_write(mnt);
1556        return err;
1557}
1558
1559static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
1560{
1561        unsigned int i;
1562
1563        if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs))
1564                return true;
1565
1566        /*
1567         * We allow using single lower with null uuid for index and nfs_export
1568         * for example to support those features with single lower squashfs.
1569         * To avoid regressions in setups of overlay with re-formatted lower
1570         * squashfs, do not allow decoding origin with lower null uuid unless
1571         * user opted-in to one of the new features that require following the
1572         * lower inode of non-dir upper.
1573         */
1574        if (!ofs->config.index && !ofs->config.metacopy && !ofs->config.xino &&
1575            uuid_is_null(uuid))
1576                return false;
1577
1578        for (i = 0; i < ofs->numfs; i++) {
1579                /*
1580                 * We use uuid to associate an overlay lower file handle with a
1581                 * lower layer, so we can accept lower fs with null uuid as long
1582                 * as all lower layers with null uuid are on the same fs.
1583                 * if we detect multiple lower fs with the same uuid, we
1584                 * disable lower file handle decoding on all of them.
1585                 */
1586                if (ofs->fs[i].is_lower &&
1587                    uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) {
1588                        ofs->fs[i].bad_uuid = true;
1589                        return false;
1590                }
1591        }
1592        return true;
1593}
1594
1595/* Get a unique fsid for the layer */
1596static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
1597{
1598        struct super_block *sb = path->mnt->mnt_sb;
1599        unsigned int i;
1600        dev_t dev;
1601        int err;
1602        bool bad_uuid = false;
1603
1604        for (i = 0; i < ofs->numfs; i++) {
1605                if (ofs->fs[i].sb == sb)
1606                        return i;
1607        }
1608
1609        if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
1610                bad_uuid = true;
1611                if (ofs->config.index || ofs->config.nfs_export) {
1612                        ofs->config.index = false;
1613                        ofs->config.nfs_export = false;
1614                        pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
1615                                uuid_is_null(&sb->s_uuid) ? "null" :
1616                                                            "conflicting",
1617                                path->dentry);
1618                }
1619        }
1620
1621        err = get_anon_bdev(&dev);
1622        if (err) {
1623                pr_err("failed to get anonymous bdev for lowerpath\n");
1624                return err;
1625        }
1626
1627        ofs->fs[ofs->numfs].sb = sb;
1628        ofs->fs[ofs->numfs].pseudo_dev = dev;
1629        ofs->fs[ofs->numfs].bad_uuid = bad_uuid;
1630
1631        return ofs->numfs++;
1632}
1633
1634static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
1635                          struct path *stack, unsigned int numlower,
1636                          struct ovl_layer *layers)
1637{
1638        int err;
1639        unsigned int i;
1640
1641        err = -ENOMEM;
1642        ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL);
1643        if (ofs->fs == NULL)
1644                goto out;
1645
1646        /* idx/fsid 0 are reserved for upper fs even with lower only overlay */
1647        ofs->numfs++;
1648
1649        /*
1650         * All lower layers that share the same fs as upper layer, use the same
1651         * pseudo_dev as upper layer.  Allocate fs[0].pseudo_dev even for lower
1652         * only overlay to simplify ovl_fs_free().
1653         * is_lower will be set if upper fs is shared with a lower layer.
1654         */
1655        err = get_anon_bdev(&ofs->fs[0].pseudo_dev);
1656        if (err) {
1657                pr_err("failed to get anonymous bdev for upper fs\n");
1658                goto out;
1659        }
1660
1661        if (ovl_upper_mnt(ofs)) {
1662                ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb;
1663                ofs->fs[0].is_lower = false;
1664        }
1665
1666        for (i = 0; i < numlower; i++) {
1667                struct vfsmount *mnt;
1668                struct inode *trap;
1669                int fsid;
1670
1671                err = fsid = ovl_get_fsid(ofs, &stack[i]);
1672                if (err < 0)
1673                        goto out;
1674
1675                /*
1676                 * Check if lower root conflicts with this overlay layers before
1677                 * checking if it is in-use as upperdir/workdir of "another"
1678                 * mount, because we do not bother to check in ovl_is_inuse() if
1679                 * the upperdir/workdir is in fact in-use by our
1680                 * upperdir/workdir.
1681                 */
1682                err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
1683                if (err)
1684                        goto out;
1685
1686                if (ovl_is_inuse(stack[i].dentry)) {
1687                        err = ovl_report_in_use(ofs, "lowerdir");
1688                        if (err) {
1689                                iput(trap);
1690                                goto out;
1691                        }
1692                }
1693
1694                mnt = clone_private_mount(&stack[i]);
1695                err = PTR_ERR(mnt);
1696                if (IS_ERR(mnt)) {
1697                        pr_err("failed to clone lowerpath\n");
1698                        iput(trap);
1699                        goto out;
1700                }
1701
1702                /*
1703                 * Make lower layers R/O.  That way fchmod/fchown on lower file
1704                 * will fail instead of modifying lower fs.
1705                 */
1706                mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
1707
1708                layers[ofs->numlayer].trap = trap;
1709                layers[ofs->numlayer].mnt = mnt;
1710                layers[ofs->numlayer].idx = ofs->numlayer;
1711                layers[ofs->numlayer].fsid = fsid;
1712                layers[ofs->numlayer].fs = &ofs->fs[fsid];
1713                ofs->numlayer++;
1714                ofs->fs[fsid].is_lower = true;
1715        }
1716
1717        /*
1718         * When all layers on same fs, overlay can use real inode numbers.
1719         * With mount option "xino=<on|auto>", mounter declares that there are
1720         * enough free high bits in underlying fs to hold the unique fsid.
1721         * If overlayfs does encounter underlying inodes using the high xino
1722         * bits reserved for fsid, it emits a warning and uses the original
1723         * inode number or a non persistent inode number allocated from a
1724         * dedicated range.
1725         */
1726        if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) {
1727                if (ofs->config.xino == OVL_XINO_ON)
1728                        pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
1729                ofs->xino_mode = 0;
1730        } else if (ofs->config.xino == OVL_XINO_OFF) {
1731                ofs->xino_mode = -1;
1732        } else if (ofs->xino_mode < 0) {
1733                /*
1734                 * This is a roundup of number of bits needed for encoding
1735                 * fsid, where fsid 0 is reserved for upper fs (even with
1736                 * lower only overlay) +1 extra bit is reserved for the non
1737                 * persistent inode number range that is used for resolving
1738                 * xino lower bits overflow.
1739                 */
1740                BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30);
1741                ofs->xino_mode = ilog2(ofs->numfs - 1) + 2;
1742        }
1743
1744        if (ofs->xino_mode > 0) {
1745                pr_info("\"xino\" feature enabled using %d upper inode bits.\n",
1746                        ofs->xino_mode);
1747        }
1748
1749        err = 0;
1750out:
1751        return err;
1752}
1753
1754static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
1755                                const char *lower, unsigned int numlower,
1756                                struct ovl_fs *ofs, struct ovl_layer *layers)
1757{
1758        int err;
1759        struct path *stack = NULL;
1760        unsigned int i;
1761        struct ovl_entry *oe;
1762
1763        if (!ofs->config.upperdir && numlower == 1) {
1764                pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
1765                return ERR_PTR(-EINVAL);
1766        }
1767
1768        stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL);
1769        if (!stack)
1770                return ERR_PTR(-ENOMEM);
1771
1772        err = -EINVAL;
1773        for (i = 0; i < numlower; i++) {
1774                err = ovl_lower_dir(lower, &stack[i], ofs, &sb->s_stack_depth);
1775                if (err)
1776                        goto out_err;
1777
1778                lower = strchr(lower, '\0') + 1;
1779        }
1780
1781        err = -EINVAL;
1782        sb->s_stack_depth++;
1783        if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
1784                pr_err("maximum fs stacking depth exceeded\n");
1785                goto out_err;
1786        }
1787
1788        err = ovl_get_layers(sb, ofs, stack, numlower, layers);
1789        if (err)
1790                goto out_err;
1791
1792        err = -ENOMEM;
1793        oe = ovl_alloc_entry(numlower);
1794        if (!oe)
1795                goto out_err;
1796
1797        for (i = 0; i < numlower; i++) {
1798                oe->lowerstack[i].dentry = dget(stack[i].dentry);
1799                oe->lowerstack[i].layer = &ofs->layers[i+1];
1800        }
1801
1802out:
1803        for (i = 0; i < numlower; i++)
1804                path_put(&stack[i]);
1805        kfree(stack);
1806
1807        return oe;
1808
1809out_err:
1810        oe = ERR_PTR(err);
1811        goto out;
1812}
1813
1814/*
1815 * Check if this layer root is a descendant of:
1816 * - another layer of this overlayfs instance
1817 * - upper/work dir of any overlayfs instance
1818 */
1819static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
1820                           struct dentry *dentry, const char *name)
1821{
1822        struct dentry *next = dentry, *parent;
1823        int err = 0;
1824
1825        if (!dentry)
1826                return 0;
1827
1828        parent = dget_parent(next);
1829
1830        /* Walk back ancestors to root (inclusive) looking for traps */
1831        while (!err && parent != next) {
1832                if (ovl_lookup_trap_inode(sb, parent)) {
1833                        err = -ELOOP;
1834                        pr_err("overlapping %s path\n", name);
1835                } else if (ovl_is_inuse(parent)) {
1836                        err = ovl_report_in_use(ofs, name);
1837                }
1838                next = parent;
1839                parent = dget_parent(next);
1840                dput(next);
1841        }
1842
1843        dput(parent);
1844
1845        return err;
1846}
1847
1848/*
1849 * Check if any of the layers or work dirs overlap.
1850 */
1851static int ovl_check_overlapping_layers(struct super_block *sb,
1852                                        struct ovl_fs *ofs)
1853{
1854        int i, err;
1855
1856        if (ovl_upper_mnt(ofs)) {
1857                err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root,
1858                                      "upperdir");
1859                if (err)
1860                        return err;
1861
1862                /*
1863                 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
1864                 * this instance and covers overlapping work and index dirs,
1865                 * unless work or index dir have been moved since created inside
1866                 * workbasedir.  In that case, we already have their traps in
1867                 * inode cache and we will catch that case on lookup.
1868                 */
1869                err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir");
1870                if (err)
1871                        return err;
1872        }
1873
1874        for (i = 1; i < ofs->numlayer; i++) {
1875                err = ovl_check_layer(sb, ofs,
1876                                      ofs->layers[i].mnt->mnt_root,
1877                                      "lowerdir");
1878                if (err)
1879                        return err;
1880        }
1881
1882        return 0;
1883}
1884
1885static struct dentry *ovl_get_root(struct super_block *sb,
1886                                   struct dentry *upperdentry,
1887                                   struct ovl_entry *oe)
1888{
1889        struct dentry *root;
1890        struct ovl_path *lowerpath = &oe->lowerstack[0];
1891        unsigned long ino = d_inode(lowerpath->dentry)->i_ino;
1892        int fsid = lowerpath->layer->fsid;
1893        struct ovl_inode_params oip = {
1894                .upperdentry = upperdentry,
1895                .lowerpath = lowerpath,
1896        };
1897
1898        root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
1899        if (!root)
1900                return NULL;
1901
1902        root->d_fsdata = oe;
1903
1904        if (upperdentry) {
1905                /* Root inode uses upper st_ino/i_ino */
1906                ino = d_inode(upperdentry)->i_ino;
1907                fsid = 0;
1908                ovl_dentry_set_upper_alias(root);
1909                if (ovl_is_impuredir(sb, upperdentry))
1910                        ovl_set_flag(OVL_IMPURE, d_inode(root));
1911        }
1912
1913        /* Root is always merge -> can have whiteouts */
1914        ovl_set_flag(OVL_WHITEOUTS, d_inode(root));
1915        ovl_dentry_set_flag(OVL_E_CONNECTED, root);
1916        ovl_set_upperdata(d_inode(root));
1917        ovl_inode_init(d_inode(root), &oip, ino, fsid);
1918        ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE);
1919
1920        return root;
1921}
1922
1923static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1924{
1925        struct path upperpath = { };
1926        struct dentry *root_dentry;
1927        struct ovl_entry *oe;
1928        struct ovl_fs *ofs;
1929        struct ovl_layer *layers;
1930        struct cred *cred;
1931        char *splitlower = NULL;
1932        unsigned int numlower;
1933        int err;
1934
1935        err = -EIO;
1936        if (WARN_ON(sb->s_user_ns != current_user_ns()))
1937                goto out;
1938
1939        sb->s_d_op = &ovl_dentry_operations;
1940
1941        err = -ENOMEM;
1942        ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
1943        if (!ofs)
1944                goto out;
1945
1946        ofs->creator_cred = cred = prepare_creds();
1947        if (!cred)
1948                goto out_err;
1949
1950        /* Is there a reason anyone would want not to share whiteouts? */
1951        ofs->share_whiteout = true;
1952
1953        ofs->config.index = ovl_index_def;
1954        ofs->config.uuid = true;
1955        ofs->config.nfs_export = ovl_nfs_export_def;
1956        ofs->config.xino = ovl_xino_def();
1957        ofs->config.metacopy = ovl_metacopy_def;
1958        err = ovl_parse_opt((char *) data, &ofs->config);
1959        if (err)
1960                goto out_err;
1961
1962        err = -EINVAL;
1963        if (!ofs->config.lowerdir) {
1964                if (!silent)
1965                        pr_err("missing 'lowerdir'\n");
1966                goto out_err;
1967        }
1968
1969        err = -ENOMEM;
1970        splitlower = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
1971        if (!splitlower)
1972                goto out_err;
1973
1974        numlower = ovl_split_lowerdirs(splitlower);
1975        if (numlower > OVL_MAX_STACK) {
1976                pr_err("too many lower directories, limit is %d\n",
1977                       OVL_MAX_STACK);
1978                goto out_err;
1979        }
1980
1981        layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
1982        if (!layers)
1983                goto out_err;
1984
1985        ofs->layers = layers;
1986        /* Layer 0 is reserved for upper even if there's no upper */
1987        ofs->numlayer = 1;
1988
1989        sb->s_stack_depth = 0;
1990        sb->s_maxbytes = MAX_LFS_FILESIZE;
1991        atomic_long_set(&ofs->last_ino, 1);
1992        /* Assume underlaying fs uses 32bit inodes unless proven otherwise */
1993        if (ofs->config.xino != OVL_XINO_OFF) {
1994                ofs->xino_mode = BITS_PER_LONG - 32;
1995                if (!ofs->xino_mode) {
1996                        pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n");
1997                        ofs->config.xino = OVL_XINO_OFF;
1998                }
1999        }
2000
2001        /* alloc/destroy_inode needed for setting up traps in inode cache */
2002        sb->s_op = &ovl_super_operations;
2003
2004        if (ofs->config.upperdir) {
2005                struct super_block *upper_sb;
2006
2007                if (!ofs->config.workdir) {
2008                        pr_err("missing 'workdir'\n");
2009                        goto out_err;
2010                }
2011
2012                err = ovl_get_upper(sb, ofs, &layers[0], &upperpath);
2013                if (err)
2014                        goto out_err;
2015
2016                upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
2017                if (!ovl_should_sync(ofs)) {
2018                        ofs->errseq = errseq_sample(&upper_sb->s_wb_err);
2019                        if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) {
2020                                err = -EIO;
2021                                pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n");
2022                                goto out_err;
2023                        }
2024                }
2025
2026                err = ovl_get_workdir(sb, ofs, &upperpath);
2027                if (err)
2028                        goto out_err;
2029
2030                if (!ofs->workdir)
2031                        sb->s_flags |= SB_RDONLY;
2032
2033                sb->s_stack_depth = upper_sb->s_stack_depth;
2034                sb->s_time_gran = upper_sb->s_time_gran;
2035        }
2036        oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
2037        err = PTR_ERR(oe);
2038        if (IS_ERR(oe))
2039                goto out_err;
2040
2041        /* If the upper fs is nonexistent, we mark overlayfs r/o too */
2042        if (!ovl_upper_mnt(ofs))
2043                sb->s_flags |= SB_RDONLY;
2044
2045        if (!ofs->config.uuid && ofs->numfs > 1) {
2046                pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=on.\n");
2047                ofs->config.uuid = true;
2048        }
2049
2050        if (!ovl_force_readonly(ofs) && ofs->config.index) {
2051                err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
2052                if (err)
2053                        goto out_free_oe;
2054
2055                /* Force r/o mount with no index dir */
2056                if (!ofs->indexdir)
2057                        sb->s_flags |= SB_RDONLY;
2058        }
2059
2060        err = ovl_check_overlapping_layers(sb, ofs);
2061        if (err)
2062                goto out_free_oe;
2063
2064        /* Show index=off in /proc/mounts for forced r/o mount */
2065        if (!ofs->indexdir) {
2066                ofs->config.index = false;
2067                if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) {
2068                        pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
2069                        ofs->config.nfs_export = false;
2070                }
2071        }
2072
2073        if (ofs->config.metacopy && ofs->config.nfs_export) {
2074                pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
2075                ofs->config.nfs_export = false;
2076        }
2077
2078        if (ofs->config.nfs_export)
2079                sb->s_export_op = &ovl_export_operations;
2080
2081        /* Never override disk quota limits or use reserved space */
2082        cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
2083
2084        sb->s_magic = OVERLAYFS_SUPER_MAGIC;
2085        sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
2086                ovl_trusted_xattr_handlers;
2087        sb->s_fs_info = ofs;
2088        sb->s_flags |= SB_POSIXACL;
2089        sb->s_iflags |= SB_I_SKIP_SYNC;
2090
2091        err = -ENOMEM;
2092        root_dentry = ovl_get_root(sb, upperpath.dentry, oe);
2093        if (!root_dentry)
2094                goto out_free_oe;
2095
2096        mntput(upperpath.mnt);
2097        kfree(splitlower);
2098
2099        sb->s_root = root_dentry;
2100
2101        return 0;
2102
2103out_free_oe:
2104        ovl_entry_stack_free(oe);
2105        kfree(oe);
2106out_err:
2107        kfree(splitlower);
2108        path_put(&upperpath);
2109        ovl_free_fs(ofs);
2110out:
2111        return err;
2112}
2113
2114static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
2115                                const char *dev_name, void *raw_data)
2116{
2117        return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
2118}
2119
2120static struct file_system_type ovl_fs_type = {
2121        .owner          = THIS_MODULE,
2122        .name           = "overlay",
2123        .fs_flags       = FS_USERNS_MOUNT,
2124        .mount          = ovl_mount,
2125        .kill_sb        = kill_anon_super,
2126};
2127MODULE_ALIAS_FS("overlay");
2128
2129static void ovl_inode_init_once(void *foo)
2130{
2131        struct ovl_inode *oi = foo;
2132
2133        inode_init_once(&oi->vfs_inode);
2134}
2135
2136static int __init ovl_init(void)
2137{
2138        int err;
2139
2140        ovl_inode_cachep = kmem_cache_create("ovl_inode",
2141                                             sizeof(struct ovl_inode), 0,
2142                                             (SLAB_RECLAIM_ACCOUNT|
2143                                              SLAB_MEM_SPREAD|SLAB_ACCOUNT),
2144                                             ovl_inode_init_once);
2145        if (ovl_inode_cachep == NULL)
2146                return -ENOMEM;
2147
2148        err = ovl_aio_request_cache_init();
2149        if (!err) {
2150                err = register_filesystem(&ovl_fs_type);
2151                if (!err)
2152                        return 0;
2153
2154                ovl_aio_request_cache_destroy();
2155        }
2156        kmem_cache_destroy(ovl_inode_cachep);
2157
2158        return err;
2159}
2160
2161static void __exit ovl_exit(void)
2162{
2163        unregister_filesystem(&ovl_fs_type);
2164
2165        /*
2166         * Make sure all delayed rcu free inodes are flushed before we
2167         * destroy cache.
2168         */
2169        rcu_barrier();
2170        kmem_cache_destroy(ovl_inode_cachep);
2171        ovl_aio_request_cache_destroy();
2172}
2173
2174module_init(ovl_init);
2175module_exit(ovl_exit);
2176