linux/fs/overlayfs/super.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *
   4 * Copyright (C) 2011 Novell Inc.
   5 */
   6
   7#include <uapi/linux/magic.h>
   8#include <linux/fs.h>
   9#include <linux/namei.h>
  10#include <linux/xattr.h>
  11#include <linux/mount.h>
  12#include <linux/parser.h>
  13#include <linux/module.h>
  14#include <linux/statfs.h>
  15#include <linux/seq_file.h>
  16#include <linux/posix_acl_xattr.h>
  17#include <linux/exportfs.h>
  18#include "overlayfs.h"
  19
  20MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
  21MODULE_DESCRIPTION("Overlay filesystem");
  22MODULE_LICENSE("GPL");
  23
  24
  25struct ovl_dir_cache;
  26
  27#define OVL_MAX_STACK 500
  28
  29static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
  30module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
  31MODULE_PARM_DESC(redirect_dir,
  32                 "Default to on or off for the redirect_dir feature");
  33
  34static bool ovl_redirect_always_follow =
  35        IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
  36module_param_named(redirect_always_follow, ovl_redirect_always_follow,
  37                   bool, 0644);
  38MODULE_PARM_DESC(redirect_always_follow,
  39                 "Follow redirects even if redirect_dir feature is turned off");
  40
  41static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
  42module_param_named(index, ovl_index_def, bool, 0644);
  43MODULE_PARM_DESC(index,
  44                 "Default to on or off for the inodes index feature");
  45
  46static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
  47module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
  48MODULE_PARM_DESC(nfs_export,
  49                 "Default to on or off for the NFS export feature");
  50
  51static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
  52module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
  53MODULE_PARM_DESC(xino_auto,
  54                 "Auto enable xino feature");
  55
  56static void ovl_entry_stack_free(struct ovl_entry *oe)
  57{
  58        unsigned int i;
  59
  60        for (i = 0; i < oe->numlower; i++)
  61                dput(oe->lowerstack[i].dentry);
  62}
  63
  64static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
  65module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
  66MODULE_PARM_DESC(metacopy,
  67                 "Default to on or off for the metadata only copy up feature");
  68
  69static void ovl_dentry_release(struct dentry *dentry)
  70{
  71        struct ovl_entry *oe = dentry->d_fsdata;
  72
  73        if (oe) {
  74                ovl_entry_stack_free(oe);
  75                kfree_rcu(oe, rcu);
  76        }
  77}
  78
  79static struct dentry *ovl_d_real(struct dentry *dentry,
  80                                 const struct inode *inode)
  81{
  82        struct dentry *real = NULL, *lower;
  83
  84        /* It's an overlay file */
  85        if (inode && d_inode(dentry) == inode)
  86                return dentry;
  87
  88        if (!d_is_reg(dentry)) {
  89                if (!inode || inode == d_inode(dentry))
  90                        return dentry;
  91                goto bug;
  92        }
  93
  94        real = ovl_dentry_upper(dentry);
  95        if (real && (inode == d_inode(real)))
  96                return real;
  97
  98        if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
  99                return real;
 100
 101        lower = ovl_dentry_lowerdata(dentry);
 102        if (!lower)
 103                goto bug;
 104        real = lower;
 105
 106        /* Handle recursion */
 107        real = d_real(real, inode);
 108
 109        if (!inode || inode == d_inode(real))
 110                return real;
 111bug:
 112        WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n",
 113             __func__, dentry, inode ? inode->i_sb->s_id : "NULL",
 114             inode ? inode->i_ino : 0, real,
 115             real && d_inode(real) ? d_inode(real)->i_ino : 0);
 116        return dentry;
 117}
 118
 119static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak)
 120{
 121        int ret = 1;
 122
 123        if (weak) {
 124                if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE)
 125                        ret =  d->d_op->d_weak_revalidate(d, flags);
 126        } else if (d->d_flags & DCACHE_OP_REVALIDATE) {
 127                ret = d->d_op->d_revalidate(d, flags);
 128                if (!ret) {
 129                        if (!(flags & LOOKUP_RCU))
 130                                d_invalidate(d);
 131                        ret = -ESTALE;
 132                }
 133        }
 134        return ret;
 135}
 136
 137static int ovl_dentry_revalidate_common(struct dentry *dentry,
 138                                        unsigned int flags, bool weak)
 139{
 140        struct ovl_entry *oe = dentry->d_fsdata;
 141        struct dentry *upper;
 142        unsigned int i;
 143        int ret = 1;
 144
 145        upper = ovl_dentry_upper(dentry);
 146        if (upper)
 147                ret = ovl_revalidate_real(upper, flags, weak);
 148
 149        for (i = 0; ret > 0 && i < oe->numlower; i++) {
 150                ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags,
 151                                          weak);
 152        }
 153        return ret;
 154}
 155
 156static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 157{
 158        return ovl_dentry_revalidate_common(dentry, flags, false);
 159}
 160
 161static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
 162{
 163        return ovl_dentry_revalidate_common(dentry, flags, true);
 164}
 165
 166static const struct dentry_operations ovl_dentry_operations = {
 167        .d_release = ovl_dentry_release,
 168        .d_real = ovl_d_real,
 169        .d_revalidate = ovl_dentry_revalidate,
 170        .d_weak_revalidate = ovl_dentry_weak_revalidate,
 171};
 172
 173static struct kmem_cache *ovl_inode_cachep;
 174
 175static struct inode *ovl_alloc_inode(struct super_block *sb)
 176{
 177        struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
 178
 179        if (!oi)
 180                return NULL;
 181
 182        oi->cache = NULL;
 183        oi->redirect = NULL;
 184        oi->version = 0;
 185        oi->flags = 0;
 186        oi->__upperdentry = NULL;
 187        oi->lower = NULL;
 188        oi->lowerdata = NULL;
 189        mutex_init(&oi->lock);
 190
 191        return &oi->vfs_inode;
 192}
 193
 194static void ovl_free_inode(struct inode *inode)
 195{
 196        struct ovl_inode *oi = OVL_I(inode);
 197
 198        kfree(oi->redirect);
 199        mutex_destroy(&oi->lock);
 200        kmem_cache_free(ovl_inode_cachep, oi);
 201}
 202
 203static void ovl_destroy_inode(struct inode *inode)
 204{
 205        struct ovl_inode *oi = OVL_I(inode);
 206
 207        dput(oi->__upperdentry);
 208        iput(oi->lower);
 209        if (S_ISDIR(inode->i_mode))
 210                ovl_dir_cache_free(inode);
 211        else
 212                iput(oi->lowerdata);
 213}
 214
 215static void ovl_free_fs(struct ovl_fs *ofs)
 216{
 217        struct vfsmount **mounts;
 218        unsigned i;
 219
 220        iput(ofs->workbasedir_trap);
 221        iput(ofs->indexdir_trap);
 222        iput(ofs->workdir_trap);
 223        dput(ofs->whiteout);
 224        dput(ofs->indexdir);
 225        dput(ofs->workdir);
 226        if (ofs->workdir_locked)
 227                ovl_inuse_unlock(ofs->workbasedir);
 228        dput(ofs->workbasedir);
 229        if (ofs->upperdir_locked)
 230                ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
 231
 232        /* Hack!  Reuse ofs->layers as a vfsmount array before freeing it */
 233        mounts = (struct vfsmount **) ofs->layers;
 234        for (i = 0; i < ofs->numlayer; i++) {
 235                iput(ofs->layers[i].trap);
 236                mounts[i] = ofs->layers[i].mnt;
 237        }
 238        kern_unmount_array(mounts, ofs->numlayer);
 239        kfree(ofs->layers);
 240        for (i = 0; i < ofs->numfs; i++)
 241                free_anon_bdev(ofs->fs[i].pseudo_dev);
 242        kfree(ofs->fs);
 243
 244        kfree(ofs->config.lowerdir);
 245        kfree(ofs->config.upperdir);
 246        kfree(ofs->config.workdir);
 247        kfree(ofs->config.redirect_mode);
 248        if (ofs->creator_cred)
 249                put_cred(ofs->creator_cred);
 250        kfree(ofs);
 251}
 252
 253static void ovl_put_super(struct super_block *sb)
 254{
 255        struct ovl_fs *ofs = sb->s_fs_info;
 256
 257        ovl_free_fs(ofs);
 258}
 259
 260/* Sync real dirty inodes in upper filesystem (if it exists) */
 261static int ovl_sync_fs(struct super_block *sb, int wait)
 262{
 263        struct ovl_fs *ofs = sb->s_fs_info;
 264        struct super_block *upper_sb;
 265        int ret;
 266
 267        ret = ovl_sync_status(ofs);
 268        /*
 269         * We have to always set the err, because the return value isn't
 270         * checked in syncfs, and instead indirectly return an error via
 271         * the sb's writeback errseq, which VFS inspects after this call.
 272         */
 273        if (ret < 0) {
 274                errseq_set(&sb->s_wb_err, -EIO);
 275                return -EIO;
 276        }
 277
 278        if (!ret)
 279                return ret;
 280
 281        /*
 282         * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
 283         * All the super blocks will be iterated, including upper_sb.
 284         *
 285         * If this is a syncfs(2) call, then we do need to call
 286         * sync_filesystem() on upper_sb, but enough if we do it when being
 287         * called with wait == 1.
 288         */
 289        if (!wait)
 290                return 0;
 291
 292        upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
 293
 294        down_read(&upper_sb->s_umount);
 295        ret = sync_filesystem(upper_sb);
 296        up_read(&upper_sb->s_umount);
 297
 298        return ret;
 299}
 300
 301/**
 302 * ovl_statfs
 303 * @sb: The overlayfs super block
 304 * @buf: The struct kstatfs to fill in with stats
 305 *
 306 * Get the filesystem statistics.  As writes always target the upper layer
 307 * filesystem pass the statfs to the upper filesystem (if it exists)
 308 */
 309static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
 310{
 311        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 312        struct dentry *root_dentry = dentry->d_sb->s_root;
 313        struct path path;
 314        int err;
 315
 316        ovl_path_real(root_dentry, &path);
 317
 318        err = vfs_statfs(&path, buf);
 319        if (!err) {
 320                buf->f_namelen = ofs->namelen;
 321                buf->f_type = OVERLAYFS_SUPER_MAGIC;
 322        }
 323
 324        return err;
 325}
 326
 327/* Will this overlay be forced to mount/remount ro? */
 328static bool ovl_force_readonly(struct ovl_fs *ofs)
 329{
 330        return (!ovl_upper_mnt(ofs) || !ofs->workdir);
 331}
 332
 333static const char *ovl_redirect_mode_def(void)
 334{
 335        return ovl_redirect_dir_def ? "on" : "off";
 336}
 337
 338static const char * const ovl_xino_str[] = {
 339        "off",
 340        "auto",
 341        "on",
 342};
 343
 344static inline int ovl_xino_def(void)
 345{
 346        return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
 347}
 348
 349/**
 350 * ovl_show_options
 351 *
 352 * Prints the mount options for a given superblock.
 353 * Returns zero; does not fail.
 354 */
 355static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 356{
 357        struct super_block *sb = dentry->d_sb;
 358        struct ovl_fs *ofs = sb->s_fs_info;
 359
 360        seq_show_option(m, "lowerdir", ofs->config.lowerdir);
 361        if (ofs->config.upperdir) {
 362                seq_show_option(m, "upperdir", ofs->config.upperdir);
 363                seq_show_option(m, "workdir", ofs->config.workdir);
 364        }
 365        if (ofs->config.default_permissions)
 366                seq_puts(m, ",default_permissions");
 367        if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
 368                seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
 369        if (ofs->config.index != ovl_index_def)
 370                seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
 371        if (!ofs->config.uuid)
 372                seq_puts(m, ",uuid=off");
 373        if (ofs->config.nfs_export != ovl_nfs_export_def)
 374                seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
 375                                                "on" : "off");
 376        if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb))
 377                seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
 378        if (ofs->config.metacopy != ovl_metacopy_def)
 379                seq_printf(m, ",metacopy=%s",
 380                           ofs->config.metacopy ? "on" : "off");
 381        if (ofs->config.ovl_volatile)
 382                seq_puts(m, ",volatile");
 383        if (ofs->config.userxattr)
 384                seq_puts(m, ",userxattr");
 385        return 0;
 386}
 387
 388static int ovl_remount(struct super_block *sb, int *flags, char *data)
 389{
 390        struct ovl_fs *ofs = sb->s_fs_info;
 391        struct super_block *upper_sb;
 392        int ret = 0;
 393
 394        if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
 395                return -EROFS;
 396
 397        if (*flags & SB_RDONLY && !sb_rdonly(sb)) {
 398                upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
 399                if (ovl_should_sync(ofs)) {
 400                        down_read(&upper_sb->s_umount);
 401                        ret = sync_filesystem(upper_sb);
 402                        up_read(&upper_sb->s_umount);
 403                }
 404        }
 405
 406        return ret;
 407}
 408
 409static const struct super_operations ovl_super_operations = {
 410        .alloc_inode    = ovl_alloc_inode,
 411        .free_inode     = ovl_free_inode,
 412        .destroy_inode  = ovl_destroy_inode,
 413        .drop_inode     = generic_delete_inode,
 414        .put_super      = ovl_put_super,
 415        .sync_fs        = ovl_sync_fs,
 416        .statfs         = ovl_statfs,
 417        .show_options   = ovl_show_options,
 418        .remount_fs     = ovl_remount,
 419};
 420
 421enum {
 422        OPT_LOWERDIR,
 423        OPT_UPPERDIR,
 424        OPT_WORKDIR,
 425        OPT_DEFAULT_PERMISSIONS,
 426        OPT_REDIRECT_DIR,
 427        OPT_INDEX_ON,
 428        OPT_INDEX_OFF,
 429        OPT_UUID_ON,
 430        OPT_UUID_OFF,
 431        OPT_NFS_EXPORT_ON,
 432        OPT_USERXATTR,
 433        OPT_NFS_EXPORT_OFF,
 434        OPT_XINO_ON,
 435        OPT_XINO_OFF,
 436        OPT_XINO_AUTO,
 437        OPT_METACOPY_ON,
 438        OPT_METACOPY_OFF,
 439        OPT_VOLATILE,
 440        OPT_ERR,
 441};
 442
 443static const match_table_t ovl_tokens = {
 444        {OPT_LOWERDIR,                  "lowerdir=%s"},
 445        {OPT_UPPERDIR,                  "upperdir=%s"},
 446        {OPT_WORKDIR,                   "workdir=%s"},
 447        {OPT_DEFAULT_PERMISSIONS,       "default_permissions"},
 448        {OPT_REDIRECT_DIR,              "redirect_dir=%s"},
 449        {OPT_INDEX_ON,                  "index=on"},
 450        {OPT_INDEX_OFF,                 "index=off"},
 451        {OPT_USERXATTR,                 "userxattr"},
 452        {OPT_UUID_ON,                   "uuid=on"},
 453        {OPT_UUID_OFF,                  "uuid=off"},
 454        {OPT_NFS_EXPORT_ON,             "nfs_export=on"},
 455        {OPT_NFS_EXPORT_OFF,            "nfs_export=off"},
 456        {OPT_XINO_ON,                   "xino=on"},
 457        {OPT_XINO_OFF,                  "xino=off"},
 458        {OPT_XINO_AUTO,                 "xino=auto"},
 459        {OPT_METACOPY_ON,               "metacopy=on"},
 460        {OPT_METACOPY_OFF,              "metacopy=off"},
 461        {OPT_VOLATILE,                  "volatile"},
 462        {OPT_ERR,                       NULL}
 463};
 464
 465static char *ovl_next_opt(char **s)
 466{
 467        char *sbegin = *s;
 468        char *p;
 469
 470        if (sbegin == NULL)
 471                return NULL;
 472
 473        for (p = sbegin; *p; p++) {
 474                if (*p == '\\') {
 475                        p++;
 476                        if (!*p)
 477                                break;
 478                } else if (*p == ',') {
 479                        *p = '\0';
 480                        *s = p + 1;
 481                        return sbegin;
 482                }
 483        }
 484        *s = NULL;
 485        return sbegin;
 486}
 487
 488static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
 489{
 490        if (strcmp(mode, "on") == 0) {
 491                config->redirect_dir = true;
 492                /*
 493                 * Does not make sense to have redirect creation without
 494                 * redirect following.
 495                 */
 496                config->redirect_follow = true;
 497        } else if (strcmp(mode, "follow") == 0) {
 498                config->redirect_follow = true;
 499        } else if (strcmp(mode, "off") == 0) {
 500                if (ovl_redirect_always_follow)
 501                        config->redirect_follow = true;
 502        } else if (strcmp(mode, "nofollow") != 0) {
 503                pr_err("bad mount option \"redirect_dir=%s\"\n",
 504                       mode);
 505                return -EINVAL;
 506        }
 507
 508        return 0;
 509}
 510
 511static int ovl_parse_opt(char *opt, struct ovl_config *config)
 512{
 513        char *p;
 514        int err;
 515        bool metacopy_opt = false, redirect_opt = false;
 516        bool nfs_export_opt = false, index_opt = false;
 517
 518        config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
 519        if (!config->redirect_mode)
 520                return -ENOMEM;
 521
 522        while ((p = ovl_next_opt(&opt)) != NULL) {
 523                int token;
 524                substring_t args[MAX_OPT_ARGS];
 525
 526                if (!*p)
 527                        continue;
 528
 529                token = match_token(p, ovl_tokens, args);
 530                switch (token) {
 531                case OPT_UPPERDIR:
 532                        kfree(config->upperdir);
 533                        config->upperdir = match_strdup(&args[0]);
 534                        if (!config->upperdir)
 535                                return -ENOMEM;
 536                        break;
 537
 538                case OPT_LOWERDIR:
 539                        kfree(config->lowerdir);
 540                        config->lowerdir = match_strdup(&args[0]);
 541                        if (!config->lowerdir)
 542                                return -ENOMEM;
 543                        break;
 544
 545                case OPT_WORKDIR:
 546                        kfree(config->workdir);
 547                        config->workdir = match_strdup(&args[0]);
 548                        if (!config->workdir)
 549                                return -ENOMEM;
 550                        break;
 551
 552                case OPT_DEFAULT_PERMISSIONS:
 553                        config->default_permissions = true;
 554                        break;
 555
 556                case OPT_REDIRECT_DIR:
 557                        kfree(config->redirect_mode);
 558                        config->redirect_mode = match_strdup(&args[0]);
 559                        if (!config->redirect_mode)
 560                                return -ENOMEM;
 561                        redirect_opt = true;
 562                        break;
 563
 564                case OPT_INDEX_ON:
 565                        config->index = true;
 566                        index_opt = true;
 567                        break;
 568
 569                case OPT_INDEX_OFF:
 570                        config->index = false;
 571                        index_opt = true;
 572                        break;
 573
 574                case OPT_UUID_ON:
 575                        config->uuid = true;
 576                        break;
 577
 578                case OPT_UUID_OFF:
 579                        config->uuid = false;
 580                        break;
 581
 582                case OPT_NFS_EXPORT_ON:
 583                        config->nfs_export = true;
 584                        nfs_export_opt = true;
 585                        break;
 586
 587                case OPT_NFS_EXPORT_OFF:
 588                        config->nfs_export = false;
 589                        nfs_export_opt = true;
 590                        break;
 591
 592                case OPT_XINO_ON:
 593                        config->xino = OVL_XINO_ON;
 594                        break;
 595
 596                case OPT_XINO_OFF:
 597                        config->xino = OVL_XINO_OFF;
 598                        break;
 599
 600                case OPT_XINO_AUTO:
 601                        config->xino = OVL_XINO_AUTO;
 602                        break;
 603
 604                case OPT_METACOPY_ON:
 605                        config->metacopy = true;
 606                        metacopy_opt = true;
 607                        break;
 608
 609                case OPT_METACOPY_OFF:
 610                        config->metacopy = false;
 611                        metacopy_opt = true;
 612                        break;
 613
 614                case OPT_VOLATILE:
 615                        config->ovl_volatile = true;
 616                        break;
 617
 618                case OPT_USERXATTR:
 619                        config->userxattr = true;
 620                        break;
 621
 622                default:
 623                        pr_err("unrecognized mount option \"%s\" or missing value\n",
 624                                        p);
 625                        return -EINVAL;
 626                }
 627        }
 628
 629        /* Workdir/index are useless in non-upper mount */
 630        if (!config->upperdir) {
 631                if (config->workdir) {
 632                        pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
 633                                config->workdir);
 634                        kfree(config->workdir);
 635                        config->workdir = NULL;
 636                }
 637                if (config->index && index_opt) {
 638                        pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n");
 639                        index_opt = false;
 640                }
 641                config->index = false;
 642        }
 643
 644        if (!config->upperdir && config->ovl_volatile) {
 645                pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
 646                config->ovl_volatile = false;
 647        }
 648
 649        err = ovl_parse_redirect_mode(config, config->redirect_mode);
 650        if (err)
 651                return err;
 652
 653        /*
 654         * This is to make the logic below simpler.  It doesn't make any other
 655         * difference, since config->redirect_dir is only used for upper.
 656         */
 657        if (!config->upperdir && config->redirect_follow)
 658                config->redirect_dir = true;
 659
 660        /* Resolve metacopy -> redirect_dir dependency */
 661        if (config->metacopy && !config->redirect_dir) {
 662                if (metacopy_opt && redirect_opt) {
 663                        pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
 664                               config->redirect_mode);
 665                        return -EINVAL;
 666                }
 667                if (redirect_opt) {
 668                        /*
 669                         * There was an explicit redirect_dir=... that resulted
 670                         * in this conflict.
 671                         */
 672                        pr_info("disabling metacopy due to redirect_dir=%s\n",
 673                                config->redirect_mode);
 674                        config->metacopy = false;
 675                } else {
 676                        /* Automatically enable redirect otherwise. */
 677                        config->redirect_follow = config->redirect_dir = true;
 678                }
 679        }
 680
 681        /* Resolve nfs_export -> index dependency */
 682        if (config->nfs_export && !config->index) {
 683                if (!config->upperdir && config->redirect_follow) {
 684                        pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
 685                        config->nfs_export = false;
 686                } else if (nfs_export_opt && index_opt) {
 687                        pr_err("conflicting options: nfs_export=on,index=off\n");
 688                        return -EINVAL;
 689                } else if (index_opt) {
 690                        /*
 691                         * There was an explicit index=off that resulted
 692                         * in this conflict.
 693                         */
 694                        pr_info("disabling nfs_export due to index=off\n");
 695                        config->nfs_export = false;
 696                } else {
 697                        /* Automatically enable index otherwise. */
 698                        config->index = true;
 699                }
 700        }
 701
 702        /* Resolve nfs_export -> !metacopy dependency */
 703        if (config->nfs_export && config->metacopy) {
 704                if (nfs_export_opt && metacopy_opt) {
 705                        pr_err("conflicting options: nfs_export=on,metacopy=on\n");
 706                        return -EINVAL;
 707                }
 708                if (metacopy_opt) {
 709                        /*
 710                         * There was an explicit metacopy=on that resulted
 711                         * in this conflict.
 712                         */
 713                        pr_info("disabling nfs_export due to metacopy=on\n");
 714                        config->nfs_export = false;
 715                } else {
 716                        /*
 717                         * There was an explicit nfs_export=on that resulted
 718                         * in this conflict.
 719                         */
 720                        pr_info("disabling metacopy due to nfs_export=on\n");
 721                        config->metacopy = false;
 722                }
 723        }
 724
 725
 726        /* Resolve userxattr -> !redirect && !metacopy dependency */
 727        if (config->userxattr) {
 728                if (config->redirect_follow && redirect_opt) {
 729                        pr_err("conflicting options: userxattr,redirect_dir=%s\n",
 730                               config->redirect_mode);
 731                        return -EINVAL;
 732                }
 733                if (config->metacopy && metacopy_opt) {
 734                        pr_err("conflicting options: userxattr,metacopy=on\n");
 735                        return -EINVAL;
 736                }
 737                /*
 738                 * Silently disable default setting of redirect and metacopy.
 739                 * This shall be the default in the future as well: these
 740                 * options must be explicitly enabled if used together with
 741                 * userxattr.
 742                 */
 743                config->redirect_dir = config->redirect_follow = false;
 744                config->metacopy = false;
 745        }
 746
 747        return 0;
 748}
 749
 750#define OVL_WORKDIR_NAME "work"
 751#define OVL_INDEXDIR_NAME "index"
 752
 753static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
 754                                         const char *name, bool persist)
 755{
 756        struct inode *dir =  ofs->workbasedir->d_inode;
 757        struct vfsmount *mnt = ovl_upper_mnt(ofs);
 758        struct dentry *work;
 759        int err;
 760        bool retried = false;
 761
 762        inode_lock_nested(dir, I_MUTEX_PARENT);
 763retry:
 764        work = lookup_one_len(name, ofs->workbasedir, strlen(name));
 765
 766        if (!IS_ERR(work)) {
 767                struct iattr attr = {
 768                        .ia_valid = ATTR_MODE,
 769                        .ia_mode = S_IFDIR | 0,
 770                };
 771
 772                if (work->d_inode) {
 773                        err = -EEXIST;
 774                        if (retried)
 775                                goto out_dput;
 776
 777                        if (persist)
 778                                goto out_unlock;
 779
 780                        retried = true;
 781                        err = ovl_workdir_cleanup(dir, mnt, work, 0);
 782                        dput(work);
 783                        if (err == -EINVAL) {
 784                                work = ERR_PTR(err);
 785                                goto out_unlock;
 786                        }
 787                        goto retry;
 788                }
 789
 790                work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode));
 791                err = PTR_ERR(work);
 792                if (IS_ERR(work))
 793                        goto out_err;
 794
 795                /*
 796                 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
 797                 *
 798                 * a) success (there was a POSIX ACL xattr and was removed)
 799                 * b) -ENODATA (there was no POSIX ACL xattr)
 800                 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
 801                 *
 802                 * There are various other error values that could effectively
 803                 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
 804                 * if the xattr name is too long), but the set of filesystems
 805                 * allowed as upper are limited to "normal" ones, where checking
 806                 * for the above two errors is sufficient.
 807                 */
 808                err = vfs_removexattr(&init_user_ns, work,
 809                                      XATTR_NAME_POSIX_ACL_DEFAULT);
 810                if (err && err != -ENODATA && err != -EOPNOTSUPP)
 811                        goto out_dput;
 812
 813                err = vfs_removexattr(&init_user_ns, work,
 814                                      XATTR_NAME_POSIX_ACL_ACCESS);
 815                if (err && err != -ENODATA && err != -EOPNOTSUPP)
 816                        goto out_dput;
 817
 818                /* Clear any inherited mode bits */
 819                inode_lock(work->d_inode);
 820                err = notify_change(&init_user_ns, work, &attr, NULL);
 821                inode_unlock(work->d_inode);
 822                if (err)
 823                        goto out_dput;
 824        } else {
 825                err = PTR_ERR(work);
 826                goto out_err;
 827        }
 828out_unlock:
 829        inode_unlock(dir);
 830        return work;
 831
 832out_dput:
 833        dput(work);
 834out_err:
 835        pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
 836                ofs->config.workdir, name, -err);
 837        work = NULL;
 838        goto out_unlock;
 839}
 840
 841static void ovl_unescape(char *s)
 842{
 843        char *d = s;
 844
 845        for (;; s++, d++) {
 846                if (*s == '\\')
 847                        s++;
 848                *d = *s;
 849                if (!*s)
 850                        break;
 851        }
 852}
 853
 854static int ovl_mount_dir_noesc(const char *name, struct path *path)
 855{
 856        int err = -EINVAL;
 857
 858        if (!*name) {
 859                pr_err("empty lowerdir\n");
 860                goto out;
 861        }
 862        err = kern_path(name, LOOKUP_FOLLOW, path);
 863        if (err) {
 864                pr_err("failed to resolve '%s': %i\n", name, err);
 865                goto out;
 866        }
 867        err = -EINVAL;
 868        if (ovl_dentry_weird(path->dentry)) {
 869                pr_err("filesystem on '%s' not supported\n", name);
 870                goto out_put;
 871        }
 872        if (mnt_user_ns(path->mnt) != &init_user_ns) {
 873                pr_err("idmapped layers are currently not supported\n");
 874                goto out_put;
 875        }
 876        if (!d_is_dir(path->dentry)) {
 877                pr_err("'%s' not a directory\n", name);
 878                goto out_put;
 879        }
 880        return 0;
 881
 882out_put:
 883        path_put_init(path);
 884out:
 885        return err;
 886}
 887
 888static int ovl_mount_dir(const char *name, struct path *path)
 889{
 890        int err = -ENOMEM;
 891        char *tmp = kstrdup(name, GFP_KERNEL);
 892
 893        if (tmp) {
 894                ovl_unescape(tmp);
 895                err = ovl_mount_dir_noesc(tmp, path);
 896
 897                if (!err && path->dentry->d_flags & DCACHE_OP_REAL) {
 898                        pr_err("filesystem on '%s' not supported as upperdir\n",
 899                               tmp);
 900                        path_put_init(path);
 901                        err = -EINVAL;
 902                }
 903                kfree(tmp);
 904        }
 905        return err;
 906}
 907
 908static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
 909                             const char *name)
 910{
 911        struct kstatfs statfs;
 912        int err = vfs_statfs(path, &statfs);
 913
 914        if (err)
 915                pr_err("statfs failed on '%s'\n", name);
 916        else
 917                ofs->namelen = max(ofs->namelen, statfs.f_namelen);
 918
 919        return err;
 920}
 921
 922static int ovl_lower_dir(const char *name, struct path *path,
 923                         struct ovl_fs *ofs, int *stack_depth)
 924{
 925        int fh_type;
 926        int err;
 927
 928        err = ovl_mount_dir_noesc(name, path);
 929        if (err)
 930                return err;
 931
 932        err = ovl_check_namelen(path, ofs, name);
 933        if (err)
 934                return err;
 935
 936        *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
 937
 938        /*
 939         * The inodes index feature and NFS export need to encode and decode
 940         * file handles, so they require that all layers support them.
 941         */
 942        fh_type = ovl_can_decode_fh(path->dentry->d_sb);
 943        if ((ofs->config.nfs_export ||
 944             (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
 945                ofs->config.index = false;
 946                ofs->config.nfs_export = false;
 947                pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
 948                        name);
 949        }
 950        /*
 951         * Decoding origin file handle is required for persistent st_ino.
 952         * Without persistent st_ino, xino=auto falls back to xino=off.
 953         */
 954        if (ofs->config.xino == OVL_XINO_AUTO &&
 955            ofs->config.upperdir && !fh_type) {
 956                ofs->config.xino = OVL_XINO_OFF;
 957                pr_warn("fs on '%s' does not support file handles, falling back to xino=off.\n",
 958                        name);
 959        }
 960
 961        /* Check if lower fs has 32bit inode numbers */
 962        if (fh_type != FILEID_INO32_GEN)
 963                ofs->xino_mode = -1;
 964
 965        return 0;
 966}
 967
 968/* Workdir should not be subdir of upperdir and vice versa */
 969static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
 970{
 971        bool ok = false;
 972
 973        if (workdir != upperdir) {
 974                ok = (lock_rename(workdir, upperdir) == NULL);
 975                unlock_rename(workdir, upperdir);
 976        }
 977        return ok;
 978}
 979
 980static unsigned int ovl_split_lowerdirs(char *str)
 981{
 982        unsigned int ctr = 1;
 983        char *s, *d;
 984
 985        for (s = d = str;; s++, d++) {
 986                if (*s == '\\') {
 987                        s++;
 988                } else if (*s == ':') {
 989                        *d = '\0';
 990                        ctr++;
 991                        continue;
 992                }
 993                *d = *s;
 994                if (!*s)
 995                        break;
 996        }
 997        return ctr;
 998}
 999
1000static int __maybe_unused
1001ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
1002                        struct dentry *dentry, struct inode *inode,
1003                        const char *name, void *buffer, size_t size)
1004{
1005        return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
1006}
1007
1008static int __maybe_unused
1009ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
1010                        struct user_namespace *mnt_userns,
1011                        struct dentry *dentry, struct inode *inode,
1012                        const char *name, const void *value,
1013                        size_t size, int flags)
1014{
1015        struct dentry *workdir = ovl_workdir(dentry);
1016        struct inode *realinode = ovl_inode_real(inode);
1017        struct posix_acl *acl = NULL;
1018        int err;
1019
1020        /* Check that everything is OK before copy-up */
1021        if (value) {
1022                acl = posix_acl_from_xattr(&init_user_ns, value, size);
1023                if (IS_ERR(acl))
1024                        return PTR_ERR(acl);
1025        }
1026        err = -EOPNOTSUPP;
1027        if (!IS_POSIXACL(d_inode(workdir)))
1028                goto out_acl_release;
1029        if (!realinode->i_op->set_acl)
1030                goto out_acl_release;
1031        if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
1032                err = acl ? -EACCES : 0;
1033                goto out_acl_release;
1034        }
1035        err = -EPERM;
1036        if (!inode_owner_or_capable(&init_user_ns, inode))
1037                goto out_acl_release;
1038
1039        posix_acl_release(acl);
1040
1041        /*
1042         * Check if sgid bit needs to be cleared (actual setacl operation will
1043         * be done with mounter's capabilities and so that won't do it for us).
1044         */
1045        if (unlikely(inode->i_mode & S_ISGID) &&
1046            handler->flags == ACL_TYPE_ACCESS &&
1047            !in_group_p(inode->i_gid) &&
1048            !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) {
1049                struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
1050
1051                err = ovl_setattr(&init_user_ns, dentry, &iattr);
1052                if (err)
1053                        return err;
1054        }
1055
1056        err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
1057        return err;
1058
1059out_acl_release:
1060        posix_acl_release(acl);
1061        return err;
1062}
1063
1064static int ovl_own_xattr_get(const struct xattr_handler *handler,
1065                             struct dentry *dentry, struct inode *inode,
1066                             const char *name, void *buffer, size_t size)
1067{
1068        return -EOPNOTSUPP;
1069}
1070
1071static int ovl_own_xattr_set(const struct xattr_handler *handler,
1072                             struct user_namespace *mnt_userns,
1073                             struct dentry *dentry, struct inode *inode,
1074                             const char *name, const void *value,
1075                             size_t size, int flags)
1076{
1077        return -EOPNOTSUPP;
1078}
1079
1080static int ovl_other_xattr_get(const struct xattr_handler *handler,
1081                               struct dentry *dentry, struct inode *inode,
1082                               const char *name, void *buffer, size_t size)
1083{
1084        return ovl_xattr_get(dentry, inode, name, buffer, size);
1085}
1086
1087static int ovl_other_xattr_set(const struct xattr_handler *handler,
1088                               struct user_namespace *mnt_userns,
1089                               struct dentry *dentry, struct inode *inode,
1090                               const char *name, const void *value,
1091                               size_t size, int flags)
1092{
1093        return ovl_xattr_set(dentry, inode, name, value, size, flags);
1094}
1095
1096static const struct xattr_handler __maybe_unused
1097ovl_posix_acl_access_xattr_handler = {
1098        .name = XATTR_NAME_POSIX_ACL_ACCESS,
1099        .flags = ACL_TYPE_ACCESS,
1100        .get = ovl_posix_acl_xattr_get,
1101        .set = ovl_posix_acl_xattr_set,
1102};
1103
1104static const struct xattr_handler __maybe_unused
1105ovl_posix_acl_default_xattr_handler = {
1106        .name = XATTR_NAME_POSIX_ACL_DEFAULT,
1107        .flags = ACL_TYPE_DEFAULT,
1108        .get = ovl_posix_acl_xattr_get,
1109        .set = ovl_posix_acl_xattr_set,
1110};
1111
1112static const struct xattr_handler ovl_own_trusted_xattr_handler = {
1113        .prefix = OVL_XATTR_TRUSTED_PREFIX,
1114        .get = ovl_own_xattr_get,
1115        .set = ovl_own_xattr_set,
1116};
1117
1118static const struct xattr_handler ovl_own_user_xattr_handler = {
1119        .prefix = OVL_XATTR_USER_PREFIX,
1120        .get = ovl_own_xattr_get,
1121        .set = ovl_own_xattr_set,
1122};
1123
1124static const struct xattr_handler ovl_other_xattr_handler = {
1125        .prefix = "", /* catch all */
1126        .get = ovl_other_xattr_get,
1127        .set = ovl_other_xattr_set,
1128};
1129
1130static const struct xattr_handler *ovl_trusted_xattr_handlers[] = {
1131#ifdef CONFIG_FS_POSIX_ACL
1132        &ovl_posix_acl_access_xattr_handler,
1133        &ovl_posix_acl_default_xattr_handler,
1134#endif
1135        &ovl_own_trusted_xattr_handler,
1136        &ovl_other_xattr_handler,
1137        NULL
1138};
1139
1140static const struct xattr_handler *ovl_user_xattr_handlers[] = {
1141#ifdef CONFIG_FS_POSIX_ACL
1142        &ovl_posix_acl_access_xattr_handler,
1143        &ovl_posix_acl_default_xattr_handler,
1144#endif
1145        &ovl_own_user_xattr_handler,
1146        &ovl_other_xattr_handler,
1147        NULL
1148};
1149
1150static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
1151                          struct inode **ptrap, const char *name)
1152{
1153        struct inode *trap;
1154        int err;
1155
1156        trap = ovl_get_trap_inode(sb, dir);
1157        err = PTR_ERR_OR_ZERO(trap);
1158        if (err) {
1159                if (err == -ELOOP)
1160                        pr_err("conflicting %s path\n", name);
1161                return err;
1162        }
1163
1164        *ptrap = trap;
1165        return 0;
1166}
1167
1168/*
1169 * Determine how we treat concurrent use of upperdir/workdir based on the
1170 * index feature. This is papering over mount leaks of container runtimes,
1171 * for example, an old overlay mount is leaked and now its upperdir is
1172 * attempted to be used as a lower layer in a new overlay mount.
1173 */
1174static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
1175{
1176        if (ofs->config.index) {
1177                pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
1178                       name);
1179                return -EBUSY;
1180        } else {
1181                pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
1182                        name);
1183                return 0;
1184        }
1185}
1186
1187static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
1188                         struct ovl_layer *upper_layer, struct path *upperpath)
1189{
1190        struct vfsmount *upper_mnt;
1191        int err;
1192
1193        err = ovl_mount_dir(ofs->config.upperdir, upperpath);
1194        if (err)
1195                goto out;
1196
1197        /* Upperdir path should not be r/o */
1198        if (__mnt_is_readonly(upperpath->mnt)) {
1199                pr_err("upper fs is r/o, try multi-lower layers mount\n");
1200                err = -EINVAL;
1201                goto out;
1202        }
1203
1204        err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
1205        if (err)
1206                goto out;
1207
1208        err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap,
1209                             "upperdir");
1210        if (err)
1211                goto out;
1212
1213        upper_mnt = clone_private_mount(upperpath);
1214        err = PTR_ERR(upper_mnt);
1215        if (IS_ERR(upper_mnt)) {
1216                pr_err("failed to clone upperpath\n");
1217                goto out;
1218        }
1219
1220        /* Don't inherit atime flags */
1221        upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
1222        upper_layer->mnt = upper_mnt;
1223        upper_layer->idx = 0;
1224        upper_layer->fsid = 0;
1225
1226        /*
1227         * Inherit SB_NOSEC flag from upperdir.
1228         *
1229         * This optimization changes behavior when a security related attribute
1230         * (suid/sgid/security.*) is changed on an underlying layer.  This is
1231         * okay because we don't yet have guarantees in that case, but it will
1232         * need careful treatment once we want to honour changes to underlying
1233         * filesystems.
1234         */
1235        if (upper_mnt->mnt_sb->s_flags & SB_NOSEC)
1236                sb->s_flags |= SB_NOSEC;
1237
1238        if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) {
1239                ofs->upperdir_locked = true;
1240        } else {
1241                err = ovl_report_in_use(ofs, "upperdir");
1242                if (err)
1243                        goto out;
1244        }
1245
1246        err = 0;
1247out:
1248        return err;
1249}
1250
1251/*
1252 * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and
1253 * negative values if error is encountered.
1254 */
1255static int ovl_check_rename_whiteout(struct dentry *workdir)
1256{
1257        struct inode *dir = d_inode(workdir);
1258        struct dentry *temp;
1259        struct dentry *dest;
1260        struct dentry *whiteout;
1261        struct name_snapshot name;
1262        int err;
1263
1264        inode_lock_nested(dir, I_MUTEX_PARENT);
1265
1266        temp = ovl_create_temp(workdir, OVL_CATTR(S_IFREG | 0));
1267        err = PTR_ERR(temp);
1268        if (IS_ERR(temp))
1269                goto out_unlock;
1270
1271        dest = ovl_lookup_temp(workdir);
1272        err = PTR_ERR(dest);
1273        if (IS_ERR(dest)) {
1274                dput(temp);
1275                goto out_unlock;
1276        }
1277
1278        /* Name is inline and stable - using snapshot as a copy helper */
1279        take_dentry_name_snapshot(&name, temp);
1280        err = ovl_do_rename(dir, temp, dir, dest, RENAME_WHITEOUT);
1281        if (err) {
1282                if (err == -EINVAL)
1283                        err = 0;
1284                goto cleanup_temp;
1285        }
1286
1287        whiteout = lookup_one_len(name.name.name, workdir, name.name.len);
1288        err = PTR_ERR(whiteout);
1289        if (IS_ERR(whiteout))
1290                goto cleanup_temp;
1291
1292        err = ovl_is_whiteout(whiteout);
1293
1294        /* Best effort cleanup of whiteout and temp file */
1295        if (err)
1296                ovl_cleanup(dir, whiteout);
1297        dput(whiteout);
1298
1299cleanup_temp:
1300        ovl_cleanup(dir, temp);
1301        release_dentry_name_snapshot(&name);
1302        dput(temp);
1303        dput(dest);
1304
1305out_unlock:
1306        inode_unlock(dir);
1307
1308        return err;
1309}
1310
1311static struct dentry *ovl_lookup_or_create(struct dentry *parent,
1312                                           const char *name, umode_t mode)
1313{
1314        size_t len = strlen(name);
1315        struct dentry *child;
1316
1317        inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
1318        child = lookup_one_len(name, parent, len);
1319        if (!IS_ERR(child) && !child->d_inode)
1320                child = ovl_create_real(parent->d_inode, child,
1321                                        OVL_CATTR(mode));
1322        inode_unlock(parent->d_inode);
1323        dput(parent);
1324
1325        return child;
1326}
1327
1328/*
1329 * Creates $workdir/work/incompat/volatile/dirty file if it is not already
1330 * present.
1331 */
1332static int ovl_create_volatile_dirty(struct ovl_fs *ofs)
1333{
1334        unsigned int ctr;
1335        struct dentry *d = dget(ofs->workbasedir);
1336        static const char *const volatile_path[] = {
1337                OVL_WORKDIR_NAME, "incompat", "volatile", "dirty"
1338        };
1339        const char *const *name = volatile_path;
1340
1341        for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) {
1342                d = ovl_lookup_or_create(d, *name, ctr > 1 ? S_IFDIR : S_IFREG);
1343                if (IS_ERR(d))
1344                        return PTR_ERR(d);
1345        }
1346        dput(d);
1347        return 0;
1348}
1349
1350static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
1351                            struct path *workpath)
1352{
1353        struct vfsmount *mnt = ovl_upper_mnt(ofs);
1354        struct dentry *temp, *workdir;
1355        bool rename_whiteout;
1356        bool d_type;
1357        int fh_type;
1358        int err;
1359
1360        err = mnt_want_write(mnt);
1361        if (err)
1362                return err;
1363
1364        workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
1365        err = PTR_ERR(workdir);
1366        if (IS_ERR_OR_NULL(workdir))
1367                goto out;
1368
1369        ofs->workdir = workdir;
1370
1371        err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
1372        if (err)
1373                goto out;
1374
1375        /*
1376         * Upper should support d_type, else whiteouts are visible.  Given
1377         * workdir and upper are on same fs, we can do iterate_dir() on
1378         * workdir. This check requires successful creation of workdir in
1379         * previous step.
1380         */
1381        err = ovl_check_d_type_supported(workpath);
1382        if (err < 0)
1383                goto out;
1384
1385        d_type = err;
1386        if (!d_type)
1387                pr_warn("upper fs needs to support d_type.\n");
1388
1389        /* Check if upper/work fs supports O_TMPFILE */
1390        temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
1391        ofs->tmpfile = !IS_ERR(temp);
1392        if (ofs->tmpfile)
1393                dput(temp);
1394        else
1395                pr_warn("upper fs does not support tmpfile.\n");
1396
1397
1398        /* Check if upper/work fs supports RENAME_WHITEOUT */
1399        err = ovl_check_rename_whiteout(ofs->workdir);
1400        if (err < 0)
1401                goto out;
1402
1403        rename_whiteout = err;
1404        if (!rename_whiteout)
1405                pr_warn("upper fs does not support RENAME_WHITEOUT.\n");
1406
1407        /*
1408         * Check if upper/work fs supports (trusted|user).overlay.* xattr
1409         */
1410        err = ovl_do_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1);
1411        if (err) {
1412                ofs->noxattr = true;
1413                if (ofs->config.index || ofs->config.metacopy) {
1414                        ofs->config.index = false;
1415                        ofs->config.metacopy = false;
1416                        pr_warn("upper fs does not support xattr, falling back to index=off,metacopy=off.\n");
1417                }
1418                /*
1419                 * xattr support is required for persistent st_ino.
1420                 * Without persistent st_ino, xino=auto falls back to xino=off.
1421                 */
1422                if (ofs->config.xino == OVL_XINO_AUTO) {
1423                        ofs->config.xino = OVL_XINO_OFF;
1424                        pr_warn("upper fs does not support xattr, falling back to xino=off.\n");
1425                }
1426                err = 0;
1427        } else {
1428                ovl_do_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE);
1429        }
1430
1431        /*
1432         * We allowed sub-optimal upper fs configuration and don't want to break
1433         * users over kernel upgrade, but we never allowed remote upper fs, so
1434         * we can enforce strict requirements for remote upper fs.
1435         */
1436        if (ovl_dentry_remote(ofs->workdir) &&
1437            (!d_type || !rename_whiteout || ofs->noxattr)) {
1438                pr_err("upper fs missing required features.\n");
1439                err = -EINVAL;
1440                goto out;
1441        }
1442
1443        /*
1444         * For volatile mount, create a incompat/volatile/dirty file to keep
1445         * track of it.
1446         */
1447        if (ofs->config.ovl_volatile) {
1448                err = ovl_create_volatile_dirty(ofs);
1449                if (err < 0) {
1450                        pr_err("Failed to create volatile/dirty file.\n");
1451                        goto out;
1452                }
1453        }
1454
1455        /* Check if upper/work fs supports file handles */
1456        fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
1457        if (ofs->config.index && !fh_type) {
1458                ofs->config.index = false;
1459                pr_warn("upper fs does not support file handles, falling back to index=off.\n");
1460        }
1461
1462        /* Check if upper fs has 32bit inode numbers */
1463        if (fh_type != FILEID_INO32_GEN)
1464                ofs->xino_mode = -1;
1465
1466        /* NFS export of r/w mount depends on index */
1467        if (ofs->config.nfs_export && !ofs->config.index) {
1468                pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
1469                ofs->config.nfs_export = false;
1470        }
1471out:
1472        mnt_drop_write(mnt);
1473        return err;
1474}
1475
1476static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
1477                           struct path *upperpath)
1478{
1479        int err;
1480        struct path workpath = { };
1481
1482        err = ovl_mount_dir(ofs->config.workdir, &workpath);
1483        if (err)
1484                goto out;
1485
1486        err = -EINVAL;
1487        if (upperpath->mnt != workpath.mnt) {
1488                pr_err("workdir and upperdir must reside under the same mount\n");
1489                goto out;
1490        }
1491        if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
1492                pr_err("workdir and upperdir must be separate subtrees\n");
1493                goto out;
1494        }
1495
1496        ofs->workbasedir = dget(workpath.dentry);
1497
1498        if (ovl_inuse_trylock(ofs->workbasedir)) {
1499                ofs->workdir_locked = true;
1500        } else {
1501                err = ovl_report_in_use(ofs, "workdir");
1502                if (err)
1503                        goto out;
1504        }
1505
1506        err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
1507                             "workdir");
1508        if (err)
1509                goto out;
1510
1511        err = ovl_make_workdir(sb, ofs, &workpath);
1512
1513out:
1514        path_put(&workpath);
1515
1516        return err;
1517}
1518
1519static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
1520                            struct ovl_entry *oe, struct path *upperpath)
1521{
1522        struct vfsmount *mnt = ovl_upper_mnt(ofs);
1523        struct dentry *indexdir;
1524        int err;
1525
1526        err = mnt_want_write(mnt);
1527        if (err)
1528                return err;
1529
1530        /* Verify lower root is upper root origin */
1531        err = ovl_verify_origin(ofs, upperpath->dentry,
1532                                oe->lowerstack[0].dentry, true);
1533        if (err) {
1534                pr_err("failed to verify upper root origin\n");
1535                goto out;
1536        }
1537
1538        /* index dir will act also as workdir */
1539        iput(ofs->workdir_trap);
1540        ofs->workdir_trap = NULL;
1541        dput(ofs->workdir);
1542        ofs->workdir = NULL;
1543        indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
1544        if (IS_ERR(indexdir)) {
1545                err = PTR_ERR(indexdir);
1546        } else if (indexdir) {
1547                ofs->indexdir = indexdir;
1548                ofs->workdir = dget(indexdir);
1549
1550                err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
1551                                     "indexdir");
1552                if (err)
1553                        goto out;
1554
1555                /*
1556                 * Verify upper root is exclusively associated with index dir.
1557                 * Older kernels stored upper fh in ".overlay.origin"
1558                 * xattr. If that xattr exists, verify that it is a match to
1559                 * upper dir file handle. In any case, verify or set xattr
1560                 * ".overlay.upper" to indicate that index may have
1561                 * directory entries.
1562                 */
1563                if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
1564                        err = ovl_verify_set_fh(ofs, ofs->indexdir,
1565                                                OVL_XATTR_ORIGIN,
1566                                                upperpath->dentry, true, false);
1567                        if (err)
1568                                pr_err("failed to verify index dir 'origin' xattr\n");
1569                }
1570                err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry,
1571                                       true);
1572                if (err)
1573                        pr_err("failed to verify index dir 'upper' xattr\n");
1574
1575                /* Cleanup bad/stale/orphan index entries */
1576                if (!err)
1577                        err = ovl_indexdir_cleanup(ofs);
1578        }
1579        if (err || !ofs->indexdir)
1580                pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
1581
1582out:
1583        mnt_drop_write(mnt);
1584        return err;
1585}
1586
1587static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
1588{
1589        unsigned int i;
1590
1591        if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs))
1592                return true;
1593
1594        /*
1595         * We allow using single lower with null uuid for index and nfs_export
1596         * for example to support those features with single lower squashfs.
1597         * To avoid regressions in setups of overlay with re-formatted lower
1598         * squashfs, do not allow decoding origin with lower null uuid unless
1599         * user opted-in to one of the new features that require following the
1600         * lower inode of non-dir upper.
1601         */
1602        if (!ofs->config.index && !ofs->config.metacopy &&
1603            ofs->config.xino != OVL_XINO_ON &&
1604            uuid_is_null(uuid))
1605                return false;
1606
1607        for (i = 0; i < ofs->numfs; i++) {
1608                /*
1609                 * We use uuid to associate an overlay lower file handle with a
1610                 * lower layer, so we can accept lower fs with null uuid as long
1611                 * as all lower layers with null uuid are on the same fs.
1612                 * if we detect multiple lower fs with the same uuid, we
1613                 * disable lower file handle decoding on all of them.
1614                 */
1615                if (ofs->fs[i].is_lower &&
1616                    uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) {
1617                        ofs->fs[i].bad_uuid = true;
1618                        return false;
1619                }
1620        }
1621        return true;
1622}
1623
1624/* Get a unique fsid for the layer */
1625static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
1626{
1627        struct super_block *sb = path->mnt->mnt_sb;
1628        unsigned int i;
1629        dev_t dev;
1630        int err;
1631        bool bad_uuid = false;
1632        bool warn = false;
1633
1634        for (i = 0; i < ofs->numfs; i++) {
1635                if (ofs->fs[i].sb == sb)
1636                        return i;
1637        }
1638
1639        if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
1640                bad_uuid = true;
1641                if (ofs->config.xino == OVL_XINO_AUTO) {
1642                        ofs->config.xino = OVL_XINO_OFF;
1643                        warn = true;
1644                }
1645                if (ofs->config.index || ofs->config.nfs_export) {
1646                        ofs->config.index = false;
1647                        ofs->config.nfs_export = false;
1648                        warn = true;
1649                }
1650                if (warn) {
1651                        pr_warn("%s uuid detected in lower fs '%pd2', falling back to xino=%s,index=off,nfs_export=off.\n",
1652                                uuid_is_null(&sb->s_uuid) ? "null" :
1653                                                            "conflicting",
1654                                path->dentry, ovl_xino_str[ofs->config.xino]);
1655                }
1656        }
1657
1658        err = get_anon_bdev(&dev);
1659        if (err) {
1660                pr_err("failed to get anonymous bdev for lowerpath\n");
1661                return err;
1662        }
1663
1664        ofs->fs[ofs->numfs].sb = sb;
1665        ofs->fs[ofs->numfs].pseudo_dev = dev;
1666        ofs->fs[ofs->numfs].bad_uuid = bad_uuid;
1667
1668        return ofs->numfs++;
1669}
1670
1671static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
1672                          struct path *stack, unsigned int numlower,
1673                          struct ovl_layer *layers)
1674{
1675        int err;
1676        unsigned int i;
1677
1678        err = -ENOMEM;
1679        ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL);
1680        if (ofs->fs == NULL)
1681                goto out;
1682
1683        /* idx/fsid 0 are reserved for upper fs even with lower only overlay */
1684        ofs->numfs++;
1685
1686        /*
1687         * All lower layers that share the same fs as upper layer, use the same
1688         * pseudo_dev as upper layer.  Allocate fs[0].pseudo_dev even for lower
1689         * only overlay to simplify ovl_fs_free().
1690         * is_lower will be set if upper fs is shared with a lower layer.
1691         */
1692        err = get_anon_bdev(&ofs->fs[0].pseudo_dev);
1693        if (err) {
1694                pr_err("failed to get anonymous bdev for upper fs\n");
1695                goto out;
1696        }
1697
1698        if (ovl_upper_mnt(ofs)) {
1699                ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb;
1700                ofs->fs[0].is_lower = false;
1701        }
1702
1703        for (i = 0; i < numlower; i++) {
1704                struct vfsmount *mnt;
1705                struct inode *trap;
1706                int fsid;
1707
1708                err = fsid = ovl_get_fsid(ofs, &stack[i]);
1709                if (err < 0)
1710                        goto out;
1711
1712                /*
1713                 * Check if lower root conflicts with this overlay layers before
1714                 * checking if it is in-use as upperdir/workdir of "another"
1715                 * mount, because we do not bother to check in ovl_is_inuse() if
1716                 * the upperdir/workdir is in fact in-use by our
1717                 * upperdir/workdir.
1718                 */
1719                err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
1720                if (err)
1721                        goto out;
1722
1723                if (ovl_is_inuse(stack[i].dentry)) {
1724                        err = ovl_report_in_use(ofs, "lowerdir");
1725                        if (err) {
1726                                iput(trap);
1727                                goto out;
1728                        }
1729                }
1730
1731                mnt = clone_private_mount(&stack[i]);
1732                err = PTR_ERR(mnt);
1733                if (IS_ERR(mnt)) {
1734                        pr_err("failed to clone lowerpath\n");
1735                        iput(trap);
1736                        goto out;
1737                }
1738
1739                /*
1740                 * Make lower layers R/O.  That way fchmod/fchown on lower file
1741                 * will fail instead of modifying lower fs.
1742                 */
1743                mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
1744
1745                layers[ofs->numlayer].trap = trap;
1746                layers[ofs->numlayer].mnt = mnt;
1747                layers[ofs->numlayer].idx = ofs->numlayer;
1748                layers[ofs->numlayer].fsid = fsid;
1749                layers[ofs->numlayer].fs = &ofs->fs[fsid];
1750                ofs->numlayer++;
1751                ofs->fs[fsid].is_lower = true;
1752        }
1753
1754        /*
1755         * When all layers on same fs, overlay can use real inode numbers.
1756         * With mount option "xino=<on|auto>", mounter declares that there are
1757         * enough free high bits in underlying fs to hold the unique fsid.
1758         * If overlayfs does encounter underlying inodes using the high xino
1759         * bits reserved for fsid, it emits a warning and uses the original
1760         * inode number or a non persistent inode number allocated from a
1761         * dedicated range.
1762         */
1763        if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) {
1764                if (ofs->config.xino == OVL_XINO_ON)
1765                        pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
1766                ofs->xino_mode = 0;
1767        } else if (ofs->config.xino == OVL_XINO_OFF) {
1768                ofs->xino_mode = -1;
1769        } else if (ofs->xino_mode < 0) {
1770                /*
1771                 * This is a roundup of number of bits needed for encoding
1772                 * fsid, where fsid 0 is reserved for upper fs (even with
1773                 * lower only overlay) +1 extra bit is reserved for the non
1774                 * persistent inode number range that is used for resolving
1775                 * xino lower bits overflow.
1776                 */
1777                BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30);
1778                ofs->xino_mode = ilog2(ofs->numfs - 1) + 2;
1779        }
1780
1781        if (ofs->xino_mode > 0) {
1782                pr_info("\"xino\" feature enabled using %d upper inode bits.\n",
1783                        ofs->xino_mode);
1784        }
1785
1786        err = 0;
1787out:
1788        return err;
1789}
1790
1791static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
1792                                const char *lower, unsigned int numlower,
1793                                struct ovl_fs *ofs, struct ovl_layer *layers)
1794{
1795        int err;
1796        struct path *stack = NULL;
1797        unsigned int i;
1798        struct ovl_entry *oe;
1799
1800        if (!ofs->config.upperdir && numlower == 1) {
1801                pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
1802                return ERR_PTR(-EINVAL);
1803        }
1804
1805        stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL);
1806        if (!stack)
1807                return ERR_PTR(-ENOMEM);
1808
1809        err = -EINVAL;
1810        for (i = 0; i < numlower; i++) {
1811                err = ovl_lower_dir(lower, &stack[i], ofs, &sb->s_stack_depth);
1812                if (err)
1813                        goto out_err;
1814
1815                lower = strchr(lower, '\0') + 1;
1816        }
1817
1818        err = -EINVAL;
1819        sb->s_stack_depth++;
1820        if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
1821                pr_err("maximum fs stacking depth exceeded\n");
1822                goto out_err;
1823        }
1824
1825        err = ovl_get_layers(sb, ofs, stack, numlower, layers);
1826        if (err)
1827                goto out_err;
1828
1829        err = -ENOMEM;
1830        oe = ovl_alloc_entry(numlower);
1831        if (!oe)
1832                goto out_err;
1833
1834        for (i = 0; i < numlower; i++) {
1835                oe->lowerstack[i].dentry = dget(stack[i].dentry);
1836                oe->lowerstack[i].layer = &ofs->layers[i+1];
1837        }
1838
1839out:
1840        for (i = 0; i < numlower; i++)
1841                path_put(&stack[i]);
1842        kfree(stack);
1843
1844        return oe;
1845
1846out_err:
1847        oe = ERR_PTR(err);
1848        goto out;
1849}
1850
1851/*
1852 * Check if this layer root is a descendant of:
1853 * - another layer of this overlayfs instance
1854 * - upper/work dir of any overlayfs instance
1855 */
1856static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
1857                           struct dentry *dentry, const char *name,
1858                           bool is_lower)
1859{
1860        struct dentry *next = dentry, *parent;
1861        int err = 0;
1862
1863        if (!dentry)
1864                return 0;
1865
1866        parent = dget_parent(next);
1867
1868        /* Walk back ancestors to root (inclusive) looking for traps */
1869        while (!err && parent != next) {
1870                if (is_lower && ovl_lookup_trap_inode(sb, parent)) {
1871                        err = -ELOOP;
1872                        pr_err("overlapping %s path\n", name);
1873                } else if (ovl_is_inuse(parent)) {
1874                        err = ovl_report_in_use(ofs, name);
1875                }
1876                next = parent;
1877                parent = dget_parent(next);
1878                dput(next);
1879        }
1880
1881        dput(parent);
1882
1883        return err;
1884}
1885
1886/*
1887 * Check if any of the layers or work dirs overlap.
1888 */
1889static int ovl_check_overlapping_layers(struct super_block *sb,
1890                                        struct ovl_fs *ofs)
1891{
1892        int i, err;
1893
1894        if (ovl_upper_mnt(ofs)) {
1895                err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root,
1896                                      "upperdir", false);
1897                if (err)
1898                        return err;
1899
1900                /*
1901                 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
1902                 * this instance and covers overlapping work and index dirs,
1903                 * unless work or index dir have been moved since created inside
1904                 * workbasedir.  In that case, we already have their traps in
1905                 * inode cache and we will catch that case on lookup.
1906                 */
1907                err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir",
1908                                      false);
1909                if (err)
1910                        return err;
1911        }
1912
1913        for (i = 1; i < ofs->numlayer; i++) {
1914                err = ovl_check_layer(sb, ofs,
1915                                      ofs->layers[i].mnt->mnt_root,
1916                                      "lowerdir", true);
1917                if (err)
1918                        return err;
1919        }
1920
1921        return 0;
1922}
1923
1924static struct dentry *ovl_get_root(struct super_block *sb,
1925                                   struct dentry *upperdentry,
1926                                   struct ovl_entry *oe)
1927{
1928        struct dentry *root;
1929        struct ovl_path *lowerpath = &oe->lowerstack[0];
1930        unsigned long ino = d_inode(lowerpath->dentry)->i_ino;
1931        int fsid = lowerpath->layer->fsid;
1932        struct ovl_inode_params oip = {
1933                .upperdentry = upperdentry,
1934                .lowerpath = lowerpath,
1935        };
1936
1937        root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
1938        if (!root)
1939                return NULL;
1940
1941        root->d_fsdata = oe;
1942
1943        if (upperdentry) {
1944                /* Root inode uses upper st_ino/i_ino */
1945                ino = d_inode(upperdentry)->i_ino;
1946                fsid = 0;
1947                ovl_dentry_set_upper_alias(root);
1948                if (ovl_is_impuredir(sb, upperdentry))
1949                        ovl_set_flag(OVL_IMPURE, d_inode(root));
1950        }
1951
1952        /* Root is always merge -> can have whiteouts */
1953        ovl_set_flag(OVL_WHITEOUTS, d_inode(root));
1954        ovl_dentry_set_flag(OVL_E_CONNECTED, root);
1955        ovl_set_upperdata(d_inode(root));
1956        ovl_inode_init(d_inode(root), &oip, ino, fsid);
1957        ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE);
1958
1959        return root;
1960}
1961
1962static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1963{
1964        struct path upperpath = { };
1965        struct dentry *root_dentry;
1966        struct ovl_entry *oe;
1967        struct ovl_fs *ofs;
1968        struct ovl_layer *layers;
1969        struct cred *cred;
1970        char *splitlower = NULL;
1971        unsigned int numlower;
1972        int err;
1973
1974        err = -EIO;
1975        if (WARN_ON(sb->s_user_ns != current_user_ns()))
1976                goto out;
1977
1978        sb->s_d_op = &ovl_dentry_operations;
1979
1980        err = -ENOMEM;
1981        ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
1982        if (!ofs)
1983                goto out;
1984
1985        err = -ENOMEM;
1986        ofs->creator_cred = cred = prepare_creds();
1987        if (!cred)
1988                goto out_err;
1989
1990        /* Is there a reason anyone would want not to share whiteouts? */
1991        ofs->share_whiteout = true;
1992
1993        ofs->config.index = ovl_index_def;
1994        ofs->config.uuid = true;
1995        ofs->config.nfs_export = ovl_nfs_export_def;
1996        ofs->config.xino = ovl_xino_def();
1997        ofs->config.metacopy = ovl_metacopy_def;
1998        err = ovl_parse_opt((char *) data, &ofs->config);
1999        if (err)
2000                goto out_err;
2001
2002        err = -EINVAL;
2003        if (!ofs->config.lowerdir) {
2004                if (!silent)
2005                        pr_err("missing 'lowerdir'\n");
2006                goto out_err;
2007        }
2008
2009        err = -ENOMEM;
2010        splitlower = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
2011        if (!splitlower)
2012                goto out_err;
2013
2014        err = -EINVAL;
2015        numlower = ovl_split_lowerdirs(splitlower);
2016        if (numlower > OVL_MAX_STACK) {
2017                pr_err("too many lower directories, limit is %d\n",
2018                       OVL_MAX_STACK);
2019                goto out_err;
2020        }
2021
2022        err = -ENOMEM;
2023        layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
2024        if (!layers)
2025                goto out_err;
2026
2027        ofs->layers = layers;
2028        /* Layer 0 is reserved for upper even if there's no upper */
2029        ofs->numlayer = 1;
2030
2031        sb->s_stack_depth = 0;
2032        sb->s_maxbytes = MAX_LFS_FILESIZE;
2033        atomic_long_set(&ofs->last_ino, 1);
2034        /* Assume underlaying fs uses 32bit inodes unless proven otherwise */
2035        if (ofs->config.xino != OVL_XINO_OFF) {
2036                ofs->xino_mode = BITS_PER_LONG - 32;
2037                if (!ofs->xino_mode) {
2038                        pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n");
2039                        ofs->config.xino = OVL_XINO_OFF;
2040                }
2041        }
2042
2043        /* alloc/destroy_inode needed for setting up traps in inode cache */
2044        sb->s_op = &ovl_super_operations;
2045
2046        if (ofs->config.upperdir) {
2047                struct super_block *upper_sb;
2048
2049                err = -EINVAL;
2050                if (!ofs->config.workdir) {
2051                        pr_err("missing 'workdir'\n");
2052                        goto out_err;
2053                }
2054
2055                err = ovl_get_upper(sb, ofs, &layers[0], &upperpath);
2056                if (err)
2057                        goto out_err;
2058
2059                upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
2060                if (!ovl_should_sync(ofs)) {
2061                        ofs->errseq = errseq_sample(&upper_sb->s_wb_err);
2062                        if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) {
2063                                err = -EIO;
2064                                pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n");
2065                                goto out_err;
2066                        }
2067                }
2068
2069                err = ovl_get_workdir(sb, ofs, &upperpath);
2070                if (err)
2071                        goto out_err;
2072
2073                if (!ofs->workdir)
2074                        sb->s_flags |= SB_RDONLY;
2075
2076                sb->s_stack_depth = upper_sb->s_stack_depth;
2077                sb->s_time_gran = upper_sb->s_time_gran;
2078        }
2079        oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
2080        err = PTR_ERR(oe);
2081        if (IS_ERR(oe))
2082                goto out_err;
2083
2084        /* If the upper fs is nonexistent, we mark overlayfs r/o too */
2085        if (!ovl_upper_mnt(ofs))
2086                sb->s_flags |= SB_RDONLY;
2087
2088        if (!ofs->config.uuid && ofs->numfs > 1) {
2089                pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=on.\n");
2090                ofs->config.uuid = true;
2091        }
2092
2093        if (!ovl_force_readonly(ofs) && ofs->config.index) {
2094                err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
2095                if (err)
2096                        goto out_free_oe;
2097
2098                /* Force r/o mount with no index dir */
2099                if (!ofs->indexdir)
2100                        sb->s_flags |= SB_RDONLY;
2101        }
2102
2103        err = ovl_check_overlapping_layers(sb, ofs);
2104        if (err)
2105                goto out_free_oe;
2106
2107        /* Show index=off in /proc/mounts for forced r/o mount */
2108        if (!ofs->indexdir) {
2109                ofs->config.index = false;
2110                if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) {
2111                        pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
2112                        ofs->config.nfs_export = false;
2113                }
2114        }
2115
2116        if (ofs->config.metacopy && ofs->config.nfs_export) {
2117                pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
2118                ofs->config.nfs_export = false;
2119        }
2120
2121        if (ofs->config.nfs_export)
2122                sb->s_export_op = &ovl_export_operations;
2123
2124        /* Never override disk quota limits or use reserved space */
2125        cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
2126
2127        sb->s_magic = OVERLAYFS_SUPER_MAGIC;
2128        sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
2129                ovl_trusted_xattr_handlers;
2130        sb->s_fs_info = ofs;
2131        sb->s_flags |= SB_POSIXACL;
2132        sb->s_iflags |= SB_I_SKIP_SYNC;
2133
2134        err = -ENOMEM;
2135        root_dentry = ovl_get_root(sb, upperpath.dentry, oe);
2136        if (!root_dentry)
2137                goto out_free_oe;
2138
2139        mntput(upperpath.mnt);
2140        kfree(splitlower);
2141
2142        sb->s_root = root_dentry;
2143
2144        return 0;
2145
2146out_free_oe:
2147        ovl_entry_stack_free(oe);
2148        kfree(oe);
2149out_err:
2150        kfree(splitlower);
2151        path_put(&upperpath);
2152        ovl_free_fs(ofs);
2153out:
2154        return err;
2155}
2156
2157static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
2158                                const char *dev_name, void *raw_data)
2159{
2160        return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
2161}
2162
2163static struct file_system_type ovl_fs_type = {
2164        .owner          = THIS_MODULE,
2165        .name           = "overlay",
2166        .fs_flags       = FS_USERNS_MOUNT,
2167        .mount          = ovl_mount,
2168        .kill_sb        = kill_anon_super,
2169};
2170MODULE_ALIAS_FS("overlay");
2171
2172static void ovl_inode_init_once(void *foo)
2173{
2174        struct ovl_inode *oi = foo;
2175
2176        inode_init_once(&oi->vfs_inode);
2177}
2178
2179static int __init ovl_init(void)
2180{
2181        int err;
2182
2183        ovl_inode_cachep = kmem_cache_create("ovl_inode",
2184                                             sizeof(struct ovl_inode), 0,
2185                                             (SLAB_RECLAIM_ACCOUNT|
2186                                              SLAB_MEM_SPREAD|SLAB_ACCOUNT),
2187                                             ovl_inode_init_once);
2188        if (ovl_inode_cachep == NULL)
2189                return -ENOMEM;
2190
2191        err = ovl_aio_request_cache_init();
2192        if (!err) {
2193                err = register_filesystem(&ovl_fs_type);
2194                if (!err)
2195                        return 0;
2196
2197                ovl_aio_request_cache_destroy();
2198        }
2199        kmem_cache_destroy(ovl_inode_cachep);
2200
2201        return err;
2202}
2203
2204static void __exit ovl_exit(void)
2205{
2206        unregister_filesystem(&ovl_fs_type);
2207
2208        /*
2209         * Make sure all delayed rcu free inodes are flushed before we
2210         * destroy cache.
2211         */
2212        rcu_barrier();
2213        kmem_cache_destroy(ovl_inode_cachep);
2214        ovl_aio_request_cache_destroy();
2215}
2216
2217module_init(ovl_init);
2218module_exit(ovl_exit);
2219