linux/fs/overlayfs/super.c
<<
>>
Prefs
   1/*
   2 *
   3 * Copyright (C) 2011 Novell Inc.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms of the GNU General Public License version 2 as published by
   7 * the Free Software Foundation.
   8 */
   9
  10#include <linux/fs.h>
  11#include <linux/namei.h>
  12#include <linux/xattr.h>
  13#include <linux/mount.h>
  14#include <linux/parser.h>
  15#include <linux/module.h>
  16#include <linux/statfs.h>
  17#include <linux/seq_file.h>
  18#include <linux/magic.h>
  19#include <linux/posix_acl_xattr.h>
  20#include <linux/cred.h>
  21#include <linux/exportfs.h>
  22#include "overlayfs.h"
  23
  24MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
  25MODULE_DESCRIPTION("Overlay filesystem");
  26MODULE_LICENSE("GPL");
  27
  28
  29struct ovl_dir_cache;
  30
  31#define OVL_MAX_STACK 500
  32
  33static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
  34module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
  35MODULE_PARM_DESC(ovl_redirect_dir_def,
  36                 "Default to on or off for the redirect_dir feature");
  37
  38static bool ovl_redirect_always_follow =
  39        IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
  40module_param_named(redirect_always_follow, ovl_redirect_always_follow,
  41                   bool, 0644);
  42MODULE_PARM_DESC(ovl_redirect_always_follow,
  43                 "Follow redirects even if redirect_dir feature is turned off");
  44
  45static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
  46module_param_named(index, ovl_index_def, bool, 0644);
  47MODULE_PARM_DESC(ovl_index_def,
  48                 "Default to on or off for the inodes index feature");
  49
  50static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
  51module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
  52MODULE_PARM_DESC(ovl_nfs_export_def,
  53                 "Default to on or off for the NFS export feature");
  54
  55static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
  56module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
  57MODULE_PARM_DESC(ovl_xino_auto_def,
  58                 "Auto enable xino feature");
  59
  60static void ovl_entry_stack_free(struct ovl_entry *oe)
  61{
  62        unsigned int i;
  63
  64        for (i = 0; i < oe->numlower; i++)
  65                dput(oe->lowerstack[i].dentry);
  66}
  67
  68static void ovl_dentry_release(struct dentry *dentry)
  69{
  70        struct ovl_entry *oe = dentry->d_fsdata;
  71
  72        if (oe) {
  73                ovl_entry_stack_free(oe);
  74                kfree_rcu(oe, rcu);
  75        }
  76}
  77
  78static int ovl_check_append_only(struct inode *inode, int flag)
  79{
  80        /*
  81         * This test was moot in vfs may_open() because overlay inode does
  82         * not have the S_APPEND flag, so re-check on real upper inode
  83         */
  84        if (IS_APPEND(inode)) {
  85                if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
  86                        return -EPERM;
  87                if (flag & O_TRUNC)
  88                        return -EPERM;
  89        }
  90
  91        return 0;
  92}
  93
  94static struct dentry *ovl_d_real(struct dentry *dentry,
  95                                 const struct inode *inode,
  96                                 unsigned int open_flags, unsigned int flags)
  97{
  98        struct dentry *real;
  99        int err;
 100
 101        if (flags & D_REAL_UPPER)
 102                return ovl_dentry_upper(dentry);
 103
 104        if (!dentry->d_inode || !S_ISREG(dentry->d_inode->i_mode)) {
 105                if (!inode || inode == d_inode(dentry))
 106                        return dentry;
 107                goto bug;
 108        }
 109
 110        if (open_flags) {
 111                err = ovl_open_maybe_copy_up(dentry, open_flags);
 112                if (err)
 113                        return ERR_PTR(err);
 114        }
 115
 116        real = ovl_dentry_upper(dentry);
 117        if (real && (!inode || inode == d_inode(real))) {
 118                if (!inode) {
 119                        err = ovl_check_append_only(d_inode(real), open_flags);
 120                        if (err)
 121                                return ERR_PTR(err);
 122                }
 123                return real;
 124        }
 125
 126        real = ovl_dentry_lower(dentry);
 127        if (!real)
 128                goto bug;
 129
 130        /* Handle recursion */
 131        real = d_real(real, inode, open_flags, 0);
 132
 133        if (!inode || inode == d_inode(real))
 134                return real;
 135
 136bug:
 137        WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
 138             inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
 139        return dentry;
 140}
 141
 142static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 143{
 144        struct ovl_entry *oe = dentry->d_fsdata;
 145        unsigned int i;
 146        int ret = 1;
 147
 148        for (i = 0; i < oe->numlower; i++) {
 149                struct dentry *d = oe->lowerstack[i].dentry;
 150
 151                if (d->d_flags & DCACHE_OP_REVALIDATE) {
 152                        ret = d->d_op->d_revalidate(d, flags);
 153                        if (ret < 0)
 154                                return ret;
 155                        if (!ret) {
 156                                if (!(flags & LOOKUP_RCU))
 157                                        d_invalidate(d);
 158                                return -ESTALE;
 159                        }
 160                }
 161        }
 162        return 1;
 163}
 164
 165static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
 166{
 167        struct ovl_entry *oe = dentry->d_fsdata;
 168        unsigned int i;
 169        int ret = 1;
 170
 171        for (i = 0; i < oe->numlower; i++) {
 172                struct dentry *d = oe->lowerstack[i].dentry;
 173
 174                if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
 175                        ret = d->d_op->d_weak_revalidate(d, flags);
 176                        if (ret <= 0)
 177                                break;
 178                }
 179        }
 180        return ret;
 181}
 182
 183static const struct dentry_operations_wrapper ovl_dentry_operations = {
 184        .ops = {
 185                .d_release = ovl_dentry_release,
 186        },
 187        .size = sizeof(struct dentry_operations_wrapper),
 188        .d_real = ovl_d_real,
 189};
 190
 191static const struct dentry_operations_wrapper ovl_reval_dentry_operations = {
 192        .ops = {
 193                .d_release = ovl_dentry_release,
 194                .d_revalidate = ovl_dentry_revalidate,
 195                .d_weak_revalidate = ovl_dentry_weak_revalidate,
 196        },
 197        .size = sizeof(struct dentry_operations_wrapper),
 198        .d_real = ovl_d_real,
 199};
 200
 201static struct kmem_cache *ovl_inode_cachep;
 202
 203static struct inode *ovl_alloc_inode(struct super_block *sb)
 204{
 205        struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
 206
 207        if (!oi)
 208                return NULL;
 209
 210        oi->cache = NULL;
 211        oi->redirect = NULL;
 212        oi->version = 0;
 213        oi->flags = 0;
 214        oi->__upperdentry = NULL;
 215        oi->lower = NULL;
 216        mutex_init(&oi->lock);
 217
 218        return &oi->vfs_inode;
 219}
 220
 221static void ovl_i_callback(struct rcu_head *head)
 222{
 223        struct inode *inode = container_of(head, struct inode, i_rcu);
 224
 225        kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
 226}
 227
 228static void ovl_destroy_inode(struct inode *inode)
 229{
 230        struct ovl_inode *oi = OVL_I(inode);
 231
 232        dput(oi->__upperdentry);
 233        iput(oi->lower);
 234        kfree(oi->redirect);
 235        ovl_dir_cache_free(inode);
 236        mutex_destroy(&oi->lock);
 237
 238        call_rcu(&inode->i_rcu, ovl_i_callback);
 239}
 240
 241static void ovl_free_fs(struct ovl_fs *ofs)
 242{
 243        unsigned i;
 244
 245        dput(ofs->indexdir);
 246        dput(ofs->workdir);
 247        if (ofs->workdir_locked)
 248                ovl_inuse_unlock(ofs->workbasedir);
 249        dput(ofs->workbasedir);
 250        if (ofs->upperdir_locked)
 251                ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
 252        mntput(ofs->upper_mnt);
 253        for (i = 0; i < ofs->numlower; i++)
 254                mntput(ofs->lower_layers[i].mnt);
 255        for (i = 0; i < ofs->numlowerfs; i++)
 256                free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
 257        kfree(ofs->lower_layers);
 258        kfree(ofs->lower_fs);
 259
 260        kfree(ofs->config.lowerdir);
 261        kfree(ofs->config.upperdir);
 262        kfree(ofs->config.workdir);
 263        kfree(ofs->config.redirect_mode);
 264        if (ofs->creator_cred)
 265                put_cred(ofs->creator_cred);
 266        kfree(ofs);
 267}
 268
 269static void ovl_put_super(struct super_block *sb)
 270{
 271        struct ovl_fs *ofs = sb->s_fs_info;
 272
 273        ovl_free_fs(ofs);
 274}
 275
 276/* Sync real dirty inodes in upper filesystem (if it exists) */
 277static int ovl_sync_fs(struct super_block *sb, int wait)
 278{
 279        struct ovl_fs *ofs = sb->s_fs_info;
 280        struct super_block *upper_sb;
 281        int ret;
 282
 283        if (!ofs->upper_mnt)
 284                return 0;
 285
 286        /*
 287         * If this is a sync(2) call or an emergency sync, all the super blocks
 288         * will be iterated, including upper_sb, so no need to do anything.
 289         *
 290         * If this is a syncfs(2) call, then we do need to call
 291         * sync_filesystem() on upper_sb, but enough if we do it when being
 292         * called with wait == 1.
 293         */
 294        if (!wait)
 295                return 0;
 296
 297        upper_sb = ofs->upper_mnt->mnt_sb;
 298
 299        down_read(&upper_sb->s_umount);
 300        ret = sync_filesystem(upper_sb);
 301        up_read(&upper_sb->s_umount);
 302
 303        return ret;
 304}
 305
 306/**
 307 * ovl_statfs
 308 * @sb: The overlayfs super block
 309 * @buf: The struct kstatfs to fill in with stats
 310 *
 311 * Get the filesystem statistics.  As writes always target the upper layer
 312 * filesystem pass the statfs to the upper filesystem (if it exists)
 313 */
 314static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
 315{
 316        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 317        struct dentry *root_dentry = dentry->d_sb->s_root;
 318        struct path path;
 319        int err;
 320
 321        ovl_path_real(root_dentry, &path);
 322
 323        err = vfs_statfs(&path, buf);
 324        if (!err) {
 325                buf->f_namelen = ofs->namelen;
 326                buf->f_type = OVERLAYFS_SUPER_MAGIC;
 327        }
 328
 329        return err;
 330}
 331
 332/* Will this overlay be forced to mount/remount ro? */
 333static bool ovl_force_readonly(struct ovl_fs *ofs)
 334{
 335        return (!ofs->upper_mnt || !ofs->workdir);
 336}
 337
 338static const char *ovl_redirect_mode_def(void)
 339{
 340        return ovl_redirect_dir_def ? "on" : "off";
 341}
 342
 343enum {
 344        OVL_XINO_OFF,
 345        OVL_XINO_AUTO,
 346        OVL_XINO_ON,
 347};
 348
 349static const char * const ovl_xino_str[] = {
 350        "off",
 351        "auto",
 352        "on",
 353};
 354
 355static inline int ovl_xino_def(void)
 356{
 357        return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
 358}
 359
 360/**
 361 * ovl_show_options
 362 *
 363 * Prints the mount options for a given superblock.
 364 * Returns zero; does not fail.
 365 */
 366static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 367{
 368        struct super_block *sb = dentry->d_sb;
 369        struct ovl_fs *ofs = sb->s_fs_info;
 370
 371        seq_show_option(m, "lowerdir", ofs->config.lowerdir);
 372        if (ofs->config.upperdir) {
 373                seq_show_option(m, "upperdir", ofs->config.upperdir);
 374                seq_show_option(m, "workdir", ofs->config.workdir);
 375        }
 376        if (ofs->config.default_permissions)
 377                seq_puts(m, ",default_permissions");
 378        if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
 379                seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
 380        if (ofs->config.index != ovl_index_def)
 381                seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
 382        if (ofs->config.nfs_export != ovl_nfs_export_def)
 383                seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
 384                                                "on" : "off");
 385        if (ofs->config.xino != ovl_xino_def())
 386                seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
 387        return 0;
 388}
 389
 390static int ovl_remount(struct super_block *sb, int *flags, char *data)
 391{
 392        struct ovl_fs *ofs = sb->s_fs_info;
 393
 394        if (!(*flags & MS_RDONLY) && ovl_force_readonly(ofs))
 395                return -EROFS;
 396
 397        return 0;
 398}
 399
 400static const struct super_operations ovl_super_operations = {
 401        .alloc_inode    = ovl_alloc_inode,
 402        .destroy_inode  = ovl_destroy_inode,
 403        .drop_inode     = generic_delete_inode,
 404        .put_super      = ovl_put_super,
 405        .sync_fs        = ovl_sync_fs,
 406        .statfs         = ovl_statfs,
 407        .show_options   = ovl_show_options,
 408        .remount_fs     = ovl_remount,
 409};
 410
 411enum {
 412        OPT_LOWERDIR,
 413        OPT_UPPERDIR,
 414        OPT_WORKDIR,
 415        OPT_DEFAULT_PERMISSIONS,
 416        OPT_REDIRECT_DIR,
 417        OPT_INDEX_ON,
 418        OPT_INDEX_OFF,
 419        OPT_NFS_EXPORT_ON,
 420        OPT_NFS_EXPORT_OFF,
 421        OPT_XINO_ON,
 422        OPT_XINO_OFF,
 423        OPT_XINO_AUTO,
 424        OPT_ERR,
 425};
 426
 427static const match_table_t ovl_tokens = {
 428        {OPT_LOWERDIR,                  "lowerdir=%s"},
 429        {OPT_UPPERDIR,                  "upperdir=%s"},
 430        {OPT_WORKDIR,                   "workdir=%s"},
 431        {OPT_DEFAULT_PERMISSIONS,       "default_permissions"},
 432        {OPT_REDIRECT_DIR,              "redirect_dir=%s"},
 433        {OPT_INDEX_ON,                  "index=on"},
 434        {OPT_INDEX_OFF,                 "index=off"},
 435        {OPT_NFS_EXPORT_ON,             "nfs_export=on"},
 436        {OPT_NFS_EXPORT_OFF,            "nfs_export=off"},
 437        {OPT_XINO_ON,                   "xino=on"},
 438        {OPT_XINO_OFF,                  "xino=off"},
 439        {OPT_XINO_AUTO,                 "xino=auto"},
 440        {OPT_ERR,                       NULL}
 441};
 442
 443static char *ovl_next_opt(char **s)
 444{
 445        char *sbegin = *s;
 446        char *p;
 447
 448        if (sbegin == NULL)
 449                return NULL;
 450
 451        for (p = sbegin; *p; p++) {
 452                if (*p == '\\') {
 453                        p++;
 454                        if (!*p)
 455                                break;
 456                } else if (*p == ',') {
 457                        *p = '\0';
 458                        *s = p + 1;
 459                        return sbegin;
 460                }
 461        }
 462        *s = NULL;
 463        return sbegin;
 464}
 465
 466static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
 467{
 468        if (strcmp(mode, "on") == 0) {
 469                config->redirect_dir = true;
 470                /*
 471                 * Does not make sense to have redirect creation without
 472                 * redirect following.
 473                 */
 474                config->redirect_follow = true;
 475        } else if (strcmp(mode, "follow") == 0) {
 476                config->redirect_follow = true;
 477        } else if (strcmp(mode, "off") == 0) {
 478                if (ovl_redirect_always_follow)
 479                        config->redirect_follow = true;
 480        } else if (strcmp(mode, "nofollow") != 0) {
 481                pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
 482                       mode);
 483                return -EINVAL;
 484        }
 485
 486        return 0;
 487}
 488
 489static int ovl_parse_opt(char *opt, struct ovl_config *config)
 490{
 491        char *p;
 492
 493        config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
 494        if (!config->redirect_mode)
 495                return -ENOMEM;
 496
 497        while ((p = ovl_next_opt(&opt)) != NULL) {
 498                int token;
 499                substring_t args[MAX_OPT_ARGS];
 500
 501                if (!*p)
 502                        continue;
 503
 504                token = match_token(p, ovl_tokens, args);
 505                switch (token) {
 506                case OPT_UPPERDIR:
 507                        kfree(config->upperdir);
 508                        config->upperdir = match_strdup(&args[0]);
 509                        if (!config->upperdir)
 510                                return -ENOMEM;
 511                        break;
 512
 513                case OPT_LOWERDIR:
 514                        kfree(config->lowerdir);
 515                        config->lowerdir = match_strdup(&args[0]);
 516                        if (!config->lowerdir)
 517                                return -ENOMEM;
 518                        break;
 519
 520                case OPT_WORKDIR:
 521                        kfree(config->workdir);
 522                        config->workdir = match_strdup(&args[0]);
 523                        if (!config->workdir)
 524                                return -ENOMEM;
 525                        break;
 526
 527                case OPT_DEFAULT_PERMISSIONS:
 528                        config->default_permissions = true;
 529                        break;
 530
 531                case OPT_REDIRECT_DIR:
 532                        kfree(config->redirect_mode);
 533                        config->redirect_mode = match_strdup(&args[0]);
 534                        if (!config->redirect_mode)
 535                                return -ENOMEM;
 536                        break;
 537
 538                case OPT_INDEX_ON:
 539                        config->index = true;
 540                        break;
 541
 542                case OPT_INDEX_OFF:
 543                        config->index = false;
 544                        break;
 545
 546                case OPT_NFS_EXPORT_ON:
 547                        config->nfs_export = true;
 548                        break;
 549
 550                case OPT_NFS_EXPORT_OFF:
 551                        config->nfs_export = false;
 552                        break;
 553
 554                case OPT_XINO_ON:
 555                        config->xino = OVL_XINO_ON;
 556                        break;
 557
 558                case OPT_XINO_OFF:
 559                        config->xino = OVL_XINO_OFF;
 560                        break;
 561
 562                case OPT_XINO_AUTO:
 563                        config->xino = OVL_XINO_AUTO;
 564                        break;
 565
 566                default:
 567                        pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
 568                        return -EINVAL;
 569                }
 570        }
 571
 572        /* Workdir is useless in non-upper mount */
 573        if (!config->upperdir && config->workdir) {
 574                pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
 575                        config->workdir);
 576                kfree(config->workdir);
 577                config->workdir = NULL;
 578        }
 579
 580        return ovl_parse_redirect_mode(config, config->redirect_mode);
 581}
 582
 583#define OVL_WORKDIR_NAME "work"
 584#define OVL_INDEXDIR_NAME "index"
 585
 586static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
 587                                         const char *name, bool persist)
 588{
 589        struct inode *dir =  ofs->workbasedir->d_inode;
 590        struct vfsmount *mnt = ofs->upper_mnt;
 591        struct dentry *work;
 592        int err;
 593        bool retried = false;
 594        bool locked = false;
 595
 596        inode_lock_nested(dir, I_MUTEX_PARENT);
 597        locked = true;
 598
 599retry:
 600        work = lookup_one_len(name, ofs->workbasedir, strlen(name));
 601
 602        if (!IS_ERR(work)) {
 603                struct iattr attr = {
 604                        .ia_valid = ATTR_MODE,
 605                        .ia_mode = S_IFDIR | 0,
 606                };
 607
 608                if (work->d_inode) {
 609                        err = -EEXIST;
 610                        if (retried)
 611                                goto out_dput;
 612
 613                        if (persist)
 614                                goto out_unlock;
 615
 616                        retried = true;
 617                        ovl_workdir_cleanup(dir, mnt, work, 0);
 618                        dput(work);
 619                        goto retry;
 620                }
 621
 622                work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode));
 623                err = PTR_ERR(work);
 624                if (IS_ERR(work))
 625                        goto out_err;
 626
 627                /*
 628                 * Try to remove POSIX ACL xattrs from workdir.  We are good if:
 629                 *
 630                 * a) success (there was a POSIX ACL xattr and was removed)
 631                 * b) -ENODATA (there was no POSIX ACL xattr)
 632                 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
 633                 *
 634                 * There are various other error values that could effectively
 635                 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
 636                 * if the xattr name is too long), but the set of filesystems
 637                 * allowed as upper are limited to "normal" ones, where checking
 638                 * for the above two errors is sufficient.
 639                 */
 640                err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
 641                if (err && err != -ENODATA && err != -EOPNOTSUPP)
 642                        goto out_dput;
 643
 644                err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
 645                if (err && err != -ENODATA && err != -EOPNOTSUPP)
 646                        goto out_dput;
 647
 648                /* Clear any inherited mode bits */
 649                inode_lock(work->d_inode);
 650                err = notify_change(work, &attr, NULL);
 651                inode_unlock(work->d_inode);
 652                if (err)
 653                        goto out_dput;
 654        } else {
 655                err = PTR_ERR(work);
 656                goto out_err;
 657        }
 658out_unlock:
 659        if (locked)
 660                inode_unlock(dir);
 661
 662        return work;
 663
 664out_dput:
 665        dput(work);
 666out_err:
 667        pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
 668                ofs->config.workdir, name, -err);
 669        work = NULL;
 670        goto out_unlock;
 671}
 672
 673static void ovl_unescape(char *s)
 674{
 675        char *d = s;
 676
 677        for (;; s++, d++) {
 678                if (*s == '\\')
 679                        s++;
 680                *d = *s;
 681                if (!*s)
 682                        break;
 683        }
 684}
 685
 686static int ovl_mount_dir_noesc(const char *name, struct path *path)
 687{
 688        int err = -EINVAL;
 689
 690        if (!*name) {
 691                pr_err("overlayfs: empty lowerdir\n");
 692                goto out;
 693        }
 694        err = kern_path(name, LOOKUP_FOLLOW, path);
 695        if (err) {
 696                pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
 697                goto out;
 698        }
 699        err = -EINVAL;
 700        if (ovl_dentry_weird(path->dentry)) {
 701                pr_err("overlayfs: filesystem on '%s' not supported\n", name);
 702                goto out_put;
 703        }
 704        if (!d_is_dir(path->dentry)) {
 705                pr_err("overlayfs: '%s' not a directory\n", name);
 706                goto out_put;
 707        }
 708        return 0;
 709
 710out_put:
 711        path_put_init(path);
 712out:
 713        return err;
 714}
 715
 716static int ovl_mount_dir(const char *name, struct path *path)
 717{
 718        int err = -ENOMEM;
 719        char *tmp = kstrdup(name, GFP_KERNEL);
 720
 721        if (tmp) {
 722                ovl_unescape(tmp);
 723                err = ovl_mount_dir_noesc(tmp, path);
 724
 725                if (!err)
 726                        if (ovl_dentry_remote(path->dentry)) {
 727                                pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
 728                                       tmp);
 729                                path_put_init(path);
 730                                err = -EINVAL;
 731                        }
 732                kfree(tmp);
 733        }
 734        return err;
 735}
 736
 737static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
 738                             const char *name)
 739{
 740        struct kstatfs statfs;
 741        int err = vfs_statfs(path, &statfs);
 742
 743        if (err)
 744                pr_err("overlayfs: statfs failed on '%s'\n", name);
 745        else
 746                ofs->namelen = max(ofs->namelen, statfs.f_namelen);
 747
 748        return err;
 749}
 750
 751static int ovl_lower_dir(const char *name, struct path *path,
 752                         struct ovl_fs *ofs, int *stack_depth, bool *remote)
 753{
 754        int fh_type;
 755        int err;
 756        const int *lower_stack_depth;
 757
 758        err = ovl_mount_dir_noesc(name, path);
 759        if (err)
 760                goto out;
 761
 762        lower_stack_depth = get_s_stack_depth(path->mnt->mnt_sb);
 763        if (!lower_stack_depth) {
 764                pr_err("overlayfs: superblock missing extension wrapper (old kernel?)\n");
 765                err = -EOPNOTSUPP;
 766                goto out_put;
 767        }
 768
 769        err = ovl_check_namelen(path, ofs, name);
 770        if (err)
 771                goto out_put;
 772
 773        *stack_depth = max(*stack_depth, *lower_stack_depth);
 774
 775        if (ovl_dentry_remote(path->dentry))
 776                *remote = true;
 777
 778        /*
 779         * The inodes index feature and NFS export need to encode and decode
 780         * file handles, so they require that all layers support them.
 781         */
 782        fh_type = ovl_can_decode_fh(path->dentry->d_sb);
 783        if ((ofs->config.nfs_export ||
 784             (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
 785                ofs->config.index = false;
 786                ofs->config.nfs_export = false;
 787                pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
 788                        name);
 789        }
 790
 791        /* Check if lower fs has 32bit inode numbers */
 792        if (fh_type != FILEID_INO32_GEN)
 793                ofs->xino_bits = 0;
 794
 795        return 0;
 796
 797out_put:
 798        path_put_init(path);
 799out:
 800        return err;
 801}
 802
 803/* Workdir should not be subdir of upperdir and vice versa */
 804static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
 805{
 806        bool ok = false;
 807
 808        if (workdir != upperdir) {
 809                ok = (lock_rename(workdir, upperdir) == NULL);
 810                unlock_rename(workdir, upperdir);
 811        }
 812        return ok;
 813}
 814
 815static unsigned int ovl_split_lowerdirs(char *str)
 816{
 817        unsigned int ctr = 1;
 818        char *s, *d;
 819
 820        for (s = d = str;; s++, d++) {
 821                if (*s == '\\') {
 822                        s++;
 823                } else if (*s == ':') {
 824                        *d = '\0';
 825                        ctr++;
 826                        continue;
 827                }
 828                *d = *s;
 829                if (!*s)
 830                        break;
 831        }
 832        return ctr;
 833}
 834
 835static int __maybe_unused
 836ovl_posix_acl_xattr_get(struct dentry *dentry,
 837                        const char *name, void *buffer, size_t size,
 838                        int handler_flags)
 839{
 840        return ovl_xattr_get(dentry, dentry->d_inode, handler_flags == ACL_TYPE_DEFAULT ? XATTR_NAME_POSIX_ACL_DEFAULT : XATTR_NAME_POSIX_ACL_ACCESS,
 841                             buffer, size);
 842}
 843
 844static int __maybe_unused
 845ovl_posix_acl_xattr_set(struct dentry *dentry,
 846                        const char *name, const void *value,
 847                        size_t size, int flags, int handler_flags)
 848{
 849        struct dentry *workdir = ovl_workdir(dentry);
 850        struct inode *inode = dentry->d_inode;
 851        struct posix_acl *acl = NULL;
 852        int err;
 853
 854        /* Check that everything is OK before copy-up */
 855        if (value) {
 856                acl = posix_acl_from_xattr(&init_user_ns, value, size);
 857                if (IS_ERR(acl))
 858                        return PTR_ERR(acl);
 859        }
 860        err = -EOPNOTSUPP;
 861        if (!IS_POSIXACL(d_inode(workdir)))
 862                goto out_acl_release;
 863        if (handler_flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
 864                err = acl ? -EACCES : 0;
 865                goto out_acl_release;
 866        }
 867        err = -EPERM;
 868        if (!inode_owner_or_capable(inode))
 869                goto out_acl_release;
 870
 871        posix_acl_release(acl);
 872
 873        /*
 874         * Check if sgid bit needs to be cleared (actual setacl operation will
 875         * be done with mounter's capabilities and so that won't do it for us).
 876         */
 877        if (unlikely(inode->i_mode & S_ISGID) &&
 878            handler_flags == ACL_TYPE_ACCESS &&
 879            !in_group_p(inode->i_gid) &&
 880            !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
 881                struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
 882
 883                err = ovl_setattr(dentry, &iattr);
 884                if (err)
 885                        return err;
 886        }
 887
 888        err = ovl_xattr_set(dentry, dentry->d_inode, handler_flags == ACL_TYPE_DEFAULT ? XATTR_NAME_POSIX_ACL_DEFAULT : XATTR_NAME_POSIX_ACL_ACCESS,
 889                             value, size, flags);
 890        if (!err)
 891                ovl_copyattr(ovl_inode_real(inode), inode);
 892
 893        return err;
 894
 895out_acl_release:
 896        posix_acl_release(acl);
 897        return err;
 898}
 899
 900static int ovl_own_xattr_get(struct dentry *dentry,
 901                             const char *name, void *buffer, size_t size,
 902                             int handler_flags)
 903{
 904        return -EOPNOTSUPP;
 905}
 906
 907static int ovl_own_xattr_set(struct dentry *dentry,
 908                             const char *name, const void *value,
 909                             size_t size, int flags, int handler_flags)
 910{
 911        return -EOPNOTSUPP;
 912}
 913
 914static int ovl_other_xattr_get(struct dentry *dentry,
 915                               const char *name, void *buffer, size_t size,
 916                               int handler_flags)
 917{
 918        return ovl_xattr_get(dentry, dentry->d_inode, name, buffer, size);
 919}
 920
 921static int ovl_other_xattr_set(struct dentry *dentry,
 922                               const char *name, const void *value,
 923                               size_t size, int flags, int handler_flags)
 924{
 925        return ovl_xattr_set(dentry, dentry->d_inode, name, value, size, flags);
 926}
 927
 928static const struct xattr_handler __maybe_unused
 929ovl_posix_acl_access_xattr_handler = {
 930        .prefix = XATTR_NAME_POSIX_ACL_ACCESS,
 931        .flags = ACL_TYPE_ACCESS,
 932        .get = ovl_posix_acl_xattr_get,
 933        .set = ovl_posix_acl_xattr_set,
 934};
 935
 936static const struct xattr_handler __maybe_unused
 937ovl_posix_acl_default_xattr_handler = {
 938        .prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
 939        .flags = ACL_TYPE_DEFAULT,
 940        .get = ovl_posix_acl_xattr_get,
 941        .set = ovl_posix_acl_xattr_set,
 942};
 943
 944static const struct xattr_handler ovl_own_xattr_handler = {
 945        .prefix = OVL_XATTR_PREFIX,
 946        .get = ovl_own_xattr_get,
 947        .set = ovl_own_xattr_set,
 948};
 949
 950static const struct xattr_handler ovl_other_xattr_handler = {
 951        .prefix = "", /* catch all */
 952        .get = ovl_other_xattr_get,
 953        .set = ovl_other_xattr_set,
 954};
 955
 956static const struct xattr_handler *ovl_xattr_handlers[] = {
 957#ifdef CONFIG_FS_POSIX_ACL
 958        &ovl_posix_acl_access_xattr_handler,
 959        &ovl_posix_acl_default_xattr_handler,
 960#endif
 961        &ovl_own_xattr_handler,
 962        &ovl_other_xattr_handler,
 963        NULL
 964};
 965
 966static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath)
 967{
 968        struct vfsmount *upper_mnt;
 969        int err;
 970
 971        err = ovl_mount_dir(ofs->config.upperdir, upperpath);
 972        if (err)
 973                goto out;
 974
 975        /* Upper fs should not be r/o */
 976        if (upperpath->mnt->mnt_sb->s_flags & MS_RDONLY) {
 977                pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
 978                err = -EINVAL;
 979                goto out;
 980        }
 981
 982        err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
 983        if (err)
 984                goto out;
 985
 986        upper_mnt = clone_private_mount(upperpath);
 987        err = PTR_ERR(upper_mnt);
 988        if (IS_ERR(upper_mnt)) {
 989                pr_err("overlayfs: failed to clone upperpath\n");
 990                goto out;
 991        }
 992
 993        /* Don't inherit atime flags */
 994        upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
 995        ofs->upper_mnt = upper_mnt;
 996
 997        err = -EBUSY;
 998        if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
 999                ofs->upperdir_locked = true;
1000        } else if (ofs->config.index) {
1001                pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
1002                goto out;
1003        } else {
1004                pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
1005        }
1006
1007        err = 0;
1008out:
1009        return err;
1010}
1011
1012static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
1013{
1014        struct vfsmount *mnt = ofs->upper_mnt;
1015        struct dentry *temp;
1016        int fh_type;
1017        int err;
1018
1019        err = mnt_want_write(mnt);
1020        if (err)
1021                return err;
1022
1023        ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
1024        if (!ofs->workdir)
1025                goto out;
1026
1027        /*
1028         * Upper should support d_type, else whiteouts are visible.  Given
1029         * workdir and upper are on same fs, we can do iterate_dir() on
1030         * workdir. This check requires successful creation of workdir in
1031         * previous step.
1032         */
1033        err = ovl_check_d_type_supported(workpath);
1034        if (err < 0)
1035                goto out;
1036
1037        /*
1038         * We allowed this configuration and don't want to break users over
1039         * kernel upgrade. So warn instead of erroring out.
1040         */
1041        if (!err)
1042                pr_warn("overlayfs: upper fs needs to support d_type.\n");
1043
1044        /* Check if upper/work fs supports O_TMPFILE */
1045        temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
1046        ofs->tmpfile = !IS_ERR(temp);
1047        if (ofs->tmpfile)
1048                dput(temp);
1049        else
1050                pr_warn("overlayfs: upper fs does not support tmpfile.\n");
1051
1052        /*
1053         * Check if upper/work fs supports trusted.overlay.* xattr
1054         */
1055        err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
1056        if (err) {
1057                ofs->noxattr = true;
1058                ofs->config.index = false;
1059                pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n");
1060                err = 0;
1061        } else {
1062                vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
1063        }
1064
1065        /* Check if upper/work fs supports file handles */
1066        fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
1067        if (ofs->config.index && !fh_type) {
1068                ofs->config.index = false;
1069                pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
1070        }
1071
1072        /* Check if upper fs has 32bit inode numbers */
1073        if (fh_type != FILEID_INO32_GEN)
1074                ofs->xino_bits = 0;
1075
1076        /* NFS export of r/w mount depends on index */
1077        if (ofs->config.nfs_export && !ofs->config.index) {
1078                pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
1079                ofs->config.nfs_export = false;
1080        }
1081
1082out:
1083        mnt_drop_write(mnt);
1084        return err;
1085}
1086
1087static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath)
1088{
1089        int err;
1090        struct path workpath = { };
1091
1092        err = ovl_mount_dir(ofs->config.workdir, &workpath);
1093        if (err)
1094                goto out;
1095
1096        err = -EINVAL;
1097        if (upperpath->mnt != workpath.mnt) {
1098                pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
1099                goto out;
1100        }
1101        if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
1102                pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
1103                goto out;
1104        }
1105
1106        ofs->workbasedir = dget(workpath.dentry);
1107
1108        err = -EBUSY;
1109        if (ovl_inuse_trylock(ofs->workbasedir)) {
1110                ofs->workdir_locked = true;
1111        } else if (ofs->config.index) {
1112                pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
1113                goto out;
1114        } else {
1115                pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
1116        }
1117
1118        err = ovl_make_workdir(ofs, &workpath);
1119        if (err)
1120                goto out;
1121
1122        err = 0;
1123out:
1124        path_put(&workpath);
1125
1126        return err;
1127}
1128
1129static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
1130                            struct path *upperpath)
1131{
1132        struct vfsmount *mnt = ofs->upper_mnt;
1133        int err;
1134
1135        err = mnt_want_write(mnt);
1136        if (err)
1137                return err;
1138
1139        /* Verify lower root is upper root origin */
1140        err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
1141                                true);
1142        if (err) {
1143                pr_err("overlayfs: failed to verify upper root origin\n");
1144                goto out;
1145        }
1146
1147        ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
1148        if (ofs->indexdir) {
1149                /*
1150                 * Verify upper root is exclusively associated with index dir.
1151                 * Older kernels stored upper fh in "trusted.overlay.origin"
1152                 * xattr. If that xattr exists, verify that it is a match to
1153                 * upper dir file handle. In any case, verify or set xattr
1154                 * "trusted.overlay.upper" to indicate that index may have
1155                 * directory entries.
1156                 */
1157                if (ovl_check_origin_xattr(ofs->indexdir)) {
1158                        err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
1159                                                upperpath->dentry, true, false);
1160                        if (err)
1161                                pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
1162                }
1163                err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
1164                if (err)
1165                        pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
1166
1167                /* Cleanup bad/stale/orphan index entries */
1168                if (!err)
1169                        err = ovl_indexdir_cleanup(ofs);
1170        }
1171        if (err || !ofs->indexdir)
1172                pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
1173
1174out:
1175        mnt_drop_write(mnt);
1176        return err;
1177}
1178
1179/* Get a unique fsid for the layer */
1180static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
1181{
1182        unsigned int i;
1183        dev_t dev;
1184        int err;
1185
1186        /* fsid 0 is reserved for upper fs even with non upper overlay */
1187        if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
1188                return 0;
1189
1190        for (i = 0; i < ofs->numlowerfs; i++) {
1191                if (ofs->lower_fs[i].sb == sb)
1192                        return i + 1;
1193        }
1194
1195        err = get_anon_bdev(&dev);
1196        if (err) {
1197                pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
1198                return err;
1199        }
1200
1201        ofs->lower_fs[ofs->numlowerfs].sb = sb;
1202        ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
1203        ofs->numlowerfs++;
1204
1205        return ofs->numlowerfs;
1206}
1207
1208static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
1209                                unsigned int numlower)
1210{
1211        int err;
1212        unsigned int i;
1213
1214        err = -ENOMEM;
1215        ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
1216                                    GFP_KERNEL);
1217        if (ofs->lower_layers == NULL)
1218                goto out;
1219
1220        ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb),
1221                                GFP_KERNEL);
1222        if (ofs->lower_fs == NULL)
1223                goto out;
1224
1225        for (i = 0; i < numlower; i++) {
1226                struct vfsmount *mnt;
1227                int fsid;
1228
1229                err = fsid = ovl_get_fsid(ofs, stack[i].mnt->mnt_sb);
1230                if (err < 0)
1231                        goto out;
1232
1233                mnt = clone_private_mount(&stack[i]);
1234                err = PTR_ERR(mnt);
1235                if (IS_ERR(mnt)) {
1236                        pr_err("overlayfs: failed to clone lowerpath\n");
1237                        goto out;
1238                }
1239
1240                /*
1241                 * Make lower layers R/O.  That way fchmod/fchown on lower file
1242                 * will fail instead of modifying lower fs.
1243                 */
1244                mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
1245
1246                ofs->lower_layers[ofs->numlower].mnt = mnt;
1247                ofs->lower_layers[ofs->numlower].idx = i + 1;
1248                ofs->lower_layers[ofs->numlower].fsid = fsid;
1249                if (fsid) {
1250                        ofs->lower_layers[ofs->numlower].fs =
1251                                &ofs->lower_fs[fsid - 1];
1252                }
1253                ofs->numlower++;
1254        }
1255
1256        /*
1257         * When all layers on same fs, overlay can use real inode numbers.
1258         * With mount option "xino=on", mounter declares that there are enough
1259         * free high bits in underlying fs to hold the unique fsid.
1260         * If overlayfs does encounter underlying inodes using the high xino
1261         * bits reserved for fsid, it emits a warning and uses the original
1262         * inode number.
1263         */
1264        if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) {
1265                ofs->xino_bits = 0;
1266                ofs->config.xino = OVL_XINO_OFF;
1267        } else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) {
1268                /*
1269                 * This is a roundup of number of bits needed for numlowerfs+1
1270                 * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for
1271                 * upper fs even with non upper overlay.
1272                 */
1273                BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
1274                ofs->xino_bits = ilog2(ofs->numlowerfs) + 1;
1275        }
1276
1277        if (ofs->xino_bits) {
1278                pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n",
1279                        ofs->xino_bits);
1280        }
1281
1282        err = 0;
1283out:
1284        return err;
1285}
1286
1287static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
1288                                            struct ovl_fs *ofs,
1289                                            int *overlay_stack_depth)
1290{
1291        int err;
1292        char *lowertmp, *lower;
1293        struct path *stack = NULL;
1294        unsigned int stacklen, numlower = 0, i;
1295        bool remote = false;
1296        struct ovl_entry *oe;
1297
1298        err = -ENOMEM;
1299        lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
1300        if (!lowertmp)
1301                goto out_err;
1302
1303        err = -EINVAL;
1304        stacklen = ovl_split_lowerdirs(lowertmp);
1305        if (stacklen > OVL_MAX_STACK) {
1306                pr_err("overlayfs: too many lower directories, limit is %d\n",
1307                       OVL_MAX_STACK);
1308                goto out_err;
1309        } else if (!ofs->config.upperdir && stacklen == 1) {
1310                pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
1311                goto out_err;
1312        } else if (!ofs->config.upperdir && ofs->config.nfs_export &&
1313                   ofs->config.redirect_follow) {
1314                pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
1315                ofs->config.nfs_export = false;
1316        }
1317
1318        err = -ENOMEM;
1319        stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
1320        if (!stack)
1321                goto out_err;
1322
1323        err = -EINVAL;
1324        lower = lowertmp;
1325        for (numlower = 0; numlower < stacklen; numlower++) {
1326                err = ovl_lower_dir(lower, &stack[numlower], ofs,
1327                                    overlay_stack_depth, &remote);
1328                if (err)
1329                        goto out_err;
1330
1331                lower = strchr(lower, '\0') + 1;
1332        }
1333
1334        err = -EINVAL;
1335        (*overlay_stack_depth)++;
1336        if (*overlay_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
1337                pr_err("overlayfs: maximum fs stacking depth exceeded\n");
1338                goto out_err;
1339        }
1340
1341        err = ovl_get_lower_layers(ofs, stack, numlower);
1342        if (err)
1343                goto out_err;
1344
1345        err = -ENOMEM;
1346        oe = ovl_alloc_entry(numlower);
1347        if (!oe)
1348                goto out_err;
1349
1350        for (i = 0; i < numlower; i++) {
1351                oe->lowerstack[i].dentry = dget(stack[i].dentry);
1352                oe->lowerstack[i].layer = &ofs->lower_layers[i];
1353        }
1354
1355        if (remote)
1356                sb->s_d_op = &ovl_reval_dentry_operations.ops;
1357        else
1358                sb->s_d_op = &ovl_dentry_operations.ops;
1359
1360out:
1361        for (i = 0; i < numlower; i++)
1362                path_put(&stack[i]);
1363        kfree(stack);
1364        kfree(lowertmp);
1365
1366        return oe;
1367
1368out_err:
1369        oe = ERR_PTR(err);
1370        goto out;
1371}
1372
1373static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1374{
1375        struct path upperpath = { };
1376        struct dentry *root_dentry;
1377        struct ovl_entry *oe;
1378        struct ovl_fs *ofs;
1379        const int *upper_stack_depth;
1380        int *overlay_stack_depth;
1381        struct cred *cred;
1382        int err;
1383
1384        err = -ENOMEM;
1385        ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
1386        if (!ofs)
1387                goto out;
1388
1389        ofs->creator_cred = cred = prepare_creds();
1390        if (!cred)
1391                goto out_err;
1392
1393        ofs->config.index = ovl_index_def;
1394        ofs->config.nfs_export = ovl_nfs_export_def;
1395        ofs->config.xino = ovl_xino_def();
1396        err = ovl_parse_opt((char *) data, &ofs->config);
1397        if (err)
1398                goto out_err;
1399
1400        err = -EINVAL;
1401        if (!ofs->config.lowerdir) {
1402                if (!silent)
1403                        pr_err("overlayfs: missing 'lowerdir'\n");
1404                goto out_err;
1405        }
1406
1407        overlay_stack_depth = get_s_stack_depth(sb);
1408        err = -EOPNOTSUPP;
1409        if (!overlay_stack_depth) {
1410                pr_err("overlayfs: superblock missing extension wrapper (old kernel?)\n");
1411                goto out_err;
1412        }
1413        *overlay_stack_depth = 0;
1414
1415        sb->s_maxbytes = MAX_LFS_FILESIZE;
1416        /* Assume underlaying fs uses 32bit inodes unless proven otherwise */
1417        if (ofs->config.xino != OVL_XINO_OFF)
1418                ofs->xino_bits = BITS_PER_LONG - 32;
1419
1420        if (ofs->config.upperdir) {
1421                if (!ofs->config.workdir) {
1422                        pr_err("overlayfs: missing 'workdir'\n");
1423                        goto out_err;
1424                }
1425
1426                err = ovl_get_upper(ofs, &upperpath);
1427                if (err)
1428                        goto out_err;
1429
1430                err = ovl_get_workdir(ofs, &upperpath);
1431                if (err)
1432                        goto out_err;
1433
1434                if (!ofs->workdir)
1435                        sb->s_flags |= MS_RDONLY;
1436
1437                upper_stack_depth = get_s_stack_depth(ofs->upper_mnt->mnt_sb);
1438                err = -EOPNOTSUPP;
1439                if (!upper_stack_depth) {
1440                        pr_err("overlayfs: superblock missing extension wrapper (old kernel?)\n");
1441                        goto out_err;
1442                }
1443                *overlay_stack_depth = *upper_stack_depth;
1444                sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
1445
1446        }
1447        oe = ovl_get_lowerstack(sb, ofs, overlay_stack_depth);
1448        err = PTR_ERR(oe);
1449        if (IS_ERR(oe))
1450                goto out_err;
1451
1452        /* If the upper fs is nonexistent, we mark overlayfs r/o too */
1453        if (!ofs->upper_mnt)
1454                sb->s_flags |= MS_RDONLY;
1455
1456        if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
1457                err = ovl_get_indexdir(ofs, oe, &upperpath);
1458                if (err)
1459                        goto out_free_oe;
1460
1461                /* Force r/o mount with no index dir */
1462                if (!ofs->indexdir) {
1463                        dput(ofs->workdir);
1464                        ofs->workdir = NULL;
1465                        sb->s_flags |= MS_RDONLY;
1466                }
1467
1468        }
1469
1470        /* Show index=off in /proc/mounts for forced r/o mount */
1471        if (!ofs->indexdir) {
1472                ofs->config.index = false;
1473                if (ofs->upper_mnt && ofs->config.nfs_export) {
1474                        pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
1475                        ofs->config.nfs_export = false;
1476                }
1477        }
1478
1479        if (ofs->config.nfs_export)
1480                sb->s_export_op = &ovl_export_operations;
1481
1482        /* Never override disk quota limits or use reserved space */
1483        cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
1484
1485        sb->s_magic = OVERLAYFS_SUPER_MAGIC;
1486        sb->s_op = &ovl_super_operations;
1487        sb->s_xattr = ovl_xattr_handlers;
1488        sb->s_fs_info = ofs;
1489        sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
1490
1491        err = -ENOMEM;
1492        root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
1493        if (!root_dentry)
1494                goto out_free_oe;
1495
1496        root_dentry->d_fsdata = oe;
1497
1498        mntput(upperpath.mnt);
1499        if (upperpath.dentry) {
1500                ovl_dentry_set_upper_alias(root_dentry);
1501                if (ovl_is_impuredir(upperpath.dentry))
1502                        ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
1503        }
1504
1505        /* Root is always merge -> can have whiteouts */
1506        ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
1507        ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
1508        ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
1509                       ovl_dentry_lower(root_dentry));
1510
1511        sb->s_root = root_dentry;
1512
1513        return 0;
1514
1515out_free_oe:
1516        ovl_entry_stack_free(oe);
1517        kfree(oe);
1518out_err:
1519        path_put(&upperpath);
1520        ovl_free_fs(ofs);
1521out:
1522        return err;
1523}
1524
1525static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
1526                                const char *dev_name, void *raw_data)
1527{
1528        static bool seen = false;
1529
1530        if (!seen) {
1531                mark_tech_preview("Overlay filesystem", THIS_MODULE);
1532                seen = true;
1533        }
1534
1535        return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
1536}
1537
1538static struct file_system_type ovl_fs_type = {
1539        .owner          = THIS_MODULE,
1540        .name           = "overlay",
1541        .mount          = ovl_mount,
1542        .kill_sb        = kill_anon_super,
1543        .fs_flags       = FS_HAS_DOPS_WRAPPER,
1544};
1545MODULE_ALIAS_FS("overlay");
1546
1547static void ovl_inode_init_once(void *foo)
1548{
1549        struct ovl_inode *oi = foo;
1550
1551        inode_init_once(&oi->vfs_inode);
1552}
1553
1554static int __init ovl_init(void)
1555{
1556        int err;
1557
1558        ovl_inode_cachep = kmem_cache_create("ovl_inode",
1559                                             sizeof(struct ovl_inode), 0,
1560                                             (SLAB_RECLAIM_ACCOUNT|
1561                                              SLAB_MEM_SPREAD),
1562                                             ovl_inode_init_once);
1563        if (ovl_inode_cachep == NULL)
1564                return -ENOMEM;
1565
1566        err = register_filesystem(&ovl_fs_type);
1567        if (err)
1568                kmem_cache_destroy(ovl_inode_cachep);
1569
1570        return err;
1571}
1572
1573static void __exit ovl_exit(void)
1574{
1575        unregister_filesystem(&ovl_fs_type);
1576
1577        /*
1578         * Make sure all delayed rcu free inodes are flushed before we
1579         * destroy cache.
1580         */
1581        rcu_barrier();
1582        kmem_cache_destroy(ovl_inode_cachep);
1583
1584}
1585
1586module_init(ovl_init);
1587module_exit(ovl_exit);
1588