linux/fs/overlayfs/inode.c
<<
>>
Prefs
   1/*
   2 *
   3 * Copyright (C) 2011 Novell Inc.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms of the GNU General Public License version 2 as published by
   7 * the Free Software Foundation.
   8 */
   9
  10#include <linux/fs.h>
  11#include <linux/slab.h>
  12#include <linux/cred.h>
  13#include <linux/xattr.h>
  14#include <linux/posix_acl.h>
  15#include "overlayfs.h"
  16
  17int ovl_setattr(struct dentry *dentry, struct iattr *attr)
  18{
  19        int err;
  20        struct dentry *upperdentry;
  21        const struct cred *old_cred;
  22
  23        /*
  24         * Check for permissions before trying to copy-up.  This is redundant
  25         * since it will be rechecked later by ->setattr() on upper dentry.  But
  26         * without this, copy-up can be triggered by just about anybody.
  27         *
  28         * We don't initialize inode->size, which just means that
  29         * inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not
  30         * check for a swapfile (which this won't be anyway).
  31         */
  32        err = setattr_prepare(dentry, attr);
  33        if (err)
  34                return err;
  35
  36        err = ovl_want_write(dentry);
  37        if (err)
  38                goto out;
  39
  40        err = ovl_copy_up(dentry);
  41        if (!err) {
  42                upperdentry = ovl_dentry_upper(dentry);
  43
  44                if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
  45                        attr->ia_valid &= ~ATTR_MODE;
  46
  47                inode_lock(upperdentry->d_inode);
  48                old_cred = ovl_override_creds(dentry->d_sb);
  49                err = notify_change(upperdentry, attr, NULL);
  50                revert_creds(old_cred);
  51                if (!err)
  52                        ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
  53                inode_unlock(upperdentry->d_inode);
  54        }
  55        ovl_drop_write(dentry);
  56out:
  57        return err;
  58}
  59
  60int ovl_getattr(const struct path *path, struct kstat *stat,
  61                u32 request_mask, unsigned int flags)
  62{
  63        struct dentry *dentry = path->dentry;
  64        enum ovl_path_type type;
  65        struct path realpath;
  66        const struct cred *old_cred;
  67        bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
  68        int err;
  69
  70        type = ovl_path_real(dentry, &realpath);
  71        old_cred = ovl_override_creds(dentry->d_sb);
  72        err = vfs_getattr(&realpath, stat, request_mask, flags);
  73        if (err)
  74                goto out;
  75
  76        /*
  77         * When all layers are on the same fs, all real inode number are
  78         * unique, so we use the overlay st_dev, which is friendly to du -x.
  79         *
  80         * We also use st_ino of the copy up origin, if we know it.
  81         * This guaranties constant st_dev/st_ino across copy up.
  82         *
  83         * If filesystem supports NFS export ops, this also guaranties
  84         * persistent st_ino across mount cycle.
  85         */
  86        if (ovl_same_sb(dentry->d_sb)) {
  87                if (OVL_TYPE_ORIGIN(type)) {
  88                        struct kstat lowerstat;
  89                        u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
  90
  91                        ovl_path_lower(dentry, &realpath);
  92                        err = vfs_getattr(&realpath, &lowerstat,
  93                                          lowermask, flags);
  94                        if (err)
  95                                goto out;
  96
  97                        WARN_ON_ONCE(stat->dev != lowerstat.dev);
  98                        /*
  99                         * Lower hardlinks are broken on copy up to different
 100                         * upper files, so we cannot use the lower origin st_ino
 101                         * for those different files, even for the same fs case.
 102                         */
 103                        if (is_dir || lowerstat.nlink == 1)
 104                                stat->ino = lowerstat.ino;
 105                }
 106                stat->dev = dentry->d_sb->s_dev;
 107        } else if (is_dir) {
 108                /*
 109                 * If not all layers are on the same fs the pair {real st_ino;
 110                 * overlay st_dev} is not unique, so use the non persistent
 111                 * overlay st_ino.
 112                 *
 113                 * Always use the overlay st_dev for directories, so 'find
 114                 * -xdev' will scan the entire overlay mount and won't cross the
 115                 * overlay mount boundaries.
 116                 */
 117                stat->dev = dentry->d_sb->s_dev;
 118                stat->ino = dentry->d_inode->i_ino;
 119        }
 120
 121        /*
 122         * It's probably not worth it to count subdirs to get the
 123         * correct link count.  nlink=1 seems to pacify 'find' and
 124         * other utilities.
 125         */
 126        if (is_dir && OVL_TYPE_MERGE(type))
 127                stat->nlink = 1;
 128
 129out:
 130        revert_creds(old_cred);
 131
 132        return err;
 133}
 134
 135int ovl_permission(struct inode *inode, int mask)
 136{
 137        bool is_upper;
 138        struct inode *realinode = ovl_inode_real(inode, &is_upper);
 139        const struct cred *old_cred;
 140        int err;
 141
 142        /* Careful in RCU walk mode */
 143        if (!realinode) {
 144                WARN_ON(!(mask & MAY_NOT_BLOCK));
 145                return -ECHILD;
 146        }
 147
 148        /*
 149         * Check overlay inode with the creds of task and underlying inode
 150         * with creds of mounter
 151         */
 152        err = generic_permission(inode, mask);
 153        if (err)
 154                return err;
 155
 156        old_cred = ovl_override_creds(inode->i_sb);
 157        if (!is_upper && !special_file(realinode->i_mode) && mask & MAY_WRITE) {
 158                mask &= ~(MAY_WRITE | MAY_APPEND);
 159                /* Make sure mounter can read file for copy up later */
 160                mask |= MAY_READ;
 161        }
 162        err = inode_permission(realinode, mask);
 163        revert_creds(old_cred);
 164
 165        return err;
 166}
 167
 168static const char *ovl_get_link(struct dentry *dentry,
 169                                struct inode *inode,
 170                                struct delayed_call *done)
 171{
 172        const struct cred *old_cred;
 173        const char *p;
 174
 175        if (!dentry)
 176                return ERR_PTR(-ECHILD);
 177
 178        old_cred = ovl_override_creds(dentry->d_sb);
 179        p = vfs_get_link(ovl_dentry_real(dentry), done);
 180        revert_creds(old_cred);
 181        return p;
 182}
 183
 184bool ovl_is_private_xattr(const char *name)
 185{
 186        return strncmp(name, OVL_XATTR_PREFIX,
 187                       sizeof(OVL_XATTR_PREFIX) - 1) == 0;
 188}
 189
 190int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
 191                  size_t size, int flags)
 192{
 193        int err;
 194        struct path realpath;
 195        enum ovl_path_type type = ovl_path_real(dentry, &realpath);
 196        const struct cred *old_cred;
 197
 198        err = ovl_want_write(dentry);
 199        if (err)
 200                goto out;
 201
 202        if (!value && !OVL_TYPE_UPPER(type)) {
 203                err = vfs_getxattr(realpath.dentry, name, NULL, 0);
 204                if (err < 0)
 205                        goto out_drop_write;
 206        }
 207
 208        err = ovl_copy_up(dentry);
 209        if (err)
 210                goto out_drop_write;
 211
 212        if (!OVL_TYPE_UPPER(type))
 213                ovl_path_upper(dentry, &realpath);
 214
 215        old_cred = ovl_override_creds(dentry->d_sb);
 216        if (value)
 217                err = vfs_setxattr(realpath.dentry, name, value, size, flags);
 218        else {
 219                WARN_ON(flags != XATTR_REPLACE);
 220                err = vfs_removexattr(realpath.dentry, name);
 221        }
 222        revert_creds(old_cred);
 223
 224out_drop_write:
 225        ovl_drop_write(dentry);
 226out:
 227        return err;
 228}
 229
 230int ovl_xattr_get(struct dentry *dentry, const char *name,
 231                  void *value, size_t size)
 232{
 233        struct dentry *realdentry = ovl_dentry_real(dentry);
 234        ssize_t res;
 235        const struct cred *old_cred;
 236
 237        old_cred = ovl_override_creds(dentry->d_sb);
 238        res = vfs_getxattr(realdentry, name, value, size);
 239        revert_creds(old_cred);
 240        return res;
 241}
 242
 243static bool ovl_can_list(const char *s)
 244{
 245        /* List all non-trusted xatts */
 246        if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
 247                return true;
 248
 249        /* Never list trusted.overlay, list other trusted for superuser only */
 250        return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN);
 251}
 252
 253ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
 254{
 255        struct dentry *realdentry = ovl_dentry_real(dentry);
 256        ssize_t res;
 257        size_t len;
 258        char *s;
 259        const struct cred *old_cred;
 260
 261        old_cred = ovl_override_creds(dentry->d_sb);
 262        res = vfs_listxattr(realdentry, list, size);
 263        revert_creds(old_cred);
 264        if (res <= 0 || size == 0)
 265                return res;
 266
 267        /* filter out private xattrs */
 268        for (s = list, len = res; len;) {
 269                size_t slen = strnlen(s, len) + 1;
 270
 271                /* underlying fs providing us with an broken xattr list? */
 272                if (WARN_ON(slen > len))
 273                        return -EIO;
 274
 275                len -= slen;
 276                if (!ovl_can_list(s)) {
 277                        res -= slen;
 278                        memmove(s, s + slen, len);
 279                } else {
 280                        s += slen;
 281                }
 282        }
 283
 284        return res;
 285}
 286
 287struct posix_acl *ovl_get_acl(struct inode *inode, int type)
 288{
 289        struct inode *realinode = ovl_inode_real(inode, NULL);
 290        const struct cred *old_cred;
 291        struct posix_acl *acl;
 292
 293        if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
 294                return NULL;
 295
 296        old_cred = ovl_override_creds(inode->i_sb);
 297        acl = get_acl(realinode, type);
 298        revert_creds(old_cred);
 299
 300        return acl;
 301}
 302
 303static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
 304                                  struct dentry *realdentry)
 305{
 306        if (OVL_TYPE_UPPER(type))
 307                return false;
 308
 309        if (special_file(realdentry->d_inode->i_mode))
 310                return false;
 311
 312        if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
 313                return false;
 314
 315        return true;
 316}
 317
 318int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
 319{
 320        int err = 0;
 321        struct path realpath;
 322        enum ovl_path_type type;
 323
 324        type = ovl_path_real(dentry, &realpath);
 325        if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
 326                err = ovl_want_write(dentry);
 327                if (!err) {
 328                        err = ovl_copy_up_flags(dentry, file_flags);
 329                        ovl_drop_write(dentry);
 330                }
 331        }
 332
 333        return err;
 334}
 335
 336int ovl_update_time(struct inode *inode, struct timespec *ts, int flags)
 337{
 338        struct dentry *alias;
 339        struct path upperpath;
 340
 341        if (!(flags & S_ATIME))
 342                return 0;
 343
 344        alias = d_find_any_alias(inode);
 345        if (!alias)
 346                return 0;
 347
 348        ovl_path_upper(alias, &upperpath);
 349        if (upperpath.dentry) {
 350                touch_atime(&upperpath);
 351                inode->i_atime = d_inode(upperpath.dentry)->i_atime;
 352        }
 353
 354        dput(alias);
 355
 356        return 0;
 357}
 358
 359static const struct inode_operations ovl_file_inode_operations = {
 360        .setattr        = ovl_setattr,
 361        .permission     = ovl_permission,
 362        .getattr        = ovl_getattr,
 363        .listxattr      = ovl_listxattr,
 364        .get_acl        = ovl_get_acl,
 365        .update_time    = ovl_update_time,
 366};
 367
 368static const struct inode_operations ovl_symlink_inode_operations = {
 369        .setattr        = ovl_setattr,
 370        .get_link       = ovl_get_link,
 371        .getattr        = ovl_getattr,
 372        .listxattr      = ovl_listxattr,
 373        .update_time    = ovl_update_time,
 374};
 375
 376/*
 377 * It is possible to stack overlayfs instance on top of another
 378 * overlayfs instance as lower layer. We need to annonate the
 379 * stackable i_mutex locks according to stack level of the super
 380 * block instance. An overlayfs instance can never be in stack
 381 * depth 0 (there is always a real fs below it).  An overlayfs
 382 * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth].
 383 *
 384 * For example, here is a snip from /proc/lockdep_chains after
 385 * dir_iterate of nested overlayfs:
 386 *
 387 * [...] &ovl_i_mutex_dir_key[depth]   (stack_depth=2)
 388 * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1)
 389 * [...] &type->i_mutex_dir_key        (stack_depth=0)
 390 */
 391#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH
 392
 393static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
 394{
 395#ifdef CONFIG_LOCKDEP
 396        static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING];
 397        static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING];
 398
 399        int depth = inode->i_sb->s_stack_depth - 1;
 400
 401        if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING))
 402                depth = 0;
 403
 404        if (S_ISDIR(inode->i_mode))
 405                lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]);
 406        else
 407                lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]);
 408#endif
 409}
 410
 411static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
 412{
 413        inode->i_ino = get_next_ino();
 414        inode->i_mode = mode;
 415        inode->i_flags |= S_NOCMTIME;
 416#ifdef CONFIG_FS_POSIX_ACL
 417        inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
 418#endif
 419
 420        ovl_lockdep_annotate_inode_mutex_key(inode);
 421
 422        switch (mode & S_IFMT) {
 423        case S_IFREG:
 424                inode->i_op = &ovl_file_inode_operations;
 425                break;
 426
 427        case S_IFDIR:
 428                inode->i_op = &ovl_dir_inode_operations;
 429                inode->i_fop = &ovl_dir_operations;
 430                break;
 431
 432        case S_IFLNK:
 433                inode->i_op = &ovl_symlink_inode_operations;
 434                break;
 435
 436        default:
 437                inode->i_op = &ovl_file_inode_operations;
 438                init_special_inode(inode, mode, rdev);
 439                break;
 440        }
 441}
 442
 443struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
 444{
 445        struct inode *inode;
 446
 447        inode = new_inode(sb);
 448        if (inode)
 449                ovl_fill_inode(inode, mode, rdev);
 450
 451        return inode;
 452}
 453
 454static int ovl_inode_test(struct inode *inode, void *data)
 455{
 456        return ovl_inode_real(inode, NULL) == data;
 457}
 458
 459static int ovl_inode_set(struct inode *inode, void *data)
 460{
 461        inode->i_private = (void *) (((unsigned long) data) | OVL_ISUPPER_MASK);
 462        return 0;
 463}
 464
 465struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode)
 466
 467{
 468        struct inode *inode;
 469
 470        inode = iget5_locked(sb, (unsigned long) realinode,
 471                             ovl_inode_test, ovl_inode_set, realinode);
 472        if (inode && inode->i_state & I_NEW) {
 473                ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
 474                set_nlink(inode, realinode->i_nlink);
 475                unlock_new_inode(inode);
 476        }
 477
 478        return inode;
 479}
 480