linux/fs/xfs/xfs_iops.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_shared.h"
  21#include "xfs_format.h"
  22#include "xfs_log_format.h"
  23#include "xfs_trans_resv.h"
  24#include "xfs_mount.h"
  25#include "xfs_da_format.h"
  26#include "xfs_inode.h"
  27#include "xfs_bmap.h"
  28#include "xfs_bmap_util.h"
  29#include "xfs_acl.h"
  30#include "xfs_quota.h"
  31#include "xfs_error.h"
  32#include "xfs_attr.h"
  33#include "xfs_trans.h"
  34#include "xfs_trace.h"
  35#include "xfs_icache.h"
  36#include "xfs_symlink.h"
  37#include "xfs_da_btree.h"
  38#include "xfs_dir2.h"
  39#include "xfs_trans_space.h"
  40#include "xfs_pnfs.h"
  41
  42#include <linux/capability.h>
  43#include <linux/xattr.h>
  44#include <linux/namei.h>
  45#include <linux/posix_acl.h>
  46#include <linux/security.h>
  47#include <linux/fiemap.h>
  48#include <linux/slab.h>
  49
  50/*
  51 * Directories have different lock order w.r.t. mmap_sem compared to regular
  52 * files. This is due to readdir potentially triggering page faults on a user
  53 * buffer inside filldir(), and this happens with the ilock on the directory
  54 * held. For regular files, the lock order is the other way around - the
  55 * mmap_sem is taken during the page fault, and then we lock the ilock to do
  56 * block mapping. Hence we need a different class for the directory ilock so
  57 * that lockdep can tell them apart.
  58 */
  59static struct lock_class_key xfs_nondir_ilock_class;
  60static struct lock_class_key xfs_dir_ilock_class;
  61
  62static int
  63xfs_initxattrs(
  64        struct inode            *inode,
  65        const struct xattr      *xattr_array,
  66        void                    *fs_info)
  67{
  68        const struct xattr      *xattr;
  69        struct xfs_inode        *ip = XFS_I(inode);
  70        int                     error = 0;
  71
  72        for (xattr = xattr_array; xattr->name != NULL; xattr++) {
  73                error = xfs_attr_set(ip, xattr->name, xattr->value,
  74                                      xattr->value_len, ATTR_SECURE);
  75                if (error < 0)
  76                        break;
  77        }
  78        return error;
  79}
  80
  81/*
  82 * Hook in SELinux.  This is not quite correct yet, what we really need
  83 * here (as we do for default ACLs) is a mechanism by which creation of
  84 * these attrs can be journalled at inode creation time (along with the
  85 * inode, of course, such that log replay can't cause these to be lost).
  86 */
  87
  88STATIC int
  89xfs_init_security(
  90        struct inode    *inode,
  91        struct inode    *dir,
  92        const struct qstr *qstr)
  93{
  94        return security_inode_init_security(inode, dir, qstr,
  95                                             &xfs_initxattrs, NULL);
  96}
  97
  98static void
  99xfs_dentry_to_name(
 100        struct xfs_name *namep,
 101        struct dentry   *dentry,
 102        int             mode)
 103{
 104        namep->name = dentry->d_name.name;
 105        namep->len = dentry->d_name.len;
 106        namep->type = xfs_mode_to_ftype[(mode & S_IFMT) >> S_SHIFT];
 107}
 108
 109STATIC void
 110xfs_cleanup_inode(
 111        struct inode    *dir,
 112        struct inode    *inode,
 113        struct dentry   *dentry)
 114{
 115        struct xfs_name teardown;
 116
 117        /* Oh, the horror.
 118         * If we can't add the ACL or we fail in
 119         * xfs_init_security we must back out.
 120         * ENOSPC can hit here, among other things.
 121         */
 122        xfs_dentry_to_name(&teardown, dentry, 0);
 123
 124        xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
 125}
 126
 127STATIC int
 128xfs_generic_create(
 129        struct inode    *dir,
 130        struct dentry   *dentry,
 131        umode_t         mode,
 132        dev_t           rdev,
 133        bool            tmpfile)        /* unnamed file */
 134{
 135        struct inode    *inode;
 136        struct xfs_inode *ip = NULL;
 137        struct posix_acl *default_acl, *acl;
 138        struct xfs_name name;
 139        int             error;
 140
 141        /*
 142         * Irix uses Missed'em'V split, but doesn't want to see
 143         * the upper 5 bits of (14bit) major.
 144         */
 145        if (S_ISCHR(mode) || S_ISBLK(mode)) {
 146                if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
 147                        return -EINVAL;
 148                rdev = sysv_encode_dev(rdev);
 149        } else {
 150                rdev = 0;
 151        }
 152
 153        error = posix_acl_create(dir, &mode, &default_acl, &acl);
 154        if (error)
 155                return error;
 156
 157        if (!tmpfile) {
 158                xfs_dentry_to_name(&name, dentry, mode);
 159                error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
 160        } else {
 161                error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
 162        }
 163        if (unlikely(error))
 164                goto out_free_acl;
 165
 166        inode = VFS_I(ip);
 167
 168        error = xfs_init_security(inode, dir, &dentry->d_name);
 169        if (unlikely(error))
 170                goto out_cleanup_inode;
 171
 172#ifdef CONFIG_XFS_POSIX_ACL
 173        if (default_acl) {
 174                error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
 175                if (error)
 176                        goto out_cleanup_inode;
 177        }
 178        if (acl) {
 179                error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
 180                if (error)
 181                        goto out_cleanup_inode;
 182        }
 183#endif
 184
 185        if (tmpfile)
 186                d_tmpfile(dentry, inode);
 187        else
 188                d_instantiate(dentry, inode);
 189
 190 out_free_acl:
 191        if (default_acl)
 192                posix_acl_release(default_acl);
 193        if (acl)
 194                posix_acl_release(acl);
 195        return error;
 196
 197 out_cleanup_inode:
 198        if (!tmpfile)
 199                xfs_cleanup_inode(dir, inode, dentry);
 200        iput(inode);
 201        goto out_free_acl;
 202}
 203
 204STATIC int
 205xfs_vn_mknod(
 206        struct inode    *dir,
 207        struct dentry   *dentry,
 208        umode_t         mode,
 209        dev_t           rdev)
 210{
 211        return xfs_generic_create(dir, dentry, mode, rdev, false);
 212}
 213
 214STATIC int
 215xfs_vn_create(
 216        struct inode    *dir,
 217        struct dentry   *dentry,
 218        umode_t         mode,
 219        bool            flags)
 220{
 221        return xfs_vn_mknod(dir, dentry, mode, 0);
 222}
 223
 224STATIC int
 225xfs_vn_mkdir(
 226        struct inode    *dir,
 227        struct dentry   *dentry,
 228        umode_t         mode)
 229{
 230        return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
 231}
 232
 233STATIC struct dentry *
 234xfs_vn_lookup(
 235        struct inode    *dir,
 236        struct dentry   *dentry,
 237        unsigned int flags)
 238{
 239        struct xfs_inode *cip;
 240        struct xfs_name name;
 241        int             error;
 242
 243        if (dentry->d_name.len >= MAXNAMELEN)
 244                return ERR_PTR(-ENAMETOOLONG);
 245
 246        xfs_dentry_to_name(&name, dentry, 0);
 247        error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
 248        if (unlikely(error)) {
 249                if (unlikely(error != -ENOENT))
 250                        return ERR_PTR(error);
 251                d_add(dentry, NULL);
 252                return NULL;
 253        }
 254
 255        return d_splice_alias(VFS_I(cip), dentry);
 256}
 257
 258STATIC struct dentry *
 259xfs_vn_ci_lookup(
 260        struct inode    *dir,
 261        struct dentry   *dentry,
 262        unsigned int flags)
 263{
 264        struct xfs_inode *ip;
 265        struct xfs_name xname;
 266        struct xfs_name ci_name;
 267        struct qstr     dname;
 268        int             error;
 269
 270        if (dentry->d_name.len >= MAXNAMELEN)
 271                return ERR_PTR(-ENAMETOOLONG);
 272
 273        xfs_dentry_to_name(&xname, dentry, 0);
 274        error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
 275        if (unlikely(error)) {
 276                if (unlikely(error != -ENOENT))
 277                        return ERR_PTR(error);
 278                /*
 279                 * call d_add(dentry, NULL) here when d_drop_negative_children
 280                 * is called in xfs_vn_mknod (ie. allow negative dentries
 281                 * with CI filesystems).
 282                 */
 283                return NULL;
 284        }
 285
 286        /* if exact match, just splice and exit */
 287        if (!ci_name.name)
 288                return d_splice_alias(VFS_I(ip), dentry);
 289
 290        /* else case-insensitive match... */
 291        dname.name = ci_name.name;
 292        dname.len = ci_name.len;
 293        dentry = d_add_ci(dentry, VFS_I(ip), &dname);
 294        kmem_free(ci_name.name);
 295        return dentry;
 296}
 297
 298STATIC int
 299xfs_vn_link(
 300        struct dentry   *old_dentry,
 301        struct inode    *dir,
 302        struct dentry   *dentry)
 303{
 304        struct inode    *inode = old_dentry->d_inode;
 305        struct xfs_name name;
 306        int             error;
 307
 308        xfs_dentry_to_name(&name, dentry, inode->i_mode);
 309
 310        error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
 311        if (unlikely(error))
 312                return error;
 313
 314        ihold(inode);
 315        d_instantiate(dentry, inode);
 316        return 0;
 317}
 318
 319STATIC int
 320xfs_vn_unlink(
 321        struct inode    *dir,
 322        struct dentry   *dentry)
 323{
 324        struct xfs_name name;
 325        int             error;
 326
 327        xfs_dentry_to_name(&name, dentry, 0);
 328
 329        error = xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
 330        if (error)
 331                return error;
 332
 333        /*
 334         * With unlink, the VFS makes the dentry "negative": no inode,
 335         * but still hashed. This is incompatible with case-insensitive
 336         * mode, so invalidate (unhash) the dentry in CI-mode.
 337         */
 338        if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
 339                d_invalidate(dentry);
 340        return 0;
 341}
 342
 343STATIC int
 344xfs_vn_symlink(
 345        struct inode    *dir,
 346        struct dentry   *dentry,
 347        const char      *symname)
 348{
 349        struct inode    *inode;
 350        struct xfs_inode *cip = NULL;
 351        struct xfs_name name;
 352        int             error;
 353        umode_t         mode;
 354
 355        mode = S_IFLNK |
 356                (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
 357        xfs_dentry_to_name(&name, dentry, mode);
 358
 359        error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
 360        if (unlikely(error))
 361                goto out;
 362
 363        inode = VFS_I(cip);
 364
 365        error = xfs_init_security(inode, dir, &dentry->d_name);
 366        if (unlikely(error))
 367                goto out_cleanup_inode;
 368
 369        d_instantiate(dentry, inode);
 370        return 0;
 371
 372 out_cleanup_inode:
 373        xfs_cleanup_inode(dir, inode, dentry);
 374        iput(inode);
 375 out:
 376        return error;
 377}
 378
 379STATIC int
 380xfs_vn_rename(
 381        struct inode    *odir,
 382        struct dentry   *odentry,
 383        struct inode    *ndir,
 384        struct dentry   *ndentry,
 385        unsigned int    flags)
 386{
 387        struct inode    *new_inode = ndentry->d_inode;
 388        int             omode = 0;
 389        struct xfs_name oname;
 390        struct xfs_name nname;
 391
 392        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
 393                return -EINVAL;
 394
 395        /* if we are exchanging files, we need to set i_mode of both files */
 396        if (flags & RENAME_EXCHANGE)
 397                omode = ndentry->d_inode->i_mode;
 398
 399        xfs_dentry_to_name(&oname, odentry, omode);
 400        xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode);
 401
 402        return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
 403                          XFS_I(ndir), &nname,
 404                          new_inode ? XFS_I(new_inode) : NULL, flags);
 405}
 406
 407/*
 408 * careful here - this function can get called recursively, so
 409 * we need to be very careful about how much stack we use.
 410 * uio is kmalloced for this reason...
 411 */
 412STATIC void *
 413xfs_vn_follow_link(
 414        struct dentry           *dentry,
 415        struct nameidata        *nd)
 416{
 417        char                    *link;
 418        int                     error = -ENOMEM;
 419
 420        link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
 421        if (!link)
 422                goto out_err;
 423
 424        error = xfs_readlink(XFS_I(dentry->d_inode), link);
 425        if (unlikely(error))
 426                goto out_kfree;
 427
 428        nd_set_link(nd, link);
 429        return NULL;
 430
 431 out_kfree:
 432        kfree(link);
 433 out_err:
 434        nd_set_link(nd, ERR_PTR(error));
 435        return NULL;
 436}
 437
 438STATIC int
 439xfs_vn_getattr(
 440        struct vfsmount         *mnt,
 441        struct dentry           *dentry,
 442        struct kstat            *stat)
 443{
 444        struct inode            *inode = dentry->d_inode;
 445        struct xfs_inode        *ip = XFS_I(inode);
 446        struct xfs_mount        *mp = ip->i_mount;
 447
 448        trace_xfs_getattr(ip);
 449
 450        if (XFS_FORCED_SHUTDOWN(mp))
 451                return -EIO;
 452
 453        stat->size = XFS_ISIZE(ip);
 454        stat->dev = inode->i_sb->s_dev;
 455        stat->mode = ip->i_d.di_mode;
 456        stat->nlink = ip->i_d.di_nlink;
 457        stat->uid = inode->i_uid;
 458        stat->gid = inode->i_gid;
 459        stat->ino = ip->i_ino;
 460        stat->atime = inode->i_atime;
 461        stat->mtime = inode->i_mtime;
 462        stat->ctime = inode->i_ctime;
 463        stat->blocks =
 464                XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
 465
 466
 467        switch (inode->i_mode & S_IFMT) {
 468        case S_IFBLK:
 469        case S_IFCHR:
 470                stat->blksize = BLKDEV_IOSIZE;
 471                stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
 472                                   sysv_minor(ip->i_df.if_u2.if_rdev));
 473                break;
 474        default:
 475                if (XFS_IS_REALTIME_INODE(ip)) {
 476                        /*
 477                         * If the file blocks are being allocated from a
 478                         * realtime volume, then return the inode's realtime
 479                         * extent size or the realtime volume's extent size.
 480                         */
 481                        stat->blksize =
 482                                xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
 483                } else
 484                        stat->blksize = xfs_preferred_iosize(mp);
 485                stat->rdev = 0;
 486                break;
 487        }
 488
 489        return 0;
 490}
 491
 492static void
 493xfs_setattr_mode(
 494        struct xfs_inode        *ip,
 495        struct iattr            *iattr)
 496{
 497        struct inode            *inode = VFS_I(ip);
 498        umode_t                 mode = iattr->ia_mode;
 499
 500        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 501
 502        ip->i_d.di_mode &= S_IFMT;
 503        ip->i_d.di_mode |= mode & ~S_IFMT;
 504
 505        inode->i_mode &= S_IFMT;
 506        inode->i_mode |= mode & ~S_IFMT;
 507}
 508
 509void
 510xfs_setattr_time(
 511        struct xfs_inode        *ip,
 512        struct iattr            *iattr)
 513{
 514        struct inode            *inode = VFS_I(ip);
 515
 516        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 517
 518        if (iattr->ia_valid & ATTR_ATIME) {
 519                inode->i_atime = iattr->ia_atime;
 520                ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
 521                ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
 522        }
 523        if (iattr->ia_valid & ATTR_CTIME) {
 524                inode->i_ctime = iattr->ia_ctime;
 525                ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
 526                ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
 527        }
 528        if (iattr->ia_valid & ATTR_MTIME) {
 529                inode->i_mtime = iattr->ia_mtime;
 530                ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
 531                ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
 532        }
 533}
 534
 535int
 536xfs_setattr_nonsize(
 537        struct xfs_inode        *ip,
 538        struct iattr            *iattr,
 539        int                     flags)
 540{
 541        xfs_mount_t             *mp = ip->i_mount;
 542        struct inode            *inode = VFS_I(ip);
 543        int                     mask = iattr->ia_valid;
 544        xfs_trans_t             *tp;
 545        int                     error;
 546        kuid_t                  uid = GLOBAL_ROOT_UID, iuid = GLOBAL_ROOT_UID;
 547        kgid_t                  gid = GLOBAL_ROOT_GID, igid = GLOBAL_ROOT_GID;
 548        struct xfs_dquot        *udqp = NULL, *gdqp = NULL;
 549        struct xfs_dquot        *olddquot1 = NULL, *olddquot2 = NULL;
 550
 551        trace_xfs_setattr(ip);
 552
 553        /* If acls are being inherited, we already have this checked */
 554        if (!(flags & XFS_ATTR_NOACL)) {
 555                if (mp->m_flags & XFS_MOUNT_RDONLY)
 556                        return -EROFS;
 557
 558                if (XFS_FORCED_SHUTDOWN(mp))
 559                        return -EIO;
 560
 561                error = inode_change_ok(inode, iattr);
 562                if (error)
 563                        return error;
 564        }
 565
 566        ASSERT((mask & ATTR_SIZE) == 0);
 567
 568        /*
 569         * If disk quotas is on, we make sure that the dquots do exist on disk,
 570         * before we start any other transactions. Trying to do this later
 571         * is messy. We don't care to take a readlock to look at the ids
 572         * in inode here, because we can't hold it across the trans_reserve.
 573         * If the IDs do change before we take the ilock, we're covered
 574         * because the i_*dquot fields will get updated anyway.
 575         */
 576        if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
 577                uint    qflags = 0;
 578
 579                if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
 580                        uid = iattr->ia_uid;
 581                        qflags |= XFS_QMOPT_UQUOTA;
 582                } else {
 583                        uid = inode->i_uid;
 584                }
 585                if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
 586                        gid = iattr->ia_gid;
 587                        qflags |= XFS_QMOPT_GQUOTA;
 588                }  else {
 589                        gid = inode->i_gid;
 590                }
 591
 592                /*
 593                 * We take a reference when we initialize udqp and gdqp,
 594                 * so it is important that we never blindly double trip on
 595                 * the same variable. See xfs_create() for an example.
 596                 */
 597                ASSERT(udqp == NULL);
 598                ASSERT(gdqp == NULL);
 599                error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid),
 600                                           xfs_kgid_to_gid(gid),
 601                                           xfs_get_projid(ip),
 602                                           qflags, &udqp, &gdqp, NULL);
 603                if (error)
 604                        return error;
 605        }
 606
 607        tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
 608        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
 609        if (error)
 610                goto out_dqrele;
 611
 612        xfs_ilock(ip, XFS_ILOCK_EXCL);
 613
 614        /*
 615         * Change file ownership.  Must be the owner or privileged.
 616         */
 617        if (mask & (ATTR_UID|ATTR_GID)) {
 618                /*
 619                 * These IDs could have changed since we last looked at them.
 620                 * But, we're assured that if the ownership did change
 621                 * while we didn't have the inode locked, inode's dquot(s)
 622                 * would have changed also.
 623                 */
 624                iuid = inode->i_uid;
 625                igid = inode->i_gid;
 626                gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
 627                uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
 628
 629                /*
 630                 * Do a quota reservation only if uid/gid is actually
 631                 * going to change.
 632                 */
 633                if (XFS_IS_QUOTA_RUNNING(mp) &&
 634                    ((XFS_IS_UQUOTA_ON(mp) && !uid_eq(iuid, uid)) ||
 635                     (XFS_IS_GQUOTA_ON(mp) && !gid_eq(igid, gid)))) {
 636                        ASSERT(tp);
 637                        error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
 638                                                NULL, capable(CAP_FOWNER) ?
 639                                                XFS_QMOPT_FORCE_RES : 0);
 640                        if (error)      /* out of quota */
 641                                goto out_trans_cancel;
 642                }
 643        }
 644
 645        xfs_trans_ijoin(tp, ip, 0);
 646
 647        /*
 648         * Change file ownership.  Must be the owner or privileged.
 649         */
 650        if (mask & (ATTR_UID|ATTR_GID)) {
 651                /*
 652                 * CAP_FSETID overrides the following restrictions:
 653                 *
 654                 * The set-user-ID and set-group-ID bits of a file will be
 655                 * cleared upon successful return from chown()
 656                 */
 657                if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
 658                    !capable(CAP_FSETID))
 659                        ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
 660
 661                /*
 662                 * Change the ownerships and register quota modifications
 663                 * in the transaction.
 664                 */
 665                if (!uid_eq(iuid, uid)) {
 666                        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
 667                                ASSERT(mask & ATTR_UID);
 668                                ASSERT(udqp);
 669                                olddquot1 = xfs_qm_vop_chown(tp, ip,
 670                                                        &ip->i_udquot, udqp);
 671                        }
 672                        ip->i_d.di_uid = xfs_kuid_to_uid(uid);
 673                        inode->i_uid = uid;
 674                }
 675                if (!gid_eq(igid, gid)) {
 676                        if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
 677                                ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) ||
 678                                       !XFS_IS_PQUOTA_ON(mp));
 679                                ASSERT(mask & ATTR_GID);
 680                                ASSERT(gdqp);
 681                                olddquot2 = xfs_qm_vop_chown(tp, ip,
 682                                                        &ip->i_gdquot, gdqp);
 683                        }
 684                        ip->i_d.di_gid = xfs_kgid_to_gid(gid);
 685                        inode->i_gid = gid;
 686                }
 687        }
 688
 689        if (mask & ATTR_MODE)
 690                xfs_setattr_mode(ip, iattr);
 691        if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
 692                xfs_setattr_time(ip, iattr);
 693
 694        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 695
 696        XFS_STATS_INC(xs_ig_attrchg);
 697
 698        if (mp->m_flags & XFS_MOUNT_WSYNC)
 699                xfs_trans_set_sync(tp);
 700        error = xfs_trans_commit(tp, 0);
 701
 702        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 703
 704        /*
 705         * Release any dquot(s) the inode had kept before chown.
 706         */
 707        xfs_qm_dqrele(olddquot1);
 708        xfs_qm_dqrele(olddquot2);
 709        xfs_qm_dqrele(udqp);
 710        xfs_qm_dqrele(gdqp);
 711
 712        if (error)
 713                return error;
 714
 715        /*
 716         * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
 717         *           update.  We could avoid this with linked transactions
 718         *           and passing down the transaction pointer all the way
 719         *           to attr_set.  No previous user of the generic
 720         *           Posix ACL code seems to care about this issue either.
 721         */
 722        if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
 723                error = posix_acl_chmod(inode, inode->i_mode);
 724                if (error)
 725                        return error;
 726        }
 727
 728        return 0;
 729
 730out_trans_cancel:
 731        xfs_trans_cancel(tp, 0);
 732        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 733out_dqrele:
 734        xfs_qm_dqrele(udqp);
 735        xfs_qm_dqrele(gdqp);
 736        return error;
 737}
 738
 739/*
 740 * Truncate file.  Must have write permission and not be a directory.
 741 */
 742int
 743xfs_setattr_size(
 744        struct xfs_inode        *ip,
 745        struct iattr            *iattr)
 746{
 747        struct xfs_mount        *mp = ip->i_mount;
 748        struct inode            *inode = VFS_I(ip);
 749        xfs_off_t               oldsize, newsize;
 750        struct xfs_trans        *tp;
 751        int                     error;
 752        uint                    lock_flags = 0;
 753        uint                    commit_flags = 0;
 754        bool                    did_zeroing = false;
 755
 756        trace_xfs_setattr(ip);
 757
 758        if (mp->m_flags & XFS_MOUNT_RDONLY)
 759                return -EROFS;
 760
 761        if (XFS_FORCED_SHUTDOWN(mp))
 762                return -EIO;
 763
 764        error = inode_change_ok(inode, iattr);
 765        if (error)
 766                return error;
 767
 768        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
 769        ASSERT(S_ISREG(ip->i_d.di_mode));
 770        ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
 771                ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
 772
 773        oldsize = inode->i_size;
 774        newsize = iattr->ia_size;
 775
 776        /*
 777         * Short circuit the truncate case for zero length files.
 778         */
 779        if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) {
 780                if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME)))
 781                        return 0;
 782
 783                /*
 784                 * Use the regular setattr path to update the timestamps.
 785                 */
 786                iattr->ia_valid &= ~ATTR_SIZE;
 787                return xfs_setattr_nonsize(ip, iattr, 0);
 788        }
 789
 790        /*
 791         * Make sure that the dquots are attached to the inode.
 792         */
 793        error = xfs_qm_dqattach(ip, 0);
 794        if (error)
 795                return error;
 796
 797        /*
 798         * File data changes must be complete before we start the transaction to
 799         * modify the inode.  This needs to be done before joining the inode to
 800         * the transaction because the inode cannot be unlocked once it is a
 801         * part of the transaction.
 802         *
 803         * Start with zeroing any data block beyond EOF that we may expose on
 804         * file extension.
 805         */
 806        if (newsize > oldsize) {
 807                error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
 808                if (error)
 809                        return error;
 810        }
 811
 812        /*
 813         * We are going to log the inode size change in this transaction so
 814         * any previous writes that are beyond the on disk EOF and the new
 815         * EOF that have not been written out need to be written here.  If we
 816         * do not write the data out, we expose ourselves to the null files
 817         * problem. Note that this includes any block zeroing we did above;
 818         * otherwise those blocks may not be zeroed after a crash.
 819         */
 820        if (newsize > ip->i_d.di_size &&
 821            (oldsize != ip->i_d.di_size || did_zeroing)) {
 822                error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
 823                                                      ip->i_d.di_size, newsize);
 824                if (error)
 825                        return error;
 826        }
 827
 828        /* Now wait for all direct I/O to complete. */
 829        inode_dio_wait(inode);
 830
 831        /*
 832         * Do all the page cache truncate work outside the transaction context
 833         * as the "lock" order is page lock->log space reservation.  i.e.
 834         * locking pages inside the transaction can ABBA deadlock with
 835         * writeback. We have to do the VFS inode size update before we truncate
 836         * the pagecache, however, to avoid racing with page faults beyond the
 837         * new EOF they are not serialised against truncate operations except by
 838         * page locks and size updates.
 839         *
 840         * Hence we are in a situation where a truncate can fail with ENOMEM
 841         * from xfs_trans_reserve(), but having already truncated the in-memory
 842         * version of the file (i.e. made user visible changes). There's not
 843         * much we can do about this, except to hope that the caller sees ENOMEM
 844         * and retries the truncate operation.
 845         */
 846        error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
 847        if (error)
 848                return error;
 849        truncate_setsize(inode, newsize);
 850
 851        /*
 852         * The "we can't serialise against page faults" pain gets worse.
 853         *
 854         * If the file is mapped then we have to clean the page at the old EOF
 855         * when extending the file. Extending the file can expose changes the
 856         * underlying page mapping (e.g. from beyond EOF to a hole or
 857         * unwritten), and so on the next attempt to write to that page we need
 858         * to remap it for write. i.e. we need .page_mkwrite() to be called.
 859         * Hence we need to clean the page to clean the pte and so a new write
 860         * fault will be triggered appropriately.
 861         *
 862         * If we do it before we change the inode size, then we can race with a
 863         * page fault that maps the page with exactly the same problem. If we do
 864         * it after we change the file size, then a new page fault can come in
 865         * and allocate space before we've run the rest of the truncate
 866         * transaction. That's kinda grotesque, but it's better than have data
 867         * over a hole, and so that's the lesser evil that has been chosen here.
 868         *
 869         * The real solution, however, is to have some mechanism for locking out
 870         * page faults while a truncate is in progress.
 871         */
 872        if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
 873                error = filemap_write_and_wait_range(
 874                                VFS_I(ip)->i_mapping,
 875                                round_down(oldsize, PAGE_CACHE_SIZE),
 876                                round_up(oldsize, PAGE_CACHE_SIZE) - 1);
 877                if (error)
 878                        return error;
 879        }
 880
 881        tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
 882        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
 883        if (error)
 884                goto out_trans_cancel;
 885
 886        commit_flags = XFS_TRANS_RELEASE_LOG_RES;
 887        lock_flags |= XFS_ILOCK_EXCL;
 888        xfs_ilock(ip, XFS_ILOCK_EXCL);
 889        xfs_trans_ijoin(tp, ip, 0);
 890
 891        /*
 892         * Only change the c/mtime if we are changing the size or we are
 893         * explicitly asked to change it.  This handles the semantic difference
 894         * between truncate() and ftruncate() as implemented in the VFS.
 895         *
 896         * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
 897         * special case where we need to update the times despite not having
 898         * these flags set.  For all other operations the VFS set these flags
 899         * explicitly if it wants a timestamp update.
 900         */
 901        if (newsize != oldsize &&
 902            !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
 903                iattr->ia_ctime = iattr->ia_mtime =
 904                        current_fs_time(inode->i_sb);
 905                iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
 906        }
 907
 908        /*
 909         * The first thing we do is set the size to new_size permanently on
 910         * disk.  This way we don't have to worry about anyone ever being able
 911         * to look at the data being freed even in the face of a crash.
 912         * What we're getting around here is the case where we free a block, it
 913         * is allocated to another file, it is written to, and then we crash.
 914         * If the new data gets written to the file but the log buffers
 915         * containing the free and reallocation don't, then we'd end up with
 916         * garbage in the blocks being freed.  As long as we make the new size
 917         * permanent before actually freeing any blocks it doesn't matter if
 918         * they get written to.
 919         */
 920        ip->i_d.di_size = newsize;
 921        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 922
 923        if (newsize <= oldsize) {
 924                error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
 925                if (error)
 926                        goto out_trans_abort;
 927
 928                /*
 929                 * Truncated "down", so we're removing references to old data
 930                 * here - if we delay flushing for a long time, we expose
 931                 * ourselves unduly to the notorious NULL files problem.  So,
 932                 * we mark this inode and flush it when the file is closed,
 933                 * and do not wait the usual (long) time for writeout.
 934                 */
 935                xfs_iflags_set(ip, XFS_ITRUNCATED);
 936
 937                /* A truncate down always removes post-EOF blocks. */
 938                xfs_inode_clear_eofblocks_tag(ip);
 939        }
 940
 941        if (iattr->ia_valid & ATTR_MODE)
 942                xfs_setattr_mode(ip, iattr);
 943        if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
 944                xfs_setattr_time(ip, iattr);
 945
 946        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 947
 948        XFS_STATS_INC(xs_ig_attrchg);
 949
 950        if (mp->m_flags & XFS_MOUNT_WSYNC)
 951                xfs_trans_set_sync(tp);
 952
 953        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
 954out_unlock:
 955        if (lock_flags)
 956                xfs_iunlock(ip, lock_flags);
 957        return error;
 958
 959out_trans_abort:
 960        commit_flags |= XFS_TRANS_ABORT;
 961out_trans_cancel:
 962        xfs_trans_cancel(tp, commit_flags);
 963        goto out_unlock;
 964}
 965
 966STATIC int
 967xfs_vn_setattr(
 968        struct dentry           *dentry,
 969        struct iattr            *iattr)
 970{
 971        struct xfs_inode        *ip = XFS_I(dentry->d_inode);
 972        int                     error;
 973
 974        if (iattr->ia_valid & ATTR_SIZE) {
 975                uint            iolock = XFS_IOLOCK_EXCL;
 976
 977                xfs_ilock(ip, iolock);
 978                error = xfs_break_layouts(dentry->d_inode, &iolock);
 979                if (!error)
 980                        error = xfs_setattr_size(ip, iattr);
 981                xfs_iunlock(ip, iolock);
 982        } else {
 983                error = xfs_setattr_nonsize(ip, iattr, 0);
 984        }
 985
 986        return error;
 987}
 988
 989STATIC int
 990xfs_vn_update_time(
 991        struct inode            *inode,
 992        struct timespec         *now,
 993        int                     flags)
 994{
 995        struct xfs_inode        *ip = XFS_I(inode);
 996        struct xfs_mount        *mp = ip->i_mount;
 997        struct xfs_trans        *tp;
 998        int                     error;
 999
1000        trace_xfs_update_time(ip);
1001
1002        tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
1003        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
1004        if (error) {
1005                xfs_trans_cancel(tp, 0);
1006                return error;
1007        }
1008
1009        xfs_ilock(ip, XFS_ILOCK_EXCL);
1010        if (flags & S_CTIME) {
1011                inode->i_ctime = *now;
1012                ip->i_d.di_ctime.t_sec = (__int32_t)now->tv_sec;
1013                ip->i_d.di_ctime.t_nsec = (__int32_t)now->tv_nsec;
1014        }
1015        if (flags & S_MTIME) {
1016                inode->i_mtime = *now;
1017                ip->i_d.di_mtime.t_sec = (__int32_t)now->tv_sec;
1018                ip->i_d.di_mtime.t_nsec = (__int32_t)now->tv_nsec;
1019        }
1020        if (flags & S_ATIME) {
1021                inode->i_atime = *now;
1022                ip->i_d.di_atime.t_sec = (__int32_t)now->tv_sec;
1023                ip->i_d.di_atime.t_nsec = (__int32_t)now->tv_nsec;
1024        }
1025        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1026        xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
1027        return xfs_trans_commit(tp, 0);
1028}
1029
1030#define XFS_FIEMAP_FLAGS        (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
1031
1032/*
1033 * Call fiemap helper to fill in user data.
1034 * Returns positive errors to xfs_getbmap.
1035 */
1036STATIC int
1037xfs_fiemap_format(
1038        void                    **arg,
1039        struct getbmapx         *bmv,
1040        int                     *full)
1041{
1042        int                     error;
1043        struct fiemap_extent_info *fieinfo = *arg;
1044        u32                     fiemap_flags = 0;
1045        u64                     logical, physical, length;
1046
1047        /* Do nothing for a hole */
1048        if (bmv->bmv_block == -1LL)
1049                return 0;
1050
1051        logical = BBTOB(bmv->bmv_offset);
1052        physical = BBTOB(bmv->bmv_block);
1053        length = BBTOB(bmv->bmv_length);
1054
1055        if (bmv->bmv_oflags & BMV_OF_PREALLOC)
1056                fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
1057        else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
1058                fiemap_flags |= (FIEMAP_EXTENT_DELALLOC |
1059                                 FIEMAP_EXTENT_UNKNOWN);
1060                physical = 0;   /* no block yet */
1061        }
1062        if (bmv->bmv_oflags & BMV_OF_LAST)
1063                fiemap_flags |= FIEMAP_EXTENT_LAST;
1064
1065        error = fiemap_fill_next_extent(fieinfo, logical, physical,
1066                                        length, fiemap_flags);
1067        if (error > 0) {
1068                error = 0;
1069                *full = 1;      /* user array now full */
1070        }
1071
1072        return error;
1073}
1074
1075STATIC int
1076xfs_vn_fiemap(
1077        struct inode            *inode,
1078        struct fiemap_extent_info *fieinfo,
1079        u64                     start,
1080        u64                     length)
1081{
1082        xfs_inode_t             *ip = XFS_I(inode);
1083        struct getbmapx         bm;
1084        int                     error;
1085
1086        error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
1087        if (error)
1088                return error;
1089
1090        /* Set up bmap header for xfs internal routine */
1091        bm.bmv_offset = BTOBBT(start);
1092        /* Special case for whole file */
1093        if (length == FIEMAP_MAX_OFFSET)
1094                bm.bmv_length = -1LL;
1095        else
1096                bm.bmv_length = BTOBB(start + length) - bm.bmv_offset;
1097
1098        /* We add one because in getbmap world count includes the header */
1099        bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
1100                                        fieinfo->fi_extents_max + 1;
1101        bm.bmv_count = min_t(__s32, bm.bmv_count,
1102                             (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
1103        bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
1104        if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
1105                bm.bmv_iflags |= BMV_IF_ATTRFORK;
1106        if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
1107                bm.bmv_iflags |= BMV_IF_DELALLOC;
1108
1109        error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
1110        if (error)
1111                return error;
1112
1113        return 0;
1114}
1115
1116STATIC int
1117xfs_vn_tmpfile(
1118        struct inode    *dir,
1119        struct dentry   *dentry,
1120        umode_t         mode)
1121{
1122        return xfs_generic_create(dir, dentry, mode, 0, true);
1123}
1124
1125static const struct inode_operations xfs_inode_operations = {
1126        .get_acl                = xfs_get_acl,
1127        .set_acl                = xfs_set_acl,
1128        .getattr                = xfs_vn_getattr,
1129        .setattr                = xfs_vn_setattr,
1130        .setxattr               = generic_setxattr,
1131        .getxattr               = generic_getxattr,
1132        .removexattr            = generic_removexattr,
1133        .listxattr              = xfs_vn_listxattr,
1134        .fiemap                 = xfs_vn_fiemap,
1135        .update_time            = xfs_vn_update_time,
1136};
1137
1138static const struct inode_operations xfs_dir_inode_operations = {
1139        .create                 = xfs_vn_create,
1140        .lookup                 = xfs_vn_lookup,
1141        .link                   = xfs_vn_link,
1142        .unlink                 = xfs_vn_unlink,
1143        .symlink                = xfs_vn_symlink,
1144        .mkdir                  = xfs_vn_mkdir,
1145        /*
1146         * Yes, XFS uses the same method for rmdir and unlink.
1147         *
1148         * There are some subtile differences deeper in the code,
1149         * but we use S_ISDIR to check for those.
1150         */
1151        .rmdir                  = xfs_vn_unlink,
1152        .mknod                  = xfs_vn_mknod,
1153        .rename2                = xfs_vn_rename,
1154        .get_acl                = xfs_get_acl,
1155        .set_acl                = xfs_set_acl,
1156        .getattr                = xfs_vn_getattr,
1157        .setattr                = xfs_vn_setattr,
1158        .setxattr               = generic_setxattr,
1159        .getxattr               = generic_getxattr,
1160        .removexattr            = generic_removexattr,
1161        .listxattr              = xfs_vn_listxattr,
1162        .update_time            = xfs_vn_update_time,
1163        .tmpfile                = xfs_vn_tmpfile,
1164};
1165
1166static const struct inode_operations xfs_dir_ci_inode_operations = {
1167        .create                 = xfs_vn_create,
1168        .lookup                 = xfs_vn_ci_lookup,
1169        .link                   = xfs_vn_link,
1170        .unlink                 = xfs_vn_unlink,
1171        .symlink                = xfs_vn_symlink,
1172        .mkdir                  = xfs_vn_mkdir,
1173        /*
1174         * Yes, XFS uses the same method for rmdir and unlink.
1175         *
1176         * There are some subtile differences deeper in the code,
1177         * but we use S_ISDIR to check for those.
1178         */
1179        .rmdir                  = xfs_vn_unlink,
1180        .mknod                  = xfs_vn_mknod,
1181        .rename2                = xfs_vn_rename,
1182        .get_acl                = xfs_get_acl,
1183        .set_acl                = xfs_set_acl,
1184        .getattr                = xfs_vn_getattr,
1185        .setattr                = xfs_vn_setattr,
1186        .setxattr               = generic_setxattr,
1187        .getxattr               = generic_getxattr,
1188        .removexattr            = generic_removexattr,
1189        .listxattr              = xfs_vn_listxattr,
1190        .update_time            = xfs_vn_update_time,
1191        .tmpfile                = xfs_vn_tmpfile,
1192};
1193
1194static const struct inode_operations xfs_symlink_inode_operations = {
1195        .readlink               = generic_readlink,
1196        .follow_link            = xfs_vn_follow_link,
1197        .put_link               = kfree_put_link,
1198        .getattr                = xfs_vn_getattr,
1199        .setattr                = xfs_vn_setattr,
1200        .setxattr               = generic_setxattr,
1201        .getxattr               = generic_getxattr,
1202        .removexattr            = generic_removexattr,
1203        .listxattr              = xfs_vn_listxattr,
1204        .update_time            = xfs_vn_update_time,
1205};
1206
1207STATIC void
1208xfs_diflags_to_iflags(
1209        struct inode            *inode,
1210        struct xfs_inode        *ip)
1211{
1212        if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
1213                inode->i_flags |= S_IMMUTABLE;
1214        else
1215                inode->i_flags &= ~S_IMMUTABLE;
1216        if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
1217                inode->i_flags |= S_APPEND;
1218        else
1219                inode->i_flags &= ~S_APPEND;
1220        if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
1221                inode->i_flags |= S_SYNC;
1222        else
1223                inode->i_flags &= ~S_SYNC;
1224        if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
1225                inode->i_flags |= S_NOATIME;
1226        else
1227                inode->i_flags &= ~S_NOATIME;
1228}
1229
1230/*
1231 * Initialize the Linux inode, set up the operation vectors and
1232 * unlock the inode.
1233 *
1234 * When reading existing inodes from disk this is called directly
1235 * from xfs_iget, when creating a new inode it is called from
1236 * xfs_ialloc after setting up the inode.
1237 *
1238 * We are always called with an uninitialised linux inode here.
1239 * We need to initialise the necessary fields and take a reference
1240 * on it.
1241 */
1242void
1243xfs_setup_inode(
1244        struct xfs_inode        *ip)
1245{
1246        struct inode            *inode = &ip->i_vnode;
1247        gfp_t                   gfp_mask;
1248
1249        inode->i_ino = ip->i_ino;
1250        inode->i_state = I_NEW;
1251
1252        inode_sb_list_add(inode);
1253        /* make the inode look hashed for the writeback code */
1254        hlist_add_fake(&inode->i_hash);
1255
1256        inode->i_mode   = ip->i_d.di_mode;
1257        set_nlink(inode, ip->i_d.di_nlink);
1258        inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
1259        inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
1260
1261        switch (inode->i_mode & S_IFMT) {
1262        case S_IFBLK:
1263        case S_IFCHR:
1264                inode->i_rdev =
1265                        MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
1266                              sysv_minor(ip->i_df.if_u2.if_rdev));
1267                break;
1268        default:
1269                inode->i_rdev = 0;
1270                break;
1271        }
1272
1273        inode->i_generation = ip->i_d.di_gen;
1274        i_size_write(inode, ip->i_d.di_size);
1275        inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
1276        inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
1277        inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
1278        inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
1279        inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
1280        inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
1281        xfs_diflags_to_iflags(inode, ip);
1282
1283        ip->d_ops = ip->i_mount->m_nondir_inode_ops;
1284        lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
1285        switch (inode->i_mode & S_IFMT) {
1286        case S_IFREG:
1287                inode->i_op = &xfs_inode_operations;
1288                inode->i_fop = &xfs_file_operations;
1289                inode->i_mapping->a_ops = &xfs_address_space_operations;
1290                break;
1291        case S_IFDIR:
1292                lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
1293                if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
1294                        inode->i_op = &xfs_dir_ci_inode_operations;
1295                else
1296                        inode->i_op = &xfs_dir_inode_operations;
1297                inode->i_fop = &xfs_dir_file_operations;
1298                ip->d_ops = ip->i_mount->m_dir_inode_ops;
1299                break;
1300        case S_IFLNK:
1301                inode->i_op = &xfs_symlink_inode_operations;
1302                if (!(ip->i_df.if_flags & XFS_IFINLINE))
1303                        inode->i_mapping->a_ops = &xfs_address_space_operations;
1304                break;
1305        default:
1306                inode->i_op = &xfs_inode_operations;
1307                init_special_inode(inode, inode->i_mode, inode->i_rdev);
1308                break;
1309        }
1310
1311        /*
1312         * Ensure all page cache allocations are done from GFP_NOFS context to
1313         * prevent direct reclaim recursion back into the filesystem and blowing
1314         * stacks or deadlocking.
1315         */
1316        gfp_mask = mapping_gfp_mask(inode->i_mapping);
1317        mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS)));
1318
1319        /*
1320         * If there is no attribute fork no ACL can exist on this inode,
1321         * and it can't have any file capabilities attached to it either.
1322         */
1323        if (!XFS_IFORK_Q(ip)) {
1324                inode_has_no_xattr(inode);
1325                cache_no_acl(inode);
1326        }
1327
1328        xfs_iflags_clear(ip, XFS_INEW);
1329        barrier();
1330
1331        unlock_new_inode(inode);
1332}
1333