linux/fs/xfs/libxfs/xfs_inode_buf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_mount.h"
  13#include "xfs_defer.h"
  14#include "xfs_inode.h"
  15#include "xfs_errortag.h"
  16#include "xfs_error.h"
  17#include "xfs_cksum.h"
  18#include "xfs_icache.h"
  19#include "xfs_trans.h"
  20#include "xfs_ialloc.h"
  21#include "xfs_dir2.h"
  22
  23#include <linux/iversion.h>
  24
  25/*
  26 * Check that none of the inode's in the buffer have a next
  27 * unlinked field of 0.
  28 */
  29#if defined(DEBUG)
  30void
  31xfs_inobp_check(
  32        xfs_mount_t     *mp,
  33        xfs_buf_t       *bp)
  34{
  35        int             i;
  36        int             j;
  37        xfs_dinode_t    *dip;
  38
  39        j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
  40
  41        for (i = 0; i < j; i++) {
  42                dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
  43                if (!dip->di_next_unlinked)  {
  44                        xfs_alert(mp,
  45        "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
  46                                i, (long long)bp->b_bn);
  47                }
  48        }
  49}
  50#endif
  51
  52bool
  53xfs_dinode_good_version(
  54        struct xfs_mount *mp,
  55        __u8            version)
  56{
  57        if (xfs_sb_version_hascrc(&mp->m_sb))
  58                return version == 3;
  59
  60        return version == 1 || version == 2;
  61}
  62
  63/*
  64 * If we are doing readahead on an inode buffer, we might be in log recovery
  65 * reading an inode allocation buffer that hasn't yet been replayed, and hence
  66 * has not had the inode cores stamped into it. Hence for readahead, the buffer
  67 * may be potentially invalid.
  68 *
  69 * If the readahead buffer is invalid, we need to mark it with an error and
  70 * clear the DONE status of the buffer so that a followup read will re-read it
  71 * from disk. We don't report the error otherwise to avoid warnings during log
  72 * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
  73 * because all we want to do is say readahead failed; there is no-one to report
  74 * the error to, so this will distinguish it from a non-ra verifier failure.
  75 * Changes to this readahead error behavour also need to be reflected in
  76 * xfs_dquot_buf_readahead_verify().
  77 */
  78static void
  79xfs_inode_buf_verify(
  80        struct xfs_buf  *bp,
  81        bool            readahead)
  82{
  83        struct xfs_mount *mp = bp->b_target->bt_mount;
  84        xfs_agnumber_t  agno;
  85        int             i;
  86        int             ni;
  87
  88        /*
  89         * Validate the magic number and version of every inode in the buffer
  90         */
  91        agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
  92        ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
  93        for (i = 0; i < ni; i++) {
  94                int             di_ok;
  95                xfs_dinode_t    *dip;
  96                xfs_agino_t     unlinked_ino;
  97
  98                dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
  99                unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
 100                di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
 101                        xfs_dinode_good_version(mp, dip->di_version) &&
 102                        (unlinked_ino == NULLAGINO ||
 103                         xfs_verify_agino(mp, agno, unlinked_ino));
 104                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
 105                                                XFS_ERRTAG_ITOBP_INOTOBP))) {
 106                        if (readahead) {
 107                                bp->b_flags &= ~XBF_DONE;
 108                                xfs_buf_ioerror(bp, -EIO);
 109                                return;
 110                        }
 111
 112#ifdef DEBUG
 113                        xfs_alert(mp,
 114                                "bad inode magic/vsn daddr %lld #%d (magic=%x)",
 115                                (unsigned long long)bp->b_bn, i,
 116                                be16_to_cpu(dip->di_magic));
 117#endif
 118                        xfs_buf_verifier_error(bp, -EFSCORRUPTED,
 119                                        __func__, dip, sizeof(*dip),
 120                                        NULL);
 121                        return;
 122                }
 123        }
 124}
 125
 126
 127static void
 128xfs_inode_buf_read_verify(
 129        struct xfs_buf  *bp)
 130{
 131        xfs_inode_buf_verify(bp, false);
 132}
 133
 134static void
 135xfs_inode_buf_readahead_verify(
 136        struct xfs_buf  *bp)
 137{
 138        xfs_inode_buf_verify(bp, true);
 139}
 140
 141static void
 142xfs_inode_buf_write_verify(
 143        struct xfs_buf  *bp)
 144{
 145        xfs_inode_buf_verify(bp, false);
 146}
 147
 148const struct xfs_buf_ops xfs_inode_buf_ops = {
 149        .name = "xfs_inode",
 150        .verify_read = xfs_inode_buf_read_verify,
 151        .verify_write = xfs_inode_buf_write_verify,
 152};
 153
 154const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
 155        .name = "xxfs_inode_ra",
 156        .verify_read = xfs_inode_buf_readahead_verify,
 157        .verify_write = xfs_inode_buf_write_verify,
 158};
 159
 160
 161/*
 162 * This routine is called to map an inode to the buffer containing the on-disk
 163 * version of the inode.  It returns a pointer to the buffer containing the
 164 * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
 165 * pointer to the on-disk inode within that buffer.
 166 *
 167 * If a non-zero error is returned, then the contents of bpp and dipp are
 168 * undefined.
 169 */
 170int
 171xfs_imap_to_bp(
 172        struct xfs_mount        *mp,
 173        struct xfs_trans        *tp,
 174        struct xfs_imap         *imap,
 175        struct xfs_dinode       **dipp,
 176        struct xfs_buf          **bpp,
 177        uint                    buf_flags,
 178        uint                    iget_flags)
 179{
 180        struct xfs_buf          *bp;
 181        int                     error;
 182
 183        buf_flags |= XBF_UNMAPPED;
 184        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
 185                                   (int)imap->im_len, buf_flags, &bp,
 186                                   &xfs_inode_buf_ops);
 187        if (error) {
 188                if (error == -EAGAIN) {
 189                        ASSERT(buf_flags & XBF_TRYLOCK);
 190                        return error;
 191                }
 192                xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
 193                        __func__, error);
 194                return error;
 195        }
 196
 197        *bpp = bp;
 198        *dipp = xfs_buf_offset(bp, imap->im_boffset);
 199        return 0;
 200}
 201
 202void
 203xfs_inode_from_disk(
 204        struct xfs_inode        *ip,
 205        struct xfs_dinode       *from)
 206{
 207        struct xfs_icdinode     *to = &ip->i_d;
 208        struct inode            *inode = VFS_I(ip);
 209
 210
 211        /*
 212         * Convert v1 inodes immediately to v2 inode format as this is the
 213         * minimum inode version format we support in the rest of the code.
 214         */
 215        to->di_version = from->di_version;
 216        if (to->di_version == 1) {
 217                set_nlink(inode, be16_to_cpu(from->di_onlink));
 218                to->di_projid_lo = 0;
 219                to->di_projid_hi = 0;
 220                to->di_version = 2;
 221        } else {
 222                set_nlink(inode, be32_to_cpu(from->di_nlink));
 223                to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
 224                to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
 225        }
 226
 227        to->di_format = from->di_format;
 228        to->di_uid = be32_to_cpu(from->di_uid);
 229        to->di_gid = be32_to_cpu(from->di_gid);
 230        to->di_flushiter = be16_to_cpu(from->di_flushiter);
 231
 232        /*
 233         * Time is signed, so need to convert to signed 32 bit before
 234         * storing in inode timestamp which may be 64 bit. Otherwise
 235         * a time before epoch is converted to a time long after epoch
 236         * on 64 bit systems.
 237         */
 238        inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec);
 239        inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec);
 240        inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec);
 241        inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
 242        inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
 243        inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
 244        inode->i_generation = be32_to_cpu(from->di_gen);
 245        inode->i_mode = be16_to_cpu(from->di_mode);
 246
 247        to->di_size = be64_to_cpu(from->di_size);
 248        to->di_nblocks = be64_to_cpu(from->di_nblocks);
 249        to->di_extsize = be32_to_cpu(from->di_extsize);
 250        to->di_nextents = be32_to_cpu(from->di_nextents);
 251        to->di_anextents = be16_to_cpu(from->di_anextents);
 252        to->di_forkoff = from->di_forkoff;
 253        to->di_aformat  = from->di_aformat;
 254        to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
 255        to->di_dmstate  = be16_to_cpu(from->di_dmstate);
 256        to->di_flags    = be16_to_cpu(from->di_flags);
 257
 258        if (to->di_version == 3) {
 259                inode_set_iversion_queried(inode,
 260                                           be64_to_cpu(from->di_changecount));
 261                to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
 262                to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
 263                to->di_flags2 = be64_to_cpu(from->di_flags2);
 264                to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
 265        }
 266}
 267
 268void
 269xfs_inode_to_disk(
 270        struct xfs_inode        *ip,
 271        struct xfs_dinode       *to,
 272        xfs_lsn_t               lsn)
 273{
 274        struct xfs_icdinode     *from = &ip->i_d;
 275        struct inode            *inode = VFS_I(ip);
 276
 277        to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 278        to->di_onlink = 0;
 279
 280        to->di_version = from->di_version;
 281        to->di_format = from->di_format;
 282        to->di_uid = cpu_to_be32(from->di_uid);
 283        to->di_gid = cpu_to_be32(from->di_gid);
 284        to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
 285        to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
 286
 287        memset(to->di_pad, 0, sizeof(to->di_pad));
 288        to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
 289        to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
 290        to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec);
 291        to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
 292        to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec);
 293        to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
 294        to->di_nlink = cpu_to_be32(inode->i_nlink);
 295        to->di_gen = cpu_to_be32(inode->i_generation);
 296        to->di_mode = cpu_to_be16(inode->i_mode);
 297
 298        to->di_size = cpu_to_be64(from->di_size);
 299        to->di_nblocks = cpu_to_be64(from->di_nblocks);
 300        to->di_extsize = cpu_to_be32(from->di_extsize);
 301        to->di_nextents = cpu_to_be32(from->di_nextents);
 302        to->di_anextents = cpu_to_be16(from->di_anextents);
 303        to->di_forkoff = from->di_forkoff;
 304        to->di_aformat = from->di_aformat;
 305        to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
 306        to->di_dmstate = cpu_to_be16(from->di_dmstate);
 307        to->di_flags = cpu_to_be16(from->di_flags);
 308
 309        if (from->di_version == 3) {
 310                to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
 311                to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
 312                to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
 313                to->di_flags2 = cpu_to_be64(from->di_flags2);
 314                to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
 315                to->di_ino = cpu_to_be64(ip->i_ino);
 316                to->di_lsn = cpu_to_be64(lsn);
 317                memset(to->di_pad2, 0, sizeof(to->di_pad2));
 318                uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
 319                to->di_flushiter = 0;
 320        } else {
 321                to->di_flushiter = cpu_to_be16(from->di_flushiter);
 322        }
 323}
 324
 325void
 326xfs_log_dinode_to_disk(
 327        struct xfs_log_dinode   *from,
 328        struct xfs_dinode       *to)
 329{
 330        to->di_magic = cpu_to_be16(from->di_magic);
 331        to->di_mode = cpu_to_be16(from->di_mode);
 332        to->di_version = from->di_version;
 333        to->di_format = from->di_format;
 334        to->di_onlink = 0;
 335        to->di_uid = cpu_to_be32(from->di_uid);
 336        to->di_gid = cpu_to_be32(from->di_gid);
 337        to->di_nlink = cpu_to_be32(from->di_nlink);
 338        to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
 339        to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
 340        memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
 341
 342        to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
 343        to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
 344        to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
 345        to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
 346        to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
 347        to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
 348
 349        to->di_size = cpu_to_be64(from->di_size);
 350        to->di_nblocks = cpu_to_be64(from->di_nblocks);
 351        to->di_extsize = cpu_to_be32(from->di_extsize);
 352        to->di_nextents = cpu_to_be32(from->di_nextents);
 353        to->di_anextents = cpu_to_be16(from->di_anextents);
 354        to->di_forkoff = from->di_forkoff;
 355        to->di_aformat = from->di_aformat;
 356        to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
 357        to->di_dmstate = cpu_to_be16(from->di_dmstate);
 358        to->di_flags = cpu_to_be16(from->di_flags);
 359        to->di_gen = cpu_to_be32(from->di_gen);
 360
 361        if (from->di_version == 3) {
 362                to->di_changecount = cpu_to_be64(from->di_changecount);
 363                to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
 364                to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
 365                to->di_flags2 = cpu_to_be64(from->di_flags2);
 366                to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
 367                to->di_ino = cpu_to_be64(from->di_ino);
 368                to->di_lsn = cpu_to_be64(from->di_lsn);
 369                memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
 370                uuid_copy(&to->di_uuid, &from->di_uuid);
 371                to->di_flushiter = 0;
 372        } else {
 373                to->di_flushiter = cpu_to_be16(from->di_flushiter);
 374        }
 375}
 376
 377static xfs_failaddr_t
 378xfs_dinode_verify_fork(
 379        struct xfs_dinode       *dip,
 380        struct xfs_mount        *mp,
 381        int                     whichfork)
 382{
 383        uint32_t                di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
 384
 385        switch (XFS_DFORK_FORMAT(dip, whichfork)) {
 386        case XFS_DINODE_FMT_LOCAL:
 387                /*
 388                 * no local regular files yet
 389                 */
 390                if (whichfork == XFS_DATA_FORK) {
 391                        if (S_ISREG(be16_to_cpu(dip->di_mode)))
 392                                return __this_address;
 393                        if (be64_to_cpu(dip->di_size) >
 394                                        XFS_DFORK_SIZE(dip, mp, whichfork))
 395                                return __this_address;
 396                }
 397                if (di_nextents)
 398                        return __this_address;
 399                break;
 400        case XFS_DINODE_FMT_EXTENTS:
 401                if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
 402                        return __this_address;
 403                break;
 404        case XFS_DINODE_FMT_BTREE:
 405                if (whichfork == XFS_ATTR_FORK) {
 406                        if (di_nextents > MAXAEXTNUM)
 407                                return __this_address;
 408                } else if (di_nextents > MAXEXTNUM) {
 409                        return __this_address;
 410                }
 411                break;
 412        default:
 413                return __this_address;
 414        }
 415        return NULL;
 416}
 417
 418static xfs_failaddr_t
 419xfs_dinode_verify_forkoff(
 420        struct xfs_dinode       *dip,
 421        struct xfs_mount        *mp)
 422{
 423        if (!XFS_DFORK_Q(dip))
 424                return NULL;
 425
 426        switch (dip->di_format)  {
 427        case XFS_DINODE_FMT_DEV:
 428                if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
 429                        return __this_address;
 430                break;
 431        case XFS_DINODE_FMT_LOCAL:      /* fall through ... */
 432        case XFS_DINODE_FMT_EXTENTS:    /* fall through ... */
 433        case XFS_DINODE_FMT_BTREE:
 434                if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3))
 435                        return __this_address;
 436                break;
 437        default:
 438                return __this_address;
 439        }
 440        return NULL;
 441}
 442
 443xfs_failaddr_t
 444xfs_dinode_verify(
 445        struct xfs_mount        *mp,
 446        xfs_ino_t               ino,
 447        struct xfs_dinode       *dip)
 448{
 449        xfs_failaddr_t          fa;
 450        uint16_t                mode;
 451        uint16_t                flags;
 452        uint64_t                flags2;
 453        uint64_t                di_size;
 454
 455        if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
 456                return __this_address;
 457
 458        /* Verify v3 integrity information first */
 459        if (dip->di_version >= 3) {
 460                if (!xfs_sb_version_hascrc(&mp->m_sb))
 461                        return __this_address;
 462                if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
 463                                      XFS_DINODE_CRC_OFF))
 464                        return __this_address;
 465                if (be64_to_cpu(dip->di_ino) != ino)
 466                        return __this_address;
 467                if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
 468                        return __this_address;
 469        }
 470
 471        /* don't allow invalid i_size */
 472        di_size = be64_to_cpu(dip->di_size);
 473        if (di_size & (1ULL << 63))
 474                return __this_address;
 475
 476        mode = be16_to_cpu(dip->di_mode);
 477        if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
 478                return __this_address;
 479
 480        /* No zero-length symlinks/dirs. */
 481        if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
 482                return __this_address;
 483
 484        /* Fork checks carried over from xfs_iformat_fork */
 485        if (mode &&
 486            be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
 487                        be64_to_cpu(dip->di_nblocks))
 488                return __this_address;
 489
 490        if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
 491                return __this_address;
 492
 493        flags = be16_to_cpu(dip->di_flags);
 494
 495        if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
 496                return __this_address;
 497
 498        /* check for illegal values of forkoff */
 499        fa = xfs_dinode_verify_forkoff(dip, mp);
 500        if (fa)
 501                return fa;
 502
 503        /* Do we have appropriate data fork formats for the mode? */
 504        switch (mode & S_IFMT) {
 505        case S_IFIFO:
 506        case S_IFCHR:
 507        case S_IFBLK:
 508        case S_IFSOCK:
 509                if (dip->di_format != XFS_DINODE_FMT_DEV)
 510                        return __this_address;
 511                break;
 512        case S_IFREG:
 513        case S_IFLNK:
 514        case S_IFDIR:
 515                fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
 516                if (fa)
 517                        return fa;
 518                break;
 519        case 0:
 520                /* Uninitialized inode ok. */
 521                break;
 522        default:
 523                return __this_address;
 524        }
 525
 526        if (XFS_DFORK_Q(dip)) {
 527                fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
 528                if (fa)
 529                        return fa;
 530        } else {
 531                /*
 532                 * If there is no fork offset, this may be a freshly-made inode
 533                 * in a new disk cluster, in which case di_aformat is zeroed.
 534                 * Otherwise, such an inode must be in EXTENTS format; this goes
 535                 * for freed inodes as well.
 536                 */
 537                switch (dip->di_aformat) {
 538                case 0:
 539                case XFS_DINODE_FMT_EXTENTS:
 540                        break;
 541                default:
 542                        return __this_address;
 543                }
 544                if (dip->di_anextents)
 545                        return __this_address;
 546        }
 547
 548        /* extent size hint validation */
 549        fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
 550                        mode, flags);
 551        if (fa)
 552                return fa;
 553
 554        /* only version 3 or greater inodes are extensively verified here */
 555        if (dip->di_version < 3)
 556                return NULL;
 557
 558        flags2 = be64_to_cpu(dip->di_flags2);
 559
 560        /* don't allow reflink/cowextsize if we don't have reflink */
 561        if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
 562             !xfs_sb_version_hasreflink(&mp->m_sb))
 563                return __this_address;
 564
 565        /* only regular files get reflink */
 566        if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
 567                return __this_address;
 568
 569        /* don't let reflink and realtime mix */
 570        if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
 571                return __this_address;
 572
 573        /* don't let reflink and dax mix */
 574        if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX))
 575                return __this_address;
 576
 577        /* COW extent size hint validation */
 578        fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
 579                        mode, flags, flags2);
 580        if (fa)
 581                return fa;
 582
 583        return NULL;
 584}
 585
 586void
 587xfs_dinode_calc_crc(
 588        struct xfs_mount        *mp,
 589        struct xfs_dinode       *dip)
 590{
 591        uint32_t                crc;
 592
 593        if (dip->di_version < 3)
 594                return;
 595
 596        ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
 597        crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
 598                              XFS_DINODE_CRC_OFF);
 599        dip->di_crc = xfs_end_cksum(crc);
 600}
 601
 602/*
 603 * Read the disk inode attributes into the in-core inode structure.
 604 *
 605 * For version 5 superblocks, if we are initialising a new inode and we are not
 606 * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
 607 * inode core with a random generation number. If we are keeping inodes around,
 608 * we need to read the inode cluster to get the existing generation number off
 609 * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
 610 * format) then log recovery is dependent on the di_flushiter field being
 611 * initialised from the current on-disk value and hence we must also read the
 612 * inode off disk.
 613 */
 614int
 615xfs_iread(
 616        xfs_mount_t     *mp,
 617        xfs_trans_t     *tp,
 618        xfs_inode_t     *ip,
 619        uint            iget_flags)
 620{
 621        xfs_buf_t       *bp;
 622        xfs_dinode_t    *dip;
 623        xfs_failaddr_t  fa;
 624        int             error;
 625
 626        /*
 627         * Fill in the location information in the in-core inode.
 628         */
 629        error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
 630        if (error)
 631                return error;
 632
 633        /* shortcut IO on inode allocation if possible */
 634        if ((iget_flags & XFS_IGET_CREATE) &&
 635            xfs_sb_version_hascrc(&mp->m_sb) &&
 636            !(mp->m_flags & XFS_MOUNT_IKEEP)) {
 637                /* initialise the on-disk inode core */
 638                memset(&ip->i_d, 0, sizeof(ip->i_d));
 639                VFS_I(ip)->i_generation = prandom_u32();
 640                ip->i_d.di_version = 3;
 641                return 0;
 642        }
 643
 644        /*
 645         * Get pointers to the on-disk inode and the buffer containing it.
 646         */
 647        error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
 648        if (error)
 649                return error;
 650
 651        /* even unallocated inodes are verified */
 652        fa = xfs_dinode_verify(mp, ip->i_ino, dip);
 653        if (fa) {
 654                xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip,
 655                                sizeof(*dip), fa);
 656                error = -EFSCORRUPTED;
 657                goto out_brelse;
 658        }
 659
 660        /*
 661         * If the on-disk inode is already linked to a directory
 662         * entry, copy all of the inode into the in-core inode.
 663         * xfs_iformat_fork() handles copying in the inode format
 664         * specific information.
 665         * Otherwise, just get the truly permanent information.
 666         */
 667        if (dip->di_mode) {
 668                xfs_inode_from_disk(ip, dip);
 669                error = xfs_iformat_fork(ip, dip);
 670                if (error)  {
 671#ifdef DEBUG
 672                        xfs_alert(mp, "%s: xfs_iformat() returned error %d",
 673                                __func__, error);
 674#endif /* DEBUG */
 675                        goto out_brelse;
 676                }
 677        } else {
 678                /*
 679                 * Partial initialisation of the in-core inode. Just the bits
 680                 * that xfs_ialloc won't overwrite or relies on being correct.
 681                 */
 682                ip->i_d.di_version = dip->di_version;
 683                VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen);
 684                ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
 685
 686                /*
 687                 * Make sure to pull in the mode here as well in
 688                 * case the inode is released without being used.
 689                 * This ensures that xfs_inactive() will see that
 690                 * the inode is already free and not try to mess
 691                 * with the uninitialized part of it.
 692                 */
 693                VFS_I(ip)->i_mode = 0;
 694        }
 695
 696        ASSERT(ip->i_d.di_version >= 2);
 697        ip->i_delayed_blks = 0;
 698
 699        /*
 700         * Mark the buffer containing the inode as something to keep
 701         * around for a while.  This helps to keep recently accessed
 702         * meta-data in-core longer.
 703         */
 704        xfs_buf_set_ref(bp, XFS_INO_REF);
 705
 706        /*
 707         * Use xfs_trans_brelse() to release the buffer containing the on-disk
 708         * inode, because it was acquired with xfs_trans_read_buf() in
 709         * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
 710         * brelse().  If we're within a transaction, then xfs_trans_brelse()
 711         * will only release the buffer if it is not dirty within the
 712         * transaction.  It will be OK to release the buffer in this case,
 713         * because inodes on disk are never destroyed and we will be locking the
 714         * new in-core inode before putting it in the cache where other
 715         * processes can find it.  Thus we don't have to worry about the inode
 716         * being changed just because we released the buffer.
 717         */
 718 out_brelse:
 719        xfs_trans_brelse(tp, bp);
 720        return error;
 721}
 722
 723/*
 724 * Validate di_extsize hint.
 725 *
 726 * The rules are documented at xfs_ioctl_setattr_check_extsize().
 727 * These functions must be kept in sync with each other.
 728 */
 729xfs_failaddr_t
 730xfs_inode_validate_extsize(
 731        struct xfs_mount                *mp,
 732        uint32_t                        extsize,
 733        uint16_t                        mode,
 734        uint16_t                        flags)
 735{
 736        bool                            rt_flag;
 737        bool                            hint_flag;
 738        bool                            inherit_flag;
 739        uint32_t                        extsize_bytes;
 740        uint32_t                        blocksize_bytes;
 741
 742        rt_flag = (flags & XFS_DIFLAG_REALTIME);
 743        hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
 744        inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
 745        extsize_bytes = XFS_FSB_TO_B(mp, extsize);
 746
 747        if (rt_flag)
 748                blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
 749        else
 750                blocksize_bytes = mp->m_sb.sb_blocksize;
 751
 752        if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
 753                return __this_address;
 754
 755        if (hint_flag && !S_ISREG(mode))
 756                return __this_address;
 757
 758        if (inherit_flag && !S_ISDIR(mode))
 759                return __this_address;
 760
 761        if ((hint_flag || inherit_flag) && extsize == 0)
 762                return __this_address;
 763
 764        /* free inodes get flags set to zero but extsize remains */
 765        if (mode && !(hint_flag || inherit_flag) && extsize != 0)
 766                return __this_address;
 767
 768        if (extsize_bytes % blocksize_bytes)
 769                return __this_address;
 770
 771        if (extsize > MAXEXTLEN)
 772                return __this_address;
 773
 774        if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
 775                return __this_address;
 776
 777        return NULL;
 778}
 779
 780/*
 781 * Validate di_cowextsize hint.
 782 *
 783 * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
 784 * These functions must be kept in sync with each other.
 785 */
 786xfs_failaddr_t
 787xfs_inode_validate_cowextsize(
 788        struct xfs_mount                *mp,
 789        uint32_t                        cowextsize,
 790        uint16_t                        mode,
 791        uint16_t                        flags,
 792        uint64_t                        flags2)
 793{
 794        bool                            rt_flag;
 795        bool                            hint_flag;
 796        uint32_t                        cowextsize_bytes;
 797
 798        rt_flag = (flags & XFS_DIFLAG_REALTIME);
 799        hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
 800        cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
 801
 802        if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
 803                return __this_address;
 804
 805        if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
 806                return __this_address;
 807
 808        if (hint_flag && cowextsize == 0)
 809                return __this_address;
 810
 811        /* free inodes get flags set to zero but cowextsize remains */
 812        if (mode && !hint_flag && cowextsize != 0)
 813                return __this_address;
 814
 815        if (hint_flag && rt_flag)
 816                return __this_address;
 817
 818        if (cowextsize_bytes % mp->m_sb.sb_blocksize)
 819                return __this_address;
 820
 821        if (cowextsize > MAXEXTLEN)
 822                return __this_address;
 823
 824        if (cowextsize > mp->m_sb.sb_agblocks / 2)
 825                return __this_address;
 826
 827        return NULL;
 828}
 829