linux/fs/xfs/libxfs/xfs_inode_buf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_mount.h"
  13#include "xfs_defer.h"
  14#include "xfs_inode.h"
  15#include "xfs_errortag.h"
  16#include "xfs_error.h"
  17#include "xfs_cksum.h"
  18#include "xfs_icache.h"
  19#include "xfs_trans.h"
  20#include "xfs_ialloc.h"
  21#include "xfs_dir2.h"
  22
  23#include <linux/iversion.h>
  24
  25/*
  26 * Check that none of the inode's in the buffer have a next
  27 * unlinked field of 0.
  28 */
  29#if defined(DEBUG)
  30void
  31xfs_inobp_check(
  32        xfs_mount_t     *mp,
  33        xfs_buf_t       *bp)
  34{
  35        int             i;
  36        int             j;
  37        xfs_dinode_t    *dip;
  38
  39        j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
  40
  41        for (i = 0; i < j; i++) {
  42                dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
  43                if (!dip->di_next_unlinked)  {
  44                        xfs_alert(mp,
  45        "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
  46                                i, (long long)bp->b_bn);
  47                }
  48        }
  49}
  50#endif
  51
  52bool
  53xfs_dinode_good_version(
  54        struct xfs_mount *mp,
  55        __u8            version)
  56{
  57        if (xfs_sb_version_hascrc(&mp->m_sb))
  58                return version == 3;
  59
  60        return version == 1 || version == 2;
  61}
  62
  63/*
  64 * If we are doing readahead on an inode buffer, we might be in log recovery
  65 * reading an inode allocation buffer that hasn't yet been replayed, and hence
  66 * has not had the inode cores stamped into it. Hence for readahead, the buffer
  67 * may be potentially invalid.
  68 *
  69 * If the readahead buffer is invalid, we need to mark it with an error and
  70 * clear the DONE status of the buffer so that a followup read will re-read it
  71 * from disk. We don't report the error otherwise to avoid warnings during log
  72 * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
  73 * because all we want to do is say readahead failed; there is no-one to report
  74 * the error to, so this will distinguish it from a non-ra verifier failure.
  75 * Changes to this readahead error behavour also need to be reflected in
  76 * xfs_dquot_buf_readahead_verify().
  77 */
  78static void
  79xfs_inode_buf_verify(
  80        struct xfs_buf  *bp,
  81        bool            readahead)
  82{
  83        struct xfs_mount *mp = bp->b_target->bt_mount;
  84        xfs_agnumber_t  agno;
  85        int             i;
  86        int             ni;
  87
  88        /*
  89         * Validate the magic number and version of every inode in the buffer
  90         */
  91        agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
  92        ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
  93        for (i = 0; i < ni; i++) {
  94                int             di_ok;
  95                xfs_dinode_t    *dip;
  96                xfs_agino_t     unlinked_ino;
  97
  98                dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
  99                unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
 100                di_ok = xfs_verify_magic16(bp, dip->di_magic) &&
 101                        xfs_dinode_good_version(mp, dip->di_version) &&
 102                        xfs_verify_agino_or_null(mp, agno, unlinked_ino);
 103                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
 104                                                XFS_ERRTAG_ITOBP_INOTOBP))) {
 105                        if (readahead) {
 106                                bp->b_flags &= ~XBF_DONE;
 107                                xfs_buf_ioerror(bp, -EIO);
 108                                return;
 109                        }
 110
 111#ifdef DEBUG
 112                        xfs_alert(mp,
 113                                "bad inode magic/vsn daddr %lld #%d (magic=%x)",
 114                                (unsigned long long)bp->b_bn, i,
 115                                be16_to_cpu(dip->di_magic));
 116#endif
 117                        xfs_buf_verifier_error(bp, -EFSCORRUPTED,
 118                                        __func__, dip, sizeof(*dip),
 119                                        NULL);
 120                        return;
 121                }
 122        }
 123}
 124
 125
 126static void
 127xfs_inode_buf_read_verify(
 128        struct xfs_buf  *bp)
 129{
 130        xfs_inode_buf_verify(bp, false);
 131}
 132
 133static void
 134xfs_inode_buf_readahead_verify(
 135        struct xfs_buf  *bp)
 136{
 137        xfs_inode_buf_verify(bp, true);
 138}
 139
 140static void
 141xfs_inode_buf_write_verify(
 142        struct xfs_buf  *bp)
 143{
 144        xfs_inode_buf_verify(bp, false);
 145}
 146
 147const struct xfs_buf_ops xfs_inode_buf_ops = {
 148        .name = "xfs_inode",
 149        .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
 150                     cpu_to_be16(XFS_DINODE_MAGIC) },
 151        .verify_read = xfs_inode_buf_read_verify,
 152        .verify_write = xfs_inode_buf_write_verify,
 153};
 154
 155const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
 156        .name = "xfs_inode_ra",
 157        .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
 158                     cpu_to_be16(XFS_DINODE_MAGIC) },
 159        .verify_read = xfs_inode_buf_readahead_verify,
 160        .verify_write = xfs_inode_buf_write_verify,
 161};
 162
 163
 164/*
 165 * This routine is called to map an inode to the buffer containing the on-disk
 166 * version of the inode.  It returns a pointer to the buffer containing the
 167 * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
 168 * pointer to the on-disk inode within that buffer.
 169 *
 170 * If a non-zero error is returned, then the contents of bpp and dipp are
 171 * undefined.
 172 */
 173int
 174xfs_imap_to_bp(
 175        struct xfs_mount        *mp,
 176        struct xfs_trans        *tp,
 177        struct xfs_imap         *imap,
 178        struct xfs_dinode       **dipp,
 179        struct xfs_buf          **bpp,
 180        uint                    buf_flags,
 181        uint                    iget_flags)
 182{
 183        struct xfs_buf          *bp;
 184        int                     error;
 185
 186        buf_flags |= XBF_UNMAPPED;
 187        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
 188                                   (int)imap->im_len, buf_flags, &bp,
 189                                   &xfs_inode_buf_ops);
 190        if (error) {
 191                if (error == -EAGAIN) {
 192                        ASSERT(buf_flags & XBF_TRYLOCK);
 193                        return error;
 194                }
 195                xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
 196                        __func__, error);
 197                return error;
 198        }
 199
 200        *bpp = bp;
 201        *dipp = xfs_buf_offset(bp, imap->im_boffset);
 202        return 0;
 203}
 204
 205void
 206xfs_inode_from_disk(
 207        struct xfs_inode        *ip,
 208        struct xfs_dinode       *from)
 209{
 210        struct xfs_icdinode     *to = &ip->i_d;
 211        struct inode            *inode = VFS_I(ip);
 212
 213
 214        /*
 215         * Convert v1 inodes immediately to v2 inode format as this is the
 216         * minimum inode version format we support in the rest of the code.
 217         */
 218        to->di_version = from->di_version;
 219        if (to->di_version == 1) {
 220                set_nlink(inode, be16_to_cpu(from->di_onlink));
 221                to->di_projid_lo = 0;
 222                to->di_projid_hi = 0;
 223                to->di_version = 2;
 224        } else {
 225                set_nlink(inode, be32_to_cpu(from->di_nlink));
 226                to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
 227                to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
 228        }
 229
 230        to->di_format = from->di_format;
 231        to->di_uid = be32_to_cpu(from->di_uid);
 232        to->di_gid = be32_to_cpu(from->di_gid);
 233        to->di_flushiter = be16_to_cpu(from->di_flushiter);
 234
 235        /*
 236         * Time is signed, so need to convert to signed 32 bit before
 237         * storing in inode timestamp which may be 64 bit. Otherwise
 238         * a time before epoch is converted to a time long after epoch
 239         * on 64 bit systems.
 240         */
 241        inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec);
 242        inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec);
 243        inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec);
 244        inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
 245        inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
 246        inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
 247        inode->i_generation = be32_to_cpu(from->di_gen);
 248        inode->i_mode = be16_to_cpu(from->di_mode);
 249
 250        to->di_size = be64_to_cpu(from->di_size);
 251        to->di_nblocks = be64_to_cpu(from->di_nblocks);
 252        to->di_extsize = be32_to_cpu(from->di_extsize);
 253        to->di_nextents = be32_to_cpu(from->di_nextents);
 254        to->di_anextents = be16_to_cpu(from->di_anextents);
 255        to->di_forkoff = from->di_forkoff;
 256        to->di_aformat  = from->di_aformat;
 257        to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
 258        to->di_dmstate  = be16_to_cpu(from->di_dmstate);
 259        to->di_flags    = be16_to_cpu(from->di_flags);
 260
 261        if (to->di_version == 3) {
 262                inode_set_iversion_queried(inode,
 263                                           be64_to_cpu(from->di_changecount));
 264                to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
 265                to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
 266                to->di_flags2 = be64_to_cpu(from->di_flags2);
 267                to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
 268        }
 269}
 270
 271void
 272xfs_inode_to_disk(
 273        struct xfs_inode        *ip,
 274        struct xfs_dinode       *to,
 275        xfs_lsn_t               lsn)
 276{
 277        struct xfs_icdinode     *from = &ip->i_d;
 278        struct inode            *inode = VFS_I(ip);
 279
 280        to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 281        to->di_onlink = 0;
 282
 283        to->di_version = from->di_version;
 284        to->di_format = from->di_format;
 285        to->di_uid = cpu_to_be32(from->di_uid);
 286        to->di_gid = cpu_to_be32(from->di_gid);
 287        to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
 288        to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
 289
 290        memset(to->di_pad, 0, sizeof(to->di_pad));
 291        to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
 292        to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
 293        to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec);
 294        to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
 295        to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec);
 296        to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
 297        to->di_nlink = cpu_to_be32(inode->i_nlink);
 298        to->di_gen = cpu_to_be32(inode->i_generation);
 299        to->di_mode = cpu_to_be16(inode->i_mode);
 300
 301        to->di_size = cpu_to_be64(from->di_size);
 302        to->di_nblocks = cpu_to_be64(from->di_nblocks);
 303        to->di_extsize = cpu_to_be32(from->di_extsize);
 304        to->di_nextents = cpu_to_be32(from->di_nextents);
 305        to->di_anextents = cpu_to_be16(from->di_anextents);
 306        to->di_forkoff = from->di_forkoff;
 307        to->di_aformat = from->di_aformat;
 308        to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
 309        to->di_dmstate = cpu_to_be16(from->di_dmstate);
 310        to->di_flags = cpu_to_be16(from->di_flags);
 311
 312        if (from->di_version == 3) {
 313                to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
 314                to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
 315                to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
 316                to->di_flags2 = cpu_to_be64(from->di_flags2);
 317                to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
 318                to->di_ino = cpu_to_be64(ip->i_ino);
 319                to->di_lsn = cpu_to_be64(lsn);
 320                memset(to->di_pad2, 0, sizeof(to->di_pad2));
 321                uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
 322                to->di_flushiter = 0;
 323        } else {
 324                to->di_flushiter = cpu_to_be16(from->di_flushiter);
 325        }
 326}
 327
 328void
 329xfs_log_dinode_to_disk(
 330        struct xfs_log_dinode   *from,
 331        struct xfs_dinode       *to)
 332{
 333        to->di_magic = cpu_to_be16(from->di_magic);
 334        to->di_mode = cpu_to_be16(from->di_mode);
 335        to->di_version = from->di_version;
 336        to->di_format = from->di_format;
 337        to->di_onlink = 0;
 338        to->di_uid = cpu_to_be32(from->di_uid);
 339        to->di_gid = cpu_to_be32(from->di_gid);
 340        to->di_nlink = cpu_to_be32(from->di_nlink);
 341        to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
 342        to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
 343        memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
 344
 345        to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
 346        to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
 347        to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
 348        to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
 349        to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
 350        to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
 351
 352        to->di_size = cpu_to_be64(from->di_size);
 353        to->di_nblocks = cpu_to_be64(from->di_nblocks);
 354        to->di_extsize = cpu_to_be32(from->di_extsize);
 355        to->di_nextents = cpu_to_be32(from->di_nextents);
 356        to->di_anextents = cpu_to_be16(from->di_anextents);
 357        to->di_forkoff = from->di_forkoff;
 358        to->di_aformat = from->di_aformat;
 359        to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
 360        to->di_dmstate = cpu_to_be16(from->di_dmstate);
 361        to->di_flags = cpu_to_be16(from->di_flags);
 362        to->di_gen = cpu_to_be32(from->di_gen);
 363
 364        if (from->di_version == 3) {
 365                to->di_changecount = cpu_to_be64(from->di_changecount);
 366                to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
 367                to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
 368                to->di_flags2 = cpu_to_be64(from->di_flags2);
 369                to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
 370                to->di_ino = cpu_to_be64(from->di_ino);
 371                to->di_lsn = cpu_to_be64(from->di_lsn);
 372                memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
 373                uuid_copy(&to->di_uuid, &from->di_uuid);
 374                to->di_flushiter = 0;
 375        } else {
 376                to->di_flushiter = cpu_to_be16(from->di_flushiter);
 377        }
 378}
 379
 380static xfs_failaddr_t
 381xfs_dinode_verify_fork(
 382        struct xfs_dinode       *dip,
 383        struct xfs_mount        *mp,
 384        int                     whichfork)
 385{
 386        uint32_t                di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
 387
 388        switch (XFS_DFORK_FORMAT(dip, whichfork)) {
 389        case XFS_DINODE_FMT_LOCAL:
 390                /*
 391                 * no local regular files yet
 392                 */
 393                if (whichfork == XFS_DATA_FORK) {
 394                        if (S_ISREG(be16_to_cpu(dip->di_mode)))
 395                                return __this_address;
 396                        if (be64_to_cpu(dip->di_size) >
 397                                        XFS_DFORK_SIZE(dip, mp, whichfork))
 398                                return __this_address;
 399                }
 400                if (di_nextents)
 401                        return __this_address;
 402                break;
 403        case XFS_DINODE_FMT_EXTENTS:
 404                if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
 405                        return __this_address;
 406                break;
 407        case XFS_DINODE_FMT_BTREE:
 408                if (whichfork == XFS_ATTR_FORK) {
 409                        if (di_nextents > MAXAEXTNUM)
 410                                return __this_address;
 411                } else if (di_nextents > MAXEXTNUM) {
 412                        return __this_address;
 413                }
 414                break;
 415        default:
 416                return __this_address;
 417        }
 418        return NULL;
 419}
 420
 421static xfs_failaddr_t
 422xfs_dinode_verify_forkoff(
 423        struct xfs_dinode       *dip,
 424        struct xfs_mount        *mp)
 425{
 426        if (!XFS_DFORK_Q(dip))
 427                return NULL;
 428
 429        switch (dip->di_format)  {
 430        case XFS_DINODE_FMT_DEV:
 431                if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
 432                        return __this_address;
 433                break;
 434        case XFS_DINODE_FMT_LOCAL:      /* fall through ... */
 435        case XFS_DINODE_FMT_EXTENTS:    /* fall through ... */
 436        case XFS_DINODE_FMT_BTREE:
 437                if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3))
 438                        return __this_address;
 439                break;
 440        default:
 441                return __this_address;
 442        }
 443        return NULL;
 444}
 445
 446xfs_failaddr_t
 447xfs_dinode_verify(
 448        struct xfs_mount        *mp,
 449        xfs_ino_t               ino,
 450        struct xfs_dinode       *dip)
 451{
 452        xfs_failaddr_t          fa;
 453        uint16_t                mode;
 454        uint16_t                flags;
 455        uint64_t                flags2;
 456        uint64_t                di_size;
 457
 458        if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
 459                return __this_address;
 460
 461        /* Verify v3 integrity information first */
 462        if (dip->di_version >= 3) {
 463                if (!xfs_sb_version_hascrc(&mp->m_sb))
 464                        return __this_address;
 465                if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
 466                                      XFS_DINODE_CRC_OFF))
 467                        return __this_address;
 468                if (be64_to_cpu(dip->di_ino) != ino)
 469                        return __this_address;
 470                if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
 471                        return __this_address;
 472        }
 473
 474        /* don't allow invalid i_size */
 475        di_size = be64_to_cpu(dip->di_size);
 476        if (di_size & (1ULL << 63))
 477                return __this_address;
 478
 479        mode = be16_to_cpu(dip->di_mode);
 480        if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
 481                return __this_address;
 482
 483        /* No zero-length symlinks/dirs. */
 484        if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
 485                return __this_address;
 486
 487        /* Fork checks carried over from xfs_iformat_fork */
 488        if (mode &&
 489            be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
 490                        be64_to_cpu(dip->di_nblocks))
 491                return __this_address;
 492
 493        if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
 494                return __this_address;
 495
 496        flags = be16_to_cpu(dip->di_flags);
 497
 498        if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
 499                return __this_address;
 500
 501        /* check for illegal values of forkoff */
 502        fa = xfs_dinode_verify_forkoff(dip, mp);
 503        if (fa)
 504                return fa;
 505
 506        /* Do we have appropriate data fork formats for the mode? */
 507        switch (mode & S_IFMT) {
 508        case S_IFIFO:
 509        case S_IFCHR:
 510        case S_IFBLK:
 511        case S_IFSOCK:
 512                if (dip->di_format != XFS_DINODE_FMT_DEV)
 513                        return __this_address;
 514                break;
 515        case S_IFREG:
 516        case S_IFLNK:
 517        case S_IFDIR:
 518                fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
 519                if (fa)
 520                        return fa;
 521                break;
 522        case 0:
 523                /* Uninitialized inode ok. */
 524                break;
 525        default:
 526                return __this_address;
 527        }
 528
 529        if (XFS_DFORK_Q(dip)) {
 530                fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
 531                if (fa)
 532                        return fa;
 533        } else {
 534                /*
 535                 * If there is no fork offset, this may be a freshly-made inode
 536                 * in a new disk cluster, in which case di_aformat is zeroed.
 537                 * Otherwise, such an inode must be in EXTENTS format; this goes
 538                 * for freed inodes as well.
 539                 */
 540                switch (dip->di_aformat) {
 541                case 0:
 542                case XFS_DINODE_FMT_EXTENTS:
 543                        break;
 544                default:
 545                        return __this_address;
 546                }
 547                if (dip->di_anextents)
 548                        return __this_address;
 549        }
 550
 551        /* extent size hint validation */
 552        fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
 553                        mode, flags);
 554        if (fa)
 555                return fa;
 556
 557        /* only version 3 or greater inodes are extensively verified here */
 558        if (dip->di_version < 3)
 559                return NULL;
 560
 561        flags2 = be64_to_cpu(dip->di_flags2);
 562
 563        /* don't allow reflink/cowextsize if we don't have reflink */
 564        if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
 565             !xfs_sb_version_hasreflink(&mp->m_sb))
 566                return __this_address;
 567
 568        /* only regular files get reflink */
 569        if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
 570                return __this_address;
 571
 572        /* don't let reflink and realtime mix */
 573        if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
 574                return __this_address;
 575
 576        /* don't let reflink and dax mix */
 577        if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX))
 578                return __this_address;
 579
 580        /* COW extent size hint validation */
 581        fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
 582                        mode, flags, flags2);
 583        if (fa)
 584                return fa;
 585
 586        return NULL;
 587}
 588
 589void
 590xfs_dinode_calc_crc(
 591        struct xfs_mount        *mp,
 592        struct xfs_dinode       *dip)
 593{
 594        uint32_t                crc;
 595
 596        if (dip->di_version < 3)
 597                return;
 598
 599        ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
 600        crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
 601                              XFS_DINODE_CRC_OFF);
 602        dip->di_crc = xfs_end_cksum(crc);
 603}
 604
 605/*
 606 * Read the disk inode attributes into the in-core inode structure.
 607 *
 608 * For version 5 superblocks, if we are initialising a new inode and we are not
 609 * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
 610 * inode core with a random generation number. If we are keeping inodes around,
 611 * we need to read the inode cluster to get the existing generation number off
 612 * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
 613 * format) then log recovery is dependent on the di_flushiter field being
 614 * initialised from the current on-disk value and hence we must also read the
 615 * inode off disk.
 616 */
 617int
 618xfs_iread(
 619        xfs_mount_t     *mp,
 620        xfs_trans_t     *tp,
 621        xfs_inode_t     *ip,
 622        uint            iget_flags)
 623{
 624        xfs_buf_t       *bp;
 625        xfs_dinode_t    *dip;
 626        xfs_failaddr_t  fa;
 627        int             error;
 628
 629        /*
 630         * Fill in the location information in the in-core inode.
 631         */
 632        error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
 633        if (error)
 634                return error;
 635
 636        /* shortcut IO on inode allocation if possible */
 637        if ((iget_flags & XFS_IGET_CREATE) &&
 638            xfs_sb_version_hascrc(&mp->m_sb) &&
 639            !(mp->m_flags & XFS_MOUNT_IKEEP)) {
 640                /* initialise the on-disk inode core */
 641                memset(&ip->i_d, 0, sizeof(ip->i_d));
 642                VFS_I(ip)->i_generation = prandom_u32();
 643                ip->i_d.di_version = 3;
 644                return 0;
 645        }
 646
 647        /*
 648         * Get pointers to the on-disk inode and the buffer containing it.
 649         */
 650        error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
 651        if (error)
 652                return error;
 653
 654        /* even unallocated inodes are verified */
 655        fa = xfs_dinode_verify(mp, ip->i_ino, dip);
 656        if (fa) {
 657                xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip,
 658                                sizeof(*dip), fa);
 659                error = -EFSCORRUPTED;
 660                goto out_brelse;
 661        }
 662
 663        /*
 664         * If the on-disk inode is already linked to a directory
 665         * entry, copy all of the inode into the in-core inode.
 666         * xfs_iformat_fork() handles copying in the inode format
 667         * specific information.
 668         * Otherwise, just get the truly permanent information.
 669         */
 670        if (dip->di_mode) {
 671                xfs_inode_from_disk(ip, dip);
 672                error = xfs_iformat_fork(ip, dip);
 673                if (error)  {
 674#ifdef DEBUG
 675                        xfs_alert(mp, "%s: xfs_iformat() returned error %d",
 676                                __func__, error);
 677#endif /* DEBUG */
 678                        goto out_brelse;
 679                }
 680        } else {
 681                /*
 682                 * Partial initialisation of the in-core inode. Just the bits
 683                 * that xfs_ialloc won't overwrite or relies on being correct.
 684                 */
 685                ip->i_d.di_version = dip->di_version;
 686                VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen);
 687                ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
 688
 689                /*
 690                 * Make sure to pull in the mode here as well in
 691                 * case the inode is released without being used.
 692                 * This ensures that xfs_inactive() will see that
 693                 * the inode is already free and not try to mess
 694                 * with the uninitialized part of it.
 695                 */
 696                VFS_I(ip)->i_mode = 0;
 697        }
 698
 699        ASSERT(ip->i_d.di_version >= 2);
 700        ip->i_delayed_blks = 0;
 701
 702        /*
 703         * Mark the buffer containing the inode as something to keep
 704         * around for a while.  This helps to keep recently accessed
 705         * meta-data in-core longer.
 706         */
 707        xfs_buf_set_ref(bp, XFS_INO_REF);
 708
 709        /*
 710         * Use xfs_trans_brelse() to release the buffer containing the on-disk
 711         * inode, because it was acquired with xfs_trans_read_buf() in
 712         * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
 713         * brelse().  If we're within a transaction, then xfs_trans_brelse()
 714         * will only release the buffer if it is not dirty within the
 715         * transaction.  It will be OK to release the buffer in this case,
 716         * because inodes on disk are never destroyed and we will be locking the
 717         * new in-core inode before putting it in the cache where other
 718         * processes can find it.  Thus we don't have to worry about the inode
 719         * being changed just because we released the buffer.
 720         */
 721 out_brelse:
 722        xfs_trans_brelse(tp, bp);
 723        return error;
 724}
 725
 726/*
 727 * Validate di_extsize hint.
 728 *
 729 * The rules are documented at xfs_ioctl_setattr_check_extsize().
 730 * These functions must be kept in sync with each other.
 731 */
 732xfs_failaddr_t
 733xfs_inode_validate_extsize(
 734        struct xfs_mount                *mp,
 735        uint32_t                        extsize,
 736        uint16_t                        mode,
 737        uint16_t                        flags)
 738{
 739        bool                            rt_flag;
 740        bool                            hint_flag;
 741        bool                            inherit_flag;
 742        uint32_t                        extsize_bytes;
 743        uint32_t                        blocksize_bytes;
 744
 745        rt_flag = (flags & XFS_DIFLAG_REALTIME);
 746        hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
 747        inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
 748        extsize_bytes = XFS_FSB_TO_B(mp, extsize);
 749
 750        if (rt_flag)
 751                blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
 752        else
 753                blocksize_bytes = mp->m_sb.sb_blocksize;
 754
 755        if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
 756                return __this_address;
 757
 758        if (hint_flag && !S_ISREG(mode))
 759                return __this_address;
 760
 761        if (inherit_flag && !S_ISDIR(mode))
 762                return __this_address;
 763
 764        if ((hint_flag || inherit_flag) && extsize == 0)
 765                return __this_address;
 766
 767        /* free inodes get flags set to zero but extsize remains */
 768        if (mode && !(hint_flag || inherit_flag) && extsize != 0)
 769                return __this_address;
 770
 771        if (extsize_bytes % blocksize_bytes)
 772                return __this_address;
 773
 774        if (extsize > MAXEXTLEN)
 775                return __this_address;
 776
 777        if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
 778                return __this_address;
 779
 780        return NULL;
 781}
 782
 783/*
 784 * Validate di_cowextsize hint.
 785 *
 786 * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
 787 * These functions must be kept in sync with each other.
 788 */
 789xfs_failaddr_t
 790xfs_inode_validate_cowextsize(
 791        struct xfs_mount                *mp,
 792        uint32_t                        cowextsize,
 793        uint16_t                        mode,
 794        uint16_t                        flags,
 795        uint64_t                        flags2)
 796{
 797        bool                            rt_flag;
 798        bool                            hint_flag;
 799        uint32_t                        cowextsize_bytes;
 800
 801        rt_flag = (flags & XFS_DIFLAG_REALTIME);
 802        hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
 803        cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
 804
 805        if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
 806                return __this_address;
 807
 808        if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
 809                return __this_address;
 810
 811        if (hint_flag && cowextsize == 0)
 812                return __this_address;
 813
 814        /* free inodes get flags set to zero but cowextsize remains */
 815        if (mode && !hint_flag && cowextsize != 0)
 816                return __this_address;
 817
 818        if (hint_flag && rt_flag)
 819                return __this_address;
 820
 821        if (cowextsize_bytes % mp->m_sb.sb_blocksize)
 822                return __this_address;
 823
 824        if (cowextsize > MAXEXTLEN)
 825                return __this_address;
 826
 827        if (cowextsize > mp->m_sb.sb_agblocks / 2)
 828                return __this_address;
 829
 830        return NULL;
 831}
 832