linux/fs/xfs/libxfs/xfs_inode_buf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_mount.h"
  13#include "xfs_inode.h"
  14#include "xfs_errortag.h"
  15#include "xfs_error.h"
  16#include "xfs_icache.h"
  17#include "xfs_trans.h"
  18#include "xfs_ialloc.h"
  19#include "xfs_dir2.h"
  20
  21#include <linux/iversion.h>
  22
  23/*
  24 * Check that none of the inode's in the buffer have a next
  25 * unlinked field of 0.
  26 */
  27#if defined(DEBUG)
  28void
  29xfs_inobp_check(
  30        xfs_mount_t     *mp,
  31        xfs_buf_t       *bp)
  32{
  33        int             i;
  34        xfs_dinode_t    *dip;
  35
  36        for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
  37                dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
  38                if (!dip->di_next_unlinked)  {
  39                        xfs_alert(mp,
  40        "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
  41                                i, (long long)bp->b_bn);
  42                }
  43        }
  44}
  45#endif
  46
  47bool
  48xfs_dinode_good_version(
  49        struct xfs_mount *mp,
  50        __u8            version)
  51{
  52        if (xfs_sb_version_hascrc(&mp->m_sb))
  53                return version == 3;
  54
  55        return version == 1 || version == 2;
  56}
  57
  58/*
  59 * If we are doing readahead on an inode buffer, we might be in log recovery
  60 * reading an inode allocation buffer that hasn't yet been replayed, and hence
  61 * has not had the inode cores stamped into it. Hence for readahead, the buffer
  62 * may be potentially invalid.
  63 *
  64 * If the readahead buffer is invalid, we need to mark it with an error and
  65 * clear the DONE status of the buffer so that a followup read will re-read it
  66 * from disk. We don't report the error otherwise to avoid warnings during log
  67 * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
  68 * because all we want to do is say readahead failed; there is no-one to report
  69 * the error to, so this will distinguish it from a non-ra verifier failure.
  70 * Changes to this readahead error behavour also need to be reflected in
  71 * xfs_dquot_buf_readahead_verify().
  72 */
  73static void
  74xfs_inode_buf_verify(
  75        struct xfs_buf  *bp,
  76        bool            readahead)
  77{
  78        struct xfs_mount *mp = bp->b_mount;
  79        xfs_agnumber_t  agno;
  80        int             i;
  81        int             ni;
  82
  83        /*
  84         * Validate the magic number and version of every inode in the buffer
  85         */
  86        agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
  87        ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
  88        for (i = 0; i < ni; i++) {
  89                int             di_ok;
  90                xfs_dinode_t    *dip;
  91                xfs_agino_t     unlinked_ino;
  92
  93                dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
  94                unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
  95                di_ok = xfs_verify_magic16(bp, dip->di_magic) &&
  96                        xfs_dinode_good_version(mp, dip->di_version) &&
  97                        xfs_verify_agino_or_null(mp, agno, unlinked_ino);
  98                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
  99                                                XFS_ERRTAG_ITOBP_INOTOBP))) {
 100                        if (readahead) {
 101                                bp->b_flags &= ~XBF_DONE;
 102                                xfs_buf_ioerror(bp, -EIO);
 103                                return;
 104                        }
 105
 106#ifdef DEBUG
 107                        xfs_alert(mp,
 108                                "bad inode magic/vsn daddr %lld #%d (magic=%x)",
 109                                (unsigned long long)bp->b_bn, i,
 110                                be16_to_cpu(dip->di_magic));
 111#endif
 112                        xfs_buf_verifier_error(bp, -EFSCORRUPTED,
 113                                        __func__, dip, sizeof(*dip),
 114                                        NULL);
 115                        return;
 116                }
 117        }
 118}
 119
 120
 121static void
 122xfs_inode_buf_read_verify(
 123        struct xfs_buf  *bp)
 124{
 125        xfs_inode_buf_verify(bp, false);
 126}
 127
 128static void
 129xfs_inode_buf_readahead_verify(
 130        struct xfs_buf  *bp)
 131{
 132        xfs_inode_buf_verify(bp, true);
 133}
 134
 135static void
 136xfs_inode_buf_write_verify(
 137        struct xfs_buf  *bp)
 138{
 139        xfs_inode_buf_verify(bp, false);
 140}
 141
 142const struct xfs_buf_ops xfs_inode_buf_ops = {
 143        .name = "xfs_inode",
 144        .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
 145                     cpu_to_be16(XFS_DINODE_MAGIC) },
 146        .verify_read = xfs_inode_buf_read_verify,
 147        .verify_write = xfs_inode_buf_write_verify,
 148};
 149
 150const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
 151        .name = "xfs_inode_ra",
 152        .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
 153                     cpu_to_be16(XFS_DINODE_MAGIC) },
 154        .verify_read = xfs_inode_buf_readahead_verify,
 155        .verify_write = xfs_inode_buf_write_verify,
 156};
 157
 158
 159/*
 160 * This routine is called to map an inode to the buffer containing the on-disk
 161 * version of the inode.  It returns a pointer to the buffer containing the
 162 * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
 163 * pointer to the on-disk inode within that buffer.
 164 *
 165 * If a non-zero error is returned, then the contents of bpp and dipp are
 166 * undefined.
 167 */
 168int
 169xfs_imap_to_bp(
 170        struct xfs_mount        *mp,
 171        struct xfs_trans        *tp,
 172        struct xfs_imap         *imap,
 173        struct xfs_dinode       **dipp,
 174        struct xfs_buf          **bpp,
 175        uint                    buf_flags,
 176        uint                    iget_flags)
 177{
 178        struct xfs_buf          *bp;
 179        int                     error;
 180
 181        buf_flags |= XBF_UNMAPPED;
 182        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
 183                                   (int)imap->im_len, buf_flags, &bp,
 184                                   &xfs_inode_buf_ops);
 185        if (error) {
 186                if (error == -EAGAIN) {
 187                        ASSERT(buf_flags & XBF_TRYLOCK);
 188                        return error;
 189                }
 190                xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
 191                        __func__, error);
 192                return error;
 193        }
 194
 195        *bpp = bp;
 196        *dipp = xfs_buf_offset(bp, imap->im_boffset);
 197        return 0;
 198}
 199
 200void
 201xfs_inode_from_disk(
 202        struct xfs_inode        *ip,
 203        struct xfs_dinode       *from)
 204{
 205        struct xfs_icdinode     *to = &ip->i_d;
 206        struct inode            *inode = VFS_I(ip);
 207
 208
 209        /*
 210         * Convert v1 inodes immediately to v2 inode format as this is the
 211         * minimum inode version format we support in the rest of the code.
 212         */
 213        to->di_version = from->di_version;
 214        if (to->di_version == 1) {
 215                set_nlink(inode, be16_to_cpu(from->di_onlink));
 216                to->di_projid_lo = 0;
 217                to->di_projid_hi = 0;
 218                to->di_version = 2;
 219        } else {
 220                set_nlink(inode, be32_to_cpu(from->di_nlink));
 221                to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
 222                to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
 223        }
 224
 225        to->di_format = from->di_format;
 226        to->di_uid = be32_to_cpu(from->di_uid);
 227        to->di_gid = be32_to_cpu(from->di_gid);
 228        to->di_flushiter = be16_to_cpu(from->di_flushiter);
 229
 230        /*
 231         * Time is signed, so need to convert to signed 32 bit before
 232         * storing in inode timestamp which may be 64 bit. Otherwise
 233         * a time before epoch is converted to a time long after epoch
 234         * on 64 bit systems.
 235         */
 236        inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec);
 237        inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec);
 238        inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec);
 239        inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
 240        inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
 241        inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
 242        inode->i_generation = be32_to_cpu(from->di_gen);
 243        inode->i_mode = be16_to_cpu(from->di_mode);
 244
 245        to->di_size = be64_to_cpu(from->di_size);
 246        to->di_nblocks = be64_to_cpu(from->di_nblocks);
 247        to->di_extsize = be32_to_cpu(from->di_extsize);
 248        to->di_nextents = be32_to_cpu(from->di_nextents);
 249        to->di_anextents = be16_to_cpu(from->di_anextents);
 250        to->di_forkoff = from->di_forkoff;
 251        to->di_aformat  = from->di_aformat;
 252        to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
 253        to->di_dmstate  = be16_to_cpu(from->di_dmstate);
 254        to->di_flags    = be16_to_cpu(from->di_flags);
 255
 256        if (to->di_version == 3) {
 257                inode_set_iversion_queried(inode,
 258                                           be64_to_cpu(from->di_changecount));
 259                to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
 260                to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
 261                to->di_flags2 = be64_to_cpu(from->di_flags2);
 262                to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
 263        }
 264}
 265
 266void
 267xfs_inode_to_disk(
 268        struct xfs_inode        *ip,
 269        struct xfs_dinode       *to,
 270        xfs_lsn_t               lsn)
 271{
 272        struct xfs_icdinode     *from = &ip->i_d;
 273        struct inode            *inode = VFS_I(ip);
 274
 275        to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 276        to->di_onlink = 0;
 277
 278        to->di_version = from->di_version;
 279        to->di_format = from->di_format;
 280        to->di_uid = cpu_to_be32(from->di_uid);
 281        to->di_gid = cpu_to_be32(from->di_gid);
 282        to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
 283        to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
 284
 285        memset(to->di_pad, 0, sizeof(to->di_pad));
 286        to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
 287        to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
 288        to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec);
 289        to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
 290        to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec);
 291        to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
 292        to->di_nlink = cpu_to_be32(inode->i_nlink);
 293        to->di_gen = cpu_to_be32(inode->i_generation);
 294        to->di_mode = cpu_to_be16(inode->i_mode);
 295
 296        to->di_size = cpu_to_be64(from->di_size);
 297        to->di_nblocks = cpu_to_be64(from->di_nblocks);
 298        to->di_extsize = cpu_to_be32(from->di_extsize);
 299        to->di_nextents = cpu_to_be32(from->di_nextents);
 300        to->di_anextents = cpu_to_be16(from->di_anextents);
 301        to->di_forkoff = from->di_forkoff;
 302        to->di_aformat = from->di_aformat;
 303        to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
 304        to->di_dmstate = cpu_to_be16(from->di_dmstate);
 305        to->di_flags = cpu_to_be16(from->di_flags);
 306
 307        if (from->di_version == 3) {
 308                to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
 309                to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
 310                to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
 311                to->di_flags2 = cpu_to_be64(from->di_flags2);
 312                to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
 313                to->di_ino = cpu_to_be64(ip->i_ino);
 314                to->di_lsn = cpu_to_be64(lsn);
 315                memset(to->di_pad2, 0, sizeof(to->di_pad2));
 316                uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
 317                to->di_flushiter = 0;
 318        } else {
 319                to->di_flushiter = cpu_to_be16(from->di_flushiter);
 320        }
 321}
 322
 323void
 324xfs_log_dinode_to_disk(
 325        struct xfs_log_dinode   *from,
 326        struct xfs_dinode       *to)
 327{
 328        to->di_magic = cpu_to_be16(from->di_magic);
 329        to->di_mode = cpu_to_be16(from->di_mode);
 330        to->di_version = from->di_version;
 331        to->di_format = from->di_format;
 332        to->di_onlink = 0;
 333        to->di_uid = cpu_to_be32(from->di_uid);
 334        to->di_gid = cpu_to_be32(from->di_gid);
 335        to->di_nlink = cpu_to_be32(from->di_nlink);
 336        to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
 337        to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
 338        memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
 339
 340        to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
 341        to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
 342        to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
 343        to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
 344        to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
 345        to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
 346
 347        to->di_size = cpu_to_be64(from->di_size);
 348        to->di_nblocks = cpu_to_be64(from->di_nblocks);
 349        to->di_extsize = cpu_to_be32(from->di_extsize);
 350        to->di_nextents = cpu_to_be32(from->di_nextents);
 351        to->di_anextents = cpu_to_be16(from->di_anextents);
 352        to->di_forkoff = from->di_forkoff;
 353        to->di_aformat = from->di_aformat;
 354        to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
 355        to->di_dmstate = cpu_to_be16(from->di_dmstate);
 356        to->di_flags = cpu_to_be16(from->di_flags);
 357        to->di_gen = cpu_to_be32(from->di_gen);
 358
 359        if (from->di_version == 3) {
 360                to->di_changecount = cpu_to_be64(from->di_changecount);
 361                to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
 362                to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
 363                to->di_flags2 = cpu_to_be64(from->di_flags2);
 364                to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
 365                to->di_ino = cpu_to_be64(from->di_ino);
 366                to->di_lsn = cpu_to_be64(from->di_lsn);
 367                memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
 368                uuid_copy(&to->di_uuid, &from->di_uuid);
 369                to->di_flushiter = 0;
 370        } else {
 371                to->di_flushiter = cpu_to_be16(from->di_flushiter);
 372        }
 373}
 374
 375static xfs_failaddr_t
 376xfs_dinode_verify_fork(
 377        struct xfs_dinode       *dip,
 378        struct xfs_mount        *mp,
 379        int                     whichfork)
 380{
 381        uint32_t                di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
 382
 383        switch (XFS_DFORK_FORMAT(dip, whichfork)) {
 384        case XFS_DINODE_FMT_LOCAL:
 385                /*
 386                 * no local regular files yet
 387                 */
 388                if (whichfork == XFS_DATA_FORK) {
 389                        if (S_ISREG(be16_to_cpu(dip->di_mode)))
 390                                return __this_address;
 391                        if (be64_to_cpu(dip->di_size) >
 392                                        XFS_DFORK_SIZE(dip, mp, whichfork))
 393                                return __this_address;
 394                }
 395                if (di_nextents)
 396                        return __this_address;
 397                break;
 398        case XFS_DINODE_FMT_EXTENTS:
 399                if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
 400                        return __this_address;
 401                break;
 402        case XFS_DINODE_FMT_BTREE:
 403                if (whichfork == XFS_ATTR_FORK) {
 404                        if (di_nextents > MAXAEXTNUM)
 405                                return __this_address;
 406                } else if (di_nextents > MAXEXTNUM) {
 407                        return __this_address;
 408                }
 409                break;
 410        default:
 411                return __this_address;
 412        }
 413        return NULL;
 414}
 415
 416static xfs_failaddr_t
 417xfs_dinode_verify_forkoff(
 418        struct xfs_dinode       *dip,
 419        struct xfs_mount        *mp)
 420{
 421        if (!XFS_DFORK_Q(dip))
 422                return NULL;
 423
 424        switch (dip->di_format)  {
 425        case XFS_DINODE_FMT_DEV:
 426                if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
 427                        return __this_address;
 428                break;
 429        case XFS_DINODE_FMT_LOCAL:      /* fall through ... */
 430        case XFS_DINODE_FMT_EXTENTS:    /* fall through ... */
 431        case XFS_DINODE_FMT_BTREE:
 432                if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3))
 433                        return __this_address;
 434                break;
 435        default:
 436                return __this_address;
 437        }
 438        return NULL;
 439}
 440
 441xfs_failaddr_t
 442xfs_dinode_verify(
 443        struct xfs_mount        *mp,
 444        xfs_ino_t               ino,
 445        struct xfs_dinode       *dip)
 446{
 447        xfs_failaddr_t          fa;
 448        uint16_t                mode;
 449        uint16_t                flags;
 450        uint64_t                flags2;
 451        uint64_t                di_size;
 452
 453        if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
 454                return __this_address;
 455
 456        /* Verify v3 integrity information first */
 457        if (dip->di_version >= 3) {
 458                if (!xfs_sb_version_hascrc(&mp->m_sb))
 459                        return __this_address;
 460                if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
 461                                      XFS_DINODE_CRC_OFF))
 462                        return __this_address;
 463                if (be64_to_cpu(dip->di_ino) != ino)
 464                        return __this_address;
 465                if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
 466                        return __this_address;
 467        }
 468
 469        /* don't allow invalid i_size */
 470        di_size = be64_to_cpu(dip->di_size);
 471        if (di_size & (1ULL << 63))
 472                return __this_address;
 473
 474        mode = be16_to_cpu(dip->di_mode);
 475        if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
 476                return __this_address;
 477
 478        /* No zero-length symlinks/dirs. */
 479        if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
 480                return __this_address;
 481
 482        /* Fork checks carried over from xfs_iformat_fork */
 483        if (mode &&
 484            be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
 485                        be64_to_cpu(dip->di_nblocks))
 486                return __this_address;
 487
 488        if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
 489                return __this_address;
 490
 491        flags = be16_to_cpu(dip->di_flags);
 492
 493        if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
 494                return __this_address;
 495
 496        /* check for illegal values of forkoff */
 497        fa = xfs_dinode_verify_forkoff(dip, mp);
 498        if (fa)
 499                return fa;
 500
 501        /* Do we have appropriate data fork formats for the mode? */
 502        switch (mode & S_IFMT) {
 503        case S_IFIFO:
 504        case S_IFCHR:
 505        case S_IFBLK:
 506        case S_IFSOCK:
 507                if (dip->di_format != XFS_DINODE_FMT_DEV)
 508                        return __this_address;
 509                break;
 510        case S_IFREG:
 511        case S_IFLNK:
 512        case S_IFDIR:
 513                fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
 514                if (fa)
 515                        return fa;
 516                break;
 517        case 0:
 518                /* Uninitialized inode ok. */
 519                break;
 520        default:
 521                return __this_address;
 522        }
 523
 524        if (XFS_DFORK_Q(dip)) {
 525                fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
 526                if (fa)
 527                        return fa;
 528        } else {
 529                /*
 530                 * If there is no fork offset, this may be a freshly-made inode
 531                 * in a new disk cluster, in which case di_aformat is zeroed.
 532                 * Otherwise, such an inode must be in EXTENTS format; this goes
 533                 * for freed inodes as well.
 534                 */
 535                switch (dip->di_aformat) {
 536                case 0:
 537                case XFS_DINODE_FMT_EXTENTS:
 538                        break;
 539                default:
 540                        return __this_address;
 541                }
 542                if (dip->di_anextents)
 543                        return __this_address;
 544        }
 545
 546        /* extent size hint validation */
 547        fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
 548                        mode, flags);
 549        if (fa)
 550                return fa;
 551
 552        /* only version 3 or greater inodes are extensively verified here */
 553        if (dip->di_version < 3)
 554                return NULL;
 555
 556        flags2 = be64_to_cpu(dip->di_flags2);
 557
 558        /* don't allow reflink/cowextsize if we don't have reflink */
 559        if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
 560             !xfs_sb_version_hasreflink(&mp->m_sb))
 561                return __this_address;
 562
 563        /* only regular files get reflink */
 564        if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
 565                return __this_address;
 566
 567        /* don't let reflink and realtime mix */
 568        if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
 569                return __this_address;
 570
 571        /* don't let reflink and dax mix */
 572        if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX))
 573                return __this_address;
 574
 575        /* COW extent size hint validation */
 576        fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
 577                        mode, flags, flags2);
 578        if (fa)
 579                return fa;
 580
 581        return NULL;
 582}
 583
 584void
 585xfs_dinode_calc_crc(
 586        struct xfs_mount        *mp,
 587        struct xfs_dinode       *dip)
 588{
 589        uint32_t                crc;
 590
 591        if (dip->di_version < 3)
 592                return;
 593
 594        ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
 595        crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
 596                              XFS_DINODE_CRC_OFF);
 597        dip->di_crc = xfs_end_cksum(crc);
 598}
 599
 600/*
 601 * Read the disk inode attributes into the in-core inode structure.
 602 *
 603 * For version 5 superblocks, if we are initialising a new inode and we are not
 604 * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
 605 * inode core with a random generation number. If we are keeping inodes around,
 606 * we need to read the inode cluster to get the existing generation number off
 607 * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
 608 * format) then log recovery is dependent on the di_flushiter field being
 609 * initialised from the current on-disk value and hence we must also read the
 610 * inode off disk.
 611 */
 612int
 613xfs_iread(
 614        xfs_mount_t     *mp,
 615        xfs_trans_t     *tp,
 616        xfs_inode_t     *ip,
 617        uint            iget_flags)
 618{
 619        xfs_buf_t       *bp;
 620        xfs_dinode_t    *dip;
 621        xfs_failaddr_t  fa;
 622        int             error;
 623
 624        /*
 625         * Fill in the location information in the in-core inode.
 626         */
 627        error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
 628        if (error)
 629                return error;
 630
 631        /* shortcut IO on inode allocation if possible */
 632        if ((iget_flags & XFS_IGET_CREATE) &&
 633            xfs_sb_version_hascrc(&mp->m_sb) &&
 634            !(mp->m_flags & XFS_MOUNT_IKEEP)) {
 635                /* initialise the on-disk inode core */
 636                memset(&ip->i_d, 0, sizeof(ip->i_d));
 637                VFS_I(ip)->i_generation = prandom_u32();
 638                ip->i_d.di_version = 3;
 639                return 0;
 640        }
 641
 642        /*
 643         * Get pointers to the on-disk inode and the buffer containing it.
 644         */
 645        error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
 646        if (error)
 647                return error;
 648
 649        /* even unallocated inodes are verified */
 650        fa = xfs_dinode_verify(mp, ip->i_ino, dip);
 651        if (fa) {
 652                xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip,
 653                                sizeof(*dip), fa);
 654                error = -EFSCORRUPTED;
 655                goto out_brelse;
 656        }
 657
 658        /*
 659         * If the on-disk inode is already linked to a directory
 660         * entry, copy all of the inode into the in-core inode.
 661         * xfs_iformat_fork() handles copying in the inode format
 662         * specific information.
 663         * Otherwise, just get the truly permanent information.
 664         */
 665        if (dip->di_mode) {
 666                xfs_inode_from_disk(ip, dip);
 667                error = xfs_iformat_fork(ip, dip);
 668                if (error)  {
 669#ifdef DEBUG
 670                        xfs_alert(mp, "%s: xfs_iformat() returned error %d",
 671                                __func__, error);
 672#endif /* DEBUG */
 673                        goto out_brelse;
 674                }
 675        } else {
 676                /*
 677                 * Partial initialisation of the in-core inode. Just the bits
 678                 * that xfs_ialloc won't overwrite or relies on being correct.
 679                 */
 680                ip->i_d.di_version = dip->di_version;
 681                VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen);
 682                ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
 683
 684                /*
 685                 * Make sure to pull in the mode here as well in
 686                 * case the inode is released without being used.
 687                 * This ensures that xfs_inactive() will see that
 688                 * the inode is already free and not try to mess
 689                 * with the uninitialized part of it.
 690                 */
 691                VFS_I(ip)->i_mode = 0;
 692        }
 693
 694        ASSERT(ip->i_d.di_version >= 2);
 695        ip->i_delayed_blks = 0;
 696
 697        /*
 698         * Mark the buffer containing the inode as something to keep
 699         * around for a while.  This helps to keep recently accessed
 700         * meta-data in-core longer.
 701         */
 702        xfs_buf_set_ref(bp, XFS_INO_REF);
 703
 704        /*
 705         * Use xfs_trans_brelse() to release the buffer containing the on-disk
 706         * inode, because it was acquired with xfs_trans_read_buf() in
 707         * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
 708         * brelse().  If we're within a transaction, then xfs_trans_brelse()
 709         * will only release the buffer if it is not dirty within the
 710         * transaction.  It will be OK to release the buffer in this case,
 711         * because inodes on disk are never destroyed and we will be locking the
 712         * new in-core inode before putting it in the cache where other
 713         * processes can find it.  Thus we don't have to worry about the inode
 714         * being changed just because we released the buffer.
 715         */
 716 out_brelse:
 717        xfs_trans_brelse(tp, bp);
 718        return error;
 719}
 720
 721/*
 722 * Validate di_extsize hint.
 723 *
 724 * The rules are documented at xfs_ioctl_setattr_check_extsize().
 725 * These functions must be kept in sync with each other.
 726 */
 727xfs_failaddr_t
 728xfs_inode_validate_extsize(
 729        struct xfs_mount                *mp,
 730        uint32_t                        extsize,
 731        uint16_t                        mode,
 732        uint16_t                        flags)
 733{
 734        bool                            rt_flag;
 735        bool                            hint_flag;
 736        bool                            inherit_flag;
 737        uint32_t                        extsize_bytes;
 738        uint32_t                        blocksize_bytes;
 739
 740        rt_flag = (flags & XFS_DIFLAG_REALTIME);
 741        hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
 742        inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
 743        extsize_bytes = XFS_FSB_TO_B(mp, extsize);
 744
 745        if (rt_flag)
 746                blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
 747        else
 748                blocksize_bytes = mp->m_sb.sb_blocksize;
 749
 750        if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
 751                return __this_address;
 752
 753        if (hint_flag && !S_ISREG(mode))
 754                return __this_address;
 755
 756        if (inherit_flag && !S_ISDIR(mode))
 757                return __this_address;
 758
 759        if ((hint_flag || inherit_flag) && extsize == 0)
 760                return __this_address;
 761
 762        /* free inodes get flags set to zero but extsize remains */
 763        if (mode && !(hint_flag || inherit_flag) && extsize != 0)
 764                return __this_address;
 765
 766        if (extsize_bytes % blocksize_bytes)
 767                return __this_address;
 768
 769        if (extsize > MAXEXTLEN)
 770                return __this_address;
 771
 772        if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
 773                return __this_address;
 774
 775        return NULL;
 776}
 777
 778/*
 779 * Validate di_cowextsize hint.
 780 *
 781 * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
 782 * These functions must be kept in sync with each other.
 783 */
 784xfs_failaddr_t
 785xfs_inode_validate_cowextsize(
 786        struct xfs_mount                *mp,
 787        uint32_t                        cowextsize,
 788        uint16_t                        mode,
 789        uint16_t                        flags,
 790        uint64_t                        flags2)
 791{
 792        bool                            rt_flag;
 793        bool                            hint_flag;
 794        uint32_t                        cowextsize_bytes;
 795
 796        rt_flag = (flags & XFS_DIFLAG_REALTIME);
 797        hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
 798        cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
 799
 800        if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
 801                return __this_address;
 802
 803        if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
 804                return __this_address;
 805
 806        if (hint_flag && cowextsize == 0)
 807                return __this_address;
 808
 809        /* free inodes get flags set to zero but cowextsize remains */
 810        if (mode && !hint_flag && cowextsize != 0)
 811                return __this_address;
 812
 813        if (hint_flag && rt_flag)
 814                return __this_address;
 815
 816        if (cowextsize_bytes % mp->m_sb.sb_blocksize)
 817                return __this_address;
 818
 819        if (cowextsize > MAXEXTLEN)
 820                return __this_address;
 821
 822        if (cowextsize > mp->m_sb.sb_agblocks / 2)
 823                return __this_address;
 824
 825        return NULL;
 826}
 827