linux/fs/xfs/libxfs/xfs_inode_buf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
   4 * All Rights Reserved.
   5 */
   6#include "xfs.h"
   7#include "xfs_fs.h"
   8#include "xfs_shared.h"
   9#include "xfs_format.h"
  10#include "xfs_log_format.h"
  11#include "xfs_trans_resv.h"
  12#include "xfs_mount.h"
  13#include "xfs_inode.h"
  14#include "xfs_errortag.h"
  15#include "xfs_error.h"
  16#include "xfs_icache.h"
  17#include "xfs_trans.h"
  18#include "xfs_ialloc.h"
  19#include "xfs_dir2.h"
  20
  21#include <linux/iversion.h>
  22
  23/*
  24 * If we are doing readahead on an inode buffer, we might be in log recovery
  25 * reading an inode allocation buffer that hasn't yet been replayed, and hence
  26 * has not had the inode cores stamped into it. Hence for readahead, the buffer
  27 * may be potentially invalid.
  28 *
  29 * If the readahead buffer is invalid, we need to mark it with an error and
  30 * clear the DONE status of the buffer so that a followup read will re-read it
  31 * from disk. We don't report the error otherwise to avoid warnings during log
  32 * recovery and we don't get unnecessary panics on debug kernels. We use EIO here
  33 * because all we want to do is say readahead failed; there is no-one to report
  34 * the error to, so this will distinguish it from a non-ra verifier failure.
  35 * Changes to this readahead error behaviour also need to be reflected in
  36 * xfs_dquot_buf_readahead_verify().
  37 */
  38static void
  39xfs_inode_buf_verify(
  40        struct xfs_buf  *bp,
  41        bool            readahead)
  42{
  43        struct xfs_mount *mp = bp->b_mount;
  44        xfs_agnumber_t  agno;
  45        int             i;
  46        int             ni;
  47
  48        /*
  49         * Validate the magic number and version of every inode in the buffer
  50         */
  51        agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp));
  52        ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
  53        for (i = 0; i < ni; i++) {
  54                int             di_ok;
  55                xfs_dinode_t    *dip;
  56                xfs_agino_t     unlinked_ino;
  57
  58                dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
  59                unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
  60                di_ok = xfs_verify_magic16(bp, dip->di_magic) &&
  61                        xfs_dinode_good_version(mp, dip->di_version) &&
  62                        xfs_verify_agino_or_null(mp, agno, unlinked_ino);
  63                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
  64                                                XFS_ERRTAG_ITOBP_INOTOBP))) {
  65                        if (readahead) {
  66                                bp->b_flags &= ~XBF_DONE;
  67                                xfs_buf_ioerror(bp, -EIO);
  68                                return;
  69                        }
  70
  71#ifdef DEBUG
  72                        xfs_alert(mp,
  73                                "bad inode magic/vsn daddr %lld #%d (magic=%x)",
  74                                (unsigned long long)xfs_buf_daddr(bp), i,
  75                                be16_to_cpu(dip->di_magic));
  76#endif
  77                        xfs_buf_verifier_error(bp, -EFSCORRUPTED,
  78                                        __func__, dip, sizeof(*dip),
  79                                        NULL);
  80                        return;
  81                }
  82        }
  83}
  84
  85
  86static void
  87xfs_inode_buf_read_verify(
  88        struct xfs_buf  *bp)
  89{
  90        xfs_inode_buf_verify(bp, false);
  91}
  92
  93static void
  94xfs_inode_buf_readahead_verify(
  95        struct xfs_buf  *bp)
  96{
  97        xfs_inode_buf_verify(bp, true);
  98}
  99
 100static void
 101xfs_inode_buf_write_verify(
 102        struct xfs_buf  *bp)
 103{
 104        xfs_inode_buf_verify(bp, false);
 105}
 106
 107const struct xfs_buf_ops xfs_inode_buf_ops = {
 108        .name = "xfs_inode",
 109        .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
 110                     cpu_to_be16(XFS_DINODE_MAGIC) },
 111        .verify_read = xfs_inode_buf_read_verify,
 112        .verify_write = xfs_inode_buf_write_verify,
 113};
 114
 115const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
 116        .name = "xfs_inode_ra",
 117        .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC),
 118                     cpu_to_be16(XFS_DINODE_MAGIC) },
 119        .verify_read = xfs_inode_buf_readahead_verify,
 120        .verify_write = xfs_inode_buf_write_verify,
 121};
 122
 123
 124/*
 125 * This routine is called to map an inode to the buffer containing the on-disk
 126 * version of the inode.  It returns a pointer to the buffer containing the
 127 * on-disk inode in the bpp parameter.
 128 */
 129int
 130xfs_imap_to_bp(
 131        struct xfs_mount        *mp,
 132        struct xfs_trans        *tp,
 133        struct xfs_imap         *imap,
 134        struct xfs_buf          **bpp)
 135{
 136        return xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
 137                                   imap->im_len, XBF_UNMAPPED, bpp,
 138                                   &xfs_inode_buf_ops);
 139}
 140
 141static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts)
 142{
 143        struct timespec64       tv;
 144        uint32_t                n;
 145
 146        tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n));
 147        tv.tv_nsec = n;
 148
 149        return tv;
 150}
 151
 152/* Convert an ondisk timestamp to an incore timestamp. */
 153struct timespec64
 154xfs_inode_from_disk_ts(
 155        struct xfs_dinode               *dip,
 156        const xfs_timestamp_t           ts)
 157{
 158        struct timespec64               tv;
 159        struct xfs_legacy_timestamp     *lts;
 160
 161        if (xfs_dinode_has_bigtime(dip))
 162                return xfs_inode_decode_bigtime(be64_to_cpu(ts));
 163
 164        lts = (struct xfs_legacy_timestamp *)&ts;
 165        tv.tv_sec = (int)be32_to_cpu(lts->t_sec);
 166        tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec);
 167
 168        return tv;
 169}
 170
 171int
 172xfs_inode_from_disk(
 173        struct xfs_inode        *ip,
 174        struct xfs_dinode       *from)
 175{
 176        struct inode            *inode = VFS_I(ip);
 177        int                     error;
 178        xfs_failaddr_t          fa;
 179
 180        ASSERT(ip->i_cowfp == NULL);
 181        ASSERT(ip->i_afp == NULL);
 182
 183        fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from);
 184        if (fa) {
 185                xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from,
 186                                sizeof(*from), fa);
 187                return -EFSCORRUPTED;
 188        }
 189
 190        /*
 191         * First get the permanent information that is needed to allocate an
 192         * inode. If the inode is unused, mode is zero and we shouldn't mess
 193         * with the uninitialized part of it.
 194         */
 195        if (!xfs_has_v3inodes(ip->i_mount))
 196                ip->i_flushiter = be16_to_cpu(from->di_flushiter);
 197        inode->i_generation = be32_to_cpu(from->di_gen);
 198        inode->i_mode = be16_to_cpu(from->di_mode);
 199        if (!inode->i_mode)
 200                return 0;
 201
 202        /*
 203         * Convert v1 inodes immediately to v2 inode format as this is the
 204         * minimum inode version format we support in the rest of the code.
 205         * They will also be unconditionally written back to disk as v2 inodes.
 206         */
 207        if (unlikely(from->di_version == 1)) {
 208                set_nlink(inode, be16_to_cpu(from->di_onlink));
 209                ip->i_projid = 0;
 210        } else {
 211                set_nlink(inode, be32_to_cpu(from->di_nlink));
 212                ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 |
 213                                        be16_to_cpu(from->di_projid_lo);
 214        }
 215
 216        i_uid_write(inode, be32_to_cpu(from->di_uid));
 217        i_gid_write(inode, be32_to_cpu(from->di_gid));
 218
 219        /*
 220         * Time is signed, so need to convert to signed 32 bit before
 221         * storing in inode timestamp which may be 64 bit. Otherwise
 222         * a time before epoch is converted to a time long after epoch
 223         * on 64 bit systems.
 224         */
 225        inode->i_atime = xfs_inode_from_disk_ts(from, from->di_atime);
 226        inode->i_mtime = xfs_inode_from_disk_ts(from, from->di_mtime);
 227        inode->i_ctime = xfs_inode_from_disk_ts(from, from->di_ctime);
 228
 229        ip->i_disk_size = be64_to_cpu(from->di_size);
 230        ip->i_nblocks = be64_to_cpu(from->di_nblocks);
 231        ip->i_extsize = be32_to_cpu(from->di_extsize);
 232        ip->i_forkoff = from->di_forkoff;
 233        ip->i_diflags   = be16_to_cpu(from->di_flags);
 234
 235        if (from->di_dmevmask || from->di_dmstate)
 236                xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS);
 237
 238        if (xfs_has_v3inodes(ip->i_mount)) {
 239                inode_set_iversion_queried(inode,
 240                                           be64_to_cpu(from->di_changecount));
 241                ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime);
 242                ip->i_diflags2 = be64_to_cpu(from->di_flags2);
 243                ip->i_cowextsize = be32_to_cpu(from->di_cowextsize);
 244        }
 245
 246        error = xfs_iformat_data_fork(ip, from);
 247        if (error)
 248                return error;
 249        if (from->di_forkoff) {
 250                error = xfs_iformat_attr_fork(ip, from);
 251                if (error)
 252                        goto out_destroy_data_fork;
 253        }
 254        if (xfs_is_reflink_inode(ip))
 255                xfs_ifork_init_cow(ip);
 256        return 0;
 257
 258out_destroy_data_fork:
 259        xfs_idestroy_fork(&ip->i_df);
 260        return error;
 261}
 262
 263/* Convert an incore timestamp to an ondisk timestamp. */
 264static inline xfs_timestamp_t
 265xfs_inode_to_disk_ts(
 266        struct xfs_inode                *ip,
 267        const struct timespec64         tv)
 268{
 269        struct xfs_legacy_timestamp     *lts;
 270        xfs_timestamp_t                 ts;
 271
 272        if (xfs_inode_has_bigtime(ip))
 273                return cpu_to_be64(xfs_inode_encode_bigtime(tv));
 274
 275        lts = (struct xfs_legacy_timestamp *)&ts;
 276        lts->t_sec = cpu_to_be32(tv.tv_sec);
 277        lts->t_nsec = cpu_to_be32(tv.tv_nsec);
 278
 279        return ts;
 280}
 281
 282void
 283xfs_inode_to_disk(
 284        struct xfs_inode        *ip,
 285        struct xfs_dinode       *to,
 286        xfs_lsn_t               lsn)
 287{
 288        struct inode            *inode = VFS_I(ip);
 289
 290        to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 291        to->di_onlink = 0;
 292
 293        to->di_format = xfs_ifork_format(&ip->i_df);
 294        to->di_uid = cpu_to_be32(i_uid_read(inode));
 295        to->di_gid = cpu_to_be32(i_gid_read(inode));
 296        to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff);
 297        to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16);
 298
 299        memset(to->di_pad, 0, sizeof(to->di_pad));
 300        to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
 301        to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
 302        to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
 303        to->di_nlink = cpu_to_be32(inode->i_nlink);
 304        to->di_gen = cpu_to_be32(inode->i_generation);
 305        to->di_mode = cpu_to_be16(inode->i_mode);
 306
 307        to->di_size = cpu_to_be64(ip->i_disk_size);
 308        to->di_nblocks = cpu_to_be64(ip->i_nblocks);
 309        to->di_extsize = cpu_to_be32(ip->i_extsize);
 310        to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
 311        to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
 312        to->di_forkoff = ip->i_forkoff;
 313        to->di_aformat = xfs_ifork_format(ip->i_afp);
 314        to->di_flags = cpu_to_be16(ip->i_diflags);
 315
 316        if (xfs_has_v3inodes(ip->i_mount)) {
 317                to->di_version = 3;
 318                to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
 319                to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime);
 320                to->di_flags2 = cpu_to_be64(ip->i_diflags2);
 321                to->di_cowextsize = cpu_to_be32(ip->i_cowextsize);
 322                to->di_ino = cpu_to_be64(ip->i_ino);
 323                to->di_lsn = cpu_to_be64(lsn);
 324                memset(to->di_pad2, 0, sizeof(to->di_pad2));
 325                uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
 326                to->di_flushiter = 0;
 327        } else {
 328                to->di_version = 2;
 329                to->di_flushiter = cpu_to_be16(ip->i_flushiter);
 330        }
 331}
 332
 333static xfs_failaddr_t
 334xfs_dinode_verify_fork(
 335        struct xfs_dinode       *dip,
 336        struct xfs_mount        *mp,
 337        int                     whichfork)
 338{
 339        uint32_t                di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
 340
 341        switch (XFS_DFORK_FORMAT(dip, whichfork)) {
 342        case XFS_DINODE_FMT_LOCAL:
 343                /*
 344                 * no local regular files yet
 345                 */
 346                if (whichfork == XFS_DATA_FORK) {
 347                        if (S_ISREG(be16_to_cpu(dip->di_mode)))
 348                                return __this_address;
 349                        if (be64_to_cpu(dip->di_size) >
 350                                        XFS_DFORK_SIZE(dip, mp, whichfork))
 351                                return __this_address;
 352                }
 353                if (di_nextents)
 354                        return __this_address;
 355                break;
 356        case XFS_DINODE_FMT_EXTENTS:
 357                if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
 358                        return __this_address;
 359                break;
 360        case XFS_DINODE_FMT_BTREE:
 361                if (whichfork == XFS_ATTR_FORK) {
 362                        if (di_nextents > MAXAEXTNUM)
 363                                return __this_address;
 364                } else if (di_nextents > MAXEXTNUM) {
 365                        return __this_address;
 366                }
 367                break;
 368        default:
 369                return __this_address;
 370        }
 371        return NULL;
 372}
 373
 374static xfs_failaddr_t
 375xfs_dinode_verify_forkoff(
 376        struct xfs_dinode       *dip,
 377        struct xfs_mount        *mp)
 378{
 379        if (!dip->di_forkoff)
 380                return NULL;
 381
 382        switch (dip->di_format)  {
 383        case XFS_DINODE_FMT_DEV:
 384                if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
 385                        return __this_address;
 386                break;
 387        case XFS_DINODE_FMT_LOCAL:      /* fall through ... */
 388        case XFS_DINODE_FMT_EXTENTS:    /* fall through ... */
 389        case XFS_DINODE_FMT_BTREE:
 390                if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3))
 391                        return __this_address;
 392                break;
 393        default:
 394                return __this_address;
 395        }
 396        return NULL;
 397}
 398
 399xfs_failaddr_t
 400xfs_dinode_verify(
 401        struct xfs_mount        *mp,
 402        xfs_ino_t               ino,
 403        struct xfs_dinode       *dip)
 404{
 405        xfs_failaddr_t          fa;
 406        uint16_t                mode;
 407        uint16_t                flags;
 408        uint64_t                flags2;
 409        uint64_t                di_size;
 410
 411        if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
 412                return __this_address;
 413
 414        /* Verify v3 integrity information first */
 415        if (dip->di_version >= 3) {
 416                if (!xfs_has_v3inodes(mp))
 417                        return __this_address;
 418                if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
 419                                      XFS_DINODE_CRC_OFF))
 420                        return __this_address;
 421                if (be64_to_cpu(dip->di_ino) != ino)
 422                        return __this_address;
 423                if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
 424                        return __this_address;
 425        }
 426
 427        /* don't allow invalid i_size */
 428        di_size = be64_to_cpu(dip->di_size);
 429        if (di_size & (1ULL << 63))
 430                return __this_address;
 431
 432        mode = be16_to_cpu(dip->di_mode);
 433        if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
 434                return __this_address;
 435
 436        /* No zero-length symlinks/dirs. */
 437        if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
 438                return __this_address;
 439
 440        /* Fork checks carried over from xfs_iformat_fork */
 441        if (mode &&
 442            be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
 443                        be64_to_cpu(dip->di_nblocks))
 444                return __this_address;
 445
 446        if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
 447                return __this_address;
 448
 449        flags = be16_to_cpu(dip->di_flags);
 450
 451        if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
 452                return __this_address;
 453
 454        /* check for illegal values of forkoff */
 455        fa = xfs_dinode_verify_forkoff(dip, mp);
 456        if (fa)
 457                return fa;
 458
 459        /* Do we have appropriate data fork formats for the mode? */
 460        switch (mode & S_IFMT) {
 461        case S_IFIFO:
 462        case S_IFCHR:
 463        case S_IFBLK:
 464        case S_IFSOCK:
 465                if (dip->di_format != XFS_DINODE_FMT_DEV)
 466                        return __this_address;
 467                break;
 468        case S_IFREG:
 469        case S_IFLNK:
 470        case S_IFDIR:
 471                fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
 472                if (fa)
 473                        return fa;
 474                break;
 475        case 0:
 476                /* Uninitialized inode ok. */
 477                break;
 478        default:
 479                return __this_address;
 480        }
 481
 482        if (dip->di_forkoff) {
 483                fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
 484                if (fa)
 485                        return fa;
 486        } else {
 487                /*
 488                 * If there is no fork offset, this may be a freshly-made inode
 489                 * in a new disk cluster, in which case di_aformat is zeroed.
 490                 * Otherwise, such an inode must be in EXTENTS format; this goes
 491                 * for freed inodes as well.
 492                 */
 493                switch (dip->di_aformat) {
 494                case 0:
 495                case XFS_DINODE_FMT_EXTENTS:
 496                        break;
 497                default:
 498                        return __this_address;
 499                }
 500                if (dip->di_anextents)
 501                        return __this_address;
 502        }
 503
 504        /* extent size hint validation */
 505        fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
 506                        mode, flags);
 507        if (fa)
 508                return fa;
 509
 510        /* only version 3 or greater inodes are extensively verified here */
 511        if (dip->di_version < 3)
 512                return NULL;
 513
 514        flags2 = be64_to_cpu(dip->di_flags2);
 515
 516        /* don't allow reflink/cowextsize if we don't have reflink */
 517        if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) &&
 518             !xfs_has_reflink(mp))
 519                return __this_address;
 520
 521        /* only regular files get reflink */
 522        if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG)
 523                return __this_address;
 524
 525        /* don't let reflink and realtime mix */
 526        if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
 527                return __this_address;
 528
 529        /* COW extent size hint validation */
 530        fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
 531                        mode, flags, flags2);
 532        if (fa)
 533                return fa;
 534
 535        /* bigtime iflag can only happen on bigtime filesystems */
 536        if (xfs_dinode_has_bigtime(dip) &&
 537            !xfs_has_bigtime(mp))
 538                return __this_address;
 539
 540        return NULL;
 541}
 542
 543void
 544xfs_dinode_calc_crc(
 545        struct xfs_mount        *mp,
 546        struct xfs_dinode       *dip)
 547{
 548        uint32_t                crc;
 549
 550        if (dip->di_version < 3)
 551                return;
 552
 553        ASSERT(xfs_has_crc(mp));
 554        crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize,
 555                              XFS_DINODE_CRC_OFF);
 556        dip->di_crc = xfs_end_cksum(crc);
 557}
 558
 559/*
 560 * Validate di_extsize hint.
 561 *
 562 * 1. Extent size hint is only valid for directories and regular files.
 563 * 2. FS_XFLAG_EXTSIZE is only valid for regular files.
 564 * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
 565 * 4. Hint cannot be larger than MAXTEXTLEN.
 566 * 5. Can be changed on directories at any time.
 567 * 6. Hint value of 0 turns off hints, clears inode flags.
 568 * 7. Extent size must be a multiple of the appropriate block size.
 569 *    For realtime files, this is the rt extent size.
 570 * 8. For non-realtime files, the extent size hint must be limited
 571 *    to half the AG size to avoid alignment extending the extent beyond the
 572 *    limits of the AG.
 573 */
 574xfs_failaddr_t
 575xfs_inode_validate_extsize(
 576        struct xfs_mount                *mp,
 577        uint32_t                        extsize,
 578        uint16_t                        mode,
 579        uint16_t                        flags)
 580{
 581        bool                            rt_flag;
 582        bool                            hint_flag;
 583        bool                            inherit_flag;
 584        uint32_t                        extsize_bytes;
 585        uint32_t                        blocksize_bytes;
 586
 587        rt_flag = (flags & XFS_DIFLAG_REALTIME);
 588        hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
 589        inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
 590        extsize_bytes = XFS_FSB_TO_B(mp, extsize);
 591
 592        /*
 593         * This comment describes a historic gap in this verifier function.
 594         *
 595         * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this
 596         * function has never checked that the extent size hint is an integer
 597         * multiple of the realtime extent size.  Since we allow users to set
 598         * this combination  on non-rt filesystems /and/ to change the rt
 599         * extent size when adding a rt device to a filesystem, the net effect
 600         * is that users can configure a filesystem anticipating one rt
 601         * geometry and change their minds later.  Directories do not use the
 602         * extent size hint, so this is harmless for them.
 603         *
 604         * If a directory with a misaligned extent size hint is allowed to
 605         * propagate that hint into a new regular realtime file, the result
 606         * is that the inode cluster buffer verifier will trigger a corruption
 607         * shutdown the next time it is run, because the verifier has always
 608         * enforced the alignment rule for regular files.
 609         *
 610         * Because we allow administrators to set a new rt extent size when
 611         * adding a rt section, we cannot add a check to this verifier because
 612         * that will result a new source of directory corruption errors when
 613         * reading an existing filesystem.  Instead, we rely on callers to
 614         * decide when alignment checks are appropriate, and fix things up as
 615         * needed.
 616         */
 617
 618        if (rt_flag)
 619                blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
 620        else
 621                blocksize_bytes = mp->m_sb.sb_blocksize;
 622
 623        if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
 624                return __this_address;
 625
 626        if (hint_flag && !S_ISREG(mode))
 627                return __this_address;
 628
 629        if (inherit_flag && !S_ISDIR(mode))
 630                return __this_address;
 631
 632        if ((hint_flag || inherit_flag) && extsize == 0)
 633                return __this_address;
 634
 635        /* free inodes get flags set to zero but extsize remains */
 636        if (mode && !(hint_flag || inherit_flag) && extsize != 0)
 637                return __this_address;
 638
 639        if (extsize_bytes % blocksize_bytes)
 640                return __this_address;
 641
 642        if (extsize > MAXEXTLEN)
 643                return __this_address;
 644
 645        if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
 646                return __this_address;
 647
 648        return NULL;
 649}
 650
 651/*
 652 * Validate di_cowextsize hint.
 653 *
 654 * 1. CoW extent size hint can only be set if reflink is enabled on the fs.
 655 *    The inode does not have to have any shared blocks, but it must be a v3.
 656 * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
 657 *    for a directory, the hint is propagated to new files.
 658 * 3. Can be changed on files & directories at any time.
 659 * 4. Hint value of 0 turns off hints, clears inode flags.
 660 * 5. Extent size must be a multiple of the appropriate block size.
 661 * 6. The extent size hint must be limited to half the AG size to avoid
 662 *    alignment extending the extent beyond the limits of the AG.
 663 */
 664xfs_failaddr_t
 665xfs_inode_validate_cowextsize(
 666        struct xfs_mount                *mp,
 667        uint32_t                        cowextsize,
 668        uint16_t                        mode,
 669        uint16_t                        flags,
 670        uint64_t                        flags2)
 671{
 672        bool                            rt_flag;
 673        bool                            hint_flag;
 674        uint32_t                        cowextsize_bytes;
 675
 676        rt_flag = (flags & XFS_DIFLAG_REALTIME);
 677        hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
 678        cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
 679
 680        if (hint_flag && !xfs_has_reflink(mp))
 681                return __this_address;
 682
 683        if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
 684                return __this_address;
 685
 686        if (hint_flag && cowextsize == 0)
 687                return __this_address;
 688
 689        /* free inodes get flags set to zero but cowextsize remains */
 690        if (mode && !hint_flag && cowextsize != 0)
 691                return __this_address;
 692
 693        if (hint_flag && rt_flag)
 694                return __this_address;
 695
 696        if (cowextsize_bytes % mp->m_sb.sb_blocksize)
 697                return __this_address;
 698
 699        if (cowextsize > MAXEXTLEN)
 700                return __this_address;
 701
 702        if (cowextsize > mp->m_sb.sb_agblocks / 2)
 703                return __this_address;
 704
 705        return NULL;
 706}
 707