linux/fs/xfs/libxfs/xfs_attr_remote.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   4 * Copyright (c) 2013 Red Hat, Inc.
   5 * All Rights Reserved.
   6 */
   7#include "xfs.h"
   8#include "xfs_fs.h"
   9#include "xfs_shared.h"
  10#include "xfs_format.h"
  11#include "xfs_log_format.h"
  12#include "xfs_trans_resv.h"
  13#include "xfs_bit.h"
  14#include "xfs_mount.h"
  15#include "xfs_defer.h"
  16#include "xfs_da_format.h"
  17#include "xfs_da_btree.h"
  18#include "xfs_inode.h"
  19#include "xfs_trans.h"
  20#include "xfs_bmap.h"
  21#include "xfs_attr.h"
  22#include "xfs_trace.h"
  23#include "xfs_error.h"
  24
  25#define ATTR_RMTVALUE_MAPSIZE   1       /* # of map entries at once */
  26
  27/*
  28 * Each contiguous block has a header, so it is not just a simple attribute
  29 * length to FSB conversion.
  30 */
  31int
  32xfs_attr3_rmt_blocks(
  33        struct xfs_mount *mp,
  34        int             attrlen)
  35{
  36        if (xfs_sb_version_hascrc(&mp->m_sb)) {
  37                int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
  38                return (attrlen + buflen - 1) / buflen;
  39        }
  40        return XFS_B_TO_FSB(mp, attrlen);
  41}
  42
  43/*
  44 * Checking of the remote attribute header is split into two parts. The verifier
  45 * does CRC, location and bounds checking, the unpacking function checks the
  46 * attribute parameters and owner.
  47 */
  48static xfs_failaddr_t
  49xfs_attr3_rmt_hdr_ok(
  50        void                    *ptr,
  51        xfs_ino_t               ino,
  52        uint32_t                offset,
  53        uint32_t                size,
  54        xfs_daddr_t             bno)
  55{
  56        struct xfs_attr3_rmt_hdr *rmt = ptr;
  57
  58        if (bno != be64_to_cpu(rmt->rm_blkno))
  59                return __this_address;
  60        if (offset != be32_to_cpu(rmt->rm_offset))
  61                return __this_address;
  62        if (size != be32_to_cpu(rmt->rm_bytes))
  63                return __this_address;
  64        if (ino != be64_to_cpu(rmt->rm_owner))
  65                return __this_address;
  66
  67        /* ok */
  68        return NULL;
  69}
  70
  71static xfs_failaddr_t
  72xfs_attr3_rmt_verify(
  73        struct xfs_mount        *mp,
  74        struct xfs_buf          *bp,
  75        void                    *ptr,
  76        int                     fsbsize,
  77        xfs_daddr_t             bno)
  78{
  79        struct xfs_attr3_rmt_hdr *rmt = ptr;
  80
  81        if (!xfs_sb_version_hascrc(&mp->m_sb))
  82                return __this_address;
  83        if (!xfs_verify_magic(bp, rmt->rm_magic))
  84                return __this_address;
  85        if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid))
  86                return __this_address;
  87        if (be64_to_cpu(rmt->rm_blkno) != bno)
  88                return __this_address;
  89        if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
  90                return __this_address;
  91        if (be32_to_cpu(rmt->rm_offset) +
  92                                be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX)
  93                return __this_address;
  94        if (rmt->rm_owner == 0)
  95                return __this_address;
  96
  97        return NULL;
  98}
  99
 100static int
 101__xfs_attr3_rmt_read_verify(
 102        struct xfs_buf  *bp,
 103        bool            check_crc,
 104        xfs_failaddr_t  *failaddr)
 105{
 106        struct xfs_mount *mp = bp->b_mount;
 107        char            *ptr;
 108        int             len;
 109        xfs_daddr_t     bno;
 110        int             blksize = mp->m_attr_geo->blksize;
 111
 112        /* no verification of non-crc buffers */
 113        if (!xfs_sb_version_hascrc(&mp->m_sb))
 114                return 0;
 115
 116        ptr = bp->b_addr;
 117        bno = bp->b_bn;
 118        len = BBTOB(bp->b_length);
 119        ASSERT(len >= blksize);
 120
 121        while (len > 0) {
 122                if (check_crc &&
 123                    !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
 124                        *failaddr = __this_address;
 125                        return -EFSBADCRC;
 126                }
 127                *failaddr = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno);
 128                if (*failaddr)
 129                        return -EFSCORRUPTED;
 130                len -= blksize;
 131                ptr += blksize;
 132                bno += BTOBB(blksize);
 133        }
 134
 135        if (len != 0) {
 136                *failaddr = __this_address;
 137                return -EFSCORRUPTED;
 138        }
 139
 140        return 0;
 141}
 142
 143static void
 144xfs_attr3_rmt_read_verify(
 145        struct xfs_buf  *bp)
 146{
 147        xfs_failaddr_t  fa;
 148        int             error;
 149
 150        error = __xfs_attr3_rmt_read_verify(bp, true, &fa);
 151        if (error)
 152                xfs_verifier_error(bp, error, fa);
 153}
 154
 155static xfs_failaddr_t
 156xfs_attr3_rmt_verify_struct(
 157        struct xfs_buf  *bp)
 158{
 159        xfs_failaddr_t  fa;
 160        int             error;
 161
 162        error = __xfs_attr3_rmt_read_verify(bp, false, &fa);
 163        return error ? fa : NULL;
 164}
 165
 166static void
 167xfs_attr3_rmt_write_verify(
 168        struct xfs_buf  *bp)
 169{
 170        struct xfs_mount *mp = bp->b_mount;
 171        xfs_failaddr_t  fa;
 172        int             blksize = mp->m_attr_geo->blksize;
 173        char            *ptr;
 174        int             len;
 175        xfs_daddr_t     bno;
 176
 177        /* no verification of non-crc buffers */
 178        if (!xfs_sb_version_hascrc(&mp->m_sb))
 179                return;
 180
 181        ptr = bp->b_addr;
 182        bno = bp->b_bn;
 183        len = BBTOB(bp->b_length);
 184        ASSERT(len >= blksize);
 185
 186        while (len > 0) {
 187                struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
 188
 189                fa = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno);
 190                if (fa) {
 191                        xfs_verifier_error(bp, -EFSCORRUPTED, fa);
 192                        return;
 193                }
 194
 195                /*
 196                 * Ensure we aren't writing bogus LSNs to disk. See
 197                 * xfs_attr3_rmt_hdr_set() for the explanation.
 198                 */
 199                if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
 200                        xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
 201                        return;
 202                }
 203                xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
 204
 205                len -= blksize;
 206                ptr += blksize;
 207                bno += BTOBB(blksize);
 208        }
 209
 210        if (len != 0)
 211                xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
 212}
 213
 214const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
 215        .name = "xfs_attr3_rmt",
 216        .magic = { 0, cpu_to_be32(XFS_ATTR3_RMT_MAGIC) },
 217        .verify_read = xfs_attr3_rmt_read_verify,
 218        .verify_write = xfs_attr3_rmt_write_verify,
 219        .verify_struct = xfs_attr3_rmt_verify_struct,
 220};
 221
 222STATIC int
 223xfs_attr3_rmt_hdr_set(
 224        struct xfs_mount        *mp,
 225        void                    *ptr,
 226        xfs_ino_t               ino,
 227        uint32_t                offset,
 228        uint32_t                size,
 229        xfs_daddr_t             bno)
 230{
 231        struct xfs_attr3_rmt_hdr *rmt = ptr;
 232
 233        if (!xfs_sb_version_hascrc(&mp->m_sb))
 234                return 0;
 235
 236        rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
 237        rmt->rm_offset = cpu_to_be32(offset);
 238        rmt->rm_bytes = cpu_to_be32(size);
 239        uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid);
 240        rmt->rm_owner = cpu_to_be64(ino);
 241        rmt->rm_blkno = cpu_to_be64(bno);
 242
 243        /*
 244         * Remote attribute blocks are written synchronously, so we don't
 245         * have an LSN that we can stamp in them that makes any sense to log
 246         * recovery. To ensure that log recovery handles overwrites of these
 247         * blocks sanely (i.e. once they've been freed and reallocated as some
 248         * other type of metadata) we need to ensure that the LSN has a value
 249         * that tells log recovery to ignore the LSN and overwrite the buffer
 250         * with whatever is in it's log. To do this, we use the magic
 251         * NULLCOMMITLSN to indicate that the LSN is invalid.
 252         */
 253        rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN);
 254
 255        return sizeof(struct xfs_attr3_rmt_hdr);
 256}
 257
 258/*
 259 * Helper functions to copy attribute data in and out of the one disk extents
 260 */
 261STATIC int
 262xfs_attr_rmtval_copyout(
 263        struct xfs_mount *mp,
 264        struct xfs_buf  *bp,
 265        xfs_ino_t       ino,
 266        int             *offset,
 267        int             *valuelen,
 268        uint8_t         **dst)
 269{
 270        char            *src = bp->b_addr;
 271        xfs_daddr_t     bno = bp->b_bn;
 272        int             len = BBTOB(bp->b_length);
 273        int             blksize = mp->m_attr_geo->blksize;
 274
 275        ASSERT(len >= blksize);
 276
 277        while (len > 0 && *valuelen > 0) {
 278                int hdr_size = 0;
 279                int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
 280
 281                byte_cnt = min(*valuelen, byte_cnt);
 282
 283                if (xfs_sb_version_hascrc(&mp->m_sb)) {
 284                        if (xfs_attr3_rmt_hdr_ok(src, ino, *offset,
 285                                                  byte_cnt, bno)) {
 286                                xfs_alert(mp,
 287"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
 288                                        bno, *offset, byte_cnt, ino);
 289                                return -EFSCORRUPTED;
 290                        }
 291                        hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
 292                }
 293
 294                memcpy(*dst, src + hdr_size, byte_cnt);
 295
 296                /* roll buffer forwards */
 297                len -= blksize;
 298                src += blksize;
 299                bno += BTOBB(blksize);
 300
 301                /* roll attribute data forwards */
 302                *valuelen -= byte_cnt;
 303                *dst += byte_cnt;
 304                *offset += byte_cnt;
 305        }
 306        return 0;
 307}
 308
 309STATIC void
 310xfs_attr_rmtval_copyin(
 311        struct xfs_mount *mp,
 312        struct xfs_buf  *bp,
 313        xfs_ino_t       ino,
 314        int             *offset,
 315        int             *valuelen,
 316        uint8_t         **src)
 317{
 318        char            *dst = bp->b_addr;
 319        xfs_daddr_t     bno = bp->b_bn;
 320        int             len = BBTOB(bp->b_length);
 321        int             blksize = mp->m_attr_geo->blksize;
 322
 323        ASSERT(len >= blksize);
 324
 325        while (len > 0 && *valuelen > 0) {
 326                int hdr_size;
 327                int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
 328
 329                byte_cnt = min(*valuelen, byte_cnt);
 330                hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
 331                                                 byte_cnt, bno);
 332
 333                memcpy(dst + hdr_size, *src, byte_cnt);
 334
 335                /*
 336                 * If this is the last block, zero the remainder of it.
 337                 * Check that we are actually the last block, too.
 338                 */
 339                if (byte_cnt + hdr_size < blksize) {
 340                        ASSERT(*valuelen - byte_cnt == 0);
 341                        ASSERT(len == blksize);
 342                        memset(dst + hdr_size + byte_cnt, 0,
 343                                        blksize - hdr_size - byte_cnt);
 344                }
 345
 346                /* roll buffer forwards */
 347                len -= blksize;
 348                dst += blksize;
 349                bno += BTOBB(blksize);
 350
 351                /* roll attribute data forwards */
 352                *valuelen -= byte_cnt;
 353                *src += byte_cnt;
 354                *offset += byte_cnt;
 355        }
 356}
 357
 358/*
 359 * Read the value associated with an attribute from the out-of-line buffer
 360 * that we stored it in.
 361 */
 362int
 363xfs_attr_rmtval_get(
 364        struct xfs_da_args      *args)
 365{
 366        struct xfs_bmbt_irec    map[ATTR_RMTVALUE_MAPSIZE];
 367        struct xfs_mount        *mp = args->dp->i_mount;
 368        struct xfs_buf          *bp;
 369        xfs_dablk_t             lblkno = args->rmtblkno;
 370        uint8_t                 *dst = args->value;
 371        int                     valuelen;
 372        int                     nmap;
 373        int                     error;
 374        int                     blkcnt = args->rmtblkcnt;
 375        int                     i;
 376        int                     offset = 0;
 377
 378        trace_xfs_attr_rmtval_get(args);
 379
 380        ASSERT(!(args->flags & ATTR_KERNOVAL));
 381        ASSERT(args->rmtvaluelen == args->valuelen);
 382
 383        valuelen = args->rmtvaluelen;
 384        while (valuelen > 0) {
 385                nmap = ATTR_RMTVALUE_MAPSIZE;
 386                error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
 387                                       blkcnt, map, &nmap,
 388                                       XFS_BMAPI_ATTRFORK);
 389                if (error)
 390                        return error;
 391                ASSERT(nmap >= 1);
 392
 393                for (i = 0; (i < nmap) && (valuelen > 0); i++) {
 394                        xfs_daddr_t     dblkno;
 395                        int             dblkcnt;
 396
 397                        ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
 398                               (map[i].br_startblock != HOLESTARTBLOCK));
 399                        dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
 400                        dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
 401                        error = xfs_trans_read_buf(mp, args->trans,
 402                                                   mp->m_ddev_targp,
 403                                                   dblkno, dblkcnt, 0, &bp,
 404                                                   &xfs_attr3_rmt_buf_ops);
 405                        if (error)
 406                                return error;
 407
 408                        error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
 409                                                        &offset, &valuelen,
 410                                                        &dst);
 411                        xfs_trans_brelse(args->trans, bp);
 412                        if (error)
 413                                return error;
 414
 415                        /* roll attribute extent map forwards */
 416                        lblkno += map[i].br_blockcount;
 417                        blkcnt -= map[i].br_blockcount;
 418                }
 419        }
 420        ASSERT(valuelen == 0);
 421        return 0;
 422}
 423
 424/*
 425 * Write the value associated with an attribute into the out-of-line buffer
 426 * that we have defined for it.
 427 */
 428int
 429xfs_attr_rmtval_set(
 430        struct xfs_da_args      *args)
 431{
 432        struct xfs_inode        *dp = args->dp;
 433        struct xfs_mount        *mp = dp->i_mount;
 434        struct xfs_bmbt_irec    map;
 435        xfs_dablk_t             lblkno;
 436        xfs_fileoff_t           lfileoff = 0;
 437        uint8_t                 *src = args->value;
 438        int                     blkcnt;
 439        int                     valuelen;
 440        int                     nmap;
 441        int                     error;
 442        int                     offset = 0;
 443
 444        trace_xfs_attr_rmtval_set(args);
 445
 446        /*
 447         * Find a "hole" in the attribute address space large enough for
 448         * us to drop the new attribute's value into. Because CRC enable
 449         * attributes have headers, we can't just do a straight byte to FSB
 450         * conversion and have to take the header space into account.
 451         */
 452        blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
 453        error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
 454                                                   XFS_ATTR_FORK);
 455        if (error)
 456                return error;
 457
 458        args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
 459        args->rmtblkcnt = blkcnt;
 460
 461        /*
 462         * Roll through the "value", allocating blocks on disk as required.
 463         */
 464        while (blkcnt > 0) {
 465                /*
 466                 * Allocate a single extent, up to the size of the value.
 467                 *
 468                 * Note that we have to consider this a data allocation as we
 469                 * write the remote attribute without logging the contents.
 470                 * Hence we must ensure that we aren't using blocks that are on
 471                 * the busy list so that we don't overwrite blocks which have
 472                 * recently been freed but their transactions are not yet
 473                 * committed to disk. If we overwrite the contents of a busy
 474                 * extent and then crash then the block may not contain the
 475                 * correct metadata after log recovery occurs.
 476                 */
 477                nmap = 1;
 478                error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
 479                                  blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map,
 480                                  &nmap);
 481                if (error)
 482                        return error;
 483                error = xfs_defer_finish(&args->trans);
 484                if (error)
 485                        return error;
 486
 487                ASSERT(nmap == 1);
 488                ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
 489                       (map.br_startblock != HOLESTARTBLOCK));
 490                lblkno += map.br_blockcount;
 491                blkcnt -= map.br_blockcount;
 492
 493                /*
 494                 * Start the next trans in the chain.
 495                 */
 496                error = xfs_trans_roll_inode(&args->trans, dp);
 497                if (error)
 498                        return error;
 499        }
 500
 501        /*
 502         * Roll through the "value", copying the attribute value to the
 503         * already-allocated blocks.  Blocks are written synchronously
 504         * so that we can know they are all on disk before we turn off
 505         * the INCOMPLETE flag.
 506         */
 507        lblkno = args->rmtblkno;
 508        blkcnt = args->rmtblkcnt;
 509        valuelen = args->rmtvaluelen;
 510        while (valuelen > 0) {
 511                struct xfs_buf  *bp;
 512                xfs_daddr_t     dblkno;
 513                int             dblkcnt;
 514
 515                ASSERT(blkcnt > 0);
 516
 517                nmap = 1;
 518                error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
 519                                       blkcnt, &map, &nmap,
 520                                       XFS_BMAPI_ATTRFORK);
 521                if (error)
 522                        return error;
 523                ASSERT(nmap == 1);
 524                ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
 525                       (map.br_startblock != HOLESTARTBLOCK));
 526
 527                dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
 528                dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
 529
 530                bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt);
 531                if (!bp)
 532                        return -ENOMEM;
 533                bp->b_ops = &xfs_attr3_rmt_buf_ops;
 534
 535                xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
 536                                       &valuelen, &src);
 537
 538                error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
 539                xfs_buf_relse(bp);
 540                if (error)
 541                        return error;
 542
 543
 544                /* roll attribute extent map forwards */
 545                lblkno += map.br_blockcount;
 546                blkcnt -= map.br_blockcount;
 547        }
 548        ASSERT(valuelen == 0);
 549        return 0;
 550}
 551
 552/*
 553 * Remove the value associated with an attribute by deleting the
 554 * out-of-line buffer that it is stored on.
 555 */
 556int
 557xfs_attr_rmtval_remove(
 558        struct xfs_da_args      *args)
 559{
 560        struct xfs_mount        *mp = args->dp->i_mount;
 561        xfs_dablk_t             lblkno;
 562        int                     blkcnt;
 563        int                     error;
 564        int                     done;
 565
 566        trace_xfs_attr_rmtval_remove(args);
 567
 568        /*
 569         * Roll through the "value", invalidating the attribute value's blocks.
 570         */
 571        lblkno = args->rmtblkno;
 572        blkcnt = args->rmtblkcnt;
 573        while (blkcnt > 0) {
 574                struct xfs_bmbt_irec    map;
 575                struct xfs_buf          *bp;
 576                xfs_daddr_t             dblkno;
 577                int                     dblkcnt;
 578                int                     nmap;
 579
 580                /*
 581                 * Try to remember where we decided to put the value.
 582                 */
 583                nmap = 1;
 584                error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
 585                                       blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
 586                if (error)
 587                        return error;
 588                ASSERT(nmap == 1);
 589                ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
 590                       (map.br_startblock != HOLESTARTBLOCK));
 591
 592                dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
 593                dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
 594
 595                /*
 596                 * If the "remote" value is in the cache, remove it.
 597                 */
 598                bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
 599                if (bp) {
 600                        xfs_buf_stale(bp);
 601                        xfs_buf_relse(bp);
 602                        bp = NULL;
 603                }
 604
 605                lblkno += map.br_blockcount;
 606                blkcnt -= map.br_blockcount;
 607        }
 608
 609        /*
 610         * Keep de-allocating extents until the remote-value region is gone.
 611         */
 612        lblkno = args->rmtblkno;
 613        blkcnt = args->rmtblkcnt;
 614        done = 0;
 615        while (!done) {
 616                error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
 617                                    XFS_BMAPI_ATTRFORK, 1, &done);
 618                if (error)
 619                        return error;
 620                error = xfs_defer_finish(&args->trans);
 621                if (error)
 622                        return error;
 623
 624                /*
 625                 * Close out trans and start the next one in the chain.
 626                 */
 627                error = xfs_trans_roll_inode(&args->trans, args->dp);
 628                if (error)
 629                        return error;
 630        }
 631        return 0;
 632}
 633