linux/fs/btrfs/verity.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3#include <linux/init.h>
   4#include <linux/fs.h>
   5#include <linux/slab.h>
   6#include <linux/rwsem.h>
   7#include <linux/xattr.h>
   8#include <linux/security.h>
   9#include <linux/posix_acl_xattr.h>
  10#include <linux/iversion.h>
  11#include <linux/fsverity.h>
  12#include <linux/sched/mm.h>
  13#include "ctree.h"
  14#include "btrfs_inode.h"
  15#include "transaction.h"
  16#include "disk-io.h"
  17#include "locking.h"
  18
  19/*
  20 * Implementation of the interface defined in struct fsverity_operations.
  21 *
  22 * The main question is how and where to store the verity descriptor and the
  23 * Merkle tree. We store both in dedicated btree items in the filesystem tree,
  24 * together with the rest of the inode metadata. This means we'll need to do
  25 * extra work to encrypt them once encryption is supported in btrfs, but btrfs
  26 * has a lot of careful code around i_size and it seems better to make a new key
  27 * type than try and adjust all of our expectations for i_size.
  28 *
  29 * Note that this differs from the implementation in ext4 and f2fs, where
  30 * this data is stored as if it were in the file, but past EOF. However, btrfs
  31 * does not have a widespread mechanism for caching opaque metadata pages, so we
  32 * do pretend that the Merkle tree pages themselves are past EOF for the
  33 * purposes of caching them (as opposed to creating a virtual inode).
  34 *
  35 * fs verity items are stored under two different key types on disk.
  36 * The descriptor items:
  37 * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ]
  38 *
  39 * At offset 0, we store a btrfs_verity_descriptor_item which tracks the
  40 * size of the descriptor item and some extra data for encryption.
  41 * Starting at offset 1, these hold the generic fs verity descriptor.
  42 * The latter are opaque to btrfs, we just read and write them as a blob for
  43 * the higher level verity code.  The most common descriptor size is 256 bytes.
  44 *
  45 * The merkle tree items:
  46 * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ]
  47 *
  48 * These also start at offset 0, and correspond to the merkle tree bytes.
  49 * So when fsverity asks for page 0 of the merkle tree, we pull up one page
  50 * starting at offset 0 for this key type.  These are also opaque to btrfs,
  51 * we're blindly storing whatever fsverity sends down.
  52 *
  53 * Another important consideration is the fact that the Merkle tree data scales
  54 * linearly with the size of the file (with 4K pages/blocks and SHA-256, it's
  55 * ~1/127th the size) so for large files, writing the tree can be a lengthy
  56 * operation. For that reason, we guard the whole enable verity operation
  57 * (between begin_enable_verity and end_enable_verity) with an orphan item.
  58 * Again, because the data can be pretty large, it's quite possible that we
  59 * could run out of space writing it, so we try our best to handle errors by
  60 * stopping and rolling back rather than aborting the victim transaction.
  61 */
  62
  63#define MERKLE_START_ALIGN                      65536
  64
  65/*
  66 * Compute the logical file offset where we cache the Merkle tree.
  67 *
  68 * @inode:  inode of the verity file
  69 *
  70 * For the purposes of caching the Merkle tree pages, as required by
  71 * fs-verity, it is convenient to do size computations in terms of a file
  72 * offset, rather than in terms of page indices.
  73 *
  74 * Use 64K to be sure it's past the last page in the file, even with 64K pages.
  75 * That rounding operation itself can overflow loff_t, so we do it in u64 and
  76 * check.
  77 *
  78 * Returns the file offset on success, negative error code on failure.
  79 */
  80static loff_t merkle_file_pos(const struct inode *inode)
  81{
  82        u64 sz = inode->i_size;
  83        u64 rounded = round_up(sz, MERKLE_START_ALIGN);
  84
  85        if (rounded > inode->i_sb->s_maxbytes)
  86                return -EFBIG;
  87
  88        return rounded;
  89}
  90
  91/*
  92 * Drop all the items for this inode with this key_type.
  93 *
  94 * @inode:     inode to drop items for
  95 * @key_type:  type of items to drop (BTRFS_VERITY_DESC_ITEM or
  96 *             BTRFS_VERITY_MERKLE_ITEM)
  97 *
  98 * Before doing a verity enable we cleanup any existing verity items.
  99 * This is also used to clean up if a verity enable failed half way through.
 100 *
 101 * Returns number of dropped items on success, negative error code on failure.
 102 */
 103static int drop_verity_items(struct btrfs_inode *inode, u8 key_type)
 104{
 105        struct btrfs_trans_handle *trans;
 106        struct btrfs_root *root = inode->root;
 107        struct btrfs_path *path;
 108        struct btrfs_key key;
 109        int count = 0;
 110        int ret;
 111
 112        path = btrfs_alloc_path();
 113        if (!path)
 114                return -ENOMEM;
 115
 116        while (1) {
 117                /* 1 for the item being dropped */
 118                trans = btrfs_start_transaction(root, 1);
 119                if (IS_ERR(trans)) {
 120                        ret = PTR_ERR(trans);
 121                        goto out;
 122                }
 123
 124                /*
 125                 * Walk backwards through all the items until we find one that
 126                 * isn't from our key type or objectid
 127                 */
 128                key.objectid = btrfs_ino(inode);
 129                key.type = key_type;
 130                key.offset = (u64)-1;
 131
 132                ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 133                if (ret > 0) {
 134                        ret = 0;
 135                        /* No more keys of this type, we're done */
 136                        if (path->slots[0] == 0)
 137                                break;
 138                        path->slots[0]--;
 139                } else if (ret < 0) {
 140                        btrfs_end_transaction(trans);
 141                        goto out;
 142                }
 143
 144                btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
 145
 146                /* No more keys of this type, we're done */
 147                if (key.objectid != btrfs_ino(inode) || key.type != key_type)
 148                        break;
 149
 150                /*
 151                 * This shouldn't be a performance sensitive function because
 152                 * it's not used as part of truncate.  If it ever becomes
 153                 * perf sensitive, change this to walk forward and bulk delete
 154                 * items
 155                 */
 156                ret = btrfs_del_items(trans, root, path, path->slots[0], 1);
 157                if (ret) {
 158                        btrfs_end_transaction(trans);
 159                        goto out;
 160                }
 161                count++;
 162                btrfs_release_path(path);
 163                btrfs_end_transaction(trans);
 164        }
 165        ret = count;
 166        btrfs_end_transaction(trans);
 167out:
 168        btrfs_free_path(path);
 169        return ret;
 170}
 171
 172/*
 173 * Drop all verity items
 174 *
 175 * @inode:  inode to drop verity items for
 176 *
 177 * In most contexts where we are dropping verity items, we want to do it for all
 178 * the types of verity items, not a particular one.
 179 *
 180 * Returns: 0 on success, negative error code on failure.
 181 */
 182int btrfs_drop_verity_items(struct btrfs_inode *inode)
 183{
 184        int ret;
 185
 186        ret = drop_verity_items(inode, BTRFS_VERITY_DESC_ITEM_KEY);
 187        if (ret < 0)
 188                return ret;
 189        ret = drop_verity_items(inode, BTRFS_VERITY_MERKLE_ITEM_KEY);
 190        if (ret < 0)
 191                return ret;
 192
 193        return 0;
 194}
 195
 196/*
 197 * Insert and write inode items with a given key type and offset.
 198 *
 199 * @inode:     inode to insert for
 200 * @key_type:  key type to insert
 201 * @offset:    item offset to insert at
 202 * @src:       source data to write
 203 * @len:       length of source data to write
 204 *
 205 * Write len bytes from src into items of up to 2K length.
 206 * The inserted items will have key (ino, key_type, offset + off) where off is
 207 * consecutively increasing from 0 up to the last item ending at offset + len.
 208 *
 209 * Returns 0 on success and a negative error code on failure.
 210 */
 211static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
 212                           const char *src, u64 len)
 213{
 214        struct btrfs_trans_handle *trans;
 215        struct btrfs_path *path;
 216        struct btrfs_root *root = inode->root;
 217        struct extent_buffer *leaf;
 218        struct btrfs_key key;
 219        unsigned long copy_bytes;
 220        unsigned long src_offset = 0;
 221        void *data;
 222        int ret = 0;
 223
 224        path = btrfs_alloc_path();
 225        if (!path)
 226                return -ENOMEM;
 227
 228        while (len > 0) {
 229                /* 1 for the new item being inserted */
 230                trans = btrfs_start_transaction(root, 1);
 231                if (IS_ERR(trans)) {
 232                        ret = PTR_ERR(trans);
 233                        break;
 234                }
 235
 236                key.objectid = btrfs_ino(inode);
 237                key.type = key_type;
 238                key.offset = offset;
 239
 240                /*
 241                 * Insert 2K at a time mostly to be friendly for smaller leaf
 242                 * size filesystems
 243                 */
 244                copy_bytes = min_t(u64, len, 2048);
 245
 246                ret = btrfs_insert_empty_item(trans, root, path, &key, copy_bytes);
 247                if (ret) {
 248                        btrfs_end_transaction(trans);
 249                        break;
 250                }
 251
 252                leaf = path->nodes[0];
 253
 254                data = btrfs_item_ptr(leaf, path->slots[0], void);
 255                write_extent_buffer(leaf, src + src_offset,
 256                                    (unsigned long)data, copy_bytes);
 257                offset += copy_bytes;
 258                src_offset += copy_bytes;
 259                len -= copy_bytes;
 260
 261                btrfs_release_path(path);
 262                btrfs_end_transaction(trans);
 263        }
 264
 265        btrfs_free_path(path);
 266        return ret;
 267}
 268
 269/*
 270 * Read inode items of the given key type and offset from the btree.
 271 *
 272 * @inode:      inode to read items of
 273 * @key_type:   key type to read
 274 * @offset:     item offset to read from
 275 * @dest:       Buffer to read into. This parameter has slightly tricky
 276 *              semantics.  If it is NULL, the function will not do any copying
 277 *              and will just return the size of all the items up to len bytes.
 278 *              If dest_page is passed, then the function will kmap_local the
 279 *              page and ignore dest, but it must still be non-NULL to avoid the
 280 *              counting-only behavior.
 281 * @len:        length in bytes to read
 282 * @dest_page:  copy into this page instead of the dest buffer
 283 *
 284 * Helper function to read items from the btree.  This returns the number of
 285 * bytes read or < 0 for errors.  We can return short reads if the items don't
 286 * exist on disk or aren't big enough to fill the desired length.  Supports
 287 * reading into a provided buffer (dest) or into the page cache
 288 *
 289 * Returns number of bytes read or a negative error code on failure.
 290 */
 291static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
 292                          char *dest, u64 len, struct page *dest_page)
 293{
 294        struct btrfs_path *path;
 295        struct btrfs_root *root = inode->root;
 296        struct extent_buffer *leaf;
 297        struct btrfs_key key;
 298        u64 item_end;
 299        u64 copy_end;
 300        int copied = 0;
 301        u32 copy_offset;
 302        unsigned long copy_bytes;
 303        unsigned long dest_offset = 0;
 304        void *data;
 305        char *kaddr = dest;
 306        int ret;
 307
 308        path = btrfs_alloc_path();
 309        if (!path)
 310                return -ENOMEM;
 311
 312        if (dest_page)
 313                path->reada = READA_FORWARD;
 314
 315        key.objectid = btrfs_ino(inode);
 316        key.type = key_type;
 317        key.offset = offset;
 318
 319        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 320        if (ret < 0) {
 321                goto out;
 322        } else if (ret > 0) {
 323                ret = 0;
 324                if (path->slots[0] == 0)
 325                        goto out;
 326                path->slots[0]--;
 327        }
 328
 329        while (len > 0) {
 330                leaf = path->nodes[0];
 331                btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 332
 333                if (key.objectid != btrfs_ino(inode) || key.type != key_type)
 334                        break;
 335
 336                item_end = btrfs_item_size_nr(leaf, path->slots[0]) + key.offset;
 337
 338                if (copied > 0) {
 339                        /*
 340                         * Once we've copied something, we want all of the items
 341                         * to be sequential
 342                         */
 343                        if (key.offset != offset)
 344                                break;
 345                } else {
 346                        /*
 347                         * Our initial offset might be in the middle of an
 348                         * item.  Make sure it all makes sense.
 349                         */
 350                        if (key.offset > offset)
 351                                break;
 352                        if (item_end <= offset)
 353                                break;
 354                }
 355
 356                /* desc = NULL to just sum all the item lengths */
 357                if (!dest)
 358                        copy_end = item_end;
 359                else
 360                        copy_end = min(offset + len, item_end);
 361
 362                /* Number of bytes in this item we want to copy */
 363                copy_bytes = copy_end - offset;
 364
 365                /* Offset from the start of item for copying */
 366                copy_offset = offset - key.offset;
 367
 368                if (dest) {
 369                        if (dest_page)
 370                                kaddr = kmap_local_page(dest_page);
 371
 372                        data = btrfs_item_ptr(leaf, path->slots[0], void);
 373                        read_extent_buffer(leaf, kaddr + dest_offset,
 374                                           (unsigned long)data + copy_offset,
 375                                           copy_bytes);
 376
 377                        if (dest_page)
 378                                kunmap_local(kaddr);
 379                }
 380
 381                offset += copy_bytes;
 382                dest_offset += copy_bytes;
 383                len -= copy_bytes;
 384                copied += copy_bytes;
 385
 386                path->slots[0]++;
 387                if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
 388                        /*
 389                         * We've reached the last slot in this leaf and we need
 390                         * to go to the next leaf.
 391                         */
 392                        ret = btrfs_next_leaf(root, path);
 393                        if (ret < 0) {
 394                                break;
 395                        } else if (ret > 0) {
 396                                ret = 0;
 397                                break;
 398                        }
 399                }
 400        }
 401out:
 402        btrfs_free_path(path);
 403        if (!ret)
 404                ret = copied;
 405        return ret;
 406}
 407
 408/*
 409 * Delete an fsverity orphan
 410 *
 411 * @trans:  transaction to do the delete in
 412 * @inode:  inode to orphan
 413 *
 414 * Capture verity orphan specific logic that is repeated in the couple places
 415 * we delete verity orphans. Specifically, handling ENOENT and ignoring inodes
 416 * with 0 links.
 417 *
 418 * Returns zero on success or a negative error code on failure.
 419 */
 420static int del_orphan(struct btrfs_trans_handle *trans, struct btrfs_inode *inode)
 421{
 422        struct btrfs_root *root = inode->root;
 423        int ret;
 424
 425        /*
 426         * If the inode has no links, it is either already unlinked, or was
 427         * created with O_TMPFILE. In either case, it should have an orphan from
 428         * that other operation. Rather than reference count the orphans, we
 429         * simply ignore them here, because we only invoke the verity path in
 430         * the orphan logic when i_nlink is 1.
 431         */
 432        if (!inode->vfs_inode.i_nlink)
 433                return 0;
 434
 435        ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
 436        if (ret == -ENOENT)
 437                ret = 0;
 438        return ret;
 439}
 440
 441/*
 442 * Rollback in-progress verity if we encounter an error.
 443 *
 444 * @inode:  inode verity had an error for
 445 *
 446 * We try to handle recoverable errors while enabling verity by rolling it back
 447 * and just failing the operation, rather than having an fs level error no
 448 * matter what. However, any error in rollback is unrecoverable.
 449 *
 450 * Returns 0 on success, negative error code on failure.
 451 */
 452static int rollback_verity(struct btrfs_inode *inode)
 453{
 454        struct btrfs_trans_handle *trans = NULL;
 455        struct btrfs_root *root = inode->root;
 456        int ret;
 457
 458        ASSERT(inode_is_locked(&inode->vfs_inode));
 459        truncate_inode_pages(inode->vfs_inode.i_mapping, inode->vfs_inode.i_size);
 460        clear_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
 461        ret = btrfs_drop_verity_items(inode);
 462        if (ret) {
 463                btrfs_handle_fs_error(root->fs_info, ret,
 464                                "failed to drop verity items in rollback %llu",
 465                                (u64)inode->vfs_inode.i_ino);
 466                goto out;
 467        }
 468
 469        /*
 470         * 1 for updating the inode flag
 471         * 1 for deleting the orphan
 472         */
 473        trans = btrfs_start_transaction(root, 2);
 474        if (IS_ERR(trans)) {
 475                ret = PTR_ERR(trans);
 476                trans = NULL;
 477                btrfs_handle_fs_error(root->fs_info, ret,
 478                        "failed to start transaction in verity rollback %llu",
 479                        (u64)inode->vfs_inode.i_ino);
 480                goto out;
 481        }
 482        inode->ro_flags &= ~BTRFS_INODE_RO_VERITY;
 483        btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode);
 484        ret = btrfs_update_inode(trans, root, inode);
 485        if (ret) {
 486                btrfs_abort_transaction(trans, ret);
 487                goto out;
 488        }
 489        ret = del_orphan(trans, inode);
 490        if (ret) {
 491                btrfs_abort_transaction(trans, ret);
 492                goto out;
 493        }
 494out:
 495        if (trans)
 496                btrfs_end_transaction(trans);
 497        return ret;
 498}
 499
 500/*
 501 * Finalize making the file a valid verity file
 502 *
 503 * @inode:      inode to be marked as verity
 504 * @desc:       contents of the verity descriptor to write (not NULL)
 505 * @desc_size:  size of the verity descriptor
 506 *
 507 * Do the actual work of finalizing verity after successfully writing the Merkle
 508 * tree:
 509 *
 510 * - write out the descriptor items
 511 * - mark the inode with the verity flag
 512 * - delete the orphan item
 513 * - mark the ro compat bit
 514 * - clear the in progress bit
 515 *
 516 * Returns 0 on success, negative error code on failure.
 517 */
 518static int finish_verity(struct btrfs_inode *inode, const void *desc,
 519                         size_t desc_size)
 520{
 521        struct btrfs_trans_handle *trans = NULL;
 522        struct btrfs_root *root = inode->root;
 523        struct btrfs_verity_descriptor_item item;
 524        int ret;
 525
 526        /* Write out the descriptor item */
 527        memset(&item, 0, sizeof(item));
 528        btrfs_set_stack_verity_descriptor_size(&item, desc_size);
 529        ret = write_key_bytes(inode, BTRFS_VERITY_DESC_ITEM_KEY, 0,
 530                              (const char *)&item, sizeof(item));
 531        if (ret)
 532                goto out;
 533
 534        /* Write out the descriptor itself */
 535        ret = write_key_bytes(inode, BTRFS_VERITY_DESC_ITEM_KEY, 1,
 536                              desc, desc_size);
 537        if (ret)
 538                goto out;
 539
 540        /*
 541         * 1 for updating the inode flag
 542         * 1 for deleting the orphan
 543         */
 544        trans = btrfs_start_transaction(root, 2);
 545        if (IS_ERR(trans)) {
 546                ret = PTR_ERR(trans);
 547                goto out;
 548        }
 549        inode->ro_flags |= BTRFS_INODE_RO_VERITY;
 550        btrfs_sync_inode_flags_to_i_flags(&inode->vfs_inode);
 551        ret = btrfs_update_inode(trans, root, inode);
 552        if (ret)
 553                goto end_trans;
 554        ret = del_orphan(trans, inode);
 555        if (ret)
 556                goto end_trans;
 557        clear_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
 558        btrfs_set_fs_compat_ro(root->fs_info, VERITY);
 559end_trans:
 560        btrfs_end_transaction(trans);
 561out:
 562        return ret;
 563
 564}
 565
 566/*
 567 * fsverity op that begins enabling verity.
 568 *
 569 * @filp:  file to enable verity on
 570 *
 571 * Begin enabling fsverity for the file. We drop any existing verity items, add
 572 * an orphan and set the in progress bit.
 573 *
 574 * Returns 0 on success, negative error code on failure.
 575 */
 576static int btrfs_begin_enable_verity(struct file *filp)
 577{
 578        struct btrfs_inode *inode = BTRFS_I(file_inode(filp));
 579        struct btrfs_root *root = inode->root;
 580        struct btrfs_trans_handle *trans;
 581        int ret;
 582
 583        ASSERT(inode_is_locked(file_inode(filp)));
 584
 585        if (test_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags))
 586                return -EBUSY;
 587
 588        /*
 589         * This should almost never do anything, but theoretically, it's
 590         * possible that we failed to enable verity on a file, then were
 591         * interrupted or failed while rolling back, failed to cleanup the
 592         * orphan, and finally attempt to enable verity again.
 593         */
 594        ret = btrfs_drop_verity_items(inode);
 595        if (ret)
 596                return ret;
 597
 598        /* 1 for the orphan item */
 599        trans = btrfs_start_transaction(root, 1);
 600        if (IS_ERR(trans))
 601                return PTR_ERR(trans);
 602
 603        ret = btrfs_orphan_add(trans, inode);
 604        if (!ret)
 605                set_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags);
 606        btrfs_end_transaction(trans);
 607
 608        return 0;
 609}
 610
 611/*
 612 * fsverity op that ends enabling verity.
 613 *
 614 * @filp:              file we are finishing enabling verity on
 615 * @desc:              verity descriptor to write out (NULL in error conditions)
 616 * @desc_size:         size of the verity descriptor (variable with signatures)
 617 * @merkle_tree_size:  size of the merkle tree in bytes
 618 *
 619 * If desc is null, then VFS is signaling an error occurred during verity
 620 * enable, and we should try to rollback. Otherwise, attempt to finish verity.
 621 *
 622 * Returns 0 on success, negative error code on error.
 623 */
 624static int btrfs_end_enable_verity(struct file *filp, const void *desc,
 625                                   size_t desc_size, u64 merkle_tree_size)
 626{
 627        struct btrfs_inode *inode = BTRFS_I(file_inode(filp));
 628        int ret = 0;
 629        int rollback_ret;
 630
 631        ASSERT(inode_is_locked(file_inode(filp)));
 632
 633        if (desc == NULL)
 634                goto rollback;
 635
 636        ret = finish_verity(inode, desc, desc_size);
 637        if (ret)
 638                goto rollback;
 639        return ret;
 640
 641rollback:
 642        rollback_ret = rollback_verity(inode);
 643        if (rollback_ret)
 644                btrfs_err(inode->root->fs_info,
 645                          "failed to rollback verity items: %d", rollback_ret);
 646        return ret;
 647}
 648
 649/*
 650 * fsverity op that gets the struct fsverity_descriptor.
 651 *
 652 * @inode:     inode to get the descriptor of
 653 * @buf:       output buffer for the descriptor contents
 654 * @buf_size:  size of the output buffer. 0 to query the size
 655 *
 656 * fsverity does a two pass setup for reading the descriptor, in the first pass
 657 * it calls with buf_size = 0 to query the size of the descriptor, and then in
 658 * the second pass it actually reads the descriptor off disk.
 659 *
 660 * Returns the size on success or a negative error code on failure.
 661 */
 662static int btrfs_get_verity_descriptor(struct inode *inode, void *buf,
 663                                       size_t buf_size)
 664{
 665        u64 true_size;
 666        int ret = 0;
 667        struct btrfs_verity_descriptor_item item;
 668
 669        memset(&item, 0, sizeof(item));
 670        ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_DESC_ITEM_KEY, 0,
 671                             (char *)&item, sizeof(item), NULL);
 672        if (ret < 0)
 673                return ret;
 674
 675        if (item.reserved[0] != 0 || item.reserved[1] != 0)
 676                return -EUCLEAN;
 677
 678        true_size = btrfs_stack_verity_descriptor_size(&item);
 679        if (true_size > INT_MAX)
 680                return -EUCLEAN;
 681
 682        if (buf_size == 0)
 683                return true_size;
 684        if (buf_size < true_size)
 685                return -ERANGE;
 686
 687        ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_DESC_ITEM_KEY, 1,
 688                             buf, buf_size, NULL);
 689        if (ret < 0)
 690                return ret;
 691        if (ret != true_size)
 692                return -EIO;
 693
 694        return true_size;
 695}
 696
 697/*
 698 * fsverity op that reads and caches a merkle tree page.
 699 *
 700 * @inode:         inode to read a merkle tree page for
 701 * @index:         page index relative to the start of the merkle tree
 702 * @num_ra_pages:  number of pages to readahead. Optional, we ignore it
 703 *
 704 * The Merkle tree is stored in the filesystem btree, but its pages are cached
 705 * with a logical position past EOF in the inode's mapping.
 706 *
 707 * Returns the page we read, or an ERR_PTR on error.
 708 */
 709static struct page *btrfs_read_merkle_tree_page(struct inode *inode,
 710                                                pgoff_t index,
 711                                                unsigned long num_ra_pages)
 712{
 713        struct page *page;
 714        u64 off = (u64)index << PAGE_SHIFT;
 715        loff_t merkle_pos = merkle_file_pos(inode);
 716        int ret;
 717
 718        if (merkle_pos < 0)
 719                return ERR_PTR(merkle_pos);
 720        if (merkle_pos > inode->i_sb->s_maxbytes - off - PAGE_SIZE)
 721                return ERR_PTR(-EFBIG);
 722        index += merkle_pos >> PAGE_SHIFT;
 723again:
 724        page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
 725        if (page) {
 726                if (PageUptodate(page))
 727                        return page;
 728
 729                lock_page(page);
 730                /*
 731                 * We only insert uptodate pages, so !Uptodate has to be
 732                 * an error
 733                 */
 734                if (!PageUptodate(page)) {
 735                        unlock_page(page);
 736                        put_page(page);
 737                        return ERR_PTR(-EIO);
 738                }
 739                unlock_page(page);
 740                return page;
 741        }
 742
 743        page = __page_cache_alloc(mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS));
 744        if (!page)
 745                return ERR_PTR(-ENOMEM);
 746
 747        /*
 748         * Merkle item keys are indexed from byte 0 in the merkle tree.
 749         * They have the form:
 750         *
 751         * [ inode objectid, BTRFS_MERKLE_ITEM_KEY, offset in bytes ]
 752         */
 753        ret = read_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY, off,
 754                             page_address(page), PAGE_SIZE, page);
 755        if (ret < 0) {
 756                put_page(page);
 757                return ERR_PTR(ret);
 758        }
 759        if (ret < PAGE_SIZE)
 760                memzero_page(page, ret, PAGE_SIZE - ret);
 761
 762        SetPageUptodate(page);
 763        ret = add_to_page_cache_lru(page, inode->i_mapping, index, GFP_NOFS);
 764
 765        if (!ret) {
 766                /* Inserted and ready for fsverity */
 767                unlock_page(page);
 768        } else {
 769                put_page(page);
 770                /* Did someone race us into inserting this page? */
 771                if (ret == -EEXIST)
 772                        goto again;
 773                page = ERR_PTR(ret);
 774        }
 775        return page;
 776}
 777
 778/*
 779 * fsverity op that writes a Merkle tree block into the btree.
 780 *
 781 * @inode:          inode to write a Merkle tree block for
 782 * @buf:            Merkle tree data block to write
 783 * @index:          index of the block in the Merkle tree
 784 * @log_blocksize:  log base 2 of the Merkle tree block size
 785 *
 786 * Note that the block size could be different from the page size, so it is not
 787 * safe to assume that index is a page index.
 788 *
 789 * Returns 0 on success or negative error code on failure
 790 */
 791static int btrfs_write_merkle_tree_block(struct inode *inode, const void *buf,
 792                                        u64 index, int log_blocksize)
 793{
 794        u64 off = index << log_blocksize;
 795        u64 len = 1ULL << log_blocksize;
 796        loff_t merkle_pos = merkle_file_pos(inode);
 797
 798        if (merkle_pos < 0)
 799                return merkle_pos;
 800        if (merkle_pos > inode->i_sb->s_maxbytes - off - len)
 801                return -EFBIG;
 802
 803        return write_key_bytes(BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY,
 804                               off, buf, len);
 805}
 806
 807const struct fsverity_operations btrfs_verityops = {
 808        .begin_enable_verity     = btrfs_begin_enable_verity,
 809        .end_enable_verity       = btrfs_end_enable_verity,
 810        .get_verity_descriptor   = btrfs_get_verity_descriptor,
 811        .read_merkle_tree_page   = btrfs_read_merkle_tree_page,
 812        .write_merkle_tree_block = btrfs_write_merkle_tree_block,
 813};
 814