linux/fs/ext4/verity.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * fs/ext4/verity.c: fs-verity support for ext4
   4 *
   5 * Copyright 2019 Google LLC
   6 */
   7
   8/*
   9 * Implementation of fsverity_operations for ext4.
  10 *
  11 * ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past
  12 * the end of the file, starting at the first 64K boundary beyond i_size.  This
  13 * approach works because (a) verity files are readonly, and (b) pages fully
  14 * beyond i_size aren't visible to userspace but can be read/written internally
  15 * by ext4 with only some relatively small changes to ext4.  This approach
  16 * avoids having to depend on the EA_INODE feature and on rearchitecturing
  17 * ext4's xattr support to support paging multi-gigabyte xattrs into memory, and
  18 * to support encrypting xattrs.  Note that the verity metadata *must* be
  19 * encrypted when the file is, since it contains hashes of the plaintext data.
  20 *
  21 * Using a 64K boundary rather than a 4K one keeps things ready for
  22 * architectures with 64K pages, and it doesn't necessarily waste space on-disk
  23 * since there can be a hole between i_size and the start of the Merkle tree.
  24 */
  25
  26#include <linux/quotaops.h>
  27
  28#include "ext4.h"
  29#include "ext4_extents.h"
  30#include "ext4_jbd2.h"
  31
  32static inline loff_t ext4_verity_metadata_pos(const struct inode *inode)
  33{
  34        return round_up(inode->i_size, 65536);
  35}
  36
  37/*
  38 * Read some verity metadata from the inode.  __vfs_read() can't be used because
  39 * we need to read beyond i_size.
  40 */
  41static int pagecache_read(struct inode *inode, void *buf, size_t count,
  42                          loff_t pos)
  43{
  44        while (count) {
  45                size_t n = min_t(size_t, count,
  46                                 PAGE_SIZE - offset_in_page(pos));
  47                struct page *page;
  48
  49                page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT,
  50                                         NULL);
  51                if (IS_ERR(page))
  52                        return PTR_ERR(page);
  53
  54                memcpy_from_page(buf, page, offset_in_page(pos), n);
  55
  56                put_page(page);
  57
  58                buf += n;
  59                pos += n;
  60                count -= n;
  61        }
  62        return 0;
  63}
  64
  65/*
  66 * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY.
  67 * kernel_write() can't be used because the file descriptor is readonly.
  68 */
  69static int pagecache_write(struct inode *inode, const void *buf, size_t count,
  70                           loff_t pos)
  71{
  72        if (pos + count > inode->i_sb->s_maxbytes)
  73                return -EFBIG;
  74
  75        while (count) {
  76                size_t n = min_t(size_t, count,
  77                                 PAGE_SIZE - offset_in_page(pos));
  78                struct page *page;
  79                void *fsdata;
  80                int res;
  81
  82                res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
  83                                            &page, &fsdata);
  84                if (res)
  85                        return res;
  86
  87                memcpy_to_page(page, offset_in_page(pos), buf, n);
  88
  89                res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
  90                                          page, fsdata);
  91                if (res < 0)
  92                        return res;
  93                if (res != n)
  94                        return -EIO;
  95
  96                buf += n;
  97                pos += n;
  98                count -= n;
  99        }
 100        return 0;
 101}
 102
 103static int ext4_begin_enable_verity(struct file *filp)
 104{
 105        struct inode *inode = file_inode(filp);
 106        const int credits = 2; /* superblock and inode for ext4_orphan_add() */
 107        handle_t *handle;
 108        int err;
 109
 110        if (IS_DAX(inode) || ext4_test_inode_flag(inode, EXT4_INODE_DAX))
 111                return -EINVAL;
 112
 113        if (ext4_verity_in_progress(inode))
 114                return -EBUSY;
 115
 116        /*
 117         * Since the file was opened readonly, we have to initialize the jbd
 118         * inode and quotas here and not rely on ->open() doing it.  This must
 119         * be done before evicting the inline data.
 120         */
 121
 122        err = ext4_inode_attach_jinode(inode);
 123        if (err)
 124                return err;
 125
 126        err = dquot_initialize(inode);
 127        if (err)
 128                return err;
 129
 130        err = ext4_convert_inline_data(inode);
 131        if (err)
 132                return err;
 133
 134        if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
 135                ext4_warning_inode(inode,
 136                                   "verity is only allowed on extent-based files");
 137                return -EOPNOTSUPP;
 138        }
 139
 140        /*
 141         * ext4 uses the last allocated block to find the verity descriptor, so
 142         * we must remove any other blocks past EOF which might confuse things.
 143         */
 144        err = ext4_truncate(inode);
 145        if (err)
 146                return err;
 147
 148        handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
 149        if (IS_ERR(handle))
 150                return PTR_ERR(handle);
 151
 152        err = ext4_orphan_add(handle, inode);
 153        if (err == 0)
 154                ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
 155
 156        ext4_journal_stop(handle);
 157        return err;
 158}
 159
 160/*
 161 * ext4 stores the verity descriptor beginning on the next filesystem block
 162 * boundary after the Merkle tree.  Then, the descriptor size is stored in the
 163 * last 4 bytes of the last allocated filesystem block --- which is either the
 164 * block in which the descriptor ends, or the next block after that if there
 165 * weren't at least 4 bytes remaining.
 166 *
 167 * We can't simply store the descriptor in an xattr because it *must* be
 168 * encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt
 169 * xattrs.  Also, if the descriptor includes a large signature blob it may be
 170 * too large to store in an xattr without the EA_INODE feature.
 171 */
 172static int ext4_write_verity_descriptor(struct inode *inode, const void *desc,
 173                                        size_t desc_size, u64 merkle_tree_size)
 174{
 175        const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) +
 176                                      merkle_tree_size, i_blocksize(inode));
 177        const u64 desc_end = desc_pos + desc_size;
 178        const __le32 desc_size_disk = cpu_to_le32(desc_size);
 179        const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk),
 180                                           i_blocksize(inode)) -
 181                                  sizeof(desc_size_disk);
 182        int err;
 183
 184        err = pagecache_write(inode, desc, desc_size, desc_pos);
 185        if (err)
 186                return err;
 187
 188        return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk),
 189                               desc_size_pos);
 190}
 191
 192static int ext4_end_enable_verity(struct file *filp, const void *desc,
 193                                  size_t desc_size, u64 merkle_tree_size)
 194{
 195        struct inode *inode = file_inode(filp);
 196        const int credits = 2; /* superblock and inode for ext4_orphan_del() */
 197        handle_t *handle;
 198        struct ext4_iloc iloc;
 199        int err = 0;
 200
 201        /*
 202         * If an error already occurred (which fs/verity/ signals by passing
 203         * desc == NULL), then only clean-up is needed.
 204         */
 205        if (desc == NULL)
 206                goto cleanup;
 207
 208        /* Append the verity descriptor. */
 209        err = ext4_write_verity_descriptor(inode, desc, desc_size,
 210                                           merkle_tree_size);
 211        if (err)
 212                goto cleanup;
 213
 214        /*
 215         * Write all pages (both data and verity metadata).  Note that this must
 216         * happen before clearing EXT4_STATE_VERITY_IN_PROGRESS; otherwise pages
 217         * beyond i_size won't be written properly.  For crash consistency, this
 218         * also must happen before the verity inode flag gets persisted.
 219         */
 220        err = filemap_write_and_wait(inode->i_mapping);
 221        if (err)
 222                goto cleanup;
 223
 224        /*
 225         * Finally, set the verity inode flag and remove the inode from the
 226         * orphan list (in a single transaction).
 227         */
 228
 229        handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
 230        if (IS_ERR(handle)) {
 231                err = PTR_ERR(handle);
 232                goto cleanup;
 233        }
 234
 235        err = ext4_orphan_del(handle, inode);
 236        if (err)
 237                goto stop_and_cleanup;
 238
 239        err = ext4_reserve_inode_write(handle, inode, &iloc);
 240        if (err)
 241                goto stop_and_cleanup;
 242
 243        ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
 244        ext4_set_inode_flags(inode, false);
 245        err = ext4_mark_iloc_dirty(handle, inode, &iloc);
 246        if (err)
 247                goto stop_and_cleanup;
 248
 249        ext4_journal_stop(handle);
 250
 251        ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
 252        return 0;
 253
 254stop_and_cleanup:
 255        ext4_journal_stop(handle);
 256cleanup:
 257        /*
 258         * Verity failed to be enabled, so clean up by truncating any verity
 259         * metadata that was written beyond i_size (both from cache and from
 260         * disk), removing the inode from the orphan list (if it wasn't done
 261         * already), and clearing EXT4_STATE_VERITY_IN_PROGRESS.
 262         */
 263        truncate_inode_pages(inode->i_mapping, inode->i_size);
 264        ext4_truncate(inode);
 265        ext4_orphan_del(NULL, inode);
 266        ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
 267        return err;
 268}
 269
 270static int ext4_get_verity_descriptor_location(struct inode *inode,
 271                                               size_t *desc_size_ret,
 272                                               u64 *desc_pos_ret)
 273{
 274        struct ext4_ext_path *path;
 275        struct ext4_extent *last_extent;
 276        u32 end_lblk;
 277        u64 desc_size_pos;
 278        __le32 desc_size_disk;
 279        u32 desc_size;
 280        u64 desc_pos;
 281        int err;
 282
 283        /*
 284         * Descriptor size is in last 4 bytes of last allocated block.
 285         * See ext4_write_verity_descriptor().
 286         */
 287
 288        if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
 289                EXT4_ERROR_INODE(inode, "verity file doesn't use extents");
 290                return -EFSCORRUPTED;
 291        }
 292
 293        path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
 294        if (IS_ERR(path))
 295                return PTR_ERR(path);
 296
 297        last_extent = path[path->p_depth].p_ext;
 298        if (!last_extent) {
 299                EXT4_ERROR_INODE(inode, "verity file has no extents");
 300                ext4_ext_drop_refs(path);
 301                kfree(path);
 302                return -EFSCORRUPTED;
 303        }
 304
 305        end_lblk = le32_to_cpu(last_extent->ee_block) +
 306                   ext4_ext_get_actual_len(last_extent);
 307        desc_size_pos = (u64)end_lblk << inode->i_blkbits;
 308        ext4_ext_drop_refs(path);
 309        kfree(path);
 310
 311        if (desc_size_pos < sizeof(desc_size_disk))
 312                goto bad;
 313        desc_size_pos -= sizeof(desc_size_disk);
 314
 315        err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk),
 316                             desc_size_pos);
 317        if (err)
 318                return err;
 319        desc_size = le32_to_cpu(desc_size_disk);
 320
 321        /*
 322         * The descriptor is stored just before the desc_size_disk, but starting
 323         * on a filesystem block boundary.
 324         */
 325
 326        if (desc_size > INT_MAX || desc_size > desc_size_pos)
 327                goto bad;
 328
 329        desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode));
 330        if (desc_pos < ext4_verity_metadata_pos(inode))
 331                goto bad;
 332
 333        *desc_size_ret = desc_size;
 334        *desc_pos_ret = desc_pos;
 335        return 0;
 336
 337bad:
 338        EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor");
 339        return -EFSCORRUPTED;
 340}
 341
 342static int ext4_get_verity_descriptor(struct inode *inode, void *buf,
 343                                      size_t buf_size)
 344{
 345        size_t desc_size = 0;
 346        u64 desc_pos = 0;
 347        int err;
 348
 349        err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos);
 350        if (err)
 351                return err;
 352
 353        if (buf_size) {
 354                if (desc_size > buf_size)
 355                        return -ERANGE;
 356                err = pagecache_read(inode, buf, desc_size, desc_pos);
 357                if (err)
 358                        return err;
 359        }
 360        return desc_size;
 361}
 362
 363static struct page *ext4_read_merkle_tree_page(struct inode *inode,
 364                                               pgoff_t index,
 365                                               unsigned long num_ra_pages)
 366{
 367        DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
 368        struct page *page;
 369
 370        index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
 371
 372        page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
 373        if (!page || !PageUptodate(page)) {
 374                if (page)
 375                        put_page(page);
 376                else if (num_ra_pages > 1)
 377                        page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
 378                page = read_mapping_page(inode->i_mapping, index, NULL);
 379        }
 380        return page;
 381}
 382
 383static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf,
 384                                        u64 index, int log_blocksize)
 385{
 386        loff_t pos = ext4_verity_metadata_pos(inode) + (index << log_blocksize);
 387
 388        return pagecache_write(inode, buf, 1 << log_blocksize, pos);
 389}
 390
 391const struct fsverity_operations ext4_verityops = {
 392        .begin_enable_verity    = ext4_begin_enable_verity,
 393        .end_enable_verity      = ext4_end_enable_verity,
 394        .get_verity_descriptor  = ext4_get_verity_descriptor,
 395        .read_merkle_tree_page  = ext4_read_merkle_tree_page,
 396        .write_merkle_tree_block = ext4_write_merkle_tree_block,
 397};
 398