LXR linux/fs/ext4/block

   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  linux/fs/ext4/block_validity.c
   4 *
   5 * Copyright (C) 2009
   6 * Theodore Ts'o (tytso@mit.edu)
   7 *
   8 * Track which blocks in the filesystem are metadata blocks that
   9 * should never be used as data blocks by files or directories.
  10 */
  11
  12#include <linux/time.h>
  13#include <linux/fs.h>
  14#include <linux/namei.h>
  15#include <linux/quotaops.h>
  16#include <linux/buffer_head.h>
  17#include <linux/swap.h>
  18#include <linux/pagemap.h>
  19#include <linux/blkdev.h>
  20#include <linux/slab.h>
  21#include "ext4.h"
  22
  23struct ext4_system_zone {
  24        struct rb_node  node;
  25        ext4_fsblk_t    start_blk;
  26        unsigned int    count;
  27};
  28
  29static struct kmem_cache *ext4_system_zone_cachep;
  30
  31int __init ext4_init_system_zone(void)
  32{
  33        ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
  34        if (ext4_system_zone_cachep == NULL)
  35                return -ENOMEM;
  36        return 0;
  37}
  38
  39void ext4_exit_system_zone(void)
  40{
  41        rcu_barrier();
  42        kmem_cache_destroy(ext4_system_zone_cachep);
  43}
  44
  45static inline int can_merge(struct ext4_system_zone *entry1,
  46                     struct ext4_system_zone *entry2)
  47{
  48        if ((entry1->start_blk + entry1->count) == entry2->start_blk)
  49                return 1;
  50        return 0;
  51}
  52
  53static void release_system_zone(struct ext4_system_blocks *system_blks)
  54{
  55        struct ext4_system_zone *entry, *n;
  56
  57        rbtree_postorder_for_each_entry_safe(entry, n,
  58                                &system_blks->root, node)
  59                kmem_cache_free(ext4_system_zone_cachep, entry);
  60}
  61
  62/*
  63 * Mark a range of blocks as belonging to the "system zone" --- that
  64 * is, filesystem metadata blocks which should never be used by
  65 * inodes.
  66 */
  67static int add_system_zone(struct ext4_system_blocks *system_blks,
  68                           ext4_fsblk_t start_blk,
  69                           unsigned int count)
  70{
  71        struct ext4_system_zone *new_entry = NULL, *entry;
  72        struct rb_node **n = &system_blks->root.rb_node, *node;
  73        struct rb_node *parent = NULL, *new_node = NULL;
  74
  75        while (*n) {
  76                parent = *n;
  77                entry = rb_entry(parent, struct ext4_system_zone, node);
  78                if (start_blk < entry->start_blk)
  79                        n = &(*n)->rb_left;
  80                else if (start_blk >= (entry->start_blk + entry->count))
  81                        n = &(*n)->rb_right;
  82                else {
  83                        if (start_blk + count > (entry->start_blk +
  84                                                 entry->count))
  85                                entry->count = (start_blk + count -
  86                                                entry->start_blk);
  87                        new_node = *n;
  88                        new_entry = rb_entry(new_node, struct ext4_system_zone,
  89                                             node);
  90                        break;
  91                }
  92        }
  93
  94        if (!new_entry) {
  95                new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
  96                                             GFP_KERNEL);
  97                if (!new_entry)
  98                        return -ENOMEM;
  99                new_entry->start_blk = start_blk;
 100                new_entry->count = count;
 101                new_node = &new_entry->node;
 102
 103                rb_link_node(new_node, parent, n);
 104                rb_insert_color(new_node, &system_blks->root);
 105        }
 106
 107        /* Can we merge to the left? */
 108        node = rb_prev(new_node);
 109        if (node) {
 110                entry = rb_entry(node, struct ext4_system_zone, node);
 111                if (can_merge(entry, new_entry)) {
 112                        new_entry->start_blk = entry->start_blk;
 113                        new_entry->count += entry->count;
 114                        rb_erase(node, &system_blks->root);
 115                        kmem_cache_free(ext4_system_zone_cachep, entry);
 116                }
 117        }
 118
 119        /* Can we merge to the right? */
 120        node = rb_next(new_node);
 121        if (node) {
 122                entry = rb_entry(node, struct ext4_system_zone, node);
 123                if (can_merge(new_entry, entry)) {
 124                        new_entry->count += entry->count;
 125                        rb_erase(node, &system_blks->root);
 126                        kmem_cache_free(ext4_system_zone_cachep, entry);
 127                }
 128        }
 129        return 0;
 130}
 131
 132static void debug_print_tree(struct ext4_sb_info *sbi)
 133{
 134        struct rb_node *node;
 135        struct ext4_system_zone *entry;
 136        struct ext4_system_blocks *system_blks;
 137        int first = 1;
 138
 139        printk(KERN_INFO "System zones: ");
 140        rcu_read_lock();
 141        system_blks = rcu_dereference(sbi->system_blks);
 142        node = rb_first(&system_blks->root);
 143        while (node) {
 144                entry = rb_entry(node, struct ext4_system_zone, node);
 145                printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ",
 146                       entry->start_blk, entry->start_blk + entry->count - 1);
 147                first = 0;
 148                node = rb_next(node);
 149        }
 150        rcu_read_unlock();
 151        printk(KERN_CONT "\n");
 152}
 153
 154/*
 155 * Returns 1 if the passed-in block region (start_blk,
 156 * start_blk+count) is valid; 0 if some part of the block region
 157 * overlaps with filesystem metadata blocks.
 158 */
 159static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi,
 160                                     struct ext4_system_blocks *system_blks,
 161                                     ext4_fsblk_t start_blk,
 162                                     unsigned int count)
 163{
 164        struct ext4_system_zone *entry;
 165        struct rb_node *n;
 166
 167        if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
 168            (start_blk + count < start_blk) ||
 169            (start_blk + count > ext4_blocks_count(sbi->s_es))) {
 170                sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
 171                return 0;
 172        }
 173
 174        if (system_blks == NULL)
 175                return 1;
 176
 177        n = system_blks->root.rb_node;
 178        while (n) {
 179                entry = rb_entry(n, struct ext4_system_zone, node);
 180                if (start_blk + count - 1 < entry->start_blk)
 181                        n = n->rb_left;
 182                else if (start_blk >= (entry->start_blk + entry->count))
 183                        n = n->rb_right;
 184                else {
 185                        sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
 186                        return 0;
 187                }
 188        }
 189        return 1;
 190}
 191
 192static int ext4_protect_reserved_inode(struct super_block *sb,
 193                                       struct ext4_system_blocks *system_blks,
 194                                       u32 ino)
 195{
 196        struct inode *inode;
 197        struct ext4_sb_info *sbi = EXT4_SB(sb);
 198        struct ext4_map_blocks map;
 199        u32 i = 0, num;
 200        int err = 0, n;
 201
 202        if ((ino < EXT4_ROOT_INO) ||
 203            (ino > le32_to_cpu(sbi->s_es->s_inodes_count)))
 204                return -EINVAL;
 205        inode = ext4_iget(sb, ino, EXT4_IGET_SPECIAL);
 206        if (IS_ERR(inode))
 207                return PTR_ERR(inode);
 208        num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
 209        while (i < num) {
 210                map.m_lblk = i;
 211                map.m_len = num - i;
 212                n = ext4_map_blocks(NULL, inode, &map, 0);
 213                if (n < 0) {
 214                        err = n;
 215                        break;
 216                }
 217                if (n == 0) {
 218                        i++;
 219                } else {
 220                        if (!ext4_data_block_valid_rcu(sbi, system_blks,
 221                                                map.m_pblk, n)) {
 222                                ext4_error(sb, "blocks %llu-%llu from inode %u "
 223                                           "overlap system zone", map.m_pblk,
 224                                           map.m_pblk + map.m_len - 1, ino);
 225                                err = -EFSCORRUPTED;
 226                                break;
 227                        }
 228                        err = add_system_zone(system_blks, map.m_pblk, n);
 229                        if (err < 0)
 230                                break;
 231                        i += n;
 232                }
 233        }
 234        iput(inode);
 235        return err;
 236}
 237
 238static void ext4_destroy_system_zone(struct rcu_head *rcu)
 239{
 240        struct ext4_system_blocks *system_blks;
 241
 242        system_blks = container_of(rcu, struct ext4_system_blocks, rcu);
 243        release_system_zone(system_blks);
 244        kfree(system_blks);
 245}
 246
 247/*
 248 * Build system zone rbtree which is used for block validity checking.
 249 *
 250 * The update of system_blks pointer in this function is protected by
 251 * sb->s_umount semaphore. However we have to be careful as we can be
 252 * racing with ext4_data_block_valid() calls reading system_blks rbtree
 253 * protected only by RCU. That's why we first build the rbtree and then
 254 * swap it in place.
 255 */
 256int ext4_setup_system_zone(struct super_block *sb)
 257{
 258        ext4_group_t ngroups = ext4_get_groups_count(sb);
 259        struct ext4_sb_info *sbi = EXT4_SB(sb);
 260        struct ext4_system_blocks *system_blks;
 261        struct ext4_group_desc *gdp;
 262        ext4_group_t i;
 263        int flex_size = ext4_flex_bg_size(sbi);
 264        int ret;
 265
 266        if (!test_opt(sb, BLOCK_VALIDITY)) {
 267                if (sbi->system_blks)
 268                        ext4_release_system_zone(sb);
 269                return 0;
 270        }
 271        if (sbi->system_blks)
 272                return 0;
 273
 274        system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
 275        if (!system_blks)
 276                return -ENOMEM;
 277
 278        for (i=0; i < ngroups; i++) {
 279                cond_resched();
 280                if (ext4_bg_has_super(sb, i) &&
 281                    ((i < 5) || ((i % flex_size) == 0)))
 282                        add_system_zone(system_blks,
 283                                        ext4_group_first_block_no(sb, i),
 284                                        ext4_bg_num_gdb(sb, i) + 1);
 285                gdp = ext4_get_group_desc(sb, i, NULL);
 286                ret = add_system_zone(system_blks,
 287                                ext4_block_bitmap(sb, gdp), 1);
 288                if (ret)
 289                        goto err;
 290                ret = add_system_zone(system_blks,
 291                                ext4_inode_bitmap(sb, gdp), 1);
 292                if (ret)
 293                        goto err;
 294                ret = add_system_zone(system_blks,
 295                                ext4_inode_table(sb, gdp),
 296                                sbi->s_itb_per_group);
 297                if (ret)
 298                        goto err;
 299        }
 300        if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) {
 301                ret = ext4_protect_reserved_inode(sb, system_blks,
 302                                le32_to_cpu(sbi->s_es->s_journal_inum));
 303                if (ret)
 304                        goto err;
 305        }
 306
 307        /*
 308         * System blks rbtree complete, announce it once to prevent racing
 309         * with ext4_data_block_valid() accessing the rbtree at the same
 310         * time.
 311         */
 312        rcu_assign_pointer(sbi->system_blks, system_blks);
 313
 314        if (test_opt(sb, DEBUG))
 315                debug_print_tree(sbi);
 316        return 0;
 317err:
 318        release_system_zone(system_blks);
 319        kfree(system_blks);
 320        return ret;
 321}
 322
 323/*
 324 * Called when the filesystem is unmounted or when remounting it with
 325 * noblock_validity specified.
 326 *
 327 * The update of system_blks pointer in this function is protected by
 328 * sb->s_umount semaphore. However we have to be careful as we can be
 329 * racing with ext4_data_block_valid() calls reading system_blks rbtree
 330 * protected only by RCU. So we first clear the system_blks pointer and
 331 * then free the rbtree only after RCU grace period expires.
 332 */
 333void ext4_release_system_zone(struct super_block *sb)
 334{
 335        struct ext4_system_blocks *system_blks;
 336
 337        system_blks = rcu_dereference_protected(EXT4_SB(sb)->system_blks,
 338                                        lockdep_is_held(&sb->s_umount));
 339        rcu_assign_pointer(EXT4_SB(sb)->system_blks, NULL);
 340
 341        if (system_blks)
 342                call_rcu(&system_blks->rcu, ext4_destroy_system_zone);
 343}
 344
 345int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
 346                          unsigned int count)
 347{
 348        struct ext4_system_blocks *system_blks;
 349        int ret;
 350
 351        /*
 352         * Lock the system zone to prevent it being released concurrently
 353         * when doing a remount which inverse current "[no]block_validity"
 354         * mount option.
 355         */
 356        rcu_read_lock();
 357        system_blks = rcu_dereference(sbi->system_blks);
 358        ret = ext4_data_block_valid_rcu(sbi, system_blks, start_blk,
 359                                        count);
 360        rcu_read_unlock();
 361        return ret;
 362}
 363
 364int ext4_check_blockref(const char *function, unsigned int line,
 365                        struct inode *inode, __le32 *p, unsigned int max)
 366{
 367        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 368        __le32 *bref = p;
 369        unsigned int blk;
 370
 371        if (ext4_has_feature_journal(inode->i_sb) &&
 372            (inode->i_ino ==
 373             le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
 374                return 0;
 375
 376        while (bref < p+max) {
 377                blk = le32_to_cpu(*bref++);
 378                if (blk &&
 379                    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
 380                                                    blk, 1))) {
 381                        es->s_last_error_block = cpu_to_le64(blk);
 382                        ext4_error_inode(inode, function, line, blk,
 383                                         "invalid block");
 384                        return -EFSCORRUPTED;
 385                }
 386        }
 387        return 0;
 388}
 389
 390