linux/fs/ext4/block_validity.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  linux/fs/ext4/block_validity.c
   4 *
   5 * Copyright (C) 2009
   6 * Theodore Ts'o (tytso@mit.edu)
   7 *
   8 * Track which blocks in the filesystem are metadata blocks that
   9 * should never be used as data blocks by files or directories.
  10 */
  11
  12#include <linux/time.h>
  13#include <linux/fs.h>
  14#include <linux/namei.h>
  15#include <linux/quotaops.h>
  16#include <linux/buffer_head.h>
  17#include <linux/swap.h>
  18#include <linux/pagemap.h>
  19#include <linux/blkdev.h>
  20#include <linux/slab.h>
  21#include "ext4.h"
  22
  23struct ext4_system_zone {
  24        struct rb_node  node;
  25        ext4_fsblk_t    start_blk;
  26        unsigned int    count;
  27};
  28
  29static struct kmem_cache *ext4_system_zone_cachep;
  30
  31int __init ext4_init_system_zone(void)
  32{
  33        ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
  34        if (ext4_system_zone_cachep == NULL)
  35                return -ENOMEM;
  36        return 0;
  37}
  38
  39void ext4_exit_system_zone(void)
  40{
  41        rcu_barrier();
  42        kmem_cache_destroy(ext4_system_zone_cachep);
  43}
  44
  45static inline int can_merge(struct ext4_system_zone *entry1,
  46                     struct ext4_system_zone *entry2)
  47{
  48        if ((entry1->start_blk + entry1->count) == entry2->start_blk)
  49                return 1;
  50        return 0;
  51}
  52
  53static void release_system_zone(struct ext4_system_blocks *system_blks)
  54{
  55        struct ext4_system_zone *entry, *n;
  56
  57        rbtree_postorder_for_each_entry_safe(entry, n,
  58                                &system_blks->root, node)
  59                kmem_cache_free(ext4_system_zone_cachep, entry);
  60}
  61
  62/*
  63 * Mark a range of blocks as belonging to the "system zone" --- that
  64 * is, filesystem metadata blocks which should never be used by
  65 * inodes.
  66 */
  67static int add_system_zone(struct ext4_system_blocks *system_blks,
  68                           ext4_fsblk_t start_blk,
  69                           unsigned int count)
  70{
  71        struct ext4_system_zone *new_entry = NULL, *entry;
  72        struct rb_node **n = &system_blks->root.rb_node, *node;
  73        struct rb_node *parent = NULL, *new_node = NULL;
  74
  75        while (*n) {
  76                parent = *n;
  77                entry = rb_entry(parent, struct ext4_system_zone, node);
  78                if (start_blk < entry->start_blk)
  79                        n = &(*n)->rb_left;
  80                else if (start_blk >= (entry->start_blk + entry->count))
  81                        n = &(*n)->rb_right;
  82                else {
  83                        if (start_blk + count > (entry->start_blk +
  84                                                 entry->count))
  85                                entry->count = (start_blk + count -
  86                                                entry->start_blk);
  87                        new_node = *n;
  88                        new_entry = rb_entry(new_node, struct ext4_system_zone,
  89                                             node);
  90                        break;
  91                }
  92        }
  93
  94        if (!new_entry) {
  95                new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
  96                                             GFP_KERNEL);
  97                if (!new_entry)
  98                        return -ENOMEM;
  99                new_entry->start_blk = start_blk;
 100                new_entry->count = count;
 101                new_node = &new_entry->node;
 102
 103                rb_link_node(new_node, parent, n);
 104                rb_insert_color(new_node, &system_blks->root);
 105        }
 106
 107        /* Can we merge to the left? */
 108        node = rb_prev(new_node);
 109        if (node) {
 110                entry = rb_entry(node, struct ext4_system_zone, node);
 111                if (can_merge(entry, new_entry)) {
 112                        new_entry->start_blk = entry->start_blk;
 113                        new_entry->count += entry->count;
 114                        rb_erase(node, &system_blks->root);
 115                        kmem_cache_free(ext4_system_zone_cachep, entry);
 116                }
 117        }
 118
 119        /* Can we merge to the right? */
 120        node = rb_next(new_node);
 121        if (node) {
 122                entry = rb_entry(node, struct ext4_system_zone, node);
 123                if (can_merge(new_entry, entry)) {
 124                        new_entry->count += entry->count;
 125                        rb_erase(node, &system_blks->root);
 126                        kmem_cache_free(ext4_system_zone_cachep, entry);
 127                }
 128        }
 129        return 0;
 130}
 131
 132static void debug_print_tree(struct ext4_sb_info *sbi)
 133{
 134        struct rb_node *node;
 135        struct ext4_system_zone *entry;
 136        struct ext4_system_blocks *system_blks;
 137        int first = 1;
 138
 139        printk(KERN_INFO "System zones: ");
 140        rcu_read_lock();
 141        system_blks = rcu_dereference(sbi->system_blks);
 142        node = rb_first(&system_blks->root);
 143        while (node) {
 144                entry = rb_entry(node, struct ext4_system_zone, node);
 145                printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ",
 146                       entry->start_blk, entry->start_blk + entry->count - 1);
 147                first = 0;
 148                node = rb_next(node);
 149        }
 150        rcu_read_unlock();
 151        printk(KERN_CONT "\n");
 152}
 153
 154/*
 155 * Returns 1 if the passed-in block region (start_blk,
 156 * start_blk+count) is valid; 0 if some part of the block region
 157 * overlaps with filesystem metadata blocks.
 158 */
 159static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi,
 160                                     struct ext4_system_blocks *system_blks,
 161                                     ext4_fsblk_t start_blk,
 162                                     unsigned int count)
 163{
 164        struct ext4_system_zone *entry;
 165        struct rb_node *n;
 166
 167        if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
 168            (start_blk + count < start_blk) ||
 169            (start_blk + count > ext4_blocks_count(sbi->s_es)))
 170                return 0;
 171
 172        if (system_blks == NULL)
 173                return 1;
 174
 175        n = system_blks->root.rb_node;
 176        while (n) {
 177                entry = rb_entry(n, struct ext4_system_zone, node);
 178                if (start_blk + count - 1 < entry->start_blk)
 179                        n = n->rb_left;
 180                else if (start_blk >= (entry->start_blk + entry->count))
 181                        n = n->rb_right;
 182                else
 183                        return 0;
 184        }
 185        return 1;
 186}
 187
 188static int ext4_protect_reserved_inode(struct super_block *sb,
 189                                       struct ext4_system_blocks *system_blks,
 190                                       u32 ino)
 191{
 192        struct inode *inode;
 193        struct ext4_sb_info *sbi = EXT4_SB(sb);
 194        struct ext4_map_blocks map;
 195        u32 i = 0, num;
 196        int err = 0, n;
 197
 198        if ((ino < EXT4_ROOT_INO) ||
 199            (ino > le32_to_cpu(sbi->s_es->s_inodes_count)))
 200                return -EINVAL;
 201        inode = ext4_iget(sb, ino, EXT4_IGET_SPECIAL);
 202        if (IS_ERR(inode))
 203                return PTR_ERR(inode);
 204        num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
 205        while (i < num) {
 206                cond_resched();
 207                map.m_lblk = i;
 208                map.m_len = num - i;
 209                n = ext4_map_blocks(NULL, inode, &map, 0);
 210                if (n < 0) {
 211                        err = n;
 212                        break;
 213                }
 214                if (n == 0) {
 215                        i++;
 216                } else {
 217                        if (!ext4_data_block_valid_rcu(sbi, system_blks,
 218                                                map.m_pblk, n)) {
 219                                err = -EFSCORRUPTED;
 220                                __ext4_error(sb, __func__, __LINE__, -err,
 221                                             map.m_pblk, "blocks %llu-%llu "
 222                                             "from inode %u overlap system zone",
 223                                             map.m_pblk,
 224                                             map.m_pblk + map.m_len - 1, ino);
 225                                break;
 226                        }
 227                        err = add_system_zone(system_blks, map.m_pblk, n);
 228                        if (err < 0)
 229                                break;
 230                        i += n;
 231                }
 232        }
 233        iput(inode);
 234        return err;
 235}
 236
 237static void ext4_destroy_system_zone(struct rcu_head *rcu)
 238{
 239        struct ext4_system_blocks *system_blks;
 240
 241        system_blks = container_of(rcu, struct ext4_system_blocks, rcu);
 242        release_system_zone(system_blks);
 243        kfree(system_blks);
 244}
 245
 246/*
 247 * Build system zone rbtree which is used for block validity checking.
 248 *
 249 * The update of system_blks pointer in this function is protected by
 250 * sb->s_umount semaphore. However we have to be careful as we can be
 251 * racing with ext4_data_block_valid() calls reading system_blks rbtree
 252 * protected only by RCU. That's why we first build the rbtree and then
 253 * swap it in place.
 254 */
 255int ext4_setup_system_zone(struct super_block *sb)
 256{
 257        ext4_group_t ngroups = ext4_get_groups_count(sb);
 258        struct ext4_sb_info *sbi = EXT4_SB(sb);
 259        struct ext4_system_blocks *system_blks;
 260        struct ext4_group_desc *gdp;
 261        ext4_group_t i;
 262        int flex_size = ext4_flex_bg_size(sbi);
 263        int ret;
 264
 265        if (!test_opt(sb, BLOCK_VALIDITY)) {
 266                if (sbi->system_blks)
 267                        ext4_release_system_zone(sb);
 268                return 0;
 269        }
 270        if (sbi->system_blks)
 271                return 0;
 272
 273        system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
 274        if (!system_blks)
 275                return -ENOMEM;
 276
 277        for (i=0; i < ngroups; i++) {
 278                cond_resched();
 279                if (ext4_bg_has_super(sb, i) &&
 280                    ((i < 5) || ((i % flex_size) == 0)))
 281                        add_system_zone(system_blks,
 282                                        ext4_group_first_block_no(sb, i),
 283                                        ext4_bg_num_gdb(sb, i) + 1);
 284                gdp = ext4_get_group_desc(sb, i, NULL);
 285                ret = add_system_zone(system_blks,
 286                                ext4_block_bitmap(sb, gdp), 1);
 287                if (ret)
 288                        goto err;
 289                ret = add_system_zone(system_blks,
 290                                ext4_inode_bitmap(sb, gdp), 1);
 291                if (ret)
 292                        goto err;
 293                ret = add_system_zone(system_blks,
 294                                ext4_inode_table(sb, gdp),
 295                                sbi->s_itb_per_group);
 296                if (ret)
 297                        goto err;
 298        }
 299        if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) {
 300                ret = ext4_protect_reserved_inode(sb, system_blks,
 301                                le32_to_cpu(sbi->s_es->s_journal_inum));
 302                if (ret)
 303                        goto err;
 304        }
 305
 306        /*
 307         * System blks rbtree complete, announce it once to prevent racing
 308         * with ext4_data_block_valid() accessing the rbtree at the same
 309         * time.
 310         */
 311        rcu_assign_pointer(sbi->system_blks, system_blks);
 312
 313        if (test_opt(sb, DEBUG))
 314                debug_print_tree(sbi);
 315        return 0;
 316err:
 317        release_system_zone(system_blks);
 318        kfree(system_blks);
 319        return ret;
 320}
 321
 322/*
 323 * Called when the filesystem is unmounted or when remounting it with
 324 * noblock_validity specified.
 325 *
 326 * The update of system_blks pointer in this function is protected by
 327 * sb->s_umount semaphore. However we have to be careful as we can be
 328 * racing with ext4_data_block_valid() calls reading system_blks rbtree
 329 * protected only by RCU. So we first clear the system_blks pointer and
 330 * then free the rbtree only after RCU grace period expires.
 331 */
 332void ext4_release_system_zone(struct super_block *sb)
 333{
 334        struct ext4_system_blocks *system_blks;
 335
 336        system_blks = rcu_dereference_protected(EXT4_SB(sb)->system_blks,
 337                                        lockdep_is_held(&sb->s_umount));
 338        rcu_assign_pointer(EXT4_SB(sb)->system_blks, NULL);
 339
 340        if (system_blks)
 341                call_rcu(&system_blks->rcu, ext4_destroy_system_zone);
 342}
 343
 344int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
 345                          unsigned int count)
 346{
 347        struct ext4_system_blocks *system_blks;
 348        int ret;
 349
 350        /*
 351         * Lock the system zone to prevent it being released concurrently
 352         * when doing a remount which inverse current "[no]block_validity"
 353         * mount option.
 354         */
 355        rcu_read_lock();
 356        system_blks = rcu_dereference(sbi->system_blks);
 357        ret = ext4_data_block_valid_rcu(sbi, system_blks, start_blk,
 358                                        count);
 359        rcu_read_unlock();
 360        return ret;
 361}
 362
 363int ext4_check_blockref(const char *function, unsigned int line,
 364                        struct inode *inode, __le32 *p, unsigned int max)
 365{
 366        __le32 *bref = p;
 367        unsigned int blk;
 368
 369        if (ext4_has_feature_journal(inode->i_sb) &&
 370            (inode->i_ino ==
 371             le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
 372                return 0;
 373
 374        while (bref < p+max) {
 375                blk = le32_to_cpu(*bref++);
 376                if (blk &&
 377                    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
 378                                                    blk, 1))) {
 379                        ext4_error_inode(inode, function, line, blk,
 380                                         "invalid block");
 381                        return -EFSCORRUPTED;
 382                }
 383        }
 384        return 0;
 385}
 386
 387