linux/fs/ext4/migrate.c
<<
>>
Prefs
   1/*
   2 * Copyright IBM Corporation, 2007
   3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms of version 2.1 of the GNU Lesser General Public License
   7 * as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful, but
  10 * WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12 *
  13 */
  14
  15#include <linux/slab.h>
  16#include "ext4_jbd2.h"
  17#include "ext4_extents.h"
  18
  19/*
  20 * The contiguous blocks details which can be
  21 * represented by a single extent
  22 */
  23struct migrate_struct {
  24        ext4_lblk_t first_block, last_block, curr_block;
  25        ext4_fsblk_t first_pblock, last_pblock;
  26};
  27
  28static int finish_range(handle_t *handle, struct inode *inode,
  29                                struct migrate_struct *lb)
  30
  31{
  32        int retval = 0, needed;
  33        struct ext4_extent newext;
  34        struct ext4_ext_path *path;
  35        if (lb->first_pblock == 0)
  36                return 0;
  37
  38        /* Add the extent to temp inode*/
  39        newext.ee_block = cpu_to_le32(lb->first_block);
  40        newext.ee_len   = cpu_to_le16(lb->last_block - lb->first_block + 1);
  41        ext4_ext_store_pblock(&newext, lb->first_pblock);
  42        path = ext4_ext_find_extent(inode, lb->first_block, NULL);
  43
  44        if (IS_ERR(path)) {
  45                retval = PTR_ERR(path);
  46                path = NULL;
  47                goto err_out;
  48        }
  49
  50        /*
  51         * Calculate the credit needed to inserting this extent
  52         * Since we are doing this in loop we may accumalate extra
  53         * credit. But below we try to not accumalate too much
  54         * of them by restarting the journal.
  55         */
  56        needed = ext4_ext_calc_credits_for_single_extent(inode,
  57                    lb->last_block - lb->first_block + 1, path);
  58
  59        /*
  60         * Make sure the credit we accumalated is not really high
  61         */
  62        if (needed && ext4_handle_has_enough_credits(handle,
  63                                                EXT4_RESERVE_TRANS_BLOCKS)) {
  64                retval = ext4_journal_restart(handle, needed);
  65                if (retval)
  66                        goto err_out;
  67        } else if (needed) {
  68                retval = ext4_journal_extend(handle, needed);
  69                if (retval) {
  70                        /*
  71                         * IF not able to extend the journal restart the journal
  72                         */
  73                        retval = ext4_journal_restart(handle, needed);
  74                        if (retval)
  75                                goto err_out;
  76                }
  77        }
  78        retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
  79err_out:
  80        if (path) {
  81                ext4_ext_drop_refs(path);
  82                kfree(path);
  83        }
  84        lb->first_pblock = 0;
  85        return retval;
  86}
  87
  88static int update_extent_range(handle_t *handle, struct inode *inode,
  89                               ext4_fsblk_t pblock, struct migrate_struct *lb)
  90{
  91        int retval;
  92        /*
  93         * See if we can add on to the existing range (if it exists)
  94         */
  95        if (lb->first_pblock &&
  96                (lb->last_pblock+1 == pblock) &&
  97                (lb->last_block+1 == lb->curr_block)) {
  98                lb->last_pblock = pblock;
  99                lb->last_block = lb->curr_block;
 100                lb->curr_block++;
 101                return 0;
 102        }
 103        /*
 104         * Start a new range.
 105         */
 106        retval = finish_range(handle, inode, lb);
 107        lb->first_pblock = lb->last_pblock = pblock;
 108        lb->first_block = lb->last_block = lb->curr_block;
 109        lb->curr_block++;
 110        return retval;
 111}
 112
 113static int update_ind_extent_range(handle_t *handle, struct inode *inode,
 114                                   ext4_fsblk_t pblock,
 115                                   struct migrate_struct *lb)
 116{
 117        struct buffer_head *bh;
 118        __le32 *i_data;
 119        int i, retval = 0;
 120        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 121
 122        bh = sb_bread(inode->i_sb, pblock);
 123        if (!bh)
 124                return -EIO;
 125
 126        i_data = (__le32 *)bh->b_data;
 127        for (i = 0; i < max_entries; i++) {
 128                if (i_data[i]) {
 129                        retval = update_extent_range(handle, inode,
 130                                                le32_to_cpu(i_data[i]), lb);
 131                        if (retval)
 132                                break;
 133                } else {
 134                        lb->curr_block++;
 135                }
 136        }
 137        put_bh(bh);
 138        return retval;
 139
 140}
 141
 142static int update_dind_extent_range(handle_t *handle, struct inode *inode,
 143                                    ext4_fsblk_t pblock,
 144                                    struct migrate_struct *lb)
 145{
 146        struct buffer_head *bh;
 147        __le32 *i_data;
 148        int i, retval = 0;
 149        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 150
 151        bh = sb_bread(inode->i_sb, pblock);
 152        if (!bh)
 153                return -EIO;
 154
 155        i_data = (__le32 *)bh->b_data;
 156        for (i = 0; i < max_entries; i++) {
 157                if (i_data[i]) {
 158                        retval = update_ind_extent_range(handle, inode,
 159                                                le32_to_cpu(i_data[i]), lb);
 160                        if (retval)
 161                                break;
 162                } else {
 163                        /* Only update the file block number */
 164                        lb->curr_block += max_entries;
 165                }
 166        }
 167        put_bh(bh);
 168        return retval;
 169
 170}
 171
 172static int update_tind_extent_range(handle_t *handle, struct inode *inode,
 173                                    ext4_fsblk_t pblock,
 174                                    struct migrate_struct *lb)
 175{
 176        struct buffer_head *bh;
 177        __le32 *i_data;
 178        int i, retval = 0;
 179        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 180
 181        bh = sb_bread(inode->i_sb, pblock);
 182        if (!bh)
 183                return -EIO;
 184
 185        i_data = (__le32 *)bh->b_data;
 186        for (i = 0; i < max_entries; i++) {
 187                if (i_data[i]) {
 188                        retval = update_dind_extent_range(handle, inode,
 189                                                le32_to_cpu(i_data[i]), lb);
 190                        if (retval)
 191                                break;
 192                } else {
 193                        /* Only update the file block number */
 194                        lb->curr_block += max_entries * max_entries;
 195                }
 196        }
 197        put_bh(bh);
 198        return retval;
 199
 200}
 201
 202static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
 203{
 204        int retval = 0, needed;
 205
 206        if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
 207                return 0;
 208        /*
 209         * We are freeing a blocks. During this we touch
 210         * superblock, group descriptor and block bitmap.
 211         * So allocate a credit of 3. We may update
 212         * quota (user and group).
 213         */
 214        needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 215
 216        if (ext4_journal_extend(handle, needed) != 0)
 217                retval = ext4_journal_restart(handle, needed);
 218
 219        return retval;
 220}
 221
 222static int free_dind_blocks(handle_t *handle,
 223                                struct inode *inode, __le32 i_data)
 224{
 225        int i;
 226        __le32 *tmp_idata;
 227        struct buffer_head *bh;
 228        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 229
 230        bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
 231        if (!bh)
 232                return -EIO;
 233
 234        tmp_idata = (__le32 *)bh->b_data;
 235        for (i = 0; i < max_entries; i++) {
 236                if (tmp_idata[i]) {
 237                        extend_credit_for_blkdel(handle, inode);
 238                        ext4_free_blocks(handle, inode, NULL,
 239                                         le32_to_cpu(tmp_idata[i]), 1,
 240                                         EXT4_FREE_BLOCKS_METADATA |
 241                                         EXT4_FREE_BLOCKS_FORGET);
 242                }
 243        }
 244        put_bh(bh);
 245        extend_credit_for_blkdel(handle, inode);
 246        ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
 247                         EXT4_FREE_BLOCKS_METADATA |
 248                         EXT4_FREE_BLOCKS_FORGET);
 249        return 0;
 250}
 251
 252static int free_tind_blocks(handle_t *handle,
 253                                struct inode *inode, __le32 i_data)
 254{
 255        int i, retval = 0;
 256        __le32 *tmp_idata;
 257        struct buffer_head *bh;
 258        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 259
 260        bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
 261        if (!bh)
 262                return -EIO;
 263
 264        tmp_idata = (__le32 *)bh->b_data;
 265        for (i = 0; i < max_entries; i++) {
 266                if (tmp_idata[i]) {
 267                        retval = free_dind_blocks(handle,
 268                                        inode, tmp_idata[i]);
 269                        if (retval) {
 270                                put_bh(bh);
 271                                return retval;
 272                        }
 273                }
 274        }
 275        put_bh(bh);
 276        extend_credit_for_blkdel(handle, inode);
 277        ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
 278                         EXT4_FREE_BLOCKS_METADATA |
 279                         EXT4_FREE_BLOCKS_FORGET);
 280        return 0;
 281}
 282
 283static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
 284{
 285        int retval;
 286
 287        /* ei->i_data[EXT4_IND_BLOCK] */
 288        if (i_data[0]) {
 289                extend_credit_for_blkdel(handle, inode);
 290                ext4_free_blocks(handle, inode, NULL,
 291                                le32_to_cpu(i_data[0]), 1,
 292                                 EXT4_FREE_BLOCKS_METADATA |
 293                                 EXT4_FREE_BLOCKS_FORGET);
 294        }
 295
 296        /* ei->i_data[EXT4_DIND_BLOCK] */
 297        if (i_data[1]) {
 298                retval = free_dind_blocks(handle, inode, i_data[1]);
 299                if (retval)
 300                        return retval;
 301        }
 302
 303        /* ei->i_data[EXT4_TIND_BLOCK] */
 304        if (i_data[2]) {
 305                retval = free_tind_blocks(handle, inode, i_data[2]);
 306                if (retval)
 307                        return retval;
 308        }
 309        return 0;
 310}
 311
 312static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
 313                                                struct inode *tmp_inode)
 314{
 315        int retval;
 316        __le32  i_data[3];
 317        struct ext4_inode_info *ei = EXT4_I(inode);
 318        struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
 319
 320        /*
 321         * One credit accounted for writing the
 322         * i_data field of the original inode
 323         */
 324        retval = ext4_journal_extend(handle, 1);
 325        if (retval) {
 326                retval = ext4_journal_restart(handle, 1);
 327                if (retval)
 328                        goto err_out;
 329        }
 330
 331        i_data[0] = ei->i_data[EXT4_IND_BLOCK];
 332        i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
 333        i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
 334
 335        down_write(&EXT4_I(inode)->i_data_sem);
 336        /*
 337         * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation
 338         * happened after we started the migrate. We need to
 339         * fail the migrate
 340         */
 341        if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) {
 342                retval = -EAGAIN;
 343                up_write(&EXT4_I(inode)->i_data_sem);
 344                goto err_out;
 345        } else
 346                ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
 347        /*
 348         * We have the extent map build with the tmp inode.
 349         * Now copy the i_data across
 350         */
 351        ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
 352        memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
 353
 354        /*
 355         * Update i_blocks with the new blocks that got
 356         * allocated while adding extents for extent index
 357         * blocks.
 358         *
 359         * While converting to extents we need not
 360         * update the orignal inode i_blocks for extent blocks
 361         * via quota APIs. The quota update happened via tmp_inode already.
 362         */
 363        spin_lock(&inode->i_lock);
 364        inode->i_blocks += tmp_inode->i_blocks;
 365        spin_unlock(&inode->i_lock);
 366        up_write(&EXT4_I(inode)->i_data_sem);
 367
 368        /*
 369         * We mark the inode dirty after, because we decrement the
 370         * i_blocks when freeing the indirect meta-data blocks
 371         */
 372        retval = free_ind_block(handle, inode, i_data);
 373        ext4_mark_inode_dirty(handle, inode);
 374
 375err_out:
 376        return retval;
 377}
 378
 379static int free_ext_idx(handle_t *handle, struct inode *inode,
 380                                        struct ext4_extent_idx *ix)
 381{
 382        int i, retval = 0;
 383        ext4_fsblk_t block;
 384        struct buffer_head *bh;
 385        struct ext4_extent_header *eh;
 386
 387        block = ext4_idx_pblock(ix);
 388        bh = sb_bread(inode->i_sb, block);
 389        if (!bh)
 390                return -EIO;
 391
 392        eh = (struct ext4_extent_header *)bh->b_data;
 393        if (eh->eh_depth != 0) {
 394                ix = EXT_FIRST_INDEX(eh);
 395                for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
 396                        retval = free_ext_idx(handle, inode, ix);
 397                        if (retval)
 398                                break;
 399                }
 400        }
 401        put_bh(bh);
 402        extend_credit_for_blkdel(handle, inode);
 403        ext4_free_blocks(handle, inode, NULL, block, 1,
 404                         EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
 405        return retval;
 406}
 407
 408/*
 409 * Free the extent meta data blocks only
 410 */
 411static int free_ext_block(handle_t *handle, struct inode *inode)
 412{
 413        int i, retval = 0;
 414        struct ext4_inode_info *ei = EXT4_I(inode);
 415        struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
 416        struct ext4_extent_idx *ix;
 417        if (eh->eh_depth == 0)
 418                /*
 419                 * No extra blocks allocated for extent meta data
 420                 */
 421                return 0;
 422        ix = EXT_FIRST_INDEX(eh);
 423        for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
 424                retval = free_ext_idx(handle, inode, ix);
 425                if (retval)
 426                        return retval;
 427        }
 428        return retval;
 429
 430}
 431
 432int ext4_ext_migrate(struct inode *inode)
 433{
 434        handle_t *handle;
 435        int retval = 0, i;
 436        __le32 *i_data;
 437        struct ext4_inode_info *ei;
 438        struct inode *tmp_inode = NULL;
 439        struct migrate_struct lb;
 440        unsigned long max_entries;
 441        __u32 goal;
 442        uid_t owner[2];
 443
 444        /*
 445         * If the filesystem does not support extents, or the inode
 446         * already is extent-based, error out.
 447         */
 448        if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
 449                                       EXT4_FEATURE_INCOMPAT_EXTENTS) ||
 450            (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 451                return -EINVAL;
 452
 453        if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
 454                /*
 455                 * don't migrate fast symlink
 456                 */
 457                return retval;
 458
 459        handle = ext4_journal_start(inode,
 460                                        EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
 461                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
 462                                        EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
 463                                        + 1);
 464        if (IS_ERR(handle)) {
 465                retval = PTR_ERR(handle);
 466                return retval;
 467        }
 468        goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
 469                EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
 470        owner[0] = i_uid_read(inode);
 471        owner[1] = i_gid_read(inode);
 472        tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
 473                                   S_IFREG, NULL, goal, owner);
 474        if (IS_ERR(tmp_inode)) {
 475                retval = PTR_ERR(tmp_inode);
 476                ext4_journal_stop(handle);
 477                return retval;
 478        }
 479        i_size_write(tmp_inode, i_size_read(inode));
 480        /*
 481         * Set the i_nlink to zero so it will be deleted later
 482         * when we drop inode reference.
 483         */
 484        clear_nlink(tmp_inode);
 485
 486        ext4_ext_tree_init(handle, tmp_inode);
 487        ext4_orphan_add(handle, tmp_inode);
 488        ext4_journal_stop(handle);
 489
 490        /*
 491         * start with one credit accounted for
 492         * superblock modification.
 493         *
 494         * For the tmp_inode we already have committed the
 495         * trascation that created the inode. Later as and
 496         * when we add extents we extent the journal
 497         */
 498        /*
 499         * Even though we take i_mutex we can still cause block
 500         * allocation via mmap write to holes. If we have allocated
 501         * new blocks we fail migrate.  New block allocation will
 502         * clear EXT4_STATE_EXT_MIGRATE flag.  The flag is updated
 503         * with i_data_sem held to prevent racing with block
 504         * allocation.
 505         */
 506        down_read((&EXT4_I(inode)->i_data_sem));
 507        ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
 508        up_read((&EXT4_I(inode)->i_data_sem));
 509
 510        handle = ext4_journal_start(inode, 1);
 511        if (IS_ERR(handle)) {
 512                /*
 513                 * It is impossible to update on-disk structures without
 514                 * a handle, so just rollback in-core changes and live other
 515                 * work to orphan_list_cleanup()
 516                 */
 517                ext4_orphan_del(NULL, tmp_inode);
 518                retval = PTR_ERR(handle);
 519                goto out;
 520        }
 521
 522        ei = EXT4_I(inode);
 523        i_data = ei->i_data;
 524        memset(&lb, 0, sizeof(lb));
 525
 526        /* 32 bit block address 4 bytes */
 527        max_entries = inode->i_sb->s_blocksize >> 2;
 528        for (i = 0; i < EXT4_NDIR_BLOCKS; i++) {
 529                if (i_data[i]) {
 530                        retval = update_extent_range(handle, tmp_inode,
 531                                                le32_to_cpu(i_data[i]), &lb);
 532                        if (retval)
 533                                goto err_out;
 534                } else
 535                        lb.curr_block++;
 536        }
 537        if (i_data[EXT4_IND_BLOCK]) {
 538                retval = update_ind_extent_range(handle, tmp_inode,
 539                                le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
 540                        if (retval)
 541                                goto err_out;
 542        } else
 543                lb.curr_block += max_entries;
 544        if (i_data[EXT4_DIND_BLOCK]) {
 545                retval = update_dind_extent_range(handle, tmp_inode,
 546                                le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
 547                        if (retval)
 548                                goto err_out;
 549        } else
 550                lb.curr_block += max_entries * max_entries;
 551        if (i_data[EXT4_TIND_BLOCK]) {
 552                retval = update_tind_extent_range(handle, tmp_inode,
 553                                le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
 554                        if (retval)
 555                                goto err_out;
 556        }
 557        /*
 558         * Build the last extent
 559         */
 560        retval = finish_range(handle, tmp_inode, &lb);
 561err_out:
 562        if (retval)
 563                /*
 564                 * Failure case delete the extent information with the
 565                 * tmp_inode
 566                 */
 567                free_ext_block(handle, tmp_inode);
 568        else {
 569                retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
 570                if (retval)
 571                        /*
 572                         * if we fail to swap inode data free the extent
 573                         * details of the tmp inode
 574                         */
 575                        free_ext_block(handle, tmp_inode);
 576        }
 577
 578        /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
 579        if (ext4_journal_extend(handle, 1) != 0)
 580                ext4_journal_restart(handle, 1);
 581
 582        /*
 583         * Mark the tmp_inode as of size zero
 584         */
 585        i_size_write(tmp_inode, 0);
 586
 587        /*
 588         * set the  i_blocks count to zero
 589         * so that the ext4_delete_inode does the
 590         * right job
 591         *
 592         * We don't need to take the i_lock because
 593         * the inode is not visible to user space.
 594         */
 595        tmp_inode->i_blocks = 0;
 596
 597        /* Reset the extent details */
 598        ext4_ext_tree_init(handle, tmp_inode);
 599        ext4_journal_stop(handle);
 600out:
 601        unlock_new_inode(tmp_inode);
 602        iput(tmp_inode);
 603
 604        return retval;
 605}
 606