linux/fs/ext4/migrate.c
<<
>>
Prefs
   1/*
   2 * Copyright IBM Corporation, 2007
   3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms of version 2.1 of the GNU Lesser General Public License
   7 * as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful, but
  10 * WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12 *
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/slab.h>
  17#include "ext4_jbd2.h"
  18#include "ext4_extents.h"
  19
  20/*
  21 * The contiguous blocks details which can be
  22 * represented by a single extent
  23 */
  24struct list_blocks_struct {
  25        ext4_lblk_t first_block, last_block;
  26        ext4_fsblk_t first_pblock, last_pblock;
  27};
  28
  29static int finish_range(handle_t *handle, struct inode *inode,
  30                                struct list_blocks_struct *lb)
  31
  32{
  33        int retval = 0, needed;
  34        struct ext4_extent newext;
  35        struct ext4_ext_path *path;
  36        if (lb->first_pblock == 0)
  37                return 0;
  38
  39        /* Add the extent to temp inode*/
  40        newext.ee_block = cpu_to_le32(lb->first_block);
  41        newext.ee_len   = cpu_to_le16(lb->last_block - lb->first_block + 1);
  42        ext4_ext_store_pblock(&newext, lb->first_pblock);
  43        path = ext4_ext_find_extent(inode, lb->first_block, NULL);
  44
  45        if (IS_ERR(path)) {
  46                retval = PTR_ERR(path);
  47                path = NULL;
  48                goto err_out;
  49        }
  50
  51        /*
  52         * Calculate the credit needed to inserting this extent
  53         * Since we are doing this in loop we may accumalate extra
  54         * credit. But below we try to not accumalate too much
  55         * of them by restarting the journal.
  56         */
  57        needed = ext4_ext_calc_credits_for_single_extent(inode,
  58                    lb->last_block - lb->first_block + 1, path);
  59
  60        /*
  61         * Make sure the credit we accumalated is not really high
  62         */
  63        if (needed && ext4_handle_has_enough_credits(handle,
  64                                                EXT4_RESERVE_TRANS_BLOCKS)) {
  65                retval = ext4_journal_restart(handle, needed);
  66                if (retval)
  67                        goto err_out;
  68        } else if (needed) {
  69                retval = ext4_journal_extend(handle, needed);
  70                if (retval) {
  71                        /*
  72                         * IF not able to extend the journal restart the journal
  73                         */
  74                        retval = ext4_journal_restart(handle, needed);
  75                        if (retval)
  76                                goto err_out;
  77                }
  78        }
  79        retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
  80err_out:
  81        if (path) {
  82                ext4_ext_drop_refs(path);
  83                kfree(path);
  84        }
  85        lb->first_pblock = 0;
  86        return retval;
  87}
  88
  89static int update_extent_range(handle_t *handle, struct inode *inode,
  90                                ext4_fsblk_t pblock, ext4_lblk_t blk_num,
  91                                struct list_blocks_struct *lb)
  92{
  93        int retval;
  94        /*
  95         * See if we can add on to the existing range (if it exists)
  96         */
  97        if (lb->first_pblock &&
  98                (lb->last_pblock+1 == pblock) &&
  99                (lb->last_block+1 == blk_num)) {
 100                lb->last_pblock = pblock;
 101                lb->last_block = blk_num;
 102                return 0;
 103        }
 104        /*
 105         * Start a new range.
 106         */
 107        retval = finish_range(handle, inode, lb);
 108        lb->first_pblock = lb->last_pblock = pblock;
 109        lb->first_block = lb->last_block = blk_num;
 110
 111        return retval;
 112}
 113
 114static int update_ind_extent_range(handle_t *handle, struct inode *inode,
 115                                   ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
 116                                   struct list_blocks_struct *lb)
 117{
 118        struct buffer_head *bh;
 119        __le32 *i_data;
 120        int i, retval = 0;
 121        ext4_lblk_t blk_count = *blk_nump;
 122        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 123
 124        if (!pblock) {
 125                /* Only update the file block number */
 126                *blk_nump += max_entries;
 127                return 0;
 128        }
 129
 130        bh = sb_bread(inode->i_sb, pblock);
 131        if (!bh)
 132                return -EIO;
 133
 134        i_data = (__le32 *)bh->b_data;
 135        for (i = 0; i < max_entries; i++, blk_count++) {
 136                if (i_data[i]) {
 137                        retval = update_extent_range(handle, inode,
 138                                                le32_to_cpu(i_data[i]),
 139                                                blk_count, lb);
 140                        if (retval)
 141                                break;
 142                }
 143        }
 144
 145        /* Update the file block number */
 146        *blk_nump = blk_count;
 147        put_bh(bh);
 148        return retval;
 149
 150}
 151
 152static int update_dind_extent_range(handle_t *handle, struct inode *inode,
 153                                    ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
 154                                    struct list_blocks_struct *lb)
 155{
 156        struct buffer_head *bh;
 157        __le32 *i_data;
 158        int i, retval = 0;
 159        ext4_lblk_t blk_count = *blk_nump;
 160        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 161
 162        if (!pblock) {
 163                /* Only update the file block number */
 164                *blk_nump += max_entries * max_entries;
 165                return 0;
 166        }
 167        bh = sb_bread(inode->i_sb, pblock);
 168        if (!bh)
 169                return -EIO;
 170
 171        i_data = (__le32 *)bh->b_data;
 172        for (i = 0; i < max_entries; i++) {
 173                if (i_data[i]) {
 174                        retval = update_ind_extent_range(handle, inode,
 175                                                le32_to_cpu(i_data[i]),
 176                                                &blk_count, lb);
 177                        if (retval)
 178                                break;
 179                } else {
 180                        /* Only update the file block number */
 181                        blk_count += max_entries;
 182                }
 183        }
 184
 185        /* Update the file block number */
 186        *blk_nump = blk_count;
 187        put_bh(bh);
 188        return retval;
 189
 190}
 191
 192static int update_tind_extent_range(handle_t *handle, struct inode *inode,
 193                                     ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
 194                                     struct list_blocks_struct *lb)
 195{
 196        struct buffer_head *bh;
 197        __le32 *i_data;
 198        int i, retval = 0;
 199        ext4_lblk_t blk_count = *blk_nump;
 200        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 201
 202        if (!pblock) {
 203                /* Only update the file block number */
 204                *blk_nump += max_entries * max_entries * max_entries;
 205                return 0;
 206        }
 207        bh = sb_bread(inode->i_sb, pblock);
 208        if (!bh)
 209                return -EIO;
 210
 211        i_data = (__le32 *)bh->b_data;
 212        for (i = 0; i < max_entries; i++) {
 213                if (i_data[i]) {
 214                        retval = update_dind_extent_range(handle, inode,
 215                                                le32_to_cpu(i_data[i]),
 216                                                &blk_count, lb);
 217                        if (retval)
 218                                break;
 219                } else
 220                        /* Only update the file block number */
 221                        blk_count += max_entries * max_entries;
 222        }
 223        /* Update the file block number */
 224        *blk_nump = blk_count;
 225        put_bh(bh);
 226        return retval;
 227
 228}
 229
 230static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
 231{
 232        int retval = 0, needed;
 233
 234        if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
 235                return 0;
 236        /*
 237         * We are freeing a blocks. During this we touch
 238         * superblock, group descriptor and block bitmap.
 239         * So allocate a credit of 3. We may update
 240         * quota (user and group).
 241         */
 242        needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 243
 244        if (ext4_journal_extend(handle, needed) != 0)
 245                retval = ext4_journal_restart(handle, needed);
 246
 247        return retval;
 248}
 249
 250static int free_dind_blocks(handle_t *handle,
 251                                struct inode *inode, __le32 i_data)
 252{
 253        int i;
 254        __le32 *tmp_idata;
 255        struct buffer_head *bh;
 256        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 257
 258        bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
 259        if (!bh)
 260                return -EIO;
 261
 262        tmp_idata = (__le32 *)bh->b_data;
 263        for (i = 0; i < max_entries; i++) {
 264                if (tmp_idata[i]) {
 265                        extend_credit_for_blkdel(handle, inode);
 266                        ext4_free_blocks(handle, inode, 0,
 267                                         le32_to_cpu(tmp_idata[i]), 1,
 268                                         EXT4_FREE_BLOCKS_METADATA |
 269                                         EXT4_FREE_BLOCKS_FORGET);
 270                }
 271        }
 272        put_bh(bh);
 273        extend_credit_for_blkdel(handle, inode);
 274        ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
 275                         EXT4_FREE_BLOCKS_METADATA |
 276                         EXT4_FREE_BLOCKS_FORGET);
 277        return 0;
 278}
 279
 280static int free_tind_blocks(handle_t *handle,
 281                                struct inode *inode, __le32 i_data)
 282{
 283        int i, retval = 0;
 284        __le32 *tmp_idata;
 285        struct buffer_head *bh;
 286        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 287
 288        bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
 289        if (!bh)
 290                return -EIO;
 291
 292        tmp_idata = (__le32 *)bh->b_data;
 293        for (i = 0; i < max_entries; i++) {
 294                if (tmp_idata[i]) {
 295                        retval = free_dind_blocks(handle,
 296                                        inode, tmp_idata[i]);
 297                        if (retval) {
 298                                put_bh(bh);
 299                                return retval;
 300                        }
 301                }
 302        }
 303        put_bh(bh);
 304        extend_credit_for_blkdel(handle, inode);
 305        ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
 306                         EXT4_FREE_BLOCKS_METADATA |
 307                         EXT4_FREE_BLOCKS_FORGET);
 308        return 0;
 309}
 310
 311static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
 312{
 313        int retval;
 314
 315        /* ei->i_data[EXT4_IND_BLOCK] */
 316        if (i_data[0]) {
 317                extend_credit_for_blkdel(handle, inode);
 318                ext4_free_blocks(handle, inode, 0,
 319                                le32_to_cpu(i_data[0]), 1,
 320                                 EXT4_FREE_BLOCKS_METADATA |
 321                                 EXT4_FREE_BLOCKS_FORGET);
 322        }
 323
 324        /* ei->i_data[EXT4_DIND_BLOCK] */
 325        if (i_data[1]) {
 326                retval = free_dind_blocks(handle, inode, i_data[1]);
 327                if (retval)
 328                        return retval;
 329        }
 330
 331        /* ei->i_data[EXT4_TIND_BLOCK] */
 332        if (i_data[2]) {
 333                retval = free_tind_blocks(handle, inode, i_data[2]);
 334                if (retval)
 335                        return retval;
 336        }
 337        return 0;
 338}
 339
 340static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
 341                                                struct inode *tmp_inode)
 342{
 343        int retval;
 344        __le32  i_data[3];
 345        struct ext4_inode_info *ei = EXT4_I(inode);
 346        struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
 347
 348        /*
 349         * One credit accounted for writing the
 350         * i_data field of the original inode
 351         */
 352        retval = ext4_journal_extend(handle, 1);
 353        if (retval) {
 354                retval = ext4_journal_restart(handle, 1);
 355                if (retval)
 356                        goto err_out;
 357        }
 358
 359        i_data[0] = ei->i_data[EXT4_IND_BLOCK];
 360        i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
 361        i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
 362
 363        down_write(&EXT4_I(inode)->i_data_sem);
 364        /*
 365         * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation
 366         * happened after we started the migrate. We need to
 367         * fail the migrate
 368         */
 369        if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) {
 370                retval = -EAGAIN;
 371                up_write(&EXT4_I(inode)->i_data_sem);
 372                goto err_out;
 373        } else
 374                ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
 375        /*
 376         * We have the extent map build with the tmp inode.
 377         * Now copy the i_data across
 378         */
 379        ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS);
 380        memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
 381
 382        /*
 383         * Update i_blocks with the new blocks that got
 384         * allocated while adding extents for extent index
 385         * blocks.
 386         *
 387         * While converting to extents we need not
 388         * update the orignal inode i_blocks for extent blocks
 389         * via quota APIs. The quota update happened via tmp_inode already.
 390         */
 391        spin_lock(&inode->i_lock);
 392        inode->i_blocks += tmp_inode->i_blocks;
 393        spin_unlock(&inode->i_lock);
 394        up_write(&EXT4_I(inode)->i_data_sem);
 395
 396        /*
 397         * We mark the inode dirty after, because we decrement the
 398         * i_blocks when freeing the indirect meta-data blocks
 399         */
 400        retval = free_ind_block(handle, inode, i_data);
 401        ext4_mark_inode_dirty(handle, inode);
 402
 403err_out:
 404        return retval;
 405}
 406
 407static int free_ext_idx(handle_t *handle, struct inode *inode,
 408                                        struct ext4_extent_idx *ix)
 409{
 410        int i, retval = 0;
 411        ext4_fsblk_t block;
 412        struct buffer_head *bh;
 413        struct ext4_extent_header *eh;
 414
 415        block = ext4_idx_pblock(ix);
 416        bh = sb_bread(inode->i_sb, block);
 417        if (!bh)
 418                return -EIO;
 419
 420        eh = (struct ext4_extent_header *)bh->b_data;
 421        if (eh->eh_depth != 0) {
 422                ix = EXT_FIRST_INDEX(eh);
 423                for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
 424                        retval = free_ext_idx(handle, inode, ix);
 425                        if (retval)
 426                                break;
 427                }
 428        }
 429        put_bh(bh);
 430        extend_credit_for_blkdel(handle, inode);
 431        ext4_free_blocks(handle, inode, 0, block, 1,
 432                         EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
 433        return retval;
 434}
 435
 436/*
 437 * Free the extent meta data blocks only
 438 */
 439static int free_ext_block(handle_t *handle, struct inode *inode)
 440{
 441        int i, retval = 0;
 442        struct ext4_inode_info *ei = EXT4_I(inode);
 443        struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
 444        struct ext4_extent_idx *ix;
 445        if (eh->eh_depth == 0)
 446                /*
 447                 * No extra blocks allocated for extent meta data
 448                 */
 449                return 0;
 450        ix = EXT_FIRST_INDEX(eh);
 451        for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
 452                retval = free_ext_idx(handle, inode, ix);
 453                if (retval)
 454                        return retval;
 455        }
 456        return retval;
 457
 458}
 459
 460int ext4_ext_migrate(struct inode *inode)
 461{
 462        handle_t *handle;
 463        int retval = 0, i;
 464        __le32 *i_data;
 465        ext4_lblk_t blk_count = 0;
 466        struct ext4_inode_info *ei;
 467        struct inode *tmp_inode = NULL;
 468        struct list_blocks_struct lb;
 469        unsigned long max_entries;
 470        __u32 goal;
 471
 472        /*
 473         * If the filesystem does not support extents, or the inode
 474         * already is extent-based, error out.
 475         */
 476        if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
 477                                       EXT4_FEATURE_INCOMPAT_EXTENTS) ||
 478            (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 479                return -EINVAL;
 480
 481        if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
 482                /*
 483                 * don't migrate fast symlink
 484                 */
 485                return retval;
 486
 487        handle = ext4_journal_start(inode,
 488                                        EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
 489                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
 490                                        EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
 491                                        + 1);
 492        if (IS_ERR(handle)) {
 493                retval = PTR_ERR(handle);
 494                return retval;
 495        }
 496        goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
 497                EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
 498        tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
 499                                   S_IFREG, NULL, goal);
 500        if (IS_ERR(tmp_inode)) {
 501                retval = -ENOMEM;
 502                ext4_journal_stop(handle);
 503                return retval;
 504        }
 505        i_size_write(tmp_inode, i_size_read(inode));
 506        /*
 507         * Set the i_nlink to zero so it will be deleted later
 508         * when we drop inode reference.
 509         */
 510        tmp_inode->i_nlink = 0;
 511
 512        ext4_ext_tree_init(handle, tmp_inode);
 513        ext4_orphan_add(handle, tmp_inode);
 514        ext4_journal_stop(handle);
 515
 516        /*
 517         * start with one credit accounted for
 518         * superblock modification.
 519         *
 520         * For the tmp_inode we already have commited the
 521         * trascation that created the inode. Later as and
 522         * when we add extents we extent the journal
 523         */
 524        /*
 525         * Even though we take i_mutex we can still cause block
 526         * allocation via mmap write to holes. If we have allocated
 527         * new blocks we fail migrate.  New block allocation will
 528         * clear EXT4_STATE_EXT_MIGRATE flag.  The flag is updated
 529         * with i_data_sem held to prevent racing with block
 530         * allocation.
 531         */
 532        down_read((&EXT4_I(inode)->i_data_sem));
 533        ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
 534        up_read((&EXT4_I(inode)->i_data_sem));
 535
 536        handle = ext4_journal_start(inode, 1);
 537        if (IS_ERR(handle)) {
 538                /*
 539                 * It is impossible to update on-disk structures without
 540                 * a handle, so just rollback in-core changes and live other
 541                 * work to orphan_list_cleanup()
 542                 */
 543                ext4_orphan_del(NULL, tmp_inode);
 544                retval = PTR_ERR(handle);
 545                goto out;
 546        }
 547
 548        ei = EXT4_I(inode);
 549        i_data = ei->i_data;
 550        memset(&lb, 0, sizeof(lb));
 551
 552        /* 32 bit block address 4 bytes */
 553        max_entries = inode->i_sb->s_blocksize >> 2;
 554        for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
 555                if (i_data[i]) {
 556                        retval = update_extent_range(handle, tmp_inode,
 557                                                le32_to_cpu(i_data[i]),
 558                                                blk_count, &lb);
 559                        if (retval)
 560                                goto err_out;
 561                }
 562        }
 563        if (i_data[EXT4_IND_BLOCK]) {
 564                retval = update_ind_extent_range(handle, tmp_inode,
 565                                        le32_to_cpu(i_data[EXT4_IND_BLOCK]),
 566                                        &blk_count, &lb);
 567                        if (retval)
 568                                goto err_out;
 569        } else
 570                blk_count +=  max_entries;
 571        if (i_data[EXT4_DIND_BLOCK]) {
 572                retval = update_dind_extent_range(handle, tmp_inode,
 573                                        le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
 574                                        &blk_count, &lb);
 575                        if (retval)
 576                                goto err_out;
 577        } else
 578                blk_count += max_entries * max_entries;
 579        if (i_data[EXT4_TIND_BLOCK]) {
 580                retval = update_tind_extent_range(handle, tmp_inode,
 581                                        le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
 582                                        &blk_count, &lb);
 583                        if (retval)
 584                                goto err_out;
 585        }
 586        /*
 587         * Build the last extent
 588         */
 589        retval = finish_range(handle, tmp_inode, &lb);
 590err_out:
 591        if (retval)
 592                /*
 593                 * Failure case delete the extent information with the
 594                 * tmp_inode
 595                 */
 596                free_ext_block(handle, tmp_inode);
 597        else {
 598                retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
 599                if (retval)
 600                        /*
 601                         * if we fail to swap inode data free the extent
 602                         * details of the tmp inode
 603                         */
 604                        free_ext_block(handle, tmp_inode);
 605        }
 606
 607        /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
 608        if (ext4_journal_extend(handle, 1) != 0)
 609                ext4_journal_restart(handle, 1);
 610
 611        /*
 612         * Mark the tmp_inode as of size zero
 613         */
 614        i_size_write(tmp_inode, 0);
 615
 616        /*
 617         * set the  i_blocks count to zero
 618         * so that the ext4_delete_inode does the
 619         * right job
 620         *
 621         * We don't need to take the i_lock because
 622         * the inode is not visible to user space.
 623         */
 624        tmp_inode->i_blocks = 0;
 625
 626        /* Reset the extent details */
 627        ext4_ext_tree_init(handle, tmp_inode);
 628        ext4_journal_stop(handle);
 629out:
 630        unlock_new_inode(tmp_inode);
 631        iput(tmp_inode);
 632
 633        return retval;
 634}
 635