linux/fs/ext4/migrate.c
<<
>>
Prefs
   1/*
   2 * Copyright IBM Corporation, 2007
   3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms of version 2.1 of the GNU Lesser General Public License
   7 * as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful, but
  10 * WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12 *
  13 */
  14
  15#include <linux/module.h>
  16#include "ext4_jbd2.h"
  17#include "ext4_extents.h"
  18
  19/*
  20 * The contiguous blocks details which can be
  21 * represented by a single extent
  22 */
  23struct list_blocks_struct {
  24        ext4_lblk_t first_block, last_block;
  25        ext4_fsblk_t first_pblock, last_pblock;
  26};
  27
  28static int finish_range(handle_t *handle, struct inode *inode,
  29                                struct list_blocks_struct *lb)
  30
  31{
  32        int retval = 0, needed;
  33        struct ext4_extent newext;
  34        struct ext4_ext_path *path;
  35        if (lb->first_pblock == 0)
  36                return 0;
  37
  38        /* Add the extent to temp inode*/
  39        newext.ee_block = cpu_to_le32(lb->first_block);
  40        newext.ee_len   = cpu_to_le16(lb->last_block - lb->first_block + 1);
  41        ext4_ext_store_pblock(&newext, lb->first_pblock);
  42        path = ext4_ext_find_extent(inode, lb->first_block, NULL);
  43
  44        if (IS_ERR(path)) {
  45                retval = PTR_ERR(path);
  46                path = NULL;
  47                goto err_out;
  48        }
  49
  50        /*
  51         * Calculate the credit needed to inserting this extent
  52         * Since we are doing this in loop we may accumalate extra
  53         * credit. But below we try to not accumalate too much
  54         * of them by restarting the journal.
  55         */
  56        needed = ext4_ext_calc_credits_for_single_extent(inode,
  57                    lb->last_block - lb->first_block + 1, path);
  58
  59        /*
  60         * Make sure the credit we accumalated is not really high
  61         */
  62        if (needed && ext4_handle_has_enough_credits(handle,
  63                                                EXT4_RESERVE_TRANS_BLOCKS)) {
  64                retval = ext4_journal_restart(handle, needed);
  65                if (retval)
  66                        goto err_out;
  67        } else if (needed) {
  68                retval = ext4_journal_extend(handle, needed);
  69                if (retval) {
  70                        /*
  71                         * IF not able to extend the journal restart the journal
  72                         */
  73                        retval = ext4_journal_restart(handle, needed);
  74                        if (retval)
  75                                goto err_out;
  76                }
  77        }
  78        retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
  79err_out:
  80        if (path) {
  81                ext4_ext_drop_refs(path);
  82                kfree(path);
  83        }
  84        lb->first_pblock = 0;
  85        return retval;
  86}
  87
  88static int update_extent_range(handle_t *handle, struct inode *inode,
  89                                ext4_fsblk_t pblock, ext4_lblk_t blk_num,
  90                                struct list_blocks_struct *lb)
  91{
  92        int retval;
  93        /*
  94         * See if we can add on to the existing range (if it exists)
  95         */
  96        if (lb->first_pblock &&
  97                (lb->last_pblock+1 == pblock) &&
  98                (lb->last_block+1 == blk_num)) {
  99                lb->last_pblock = pblock;
 100                lb->last_block = blk_num;
 101                return 0;
 102        }
 103        /*
 104         * Start a new range.
 105         */
 106        retval = finish_range(handle, inode, lb);
 107        lb->first_pblock = lb->last_pblock = pblock;
 108        lb->first_block = lb->last_block = blk_num;
 109
 110        return retval;
 111}
 112
 113static int update_ind_extent_range(handle_t *handle, struct inode *inode,
 114                                   ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
 115                                   struct list_blocks_struct *lb)
 116{
 117        struct buffer_head *bh;
 118        __le32 *i_data;
 119        int i, retval = 0;
 120        ext4_lblk_t blk_count = *blk_nump;
 121        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 122
 123        if (!pblock) {
 124                /* Only update the file block number */
 125                *blk_nump += max_entries;
 126                return 0;
 127        }
 128
 129        bh = sb_bread(inode->i_sb, pblock);
 130        if (!bh)
 131                return -EIO;
 132
 133        i_data = (__le32 *)bh->b_data;
 134        for (i = 0; i < max_entries; i++, blk_count++) {
 135                if (i_data[i]) {
 136                        retval = update_extent_range(handle, inode,
 137                                                le32_to_cpu(i_data[i]),
 138                                                blk_count, lb);
 139                        if (retval)
 140                                break;
 141                }
 142        }
 143
 144        /* Update the file block number */
 145        *blk_nump = blk_count;
 146        put_bh(bh);
 147        return retval;
 148
 149}
 150
 151static int update_dind_extent_range(handle_t *handle, struct inode *inode,
 152                                    ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
 153                                    struct list_blocks_struct *lb)
 154{
 155        struct buffer_head *bh;
 156        __le32 *i_data;
 157        int i, retval = 0;
 158        ext4_lblk_t blk_count = *blk_nump;
 159        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 160
 161        if (!pblock) {
 162                /* Only update the file block number */
 163                *blk_nump += max_entries * max_entries;
 164                return 0;
 165        }
 166        bh = sb_bread(inode->i_sb, pblock);
 167        if (!bh)
 168                return -EIO;
 169
 170        i_data = (__le32 *)bh->b_data;
 171        for (i = 0; i < max_entries; i++) {
 172                if (i_data[i]) {
 173                        retval = update_ind_extent_range(handle, inode,
 174                                                le32_to_cpu(i_data[i]),
 175                                                &blk_count, lb);
 176                        if (retval)
 177                                break;
 178                } else {
 179                        /* Only update the file block number */
 180                        blk_count += max_entries;
 181                }
 182        }
 183
 184        /* Update the file block number */
 185        *blk_nump = blk_count;
 186        put_bh(bh);
 187        return retval;
 188
 189}
 190
 191static int update_tind_extent_range(handle_t *handle, struct inode *inode,
 192                                     ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
 193                                     struct list_blocks_struct *lb)
 194{
 195        struct buffer_head *bh;
 196        __le32 *i_data;
 197        int i, retval = 0;
 198        ext4_lblk_t blk_count = *blk_nump;
 199        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 200
 201        if (!pblock) {
 202                /* Only update the file block number */
 203                *blk_nump += max_entries * max_entries * max_entries;
 204                return 0;
 205        }
 206        bh = sb_bread(inode->i_sb, pblock);
 207        if (!bh)
 208                return -EIO;
 209
 210        i_data = (__le32 *)bh->b_data;
 211        for (i = 0; i < max_entries; i++) {
 212                if (i_data[i]) {
 213                        retval = update_dind_extent_range(handle, inode,
 214                                                le32_to_cpu(i_data[i]),
 215                                                &blk_count, lb);
 216                        if (retval)
 217                                break;
 218                } else
 219                        /* Only update the file block number */
 220                        blk_count += max_entries * max_entries;
 221        }
 222        /* Update the file block number */
 223        *blk_nump = blk_count;
 224        put_bh(bh);
 225        return retval;
 226
 227}
 228
 229static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
 230{
 231        int retval = 0, needed;
 232
 233        if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
 234                return 0;
 235        /*
 236         * We are freeing a blocks. During this we touch
 237         * superblock, group descriptor and block bitmap.
 238         * So allocate a credit of 3. We may update
 239         * quota (user and group).
 240         */
 241        needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
 242
 243        if (ext4_journal_extend(handle, needed) != 0)
 244                retval = ext4_journal_restart(handle, needed);
 245
 246        return retval;
 247}
 248
 249static int free_dind_blocks(handle_t *handle,
 250                                struct inode *inode, __le32 i_data)
 251{
 252        int i;
 253        __le32 *tmp_idata;
 254        struct buffer_head *bh;
 255        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 256
 257        bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
 258        if (!bh)
 259                return -EIO;
 260
 261        tmp_idata = (__le32 *)bh->b_data;
 262        for (i = 0; i < max_entries; i++) {
 263                if (tmp_idata[i]) {
 264                        extend_credit_for_blkdel(handle, inode);
 265                        ext4_free_blocks(handle, inode,
 266                                        le32_to_cpu(tmp_idata[i]), 1, 1);
 267                }
 268        }
 269        put_bh(bh);
 270        extend_credit_for_blkdel(handle, inode);
 271        ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
 272        return 0;
 273}
 274
 275static int free_tind_blocks(handle_t *handle,
 276                                struct inode *inode, __le32 i_data)
 277{
 278        int i, retval = 0;
 279        __le32 *tmp_idata;
 280        struct buffer_head *bh;
 281        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 282
 283        bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
 284        if (!bh)
 285                return -EIO;
 286
 287        tmp_idata = (__le32 *)bh->b_data;
 288        for (i = 0; i < max_entries; i++) {
 289                if (tmp_idata[i]) {
 290                        retval = free_dind_blocks(handle,
 291                                        inode, tmp_idata[i]);
 292                        if (retval) {
 293                                put_bh(bh);
 294                                return retval;
 295                        }
 296                }
 297        }
 298        put_bh(bh);
 299        extend_credit_for_blkdel(handle, inode);
 300        ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
 301        return 0;
 302}
 303
 304static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
 305{
 306        int retval;
 307
 308        /* ei->i_data[EXT4_IND_BLOCK] */
 309        if (i_data[0]) {
 310                extend_credit_for_blkdel(handle, inode);
 311                ext4_free_blocks(handle, inode,
 312                                le32_to_cpu(i_data[0]), 1, 1);
 313        }
 314
 315        /* ei->i_data[EXT4_DIND_BLOCK] */
 316        if (i_data[1]) {
 317                retval = free_dind_blocks(handle, inode, i_data[1]);
 318                if (retval)
 319                        return retval;
 320        }
 321
 322        /* ei->i_data[EXT4_TIND_BLOCK] */
 323        if (i_data[2]) {
 324                retval = free_tind_blocks(handle, inode, i_data[2]);
 325                if (retval)
 326                        return retval;
 327        }
 328        return 0;
 329}
 330
 331static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
 332                                                struct inode *tmp_inode)
 333{
 334        int retval;
 335        __le32  i_data[3];
 336        struct ext4_inode_info *ei = EXT4_I(inode);
 337        struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
 338
 339        /*
 340         * One credit accounted for writing the
 341         * i_data field of the original inode
 342         */
 343        retval = ext4_journal_extend(handle, 1);
 344        if (retval) {
 345                retval = ext4_journal_restart(handle, 1);
 346                if (retval)
 347                        goto err_out;
 348        }
 349
 350        i_data[0] = ei->i_data[EXT4_IND_BLOCK];
 351        i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
 352        i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
 353
 354        down_write(&EXT4_I(inode)->i_data_sem);
 355        /*
 356         * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation
 357         * happened after we started the migrate. We need to
 358         * fail the migrate
 359         */
 360        if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) {
 361                retval = -EAGAIN;
 362                up_write(&EXT4_I(inode)->i_data_sem);
 363                goto err_out;
 364        } else
 365                EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
 366        /*
 367         * We have the extent map build with the tmp inode.
 368         * Now copy the i_data across
 369         */
 370        ei->i_flags |= EXT4_EXTENTS_FL;
 371        memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
 372
 373        /*
 374         * Update i_blocks with the new blocks that got
 375         * allocated while adding extents for extent index
 376         * blocks.
 377         *
 378         * While converting to extents we need not
 379         * update the orignal inode i_blocks for extent blocks
 380         * via quota APIs. The quota update happened via tmp_inode already.
 381         */
 382        spin_lock(&inode->i_lock);
 383        inode->i_blocks += tmp_inode->i_blocks;
 384        spin_unlock(&inode->i_lock);
 385        up_write(&EXT4_I(inode)->i_data_sem);
 386
 387        /*
 388         * We mark the inode dirty after, because we decrement the
 389         * i_blocks when freeing the indirect meta-data blocks
 390         */
 391        retval = free_ind_block(handle, inode, i_data);
 392        ext4_mark_inode_dirty(handle, inode);
 393
 394err_out:
 395        return retval;
 396}
 397
 398static int free_ext_idx(handle_t *handle, struct inode *inode,
 399                                        struct ext4_extent_idx *ix)
 400{
 401        int i, retval = 0;
 402        ext4_fsblk_t block;
 403        struct buffer_head *bh;
 404        struct ext4_extent_header *eh;
 405
 406        block = idx_pblock(ix);
 407        bh = sb_bread(inode->i_sb, block);
 408        if (!bh)
 409                return -EIO;
 410
 411        eh = (struct ext4_extent_header *)bh->b_data;
 412        if (eh->eh_depth != 0) {
 413                ix = EXT_FIRST_INDEX(eh);
 414                for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
 415                        retval = free_ext_idx(handle, inode, ix);
 416                        if (retval)
 417                                break;
 418                }
 419        }
 420        put_bh(bh);
 421        extend_credit_for_blkdel(handle, inode);
 422        ext4_free_blocks(handle, inode, block, 1, 1);
 423        return retval;
 424}
 425
 426/*
 427 * Free the extent meta data blocks only
 428 */
 429static int free_ext_block(handle_t *handle, struct inode *inode)
 430{
 431        int i, retval = 0;
 432        struct ext4_inode_info *ei = EXT4_I(inode);
 433        struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
 434        struct ext4_extent_idx *ix;
 435        if (eh->eh_depth == 0)
 436                /*
 437                 * No extra blocks allocated for extent meta data
 438                 */
 439                return 0;
 440        ix = EXT_FIRST_INDEX(eh);
 441        for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
 442                retval = free_ext_idx(handle, inode, ix);
 443                if (retval)
 444                        return retval;
 445        }
 446        return retval;
 447
 448}
 449
 450int ext4_ext_migrate(struct inode *inode)
 451{
 452        handle_t *handle;
 453        int retval = 0, i;
 454        __le32 *i_data;
 455        ext4_lblk_t blk_count = 0;
 456        struct ext4_inode_info *ei;
 457        struct inode *tmp_inode = NULL;
 458        struct list_blocks_struct lb;
 459        unsigned long max_entries;
 460        __u32 goal;
 461
 462        /*
 463         * If the filesystem does not support extents, or the inode
 464         * already is extent-based, error out.
 465         */
 466        if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
 467                                       EXT4_FEATURE_INCOMPAT_EXTENTS) ||
 468            (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
 469                return -EINVAL;
 470
 471        if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
 472                /*
 473                 * don't migrate fast symlink
 474                 */
 475                return retval;
 476
 477        handle = ext4_journal_start(inode,
 478                                        EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
 479                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
 480                                        2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
 481                                        + 1);
 482        if (IS_ERR(handle)) {
 483                retval = PTR_ERR(handle);
 484                return retval;
 485        }
 486        goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
 487                EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
 488        tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
 489                                   S_IFREG, 0, goal);
 490        if (IS_ERR(tmp_inode)) {
 491                retval = -ENOMEM;
 492                ext4_journal_stop(handle);
 493                return retval;
 494        }
 495        i_size_write(tmp_inode, i_size_read(inode));
 496        /*
 497         * We don't want the inode to be reclaimed
 498         * if we got interrupted in between. We have
 499         * this tmp inode carrying reference to the
 500         * data blocks of the original file. We set
 501         * the i_nlink to zero at the last stage after
 502         * switching the original file to extent format
 503         */
 504        tmp_inode->i_nlink = 1;
 505
 506        ext4_ext_tree_init(handle, tmp_inode);
 507        ext4_orphan_add(handle, tmp_inode);
 508        ext4_journal_stop(handle);
 509
 510        /*
 511         * start with one credit accounted for
 512         * superblock modification.
 513         *
 514         * For the tmp_inode we already have commited the
 515         * trascation that created the inode. Later as and
 516         * when we add extents we extent the journal
 517         */
 518        /*
 519         * Even though we take i_mutex we can still cause block
 520         * allocation via mmap write to holes. If we have allocated
 521         * new blocks we fail migrate.  New block allocation will
 522         * clear EXT4_STATE_EXT_MIGRATE flag.  The flag is updated
 523         * with i_data_sem held to prevent racing with block
 524         * allocation.
 525         */
 526        down_read((&EXT4_I(inode)->i_data_sem));
 527        EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE;
 528        up_read((&EXT4_I(inode)->i_data_sem));
 529
 530        handle = ext4_journal_start(inode, 1);
 531
 532        ei = EXT4_I(inode);
 533        i_data = ei->i_data;
 534        memset(&lb, 0, sizeof(lb));
 535
 536        /* 32 bit block address 4 bytes */
 537        max_entries = inode->i_sb->s_blocksize >> 2;
 538        for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
 539                if (i_data[i]) {
 540                        retval = update_extent_range(handle, tmp_inode,
 541                                                le32_to_cpu(i_data[i]),
 542                                                blk_count, &lb);
 543                        if (retval)
 544                                goto err_out;
 545                }
 546        }
 547        if (i_data[EXT4_IND_BLOCK]) {
 548                retval = update_ind_extent_range(handle, tmp_inode,
 549                                        le32_to_cpu(i_data[EXT4_IND_BLOCK]),
 550                                        &blk_count, &lb);
 551                        if (retval)
 552                                goto err_out;
 553        } else
 554                blk_count +=  max_entries;
 555        if (i_data[EXT4_DIND_BLOCK]) {
 556                retval = update_dind_extent_range(handle, tmp_inode,
 557                                        le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
 558                                        &blk_count, &lb);
 559                        if (retval)
 560                                goto err_out;
 561        } else
 562                blk_count += max_entries * max_entries;
 563        if (i_data[EXT4_TIND_BLOCK]) {
 564                retval = update_tind_extent_range(handle, tmp_inode,
 565                                        le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
 566                                        &blk_count, &lb);
 567                        if (retval)
 568                                goto err_out;
 569        }
 570        /*
 571         * Build the last extent
 572         */
 573        retval = finish_range(handle, tmp_inode, &lb);
 574err_out:
 575        if (retval)
 576                /*
 577                 * Failure case delete the extent information with the
 578                 * tmp_inode
 579                 */
 580                free_ext_block(handle, tmp_inode);
 581        else {
 582                retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
 583                if (retval)
 584                        /*
 585                         * if we fail to swap inode data free the extent
 586                         * details of the tmp inode
 587                         */
 588                        free_ext_block(handle, tmp_inode);
 589        }
 590
 591        /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
 592        if (ext4_journal_extend(handle, 1) != 0)
 593                ext4_journal_restart(handle, 1);
 594
 595        /*
 596         * Mark the tmp_inode as of size zero
 597         */
 598        i_size_write(tmp_inode, 0);
 599
 600        /*
 601         * set the  i_blocks count to zero
 602         * so that the ext4_delete_inode does the
 603         * right job
 604         *
 605         * We don't need to take the i_lock because
 606         * the inode is not visible to user space.
 607         */
 608        tmp_inode->i_blocks = 0;
 609
 610        /* Reset the extent details */
 611        ext4_ext_tree_init(handle, tmp_inode);
 612
 613        /*
 614         * Set the i_nlink to zero so that
 615         * generic_drop_inode really deletes the
 616         * inode
 617         */
 618        tmp_inode->i_nlink = 0;
 619
 620        ext4_journal_stop(handle);
 621        unlock_new_inode(tmp_inode);
 622        iput(tmp_inode);
 623
 624        return retval;
 625}
 626