linux/fs/ext4/move_extent.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2008,2009 NEC Software Tohoku, Ltd.
   3 * Written by Takashi Sato <t-sato@yk.jp.nec.com>
   4 *            Akira Fujita <a-fujita@rs.jp.nec.com>
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of version 2.1 of the GNU Lesser General Public License
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful,
  11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 * GNU General Public License for more details.
  14 */
  15
  16#include <linux/fs.h>
  17#include <linux/quotaops.h>
  18#include <linux/slab.h>
  19#include "ext4_jbd2.h"
  20#include "ext4_extents.h"
  21#include "ext4.h"
  22
  23/**
  24 * get_ext_path - Find an extent path for designated logical block number.
  25 *
  26 * @inode:      an inode which is searched
  27 * @lblock:     logical block number to find an extent path
  28 * @path:       pointer to an extent path pointer (for output)
  29 *
  30 * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value
  31 * on failure.
  32 */
  33static inline int
  34get_ext_path(struct inode *inode, ext4_lblk_t lblock,
  35                struct ext4_ext_path **path)
  36{
  37        int ret = 0;
  38
  39        *path = ext4_ext_find_extent(inode, lblock, *path);
  40        if (IS_ERR(*path)) {
  41                ret = PTR_ERR(*path);
  42                *path = NULL;
  43        } else if ((*path)[ext_depth(inode)].p_ext == NULL)
  44                ret = -ENODATA;
  45
  46        return ret;
  47}
  48
  49/**
  50 * copy_extent_status - Copy the extent's initialization status
  51 *
  52 * @src:        an extent for getting initialize status
  53 * @dest:       an extent to be set the status
  54 */
  55static void
  56copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
  57{
  58        if (ext4_ext_is_uninitialized(src))
  59                ext4_ext_mark_uninitialized(dest);
  60        else
  61                dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
  62}
  63
  64/**
  65 * mext_next_extent - Search for the next extent and set it to "extent"
  66 *
  67 * @inode:      inode which is searched
  68 * @path:       this will obtain data for the next extent
  69 * @extent:     pointer to the next extent we have just gotten
  70 *
  71 * Search the next extent in the array of ext4_ext_path structure (@path)
  72 * and set it to ext4_extent structure (@extent). In addition, the member of
  73 * @path (->p_ext) also points the next extent. Return 0 on success, 1 if
  74 * ext4_ext_path structure refers to the last extent, or a negative error
  75 * value on failure.
  76 */
  77static int
  78mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
  79                      struct ext4_extent **extent)
  80{
  81        struct ext4_extent_header *eh;
  82        int ppos, leaf_ppos = path->p_depth;
  83
  84        ppos = leaf_ppos;
  85        if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
  86                /* leaf block */
  87                *extent = ++path[ppos].p_ext;
  88                path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
  89                return 0;
  90        }
  91
  92        while (--ppos >= 0) {
  93                if (EXT_LAST_INDEX(path[ppos].p_hdr) >
  94                    path[ppos].p_idx) {
  95                        int cur_ppos = ppos;
  96
  97                        /* index block */
  98                        path[ppos].p_idx++;
  99                        path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
 100                        if (path[ppos+1].p_bh)
 101                                brelse(path[ppos+1].p_bh);
 102                        path[ppos+1].p_bh =
 103                                sb_bread(inode->i_sb, path[ppos].p_block);
 104                        if (!path[ppos+1].p_bh)
 105                                return -EIO;
 106                        path[ppos+1].p_hdr =
 107                                ext_block_hdr(path[ppos+1].p_bh);
 108
 109                        /* Halfway index block */
 110                        while (++cur_ppos < leaf_ppos) {
 111                                path[cur_ppos].p_idx =
 112                                        EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
 113                                path[cur_ppos].p_block =
 114                                        ext4_idx_pblock(path[cur_ppos].p_idx);
 115                                if (path[cur_ppos+1].p_bh)
 116                                        brelse(path[cur_ppos+1].p_bh);
 117                                path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
 118                                        path[cur_ppos].p_block);
 119                                if (!path[cur_ppos+1].p_bh)
 120                                        return -EIO;
 121                                path[cur_ppos+1].p_hdr =
 122                                        ext_block_hdr(path[cur_ppos+1].p_bh);
 123                        }
 124
 125                        path[leaf_ppos].p_ext = *extent = NULL;
 126
 127                        eh = path[leaf_ppos].p_hdr;
 128                        if (le16_to_cpu(eh->eh_entries) == 0)
 129                                /* empty leaf is found */
 130                                return -ENODATA;
 131
 132                        /* leaf block */
 133                        path[leaf_ppos].p_ext = *extent =
 134                                EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
 135                        path[leaf_ppos].p_block =
 136                                        ext4_ext_pblock(path[leaf_ppos].p_ext);
 137                        return 0;
 138                }
 139        }
 140        /* We found the last extent */
 141        return 1;
 142}
 143
 144/**
 145 * mext_check_null_inode - NULL check for two inodes
 146 *
 147 * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
 148 */
 149static int
 150mext_check_null_inode(struct inode *inode1, struct inode *inode2,
 151                      const char *function, unsigned int line)
 152{
 153        int ret = 0;
 154
 155        if (inode1 == NULL) {
 156                __ext4_error(inode2->i_sb, function, line,
 157                        "Both inodes should not be NULL: "
 158                        "inode1 NULL inode2 %lu", inode2->i_ino);
 159                ret = -EIO;
 160        } else if (inode2 == NULL) {
 161                __ext4_error(inode1->i_sb, function, line,
 162                        "Both inodes should not be NULL: "
 163                        "inode1 %lu inode2 NULL", inode1->i_ino);
 164                ret = -EIO;
 165        }
 166        return ret;
 167}
 168
 169/**
 170 * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
 171 *
 172 * @orig_inode:         original inode structure
 173 * @donor_inode:        donor inode structure
 174 * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
 175 * i_ino order.
 176 */
 177static void
 178double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
 179{
 180        struct inode *first = orig_inode, *second = donor_inode;
 181
 182        /*
 183         * Use the inode number to provide the stable locking order instead
 184         * of its address, because the C language doesn't guarantee you can
 185         * compare pointers that don't come from the same array.
 186         */
 187        if (donor_inode->i_ino < orig_inode->i_ino) {
 188                first = donor_inode;
 189                second = orig_inode;
 190        }
 191
 192        down_write(&EXT4_I(first)->i_data_sem);
 193        down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
 194}
 195
 196/**
 197 * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
 198 *
 199 * @orig_inode:         original inode structure to be released its lock first
 200 * @donor_inode:        donor inode structure to be released its lock second
 201 * Release write lock of i_data_sem of two inodes (orig and donor).
 202 */
 203static void
 204double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
 205{
 206        up_write(&EXT4_I(orig_inode)->i_data_sem);
 207        up_write(&EXT4_I(donor_inode)->i_data_sem);
 208}
 209
 210/**
 211 * mext_insert_across_blocks - Insert extents across leaf block
 212 *
 213 * @handle:             journal handle
 214 * @orig_inode:         original inode
 215 * @o_start:            first original extent to be changed
 216 * @o_end:              last original extent to be changed
 217 * @start_ext:          first new extent to be inserted
 218 * @new_ext:            middle of new extent to be inserted
 219 * @end_ext:            last new extent to be inserted
 220 *
 221 * Allocate a new leaf block and insert extents into it. Return 0 on success,
 222 * or a negative error value on failure.
 223 */
 224static int
 225mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
 226                struct ext4_extent *o_start, struct ext4_extent *o_end,
 227                struct ext4_extent *start_ext, struct ext4_extent *new_ext,
 228                struct ext4_extent *end_ext)
 229{
 230        struct ext4_ext_path *orig_path = NULL;
 231        ext4_lblk_t eblock = 0;
 232        int new_flag = 0;
 233        int end_flag = 0;
 234        int err = 0;
 235
 236        if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
 237                if (o_start == o_end) {
 238
 239                        /*       start_ext   new_ext    end_ext
 240                         * donor |---------|-----------|--------|
 241                         * orig  |------------------------------|
 242                         */
 243                        end_flag = 1;
 244                } else {
 245
 246                        /*       start_ext   new_ext   end_ext
 247                         * donor |---------|----------|---------|
 248                         * orig  |---------------|--------------|
 249                         */
 250                        o_end->ee_block = end_ext->ee_block;
 251                        o_end->ee_len = end_ext->ee_len;
 252                        ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
 253                }
 254
 255                o_start->ee_len = start_ext->ee_len;
 256                eblock = le32_to_cpu(start_ext->ee_block);
 257                new_flag = 1;
 258
 259        } else if (start_ext->ee_len && new_ext->ee_len &&
 260                   !end_ext->ee_len && o_start == o_end) {
 261
 262                /*       start_ext      new_ext
 263                 * donor |--------------|---------------|
 264                 * orig  |------------------------------|
 265                 */
 266                o_start->ee_len = start_ext->ee_len;
 267                eblock = le32_to_cpu(start_ext->ee_block);
 268                new_flag = 1;
 269
 270        } else if (!start_ext->ee_len && new_ext->ee_len &&
 271                   end_ext->ee_len && o_start == o_end) {
 272
 273                /*        new_ext       end_ext
 274                 * donor |--------------|---------------|
 275                 * orig  |------------------------------|
 276                 */
 277                o_end->ee_block = end_ext->ee_block;
 278                o_end->ee_len = end_ext->ee_len;
 279                ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
 280
 281                /*
 282                 * Set 0 to the extent block if new_ext was
 283                 * the first block.
 284                 */
 285                if (new_ext->ee_block)
 286                        eblock = le32_to_cpu(new_ext->ee_block);
 287
 288                new_flag = 1;
 289        } else {
 290                ext4_debug("ext4 move extent: Unexpected insert case\n");
 291                return -EIO;
 292        }
 293
 294        if (new_flag) {
 295                err = get_ext_path(orig_inode, eblock, &orig_path);
 296                if (err)
 297                        goto out;
 298
 299                if (ext4_ext_insert_extent(handle, orig_inode,
 300                                        orig_path, new_ext, 0))
 301                        goto out;
 302        }
 303
 304        if (end_flag) {
 305                err = get_ext_path(orig_inode,
 306                                le32_to_cpu(end_ext->ee_block) - 1, &orig_path);
 307                if (err)
 308                        goto out;
 309
 310                if (ext4_ext_insert_extent(handle, orig_inode,
 311                                           orig_path, end_ext, 0))
 312                        goto out;
 313        }
 314out:
 315        if (orig_path) {
 316                ext4_ext_drop_refs(orig_path);
 317                kfree(orig_path);
 318        }
 319
 320        return err;
 321
 322}
 323
 324/**
 325 * mext_insert_inside_block - Insert new extent to the extent block
 326 *
 327 * @o_start:            first original extent to be moved
 328 * @o_end:              last original extent to be moved
 329 * @start_ext:          first new extent to be inserted
 330 * @new_ext:            middle of new extent to be inserted
 331 * @end_ext:            last new extent to be inserted
 332 * @eh:                 extent header of target leaf block
 333 * @range_to_move:      used to decide how to insert extent
 334 *
 335 * Insert extents into the leaf block. The extent (@o_start) is overwritten
 336 * by inserted extents.
 337 */
 338static void
 339mext_insert_inside_block(struct ext4_extent *o_start,
 340                              struct ext4_extent *o_end,
 341                              struct ext4_extent *start_ext,
 342                              struct ext4_extent *new_ext,
 343                              struct ext4_extent *end_ext,
 344                              struct ext4_extent_header *eh,
 345                              int range_to_move)
 346{
 347        int i = 0;
 348        unsigned long len;
 349
 350        /* Move the existing extents */
 351        if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
 352                len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
 353                        (unsigned long)(o_end + 1);
 354                memmove(o_end + 1 + range_to_move, o_end + 1, len);
 355        }
 356
 357        /* Insert start entry */
 358        if (start_ext->ee_len)
 359                o_start[i++].ee_len = start_ext->ee_len;
 360
 361        /* Insert new entry */
 362        if (new_ext->ee_len) {
 363                o_start[i] = *new_ext;
 364                ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
 365        }
 366
 367        /* Insert end entry */
 368        if (end_ext->ee_len)
 369                o_start[i] = *end_ext;
 370
 371        /* Increment the total entries counter on the extent block */
 372        le16_add_cpu(&eh->eh_entries, range_to_move);
 373}
 374
 375/**
 376 * mext_insert_extents - Insert new extent
 377 *
 378 * @handle:     journal handle
 379 * @orig_inode: original inode
 380 * @orig_path:  path indicates first extent to be changed
 381 * @o_start:    first original extent to be changed
 382 * @o_end:      last original extent to be changed
 383 * @start_ext:  first new extent to be inserted
 384 * @new_ext:    middle of new extent to be inserted
 385 * @end_ext:    last new extent to be inserted
 386 *
 387 * Call the function to insert extents. If we cannot add more extents into
 388 * the leaf block, we call mext_insert_across_blocks() to create a
 389 * new leaf block. Otherwise call mext_insert_inside_block(). Return 0
 390 * on success, or a negative error value on failure.
 391 */
 392static int
 393mext_insert_extents(handle_t *handle, struct inode *orig_inode,
 394                         struct ext4_ext_path *orig_path,
 395                         struct ext4_extent *o_start,
 396                         struct ext4_extent *o_end,
 397                         struct ext4_extent *start_ext,
 398                         struct ext4_extent *new_ext,
 399                         struct ext4_extent *end_ext)
 400{
 401        struct  ext4_extent_header *eh;
 402        unsigned long need_slots, slots_range;
 403        int     range_to_move, depth, ret;
 404
 405        /*
 406         * The extents need to be inserted
 407         * start_extent + new_extent + end_extent.
 408         */
 409        need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
 410                (new_ext->ee_len ? 1 : 0);
 411
 412        /* The number of slots between start and end */
 413        slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
 414                / sizeof(struct ext4_extent);
 415
 416        /* Range to move the end of extent */
 417        range_to_move = need_slots - slots_range;
 418        depth = orig_path->p_depth;
 419        orig_path += depth;
 420        eh = orig_path->p_hdr;
 421
 422        if (depth) {
 423                /* Register to journal */
 424                ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
 425                if (ret)
 426                        return ret;
 427        }
 428
 429        /* Expansion */
 430        if (range_to_move > 0 &&
 431                (range_to_move > le16_to_cpu(eh->eh_max)
 432                        - le16_to_cpu(eh->eh_entries))) {
 433
 434                ret = mext_insert_across_blocks(handle, orig_inode, o_start,
 435                                        o_end, start_ext, new_ext, end_ext);
 436                if (ret < 0)
 437                        return ret;
 438        } else
 439                mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
 440                                                end_ext, eh, range_to_move);
 441
 442        if (depth) {
 443                ret = ext4_handle_dirty_metadata(handle, orig_inode,
 444                                                 orig_path->p_bh);
 445                if (ret)
 446                        return ret;
 447        } else {
 448                ret = ext4_mark_inode_dirty(handle, orig_inode);
 449                if (ret < 0)
 450                        return ret;
 451        }
 452
 453        return 0;
 454}
 455
 456/**
 457 * mext_leaf_block - Move one leaf extent block into the inode.
 458 *
 459 * @handle:             journal handle
 460 * @orig_inode:         original inode
 461 * @orig_path:          path indicates first extent to be changed
 462 * @dext:               donor extent
 463 * @from:               start offset on the target file
 464 *
 465 * In order to insert extents into the leaf block, we must divide the extent
 466 * in the leaf block into three extents. The one is located to be inserted
 467 * extents, and the others are located around it.
 468 *
 469 * Therefore, this function creates structures to save extents of the leaf
 470 * block, and inserts extents by calling mext_insert_extents() with
 471 * created extents. Return 0 on success, or a negative error value on failure.
 472 */
 473static int
 474mext_leaf_block(handle_t *handle, struct inode *orig_inode,
 475                     struct ext4_ext_path *orig_path, struct ext4_extent *dext,
 476                     ext4_lblk_t *from)
 477{
 478        struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
 479        struct ext4_extent new_ext, start_ext, end_ext;
 480        ext4_lblk_t new_ext_end;
 481        int oext_alen, new_ext_alen, end_ext_alen;
 482        int depth = ext_depth(orig_inode);
 483        int ret;
 484
 485        start_ext.ee_block = end_ext.ee_block = 0;
 486        o_start = o_end = oext = orig_path[depth].p_ext;
 487        oext_alen = ext4_ext_get_actual_len(oext);
 488        start_ext.ee_len = end_ext.ee_len = 0;
 489
 490        new_ext.ee_block = cpu_to_le32(*from);
 491        ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
 492        new_ext.ee_len = dext->ee_len;
 493        new_ext_alen = ext4_ext_get_actual_len(&new_ext);
 494        new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
 495
 496        /*
 497         * Case: original extent is first
 498         * oext      |--------|
 499         * new_ext      |--|
 500         * start_ext |--|
 501         */
 502        if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
 503                le32_to_cpu(new_ext.ee_block) <
 504                le32_to_cpu(oext->ee_block) + oext_alen) {
 505                start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
 506                                               le32_to_cpu(oext->ee_block));
 507                start_ext.ee_block = oext->ee_block;
 508                copy_extent_status(oext, &start_ext);
 509        } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
 510                prev_ext = oext - 1;
 511                /*
 512                 * We can merge new_ext into previous extent,
 513                 * if these are contiguous and same extent type.
 514                 */
 515                if (ext4_can_extents_be_merged(orig_inode, prev_ext,
 516                                               &new_ext)) {
 517                        o_start = prev_ext;
 518                        start_ext.ee_len = cpu_to_le16(
 519                                ext4_ext_get_actual_len(prev_ext) +
 520                                new_ext_alen);
 521                        start_ext.ee_block = oext->ee_block;
 522                        copy_extent_status(prev_ext, &start_ext);
 523                        new_ext.ee_len = 0;
 524                }
 525        }
 526
 527        /*
 528         * Case: new_ext_end must be less than oext
 529         * oext      |-----------|
 530         * new_ext       |-------|
 531         */
 532        if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
 533                EXT4_ERROR_INODE(orig_inode,
 534                        "new_ext_end(%u) should be less than or equal to "
 535                        "oext->ee_block(%u) + oext_alen(%d) - 1",
 536                        new_ext_end, le32_to_cpu(oext->ee_block),
 537                        oext_alen);
 538                ret = -EIO;
 539                goto out;
 540        }
 541
 542        /*
 543         * Case: new_ext is smaller than original extent
 544         * oext    |---------------|
 545         * new_ext |-----------|
 546         * end_ext             |---|
 547         */
 548        if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
 549                new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
 550                end_ext.ee_len =
 551                        cpu_to_le16(le32_to_cpu(oext->ee_block) +
 552                        oext_alen - 1 - new_ext_end);
 553                copy_extent_status(oext, &end_ext);
 554                end_ext_alen = ext4_ext_get_actual_len(&end_ext);
 555                ext4_ext_store_pblock(&end_ext,
 556                        (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
 557                end_ext.ee_block =
 558                        cpu_to_le32(le32_to_cpu(o_end->ee_block) +
 559                        oext_alen - end_ext_alen);
 560        }
 561
 562        ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
 563                                o_end, &start_ext, &new_ext, &end_ext);
 564out:
 565        return ret;
 566}
 567
 568/**
 569 * mext_calc_swap_extents - Calculate extents for extent swapping.
 570 *
 571 * @tmp_dext:           the extent that will belong to the original inode
 572 * @tmp_oext:           the extent that will belong to the donor inode
 573 * @orig_off:           block offset of original inode
 574 * @donor_off:          block offset of donor inode
 575 * @max_count:          the maximum length of extents
 576 *
 577 * Return 0 on success, or a negative error value on failure.
 578 */
 579static int
 580mext_calc_swap_extents(struct ext4_extent *tmp_dext,
 581                              struct ext4_extent *tmp_oext,
 582                              ext4_lblk_t orig_off, ext4_lblk_t donor_off,
 583                              ext4_lblk_t max_count)
 584{
 585        ext4_lblk_t diff, orig_diff;
 586        struct ext4_extent dext_old, oext_old;
 587
 588        BUG_ON(orig_off != donor_off);
 589
 590        /* original and donor extents have to cover the same block offset */
 591        if (orig_off < le32_to_cpu(tmp_oext->ee_block) ||
 592            le32_to_cpu(tmp_oext->ee_block) +
 593                        ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off)
 594                return -ENODATA;
 595
 596        if (orig_off < le32_to_cpu(tmp_dext->ee_block) ||
 597            le32_to_cpu(tmp_dext->ee_block) +
 598                        ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off)
 599                return -ENODATA;
 600
 601        dext_old = *tmp_dext;
 602        oext_old = *tmp_oext;
 603
 604        /* When tmp_dext is too large, pick up the target range. */
 605        diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
 606
 607        ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
 608        tmp_dext->ee_block =
 609                        cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
 610        tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
 611
 612        if (max_count < ext4_ext_get_actual_len(tmp_dext))
 613                tmp_dext->ee_len = cpu_to_le16(max_count);
 614
 615        orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
 616        ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
 617
 618        /* Adjust extent length if donor extent is larger than orig */
 619        if (ext4_ext_get_actual_len(tmp_dext) >
 620            ext4_ext_get_actual_len(tmp_oext) - orig_diff)
 621                tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
 622                                                orig_diff);
 623
 624        tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
 625
 626        copy_extent_status(&oext_old, tmp_dext);
 627        copy_extent_status(&dext_old, tmp_oext);
 628
 629        return 0;
 630}
 631
 632/**
 633 * mext_replace_branches - Replace original extents with new extents
 634 *
 635 * @handle:             journal handle
 636 * @orig_inode:         original inode
 637 * @donor_inode:        donor inode
 638 * @from:               block offset of orig_inode
 639 * @count:              block count to be replaced
 640 * @err:                pointer to save return value
 641 *
 642 * Replace original inode extents and donor inode extents page by page.
 643 * We implement this replacement in the following three steps:
 644 * 1. Save the block information of original and donor inodes into
 645 *    dummy extents.
 646 * 2. Change the block information of original inode to point at the
 647 *    donor inode blocks.
 648 * 3. Change the block information of donor inode to point at the saved
 649 *    original inode blocks in the dummy extents.
 650 *
 651 * Return replaced block count.
 652 */
 653static int
 654mext_replace_branches(handle_t *handle, struct inode *orig_inode,
 655                           struct inode *donor_inode, ext4_lblk_t from,
 656                           ext4_lblk_t count, int *err)
 657{
 658        struct ext4_ext_path *orig_path = NULL;
 659        struct ext4_ext_path *donor_path = NULL;
 660        struct ext4_extent *oext, *dext;
 661        struct ext4_extent tmp_dext, tmp_oext;
 662        ext4_lblk_t orig_off = from, donor_off = from;
 663        int depth;
 664        int replaced_count = 0;
 665        int dext_alen;
 666
 667        /* Protect extent trees against block allocations via delalloc */
 668        double_down_write_data_sem(orig_inode, donor_inode);
 669
 670        /* Get the original extent for the block "orig_off" */
 671        *err = get_ext_path(orig_inode, orig_off, &orig_path);
 672        if (*err)
 673                goto out;
 674
 675        /* Get the donor extent for the head */
 676        *err = get_ext_path(donor_inode, donor_off, &donor_path);
 677        if (*err)
 678                goto out;
 679        depth = ext_depth(orig_inode);
 680        oext = orig_path[depth].p_ext;
 681        tmp_oext = *oext;
 682
 683        depth = ext_depth(donor_inode);
 684        dext = donor_path[depth].p_ext;
 685        tmp_dext = *dext;
 686
 687        *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
 688                                      donor_off, count);
 689        if (*err)
 690                goto out;
 691
 692        /* Loop for the donor extents */
 693        while (1) {
 694                /* The extent for donor must be found. */
 695                if (!dext) {
 696                        EXT4_ERROR_INODE(donor_inode,
 697                                   "The extent for donor must be found");
 698                        *err = -EIO;
 699                        goto out;
 700                } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
 701                        EXT4_ERROR_INODE(donor_inode,
 702                                "Donor offset(%u) and the first block of donor "
 703                                "extent(%u) should be equal",
 704                                donor_off,
 705                                le32_to_cpu(tmp_dext.ee_block));
 706                        *err = -EIO;
 707                        goto out;
 708                }
 709
 710                /* Set donor extent to orig extent */
 711                *err = mext_leaf_block(handle, orig_inode,
 712                                           orig_path, &tmp_dext, &orig_off);
 713                if (*err)
 714                        goto out;
 715
 716                /* Set orig extent to donor extent */
 717                *err = mext_leaf_block(handle, donor_inode,
 718                                           donor_path, &tmp_oext, &donor_off);
 719                if (*err)
 720                        goto out;
 721
 722                dext_alen = ext4_ext_get_actual_len(&tmp_dext);
 723                replaced_count += dext_alen;
 724                donor_off += dext_alen;
 725                orig_off += dext_alen;
 726
 727                /* Already moved the expected blocks */
 728                if (replaced_count >= count)
 729                        break;
 730
 731                if (orig_path)
 732                        ext4_ext_drop_refs(orig_path);
 733                *err = get_ext_path(orig_inode, orig_off, &orig_path);
 734                if (*err)
 735                        goto out;
 736                depth = ext_depth(orig_inode);
 737                oext = orig_path[depth].p_ext;
 738                tmp_oext = *oext;
 739
 740                if (donor_path)
 741                        ext4_ext_drop_refs(donor_path);
 742                *err = get_ext_path(donor_inode, donor_off, &donor_path);
 743                if (*err)
 744                        goto out;
 745                depth = ext_depth(donor_inode);
 746                dext = donor_path[depth].p_ext;
 747                tmp_dext = *dext;
 748
 749                *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
 750                                           donor_off, count - replaced_count);
 751                if (*err)
 752                        goto out;
 753        }
 754
 755out:
 756        if (orig_path) {
 757                ext4_ext_drop_refs(orig_path);
 758                kfree(orig_path);
 759        }
 760        if (donor_path) {
 761                ext4_ext_drop_refs(donor_path);
 762                kfree(donor_path);
 763        }
 764
 765        ext4_ext_invalidate_cache(orig_inode);
 766        ext4_ext_invalidate_cache(donor_inode);
 767
 768        double_up_write_data_sem(orig_inode, donor_inode);
 769
 770        return replaced_count;
 771}
 772
 773/**
 774 * move_extent_per_page - Move extent data per page
 775 *
 776 * @o_filp:                     file structure of original file
 777 * @donor_inode:                donor inode
 778 * @orig_page_offset:           page index on original file
 779 * @data_offset_in_page:        block index where data swapping starts
 780 * @block_len_in_page:          the number of blocks to be swapped
 781 * @uninit:                     orig extent is uninitialized or not
 782 * @err:                        pointer to save return value
 783 *
 784 * Save the data in original inode blocks and replace original inode extents
 785 * with donor inode extents by calling mext_replace_branches().
 786 * Finally, write out the saved data in new original inode blocks. Return
 787 * replaced block count.
 788 */
 789static int
 790move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
 791                  pgoff_t orig_page_offset, int data_offset_in_page,
 792                  int block_len_in_page, int uninit, int *err)
 793{
 794        struct inode *orig_inode = o_filp->f_dentry->d_inode;
 795        struct address_space *mapping = orig_inode->i_mapping;
 796        struct buffer_head *bh;
 797        struct page *page = NULL;
 798        const struct address_space_operations *a_ops = mapping->a_ops;
 799        handle_t *handle;
 800        ext4_lblk_t orig_blk_offset;
 801        long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
 802        unsigned long blocksize = orig_inode->i_sb->s_blocksize;
 803        unsigned int w_flags = 0;
 804        unsigned int tmp_data_size, data_size, replaced_size;
 805        void *fsdata;
 806        int i, jblocks;
 807        int err2 = 0;
 808        int replaced_count = 0;
 809        int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
 810
 811        /*
 812         * It needs twice the amount of ordinary journal buffers because
 813         * inode and donor_inode may change each different metadata blocks.
 814         */
 815        jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
 816        handle = ext4_journal_start(orig_inode, jblocks);
 817        if (IS_ERR(handle)) {
 818                *err = PTR_ERR(handle);
 819                return 0;
 820        }
 821
 822        if (segment_eq(get_fs(), KERNEL_DS))
 823                w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
 824
 825        orig_blk_offset = orig_page_offset * blocks_per_page +
 826                data_offset_in_page;
 827
 828        /*
 829         * If orig extent is uninitialized one,
 830         * it's not necessary force the page into memory
 831         * and then force it to be written out again.
 832         * Just swap data blocks between orig and donor.
 833         */
 834        if (uninit) {
 835                replaced_count = mext_replace_branches(handle, orig_inode,
 836                                                donor_inode, orig_blk_offset,
 837                                                block_len_in_page, err);
 838                goto out2;
 839        }
 840
 841        offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
 842
 843        /* Calculate data_size */
 844        if ((orig_blk_offset + block_len_in_page - 1) ==
 845            ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
 846                /* Replace the last block */
 847                tmp_data_size = orig_inode->i_size & (blocksize - 1);
 848                /*
 849                 * If data_size equal zero, it shows data_size is multiples of
 850                 * blocksize. So we set appropriate value.
 851                 */
 852                if (tmp_data_size == 0)
 853                        tmp_data_size = blocksize;
 854
 855                data_size = tmp_data_size +
 856                        ((block_len_in_page - 1) << orig_inode->i_blkbits);
 857        } else
 858                data_size = block_len_in_page << orig_inode->i_blkbits;
 859
 860        replaced_size = data_size;
 861
 862        *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
 863                                 &page, &fsdata);
 864        if (unlikely(*err < 0))
 865                goto out;
 866
 867        if (!PageUptodate(page)) {
 868                mapping->a_ops->readpage(o_filp, page);
 869                lock_page(page);
 870        }
 871
 872        /*
 873         * try_to_release_page() doesn't call releasepage in writeback mode.
 874         * We should care about the order of writing to the same file
 875         * by multiple move extent processes.
 876         * It needs to call wait_on_page_writeback() to wait for the
 877         * writeback of the page.
 878         */
 879        if (PageWriteback(page))
 880                wait_on_page_writeback(page);
 881
 882        /* Release old bh and drop refs */
 883        try_to_release_page(page, 0);
 884
 885        replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
 886                                        orig_blk_offset, block_len_in_page,
 887                                        &err2);
 888        if (err2) {
 889                if (replaced_count) {
 890                        block_len_in_page = replaced_count;
 891                        replaced_size =
 892                                block_len_in_page << orig_inode->i_blkbits;
 893                } else
 894                        goto out;
 895        }
 896
 897        if (!page_has_buffers(page))
 898                create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
 899
 900        bh = page_buffers(page);
 901        for (i = 0; i < data_offset_in_page; i++)
 902                bh = bh->b_this_page;
 903
 904        for (i = 0; i < block_len_in_page; i++) {
 905                *err = ext4_get_block(orig_inode,
 906                                (sector_t)(orig_blk_offset + i), bh, 0);
 907                if (*err < 0)
 908                        goto out;
 909
 910                if (bh->b_this_page != NULL)
 911                        bh = bh->b_this_page;
 912        }
 913
 914        *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
 915                               page, fsdata);
 916        page = NULL;
 917
 918out:
 919        if (unlikely(page)) {
 920                if (PageLocked(page))
 921                        unlock_page(page);
 922                page_cache_release(page);
 923                ext4_journal_stop(handle);
 924        }
 925out2:
 926        ext4_journal_stop(handle);
 927
 928        if (err2)
 929                *err = err2;
 930
 931        return replaced_count;
 932}
 933
 934/**
 935 * mext_check_arguments - Check whether move extent can be done
 936 *
 937 * @orig_inode:         original inode
 938 * @donor_inode:        donor inode
 939 * @orig_start:         logical start offset in block for orig
 940 * @donor_start:        logical start offset in block for donor
 941 * @len:                the number of blocks to be moved
 942 *
 943 * Check the arguments of ext4_move_extents() whether the files can be
 944 * exchanged with each other.
 945 * Return 0 on success, or a negative error value on failure.
 946 */
 947static int
 948mext_check_arguments(struct inode *orig_inode,
 949                     struct inode *donor_inode, __u64 orig_start,
 950                     __u64 donor_start, __u64 *len)
 951{
 952        ext4_lblk_t orig_blocks, donor_blocks;
 953        unsigned int blkbits = orig_inode->i_blkbits;
 954        unsigned int blocksize = 1 << blkbits;
 955
 956        if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
 957                ext4_debug("ext4 move extent: suid or sgid is set"
 958                           " to donor file [ino:orig %lu, donor %lu]\n",
 959                           orig_inode->i_ino, donor_inode->i_ino);
 960                return -EINVAL;
 961        }
 962
 963        if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
 964                return -EPERM;
 965
 966        /* Ext4 move extent does not support swapfile */
 967        if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
 968                ext4_debug("ext4 move extent: The argument files should "
 969                        "not be swapfile [ino:orig %lu, donor %lu]\n",
 970                        orig_inode->i_ino, donor_inode->i_ino);
 971                return -EINVAL;
 972        }
 973
 974        /* Files should be in the same ext4 FS */
 975        if (orig_inode->i_sb != donor_inode->i_sb) {
 976                ext4_debug("ext4 move extent: The argument files "
 977                        "should be in same FS [ino:orig %lu, donor %lu]\n",
 978                        orig_inode->i_ino, donor_inode->i_ino);
 979                return -EINVAL;
 980        }
 981
 982        /* Ext4 move extent supports only extent based file */
 983        if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
 984                ext4_debug("ext4 move extent: orig file is not extents "
 985                        "based file [ino:orig %lu]\n", orig_inode->i_ino);
 986                return -EOPNOTSUPP;
 987        } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
 988                ext4_debug("ext4 move extent: donor file is not extents "
 989                        "based file [ino:donor %lu]\n", donor_inode->i_ino);
 990                return -EOPNOTSUPP;
 991        }
 992
 993        if ((!orig_inode->i_size) || (!donor_inode->i_size)) {
 994                ext4_debug("ext4 move extent: File size is 0 byte\n");
 995                return -EINVAL;
 996        }
 997
 998        /* Start offset should be same */
 999        if (orig_start != donor_start) {
1000                ext4_debug("ext4 move extent: orig and donor's start "
1001                        "offset are not same [ino:orig %lu, donor %lu]\n",
1002                        orig_inode->i_ino, donor_inode->i_ino);
1003                return -EINVAL;
1004        }
1005
1006        if ((orig_start > EXT_MAX_BLOCK) ||
1007            (donor_start > EXT_MAX_BLOCK) ||
1008            (*len > EXT_MAX_BLOCK) ||
1009            (orig_start + *len > EXT_MAX_BLOCK))  {
1010                ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
1011                        "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK,
1012                        orig_inode->i_ino, donor_inode->i_ino);
1013                return -EINVAL;
1014        }
1015
1016        if (orig_inode->i_size > donor_inode->i_size) {
1017                donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits;
1018                /* TODO: eliminate this artificial restriction */
1019                if (orig_start >= donor_blocks) {
1020                        ext4_debug("ext4 move extent: orig start offset "
1021                        "[%llu] should be less than donor file blocks "
1022                        "[%u] [ino:orig %lu, donor %lu]\n",
1023                        orig_start, donor_blocks,
1024                        orig_inode->i_ino, donor_inode->i_ino);
1025                        return -EINVAL;
1026                }
1027
1028                /* TODO: eliminate this artificial restriction */
1029                if (orig_start + *len > donor_blocks) {
1030                        ext4_debug("ext4 move extent: End offset [%llu] should "
1031                                "be less than donor file blocks [%u]."
1032                                "So adjust length from %llu to %llu "
1033                                "[ino:orig %lu, donor %lu]\n",
1034                                orig_start + *len, donor_blocks,
1035                                *len, donor_blocks - orig_start,
1036                                orig_inode->i_ino, donor_inode->i_ino);
1037                        *len = donor_blocks - orig_start;
1038                }
1039        } else {
1040                orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits;
1041                if (orig_start >= orig_blocks) {
1042                        ext4_debug("ext4 move extent: start offset [%llu] "
1043                                "should be less than original file blocks "
1044                                "[%u] [ino:orig %lu, donor %lu]\n",
1045                                 orig_start, orig_blocks,
1046                                orig_inode->i_ino, donor_inode->i_ino);
1047                        return -EINVAL;
1048                }
1049
1050                if (orig_start + *len > orig_blocks) {
1051                        ext4_debug("ext4 move extent: Adjust length "
1052                                "from %llu to %llu. Because it should be "
1053                                "less than original file blocks "
1054                                "[ino:orig %lu, donor %lu]\n",
1055                                *len, orig_blocks - orig_start,
1056                                orig_inode->i_ino, donor_inode->i_ino);
1057                        *len = orig_blocks - orig_start;
1058                }
1059        }
1060
1061        if (!*len) {
1062                ext4_debug("ext4 move extent: len should not be 0 "
1063                        "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1064                        donor_inode->i_ino);
1065                return -EINVAL;
1066        }
1067
1068        return 0;
1069}
1070
1071/**
1072 * mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
1073 *
1074 * @inode1:     the inode structure
1075 * @inode2:     the inode structure
1076 *
1077 * Lock two inodes' i_mutex by i_ino order.
1078 * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
1079 */
1080static int
1081mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
1082{
1083        int ret = 0;
1084
1085        BUG_ON(inode1 == NULL && inode2 == NULL);
1086
1087        ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
1088        if (ret < 0)
1089                goto out;
1090
1091        if (inode1 == inode2) {
1092                mutex_lock(&inode1->i_mutex);
1093                goto out;
1094        }
1095
1096        if (inode1->i_ino < inode2->i_ino) {
1097                mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
1098                mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
1099        } else {
1100                mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
1101                mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
1102        }
1103
1104out:
1105        return ret;
1106}
1107
1108/**
1109 * mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
1110 *
1111 * @inode1:     the inode that is released first
1112 * @inode2:     the inode that is released second
1113 *
1114 * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
1115 */
1116
1117static int
1118mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
1119{
1120        int ret = 0;
1121
1122        BUG_ON(inode1 == NULL && inode2 == NULL);
1123
1124        ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
1125        if (ret < 0)
1126                goto out;
1127
1128        if (inode1)
1129                mutex_unlock(&inode1->i_mutex);
1130
1131        if (inode2 && inode2 != inode1)
1132                mutex_unlock(&inode2->i_mutex);
1133
1134out:
1135        return ret;
1136}
1137
1138/**
1139 * ext4_move_extents - Exchange the specified range of a file
1140 *
1141 * @o_filp:             file structure of the original file
1142 * @d_filp:             file structure of the donor file
1143 * @orig_start:         start offset in block for orig
1144 * @donor_start:        start offset in block for donor
1145 * @len:                the number of blocks to be moved
1146 * @moved_len:          moved block length
1147 *
1148 * This function returns 0 and moved block length is set in moved_len
1149 * if succeed, otherwise returns error value.
1150 *
1151 * Note: ext4_move_extents() proceeds the following order.
1152 * 1:ext4_move_extents() calculates the last block number of moving extent
1153 *   function by the start block number (orig_start) and the number of blocks
1154 *   to be moved (len) specified as arguments.
1155 *   If the {orig, donor}_start points a hole, the extent's start offset
1156 *   pointed by ext_cur (current extent), holecheck_path, orig_path are set
1157 *   after hole behind.
1158 * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
1159 *   or the ext_cur exceeds the block_end which is last logical block number.
1160 * 3:To get the length of continues area, call mext_next_extent()
1161 *   specified with the ext_cur (initial value is holecheck_path) re-cursive,
1162 *   until find un-continuous extent, the start logical block number exceeds
1163 *   the block_end or the extent points to the last extent.
1164 * 4:Exchange the original inode data with donor inode data
1165 *   from orig_page_offset to seq_end_page.
1166 *   The start indexes of data are specified as arguments.
1167 *   That of the original inode is orig_page_offset,
1168 *   and the donor inode is also orig_page_offset
1169 *   (To easily handle blocksize != pagesize case, the offset for the
1170 *   donor inode is block unit).
1171 * 5:Update holecheck_path and orig_path to points a next proceeding extent,
1172 *   then returns to step 2.
1173 * 6:Release holecheck_path, orig_path and set the len to moved_len
1174 *   which shows the number of moved blocks.
1175 *   The moved_len is useful for the command to calculate the file offset
1176 *   for starting next move extent ioctl.
1177 * 7:Return 0 on success, or a negative error value on failure.
1178 */
1179int
1180ext4_move_extents(struct file *o_filp, struct file *d_filp,
1181                 __u64 orig_start, __u64 donor_start, __u64 len,
1182                 __u64 *moved_len)
1183{
1184        struct inode *orig_inode = o_filp->f_dentry->d_inode;
1185        struct inode *donor_inode = d_filp->f_dentry->d_inode;
1186        struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
1187        struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
1188        ext4_lblk_t block_start = orig_start;
1189        ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
1190        ext4_lblk_t rest_blocks;
1191        pgoff_t orig_page_offset = 0, seq_end_page;
1192        int ret1, ret2, depth, last_extent = 0;
1193        int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
1194        int data_offset_in_page;
1195        int block_len_in_page;
1196        int uninit;
1197
1198        /* orig and donor should be different file */
1199        if (orig_inode->i_ino == donor_inode->i_ino) {
1200                ext4_debug("ext4 move extent: The argument files should not "
1201                        "be same file [ino:orig %lu, donor %lu]\n",
1202                        orig_inode->i_ino, donor_inode->i_ino);
1203                return -EINVAL;
1204        }
1205
1206        /* Regular file check */
1207        if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
1208                ext4_debug("ext4 move extent: The argument files should be "
1209                        "regular file [ino:orig %lu, donor %lu]\n",
1210                        orig_inode->i_ino, donor_inode->i_ino);
1211                return -EINVAL;
1212        }
1213
1214        /* Protect orig and donor inodes against a truncate */
1215        ret1 = mext_inode_double_lock(orig_inode, donor_inode);
1216        if (ret1 < 0)
1217                return ret1;
1218
1219        /* Protect extent tree against block allocations via delalloc */
1220        double_down_write_data_sem(orig_inode, donor_inode);
1221        /* Check the filesystem environment whether move_extent can be done */
1222        ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
1223                                    donor_start, &len);
1224        if (ret1)
1225                goto out;
1226
1227        file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
1228        block_end = block_start + len - 1;
1229        if (file_end < block_end)
1230                len -= block_end - file_end;
1231
1232        ret1 = get_ext_path(orig_inode, block_start, &orig_path);
1233        if (ret1)
1234                goto out;
1235
1236        /* Get path structure to check the hole */
1237        ret1 = get_ext_path(orig_inode, block_start, &holecheck_path);
1238        if (ret1)
1239                goto out;
1240
1241        depth = ext_depth(orig_inode);
1242        ext_cur = holecheck_path[depth].p_ext;
1243
1244        /*
1245         * Get proper starting location of block replacement if block_start was
1246         * within the hole.
1247         */
1248        if (le32_to_cpu(ext_cur->ee_block) +
1249                ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
1250                /*
1251                 * The hole exists between extents or the tail of
1252                 * original file.
1253                 */
1254                last_extent = mext_next_extent(orig_inode,
1255                                        holecheck_path, &ext_cur);
1256                if (last_extent < 0) {
1257                        ret1 = last_extent;
1258                        goto out;
1259                }
1260                last_extent = mext_next_extent(orig_inode, orig_path,
1261                                                        &ext_dummy);
1262                if (last_extent < 0) {
1263                        ret1 = last_extent;
1264                        goto out;
1265                }
1266                seq_start = le32_to_cpu(ext_cur->ee_block);
1267        } else if (le32_to_cpu(ext_cur->ee_block) > block_start)
1268                /* The hole exists at the beginning of original file. */
1269                seq_start = le32_to_cpu(ext_cur->ee_block);
1270        else
1271                seq_start = block_start;
1272
1273        /* No blocks within the specified range. */
1274        if (le32_to_cpu(ext_cur->ee_block) > block_end) {
1275                ext4_debug("ext4 move extent: The specified range of file "
1276                                                        "may be the hole\n");
1277                ret1 = -EINVAL;
1278                goto out;
1279        }
1280
1281        /* Adjust start blocks */
1282        add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
1283                         ext4_ext_get_actual_len(ext_cur), block_end + 1) -
1284                     max(le32_to_cpu(ext_cur->ee_block), block_start);
1285
1286        while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
1287                seq_blocks += add_blocks;
1288
1289                /* Adjust tail blocks */
1290                if (seq_start + seq_blocks - 1 > block_end)
1291                        seq_blocks = block_end - seq_start + 1;
1292
1293                ext_prev = ext_cur;
1294                last_extent = mext_next_extent(orig_inode, holecheck_path,
1295                                                &ext_cur);
1296                if (last_extent < 0) {
1297                        ret1 = last_extent;
1298                        break;
1299                }
1300                add_blocks = ext4_ext_get_actual_len(ext_cur);
1301
1302                /*
1303                 * Extend the length of contiguous block (seq_blocks)
1304                 * if extents are contiguous.
1305                 */
1306                if (ext4_can_extents_be_merged(orig_inode,
1307                                               ext_prev, ext_cur) &&
1308                    block_end >= le32_to_cpu(ext_cur->ee_block) &&
1309                    !last_extent)
1310                        continue;
1311
1312                /* Is original extent is uninitialized */
1313                uninit = ext4_ext_is_uninitialized(ext_prev);
1314
1315                data_offset_in_page = seq_start % blocks_per_page;
1316
1317                /*
1318                 * Calculate data blocks count that should be swapped
1319                 * at the first page.
1320                 */
1321                if (data_offset_in_page + seq_blocks > blocks_per_page) {
1322                        /* Swapped blocks are across pages */
1323                        block_len_in_page =
1324                                        blocks_per_page - data_offset_in_page;
1325                } else {
1326                        /* Swapped blocks are in a page */
1327                        block_len_in_page = seq_blocks;
1328                }
1329
1330                orig_page_offset = seq_start >>
1331                                (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
1332                seq_end_page = (seq_start + seq_blocks - 1) >>
1333                                (PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
1334                seq_start = le32_to_cpu(ext_cur->ee_block);
1335                rest_blocks = seq_blocks;
1336
1337                /*
1338                 * Up semaphore to avoid following problems:
1339                 * a. transaction deadlock among ext4_journal_start,
1340                 *    ->write_begin via pagefault, and jbd2_journal_commit
1341                 * b. racing with ->readpage, ->write_begin, and ext4_get_block
1342                 *    in move_extent_per_page
1343                 */
1344                double_up_write_data_sem(orig_inode, donor_inode);
1345
1346                while (orig_page_offset <= seq_end_page) {
1347
1348                        /* Swap original branches with new branches */
1349                        block_len_in_page = move_extent_per_page(
1350                                                o_filp, donor_inode,
1351                                                orig_page_offset,
1352                                                data_offset_in_page,
1353                                                block_len_in_page, uninit,
1354                                                &ret1);
1355
1356                        /* Count how many blocks we have exchanged */
1357                        *moved_len += block_len_in_page;
1358                        if (ret1 < 0)
1359                                break;
1360                        if (*moved_len > len) {
1361                                EXT4_ERROR_INODE(orig_inode,
1362                                        "We replaced blocks too much! "
1363                                        "sum of replaced: %llu requested: %llu",
1364                                        *moved_len, len);
1365                                ret1 = -EIO;
1366                                break;
1367                        }
1368
1369                        orig_page_offset++;
1370                        data_offset_in_page = 0;
1371                        rest_blocks -= block_len_in_page;
1372                        if (rest_blocks > blocks_per_page)
1373                                block_len_in_page = blocks_per_page;
1374                        else
1375                                block_len_in_page = rest_blocks;
1376                }
1377
1378                double_down_write_data_sem(orig_inode, donor_inode);
1379                if (ret1 < 0)
1380                        break;
1381
1382                /* Decrease buffer counter */
1383                if (holecheck_path)
1384                        ext4_ext_drop_refs(holecheck_path);
1385                ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path);
1386                if (ret1)
1387                        break;
1388                depth = holecheck_path->p_depth;
1389
1390                /* Decrease buffer counter */
1391                if (orig_path)
1392                        ext4_ext_drop_refs(orig_path);
1393                ret1 = get_ext_path(orig_inode, seq_start, &orig_path);
1394                if (ret1)
1395                        break;
1396
1397                ext_cur = holecheck_path[depth].p_ext;
1398                add_blocks = ext4_ext_get_actual_len(ext_cur);
1399                seq_blocks = 0;
1400
1401        }
1402out:
1403        if (*moved_len) {
1404                ext4_discard_preallocations(orig_inode);
1405                ext4_discard_preallocations(donor_inode);
1406        }
1407
1408        if (orig_path) {
1409                ext4_ext_drop_refs(orig_path);
1410                kfree(orig_path);
1411        }
1412        if (holecheck_path) {
1413                ext4_ext_drop_refs(holecheck_path);
1414                kfree(holecheck_path);
1415        }
1416        double_up_write_data_sem(orig_inode, donor_inode);
1417        ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
1418
1419        if (ret1)
1420                return ret1;
1421        else if (ret2)
1422                return ret2;
1423
1424        return 0;
1425}
1426