linux/fs/btrfs/relocation.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2009 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/sched.h>
  20#include <linux/pagemap.h>
  21#include <linux/writeback.h>
  22#include <linux/blkdev.h>
  23#include <linux/rbtree.h>
  24#include <linux/slab.h>
  25#include "ctree.h"
  26#include "disk-io.h"
  27#include "transaction.h"
  28#include "volumes.h"
  29#include "locking.h"
  30#include "btrfs_inode.h"
  31#include "async-thread.h"
  32#include "free-space-cache.h"
  33
  34/*
  35 * backref_node, mapping_node and tree_block start with this
  36 */
  37struct tree_entry {
  38        struct rb_node rb_node;
  39        u64 bytenr;
  40};
  41
  42/*
  43 * present a tree block in the backref cache
  44 */
  45struct backref_node {
  46        struct rb_node rb_node;
  47        u64 bytenr;
  48
  49        u64 new_bytenr;
  50        /* objectid of tree block owner, can be not uptodate */
  51        u64 owner;
  52        /* link to pending, changed or detached list */
  53        struct list_head list;
  54        /* list of upper level blocks reference this block */
  55        struct list_head upper;
  56        /* list of child blocks in the cache */
  57        struct list_head lower;
  58        /* NULL if this node is not tree root */
  59        struct btrfs_root *root;
  60        /* extent buffer got by COW the block */
  61        struct extent_buffer *eb;
  62        /* level of tree block */
  63        unsigned int level:8;
  64        /* is the block in non-reference counted tree */
  65        unsigned int cowonly:1;
  66        /* 1 if no child node in the cache */
  67        unsigned int lowest:1;
  68        /* is the extent buffer locked */
  69        unsigned int locked:1;
  70        /* has the block been processed */
  71        unsigned int processed:1;
  72        /* have backrefs of this block been checked */
  73        unsigned int checked:1;
  74        /*
  75         * 1 if corresponding block has been cowed but some upper
  76         * level block pointers may not point to the new location
  77         */
  78        unsigned int pending:1;
  79        /*
  80         * 1 if the backref node isn't connected to any other
  81         * backref node.
  82         */
  83        unsigned int detached:1;
  84};
  85
  86/*
  87 * present a block pointer in the backref cache
  88 */
  89struct backref_edge {
  90        struct list_head list[2];
  91        struct backref_node *node[2];
  92};
  93
  94#define LOWER   0
  95#define UPPER   1
  96
  97struct backref_cache {
  98        /* red black tree of all backref nodes in the cache */
  99        struct rb_root rb_root;
 100        /* for passing backref nodes to btrfs_reloc_cow_block */
 101        struct backref_node *path[BTRFS_MAX_LEVEL];
 102        /*
 103         * list of blocks that have been cowed but some block
 104         * pointers in upper level blocks may not reflect the
 105         * new location
 106         */
 107        struct list_head pending[BTRFS_MAX_LEVEL];
 108        /* list of backref nodes with no child node */
 109        struct list_head leaves;
 110        /* list of blocks that have been cowed in current transaction */
 111        struct list_head changed;
 112        /* list of detached backref node. */
 113        struct list_head detached;
 114
 115        u64 last_trans;
 116
 117        int nr_nodes;
 118        int nr_edges;
 119};
 120
 121/*
 122 * map address of tree root to tree
 123 */
 124struct mapping_node {
 125        struct rb_node rb_node;
 126        u64 bytenr;
 127        void *data;
 128};
 129
 130struct mapping_tree {
 131        struct rb_root rb_root;
 132        spinlock_t lock;
 133};
 134
 135/*
 136 * present a tree block to process
 137 */
 138struct tree_block {
 139        struct rb_node rb_node;
 140        u64 bytenr;
 141        struct btrfs_key key;
 142        unsigned int level:8;
 143        unsigned int key_ready:1;
 144};
 145
 146#define MAX_EXTENTS 128
 147
 148struct file_extent_cluster {
 149        u64 start;
 150        u64 end;
 151        u64 boundary[MAX_EXTENTS];
 152        unsigned int nr;
 153};
 154
 155struct reloc_control {
 156        /* block group to relocate */
 157        struct btrfs_block_group_cache *block_group;
 158        /* extent tree */
 159        struct btrfs_root *extent_root;
 160        /* inode for moving data */
 161        struct inode *data_inode;
 162
 163        struct btrfs_block_rsv *block_rsv;
 164
 165        struct backref_cache backref_cache;
 166
 167        struct file_extent_cluster cluster;
 168        /* tree blocks have been processed */
 169        struct extent_io_tree processed_blocks;
 170        /* map start of tree root to corresponding reloc tree */
 171        struct mapping_tree reloc_root_tree;
 172        /* list of reloc trees */
 173        struct list_head reloc_roots;
 174        /* size of metadata reservation for merging reloc trees */
 175        u64 merging_rsv_size;
 176        /* size of relocated tree nodes */
 177        u64 nodes_relocated;
 178
 179        u64 search_start;
 180        u64 extents_found;
 181
 182        unsigned int stage:8;
 183        unsigned int create_reloc_tree:1;
 184        unsigned int merge_reloc_tree:1;
 185        unsigned int found_file_extent:1;
 186        unsigned int commit_transaction:1;
 187};
 188
 189/* stages of data relocation */
 190#define MOVE_DATA_EXTENTS       0
 191#define UPDATE_DATA_PTRS        1
 192
 193static void remove_backref_node(struct backref_cache *cache,
 194                                struct backref_node *node);
 195static void __mark_block_processed(struct reloc_control *rc,
 196                                   struct backref_node *node);
 197
 198static void mapping_tree_init(struct mapping_tree *tree)
 199{
 200        tree->rb_root = RB_ROOT;
 201        spin_lock_init(&tree->lock);
 202}
 203
 204static void backref_cache_init(struct backref_cache *cache)
 205{
 206        int i;
 207        cache->rb_root = RB_ROOT;
 208        for (i = 0; i < BTRFS_MAX_LEVEL; i++)
 209                INIT_LIST_HEAD(&cache->pending[i]);
 210        INIT_LIST_HEAD(&cache->changed);
 211        INIT_LIST_HEAD(&cache->detached);
 212        INIT_LIST_HEAD(&cache->leaves);
 213}
 214
 215static void backref_cache_cleanup(struct backref_cache *cache)
 216{
 217        struct backref_node *node;
 218        int i;
 219
 220        while (!list_empty(&cache->detached)) {
 221                node = list_entry(cache->detached.next,
 222                                  struct backref_node, list);
 223                remove_backref_node(cache, node);
 224        }
 225
 226        while (!list_empty(&cache->leaves)) {
 227                node = list_entry(cache->leaves.next,
 228                                  struct backref_node, lower);
 229                remove_backref_node(cache, node);
 230        }
 231
 232        cache->last_trans = 0;
 233
 234        for (i = 0; i < BTRFS_MAX_LEVEL; i++)
 235                BUG_ON(!list_empty(&cache->pending[i]));
 236        BUG_ON(!list_empty(&cache->changed));
 237        BUG_ON(!list_empty(&cache->detached));
 238        BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
 239        BUG_ON(cache->nr_nodes);
 240        BUG_ON(cache->nr_edges);
 241}
 242
 243static struct backref_node *alloc_backref_node(struct backref_cache *cache)
 244{
 245        struct backref_node *node;
 246
 247        node = kzalloc(sizeof(*node), GFP_NOFS);
 248        if (node) {
 249                INIT_LIST_HEAD(&node->list);
 250                INIT_LIST_HEAD(&node->upper);
 251                INIT_LIST_HEAD(&node->lower);
 252                RB_CLEAR_NODE(&node->rb_node);
 253                cache->nr_nodes++;
 254        }
 255        return node;
 256}
 257
 258static void free_backref_node(struct backref_cache *cache,
 259                              struct backref_node *node)
 260{
 261        if (node) {
 262                cache->nr_nodes--;
 263                kfree(node);
 264        }
 265}
 266
 267static struct backref_edge *alloc_backref_edge(struct backref_cache *cache)
 268{
 269        struct backref_edge *edge;
 270
 271        edge = kzalloc(sizeof(*edge), GFP_NOFS);
 272        if (edge)
 273                cache->nr_edges++;
 274        return edge;
 275}
 276
 277static void free_backref_edge(struct backref_cache *cache,
 278                              struct backref_edge *edge)
 279{
 280        if (edge) {
 281                cache->nr_edges--;
 282                kfree(edge);
 283        }
 284}
 285
 286static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
 287                                   struct rb_node *node)
 288{
 289        struct rb_node **p = &root->rb_node;
 290        struct rb_node *parent = NULL;
 291        struct tree_entry *entry;
 292
 293        while (*p) {
 294                parent = *p;
 295                entry = rb_entry(parent, struct tree_entry, rb_node);
 296
 297                if (bytenr < entry->bytenr)
 298                        p = &(*p)->rb_left;
 299                else if (bytenr > entry->bytenr)
 300                        p = &(*p)->rb_right;
 301                else
 302                        return parent;
 303        }
 304
 305        rb_link_node(node, parent, p);
 306        rb_insert_color(node, root);
 307        return NULL;
 308}
 309
 310static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
 311{
 312        struct rb_node *n = root->rb_node;
 313        struct tree_entry *entry;
 314
 315        while (n) {
 316                entry = rb_entry(n, struct tree_entry, rb_node);
 317
 318                if (bytenr < entry->bytenr)
 319                        n = n->rb_left;
 320                else if (bytenr > entry->bytenr)
 321                        n = n->rb_right;
 322                else
 323                        return n;
 324        }
 325        return NULL;
 326}
 327
 328/*
 329 * walk up backref nodes until reach node presents tree root
 330 */
 331static struct backref_node *walk_up_backref(struct backref_node *node,
 332                                            struct backref_edge *edges[],
 333                                            int *index)
 334{
 335        struct backref_edge *edge;
 336        int idx = *index;
 337
 338        while (!list_empty(&node->upper)) {
 339                edge = list_entry(node->upper.next,
 340                                  struct backref_edge, list[LOWER]);
 341                edges[idx++] = edge;
 342                node = edge->node[UPPER];
 343        }
 344        BUG_ON(node->detached);
 345        *index = idx;
 346        return node;
 347}
 348
 349/*
 350 * walk down backref nodes to find start of next reference path
 351 */
 352static struct backref_node *walk_down_backref(struct backref_edge *edges[],
 353                                              int *index)
 354{
 355        struct backref_edge *edge;
 356        struct backref_node *lower;
 357        int idx = *index;
 358
 359        while (idx > 0) {
 360                edge = edges[idx - 1];
 361                lower = edge->node[LOWER];
 362                if (list_is_last(&edge->list[LOWER], &lower->upper)) {
 363                        idx--;
 364                        continue;
 365                }
 366                edge = list_entry(edge->list[LOWER].next,
 367                                  struct backref_edge, list[LOWER]);
 368                edges[idx - 1] = edge;
 369                *index = idx;
 370                return edge->node[UPPER];
 371        }
 372        *index = 0;
 373        return NULL;
 374}
 375
 376static void unlock_node_buffer(struct backref_node *node)
 377{
 378        if (node->locked) {
 379                btrfs_tree_unlock(node->eb);
 380                node->locked = 0;
 381        }
 382}
 383
 384static void drop_node_buffer(struct backref_node *node)
 385{
 386        if (node->eb) {
 387                unlock_node_buffer(node);
 388                free_extent_buffer(node->eb);
 389                node->eb = NULL;
 390        }
 391}
 392
 393static void drop_backref_node(struct backref_cache *tree,
 394                              struct backref_node *node)
 395{
 396        BUG_ON(!list_empty(&node->upper));
 397
 398        drop_node_buffer(node);
 399        list_del(&node->list);
 400        list_del(&node->lower);
 401        if (!RB_EMPTY_NODE(&node->rb_node))
 402                rb_erase(&node->rb_node, &tree->rb_root);
 403        free_backref_node(tree, node);
 404}
 405
 406/*
 407 * remove a backref node from the backref cache
 408 */
 409static void remove_backref_node(struct backref_cache *cache,
 410                                struct backref_node *node)
 411{
 412        struct backref_node *upper;
 413        struct backref_edge *edge;
 414
 415        if (!node)
 416                return;
 417
 418        BUG_ON(!node->lowest && !node->detached);
 419        while (!list_empty(&node->upper)) {
 420                edge = list_entry(node->upper.next, struct backref_edge,
 421                                  list[LOWER]);
 422                upper = edge->node[UPPER];
 423                list_del(&edge->list[LOWER]);
 424                list_del(&edge->list[UPPER]);
 425                free_backref_edge(cache, edge);
 426
 427                if (RB_EMPTY_NODE(&upper->rb_node)) {
 428                        BUG_ON(!list_empty(&node->upper));
 429                        drop_backref_node(cache, node);
 430                        node = upper;
 431                        node->lowest = 1;
 432                        continue;
 433                }
 434                /*
 435                 * add the node to leaf node list if no other
 436                 * child block cached.
 437                 */
 438                if (list_empty(&upper->lower)) {
 439                        list_add_tail(&upper->lower, &cache->leaves);
 440                        upper->lowest = 1;
 441                }
 442        }
 443
 444        drop_backref_node(cache, node);
 445}
 446
 447static void update_backref_node(struct backref_cache *cache,
 448                                struct backref_node *node, u64 bytenr)
 449{
 450        struct rb_node *rb_node;
 451        rb_erase(&node->rb_node, &cache->rb_root);
 452        node->bytenr = bytenr;
 453        rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
 454        BUG_ON(rb_node);
 455}
 456
 457/*
 458 * update backref cache after a transaction commit
 459 */
 460static int update_backref_cache(struct btrfs_trans_handle *trans,
 461                                struct backref_cache *cache)
 462{
 463        struct backref_node *node;
 464        int level = 0;
 465
 466        if (cache->last_trans == 0) {
 467                cache->last_trans = trans->transid;
 468                return 0;
 469        }
 470
 471        if (cache->last_trans == trans->transid)
 472                return 0;
 473
 474        /*
 475         * detached nodes are used to avoid unnecessary backref
 476         * lookup. transaction commit changes the extent tree.
 477         * so the detached nodes are no longer useful.
 478         */
 479        while (!list_empty(&cache->detached)) {
 480                node = list_entry(cache->detached.next,
 481                                  struct backref_node, list);
 482                remove_backref_node(cache, node);
 483        }
 484
 485        while (!list_empty(&cache->changed)) {
 486                node = list_entry(cache->changed.next,
 487                                  struct backref_node, list);
 488                list_del_init(&node->list);
 489                BUG_ON(node->pending);
 490                update_backref_node(cache, node, node->new_bytenr);
 491        }
 492
 493        /*
 494         * some nodes can be left in the pending list if there were
 495         * errors during processing the pending nodes.
 496         */
 497        for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
 498                list_for_each_entry(node, &cache->pending[level], list) {
 499                        BUG_ON(!node->pending);
 500                        if (node->bytenr == node->new_bytenr)
 501                                continue;
 502                        update_backref_node(cache, node, node->new_bytenr);
 503                }
 504        }
 505
 506        cache->last_trans = 0;
 507        return 1;
 508}
 509
 510static int should_ignore_root(struct btrfs_root *root)
 511{
 512        struct btrfs_root *reloc_root;
 513
 514        if (!root->ref_cows)
 515                return 0;
 516
 517        reloc_root = root->reloc_root;
 518        if (!reloc_root)
 519                return 0;
 520
 521        if (btrfs_root_last_snapshot(&reloc_root->root_item) ==
 522            root->fs_info->running_transaction->transid - 1)
 523                return 0;
 524        /*
 525         * if there is reloc tree and it was created in previous
 526         * transaction backref lookup can find the reloc tree,
 527         * so backref node for the fs tree root is useless for
 528         * relocation.
 529         */
 530        return 1;
 531}
 532
 533/*
 534 * find reloc tree by address of tree root
 535 */
 536static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
 537                                          u64 bytenr)
 538{
 539        struct rb_node *rb_node;
 540        struct mapping_node *node;
 541        struct btrfs_root *root = NULL;
 542
 543        spin_lock(&rc->reloc_root_tree.lock);
 544        rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
 545        if (rb_node) {
 546                node = rb_entry(rb_node, struct mapping_node, rb_node);
 547                root = (struct btrfs_root *)node->data;
 548        }
 549        spin_unlock(&rc->reloc_root_tree.lock);
 550        return root;
 551}
 552
 553static int is_cowonly_root(u64 root_objectid)
 554{
 555        if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
 556            root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
 557            root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
 558            root_objectid == BTRFS_DEV_TREE_OBJECTID ||
 559            root_objectid == BTRFS_TREE_LOG_OBJECTID ||
 560            root_objectid == BTRFS_CSUM_TREE_OBJECTID)
 561                return 1;
 562        return 0;
 563}
 564
 565static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
 566                                        u64 root_objectid)
 567{
 568        struct btrfs_key key;
 569
 570        key.objectid = root_objectid;
 571        key.type = BTRFS_ROOT_ITEM_KEY;
 572        if (is_cowonly_root(root_objectid))
 573                key.offset = 0;
 574        else
 575                key.offset = (u64)-1;
 576
 577        return btrfs_read_fs_root_no_name(fs_info, &key);
 578}
 579
 580#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
 581static noinline_for_stack
 582struct btrfs_root *find_tree_root(struct reloc_control *rc,
 583                                  struct extent_buffer *leaf,
 584                                  struct btrfs_extent_ref_v0 *ref0)
 585{
 586        struct btrfs_root *root;
 587        u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
 588        u64 generation = btrfs_ref_generation_v0(leaf, ref0);
 589
 590        BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
 591
 592        root = read_fs_root(rc->extent_root->fs_info, root_objectid);
 593        BUG_ON(IS_ERR(root));
 594
 595        if (root->ref_cows &&
 596            generation != btrfs_root_generation(&root->root_item))
 597                return NULL;
 598
 599        return root;
 600}
 601#endif
 602
 603static noinline_for_stack
 604int find_inline_backref(struct extent_buffer *leaf, int slot,
 605                        unsigned long *ptr, unsigned long *end)
 606{
 607        struct btrfs_extent_item *ei;
 608        struct btrfs_tree_block_info *bi;
 609        u32 item_size;
 610
 611        item_size = btrfs_item_size_nr(leaf, slot);
 612#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
 613        if (item_size < sizeof(*ei)) {
 614                WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
 615                return 1;
 616        }
 617#endif
 618        ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
 619        WARN_ON(!(btrfs_extent_flags(leaf, ei) &
 620                  BTRFS_EXTENT_FLAG_TREE_BLOCK));
 621
 622        if (item_size <= sizeof(*ei) + sizeof(*bi)) {
 623                WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
 624                return 1;
 625        }
 626
 627        bi = (struct btrfs_tree_block_info *)(ei + 1);
 628        *ptr = (unsigned long)(bi + 1);
 629        *end = (unsigned long)ei + item_size;
 630        return 0;
 631}
 632
 633/*
 634 * build backref tree for a given tree block. root of the backref tree
 635 * corresponds the tree block, leaves of the backref tree correspond
 636 * roots of b-trees that reference the tree block.
 637 *
 638 * the basic idea of this function is check backrefs of a given block
 639 * to find upper level blocks that refernece the block, and then check
 640 * bakcrefs of these upper level blocks recursively. the recursion stop
 641 * when tree root is reached or backrefs for the block is cached.
 642 *
 643 * NOTE: if we find backrefs for a block are cached, we know backrefs
 644 * for all upper level blocks that directly/indirectly reference the
 645 * block are also cached.
 646 */
 647static noinline_for_stack
 648struct backref_node *build_backref_tree(struct reloc_control *rc,
 649                                        struct btrfs_key *node_key,
 650                                        int level, u64 bytenr)
 651{
 652        struct backref_cache *cache = &rc->backref_cache;
 653        struct btrfs_path *path1;
 654        struct btrfs_path *path2;
 655        struct extent_buffer *eb;
 656        struct btrfs_root *root;
 657        struct backref_node *cur;
 658        struct backref_node *upper;
 659        struct backref_node *lower;
 660        struct backref_node *node = NULL;
 661        struct backref_node *exist = NULL;
 662        struct backref_edge *edge;
 663        struct rb_node *rb_node;
 664        struct btrfs_key key;
 665        unsigned long end;
 666        unsigned long ptr;
 667        LIST_HEAD(list);
 668        LIST_HEAD(useless);
 669        int cowonly;
 670        int ret;
 671        int err = 0;
 672
 673        path1 = btrfs_alloc_path();
 674        path2 = btrfs_alloc_path();
 675        if (!path1 || !path2) {
 676                err = -ENOMEM;
 677                goto out;
 678        }
 679
 680        node = alloc_backref_node(cache);
 681        if (!node) {
 682                err = -ENOMEM;
 683                goto out;
 684        }
 685
 686        node->bytenr = bytenr;
 687        node->level = level;
 688        node->lowest = 1;
 689        cur = node;
 690again:
 691        end = 0;
 692        ptr = 0;
 693        key.objectid = cur->bytenr;
 694        key.type = BTRFS_EXTENT_ITEM_KEY;
 695        key.offset = (u64)-1;
 696
 697        path1->search_commit_root = 1;
 698        path1->skip_locking = 1;
 699        ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
 700                                0, 0);
 701        if (ret < 0) {
 702                err = ret;
 703                goto out;
 704        }
 705        BUG_ON(!ret || !path1->slots[0]);
 706
 707        path1->slots[0]--;
 708
 709        WARN_ON(cur->checked);
 710        if (!list_empty(&cur->upper)) {
 711                /*
 712                 * the backref was added previously when processsing
 713                 * backref of type BTRFS_TREE_BLOCK_REF_KEY
 714                 */
 715                BUG_ON(!list_is_singular(&cur->upper));
 716                edge = list_entry(cur->upper.next, struct backref_edge,
 717                                  list[LOWER]);
 718                BUG_ON(!list_empty(&edge->list[UPPER]));
 719                exist = edge->node[UPPER];
 720                /*
 721                 * add the upper level block to pending list if we need
 722                 * check its backrefs
 723                 */
 724                if (!exist->checked)
 725                        list_add_tail(&edge->list[UPPER], &list);
 726        } else {
 727                exist = NULL;
 728        }
 729
 730        while (1) {
 731                cond_resched();
 732                eb = path1->nodes[0];
 733
 734                if (ptr >= end) {
 735                        if (path1->slots[0] >= btrfs_header_nritems(eb)) {
 736                                ret = btrfs_next_leaf(rc->extent_root, path1);
 737                                if (ret < 0) {
 738                                        err = ret;
 739                                        goto out;
 740                                }
 741                                if (ret > 0)
 742                                        break;
 743                                eb = path1->nodes[0];
 744                        }
 745
 746                        btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
 747                        if (key.objectid != cur->bytenr) {
 748                                WARN_ON(exist);
 749                                break;
 750                        }
 751
 752                        if (key.type == BTRFS_EXTENT_ITEM_KEY) {
 753                                ret = find_inline_backref(eb, path1->slots[0],
 754                                                          &ptr, &end);
 755                                if (ret)
 756                                        goto next;
 757                        }
 758                }
 759
 760                if (ptr < end) {
 761                        /* update key for inline back ref */
 762                        struct btrfs_extent_inline_ref *iref;
 763                        iref = (struct btrfs_extent_inline_ref *)ptr;
 764                        key.type = btrfs_extent_inline_ref_type(eb, iref);
 765                        key.offset = btrfs_extent_inline_ref_offset(eb, iref);
 766                        WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY &&
 767                                key.type != BTRFS_SHARED_BLOCK_REF_KEY);
 768                }
 769
 770                if (exist &&
 771                    ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
 772                      exist->owner == key.offset) ||
 773                     (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
 774                      exist->bytenr == key.offset))) {
 775                        exist = NULL;
 776                        goto next;
 777                }
 778
 779#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
 780                if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
 781                    key.type == BTRFS_EXTENT_REF_V0_KEY) {
 782                        if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
 783                                struct btrfs_extent_ref_v0 *ref0;
 784                                ref0 = btrfs_item_ptr(eb, path1->slots[0],
 785                                                struct btrfs_extent_ref_v0);
 786                                if (key.objectid == key.offset) {
 787                                        root = find_tree_root(rc, eb, ref0);
 788                                        if (root && !should_ignore_root(root))
 789                                                cur->root = root;
 790                                        else
 791                                                list_add(&cur->list, &useless);
 792                                        break;
 793                                }
 794                                if (is_cowonly_root(btrfs_ref_root_v0(eb,
 795                                                                      ref0)))
 796                                        cur->cowonly = 1;
 797                        }
 798#else
 799                BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
 800                if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
 801#endif
 802                        if (key.objectid == key.offset) {
 803                                /*
 804                                 * only root blocks of reloc trees use
 805                                 * backref of this type.
 806                                 */
 807                                root = find_reloc_root(rc, cur->bytenr);
 808                                BUG_ON(!root);
 809                                cur->root = root;
 810                                break;
 811                        }
 812
 813                        edge = alloc_backref_edge(cache);
 814                        if (!edge) {
 815                                err = -ENOMEM;
 816                                goto out;
 817                        }
 818                        rb_node = tree_search(&cache->rb_root, key.offset);
 819                        if (!rb_node) {
 820                                upper = alloc_backref_node(cache);
 821                                if (!upper) {
 822                                        free_backref_edge(cache, edge);
 823                                        err = -ENOMEM;
 824                                        goto out;
 825                                }
 826                                upper->bytenr = key.offset;
 827                                upper->level = cur->level + 1;
 828                                /*
 829                                 *  backrefs for the upper level block isn't
 830                                 *  cached, add the block to pending list
 831                                 */
 832                                list_add_tail(&edge->list[UPPER], &list);
 833                        } else {
 834                                upper = rb_entry(rb_node, struct backref_node,
 835                                                 rb_node);
 836                                BUG_ON(!upper->checked);
 837                                INIT_LIST_HEAD(&edge->list[UPPER]);
 838                        }
 839                        list_add_tail(&edge->list[LOWER], &cur->upper);
 840                        edge->node[LOWER] = cur;
 841                        edge->node[UPPER] = upper;
 842
 843                        goto next;
 844                } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
 845                        goto next;
 846                }
 847
 848                /* key.type == BTRFS_TREE_BLOCK_REF_KEY */
 849                root = read_fs_root(rc->extent_root->fs_info, key.offset);
 850                if (IS_ERR(root)) {
 851                        err = PTR_ERR(root);
 852                        goto out;
 853                }
 854
 855                if (!root->ref_cows)
 856                        cur->cowonly = 1;
 857
 858                if (btrfs_root_level(&root->root_item) == cur->level) {
 859                        /* tree root */
 860                        BUG_ON(btrfs_root_bytenr(&root->root_item) !=
 861                               cur->bytenr);
 862                        if (should_ignore_root(root))
 863                                list_add(&cur->list, &useless);
 864                        else
 865                                cur->root = root;
 866                        break;
 867                }
 868
 869                level = cur->level + 1;
 870
 871                /*
 872                 * searching the tree to find upper level blocks
 873                 * reference the block.
 874                 */
 875                path2->search_commit_root = 1;
 876                path2->skip_locking = 1;
 877                path2->lowest_level = level;
 878                ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
 879                path2->lowest_level = 0;
 880                if (ret < 0) {
 881                        err = ret;
 882                        goto out;
 883                }
 884                if (ret > 0 && path2->slots[level] > 0)
 885                        path2->slots[level]--;
 886
 887                eb = path2->nodes[level];
 888                WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
 889                        cur->bytenr);
 890
 891                lower = cur;
 892                for (; level < BTRFS_MAX_LEVEL; level++) {
 893                        if (!path2->nodes[level]) {
 894                                BUG_ON(btrfs_root_bytenr(&root->root_item) !=
 895                                       lower->bytenr);
 896                                if (should_ignore_root(root))
 897                                        list_add(&lower->list, &useless);
 898                                else
 899                                        lower->root = root;
 900                                break;
 901                        }
 902
 903                        edge = alloc_backref_edge(cache);
 904                        if (!edge) {
 905                                err = -ENOMEM;
 906                                goto out;
 907                        }
 908
 909                        eb = path2->nodes[level];
 910                        rb_node = tree_search(&cache->rb_root, eb->start);
 911                        if (!rb_node) {
 912                                upper = alloc_backref_node(cache);
 913                                if (!upper) {
 914                                        free_backref_edge(cache, edge);
 915                                        err = -ENOMEM;
 916                                        goto out;
 917                                }
 918                                upper->bytenr = eb->start;
 919                                upper->owner = btrfs_header_owner(eb);
 920                                upper->level = lower->level + 1;
 921                                if (!root->ref_cows)
 922                                        upper->cowonly = 1;
 923
 924                                /*
 925                                 * if we know the block isn't shared
 926                                 * we can void checking its backrefs.
 927                                 */
 928                                if (btrfs_block_can_be_shared(root, eb))
 929                                        upper->checked = 0;
 930                                else
 931                                        upper->checked = 1;
 932
 933                                /*
 934                                 * add the block to pending list if we
 935                                 * need check its backrefs. only block
 936                                 * at 'cur->level + 1' is added to the
 937                                 * tail of pending list. this guarantees
 938                                 * we check backrefs from lower level
 939                                 * blocks to upper level blocks.
 940                                 */
 941                                if (!upper->checked &&
 942                                    level == cur->level + 1) {
 943                                        list_add_tail(&edge->list[UPPER],
 944                                                      &list);
 945                                } else
 946                                        INIT_LIST_HEAD(&edge->list[UPPER]);
 947                        } else {
 948                                upper = rb_entry(rb_node, struct backref_node,
 949                                                 rb_node);
 950                                BUG_ON(!upper->checked);
 951                                INIT_LIST_HEAD(&edge->list[UPPER]);
 952                                if (!upper->owner)
 953                                        upper->owner = btrfs_header_owner(eb);
 954                        }
 955                        list_add_tail(&edge->list[LOWER], &lower->upper);
 956                        edge->node[LOWER] = lower;
 957                        edge->node[UPPER] = upper;
 958
 959                        if (rb_node)
 960                                break;
 961                        lower = upper;
 962                        upper = NULL;
 963                }
 964                btrfs_release_path(root, path2);
 965next:
 966                if (ptr < end) {
 967                        ptr += btrfs_extent_inline_ref_size(key.type);
 968                        if (ptr >= end) {
 969                                WARN_ON(ptr > end);
 970                                ptr = 0;
 971                                end = 0;
 972                        }
 973                }
 974                if (ptr >= end)
 975                        path1->slots[0]++;
 976        }
 977        btrfs_release_path(rc->extent_root, path1);
 978
 979        cur->checked = 1;
 980        WARN_ON(exist);
 981
 982        /* the pending list isn't empty, take the first block to process */
 983        if (!list_empty(&list)) {
 984                edge = list_entry(list.next, struct backref_edge, list[UPPER]);
 985                list_del_init(&edge->list[UPPER]);
 986                cur = edge->node[UPPER];
 987                goto again;
 988        }
 989
 990        /*
 991         * everything goes well, connect backref nodes and insert backref nodes
 992         * into the cache.
 993         */
 994        BUG_ON(!node->checked);
 995        cowonly = node->cowonly;
 996        if (!cowonly) {
 997                rb_node = tree_insert(&cache->rb_root, node->bytenr,
 998                                      &node->rb_node);
 999                BUG_ON(rb_node);
1000                list_add_tail(&node->lower, &cache->leaves);
1001        }
1002
1003        list_for_each_entry(edge, &node->upper, list[LOWER])
1004                list_add_tail(&edge->list[UPPER], &list);
1005
1006        while (!list_empty(&list)) {
1007                edge = list_entry(list.next, struct backref_edge, list[UPPER]);
1008                list_del_init(&edge->list[UPPER]);
1009                upper = edge->node[UPPER];
1010                if (upper->detached) {
1011                        list_del(&edge->list[LOWER]);
1012                        lower = edge->node[LOWER];
1013                        free_backref_edge(cache, edge);
1014                        if (list_empty(&lower->upper))
1015                                list_add(&lower->list, &useless);
1016                        continue;
1017                }
1018
1019                if (!RB_EMPTY_NODE(&upper->rb_node)) {
1020                        if (upper->lowest) {
1021                                list_del_init(&upper->lower);
1022                                upper->lowest = 0;
1023                        }
1024
1025                        list_add_tail(&edge->list[UPPER], &upper->lower);
1026                        continue;
1027                }
1028
1029                BUG_ON(!upper->checked);
1030                BUG_ON(cowonly != upper->cowonly);
1031                if (!cowonly) {
1032                        rb_node = tree_insert(&cache->rb_root, upper->bytenr,
1033                                              &upper->rb_node);
1034                        BUG_ON(rb_node);
1035                }
1036
1037                list_add_tail(&edge->list[UPPER], &upper->lower);
1038
1039                list_for_each_entry(edge, &upper->upper, list[LOWER])
1040                        list_add_tail(&edge->list[UPPER], &list);
1041        }
1042        /*
1043         * process useless backref nodes. backref nodes for tree leaves
1044         * are deleted from the cache. backref nodes for upper level
1045         * tree blocks are left in the cache to avoid unnecessary backref
1046         * lookup.
1047         */
1048        while (!list_empty(&useless)) {
1049                upper = list_entry(useless.next, struct backref_node, list);
1050                list_del_init(&upper->list);
1051                BUG_ON(!list_empty(&upper->upper));
1052                if (upper == node)
1053                        node = NULL;
1054                if (upper->lowest) {
1055                        list_del_init(&upper->lower);
1056                        upper->lowest = 0;
1057                }
1058                while (!list_empty(&upper->lower)) {
1059                        edge = list_entry(upper->lower.next,
1060                                          struct backref_edge, list[UPPER]);
1061                        list_del(&edge->list[UPPER]);
1062                        list_del(&edge->list[LOWER]);
1063                        lower = edge->node[LOWER];
1064                        free_backref_edge(cache, edge);
1065
1066                        if (list_empty(&lower->upper))
1067                                list_add(&lower->list, &useless);
1068                }
1069                __mark_block_processed(rc, upper);
1070                if (upper->level > 0) {
1071                        list_add(&upper->list, &cache->detached);
1072                        upper->detached = 1;
1073                } else {
1074                        rb_erase(&upper->rb_node, &cache->rb_root);
1075                        free_backref_node(cache, upper);
1076                }
1077        }
1078out:
1079        btrfs_free_path(path1);
1080        btrfs_free_path(path2);
1081        if (err) {
1082                while (!list_empty(&useless)) {
1083                        lower = list_entry(useless.next,
1084                                           struct backref_node, upper);
1085                        list_del_init(&lower->upper);
1086                }
1087                upper = node;
1088                INIT_LIST_HEAD(&list);
1089                while (upper) {
1090                        if (RB_EMPTY_NODE(&upper->rb_node)) {
1091                                list_splice_tail(&upper->upper, &list);
1092                                free_backref_node(cache, upper);
1093                        }
1094
1095                        if (list_empty(&list))
1096                                break;
1097
1098                        edge = list_entry(list.next, struct backref_edge,
1099                                          list[LOWER]);
1100                        list_del(&edge->list[LOWER]);
1101                        upper = edge->node[UPPER];
1102                        free_backref_edge(cache, edge);
1103                }
1104                return ERR_PTR(err);
1105        }
1106        BUG_ON(node && node->detached);
1107        return node;
1108}
1109
1110/*
1111 * helper to add backref node for the newly created snapshot.
1112 * the backref node is created by cloning backref node that
1113 * corresponds to root of source tree
1114 */
1115static int clone_backref_node(struct btrfs_trans_handle *trans,
1116                              struct reloc_control *rc,
1117                              struct btrfs_root *src,
1118                              struct btrfs_root *dest)
1119{
1120        struct btrfs_root *reloc_root = src->reloc_root;
1121        struct backref_cache *cache = &rc->backref_cache;
1122        struct backref_node *node = NULL;
1123        struct backref_node *new_node;
1124        struct backref_edge *edge;
1125        struct backref_edge *new_edge;
1126        struct rb_node *rb_node;
1127
1128        if (cache->last_trans > 0)
1129                update_backref_cache(trans, cache);
1130
1131        rb_node = tree_search(&cache->rb_root, src->commit_root->start);
1132        if (rb_node) {
1133                node = rb_entry(rb_node, struct backref_node, rb_node);
1134                if (node->detached)
1135                        node = NULL;
1136                else
1137                        BUG_ON(node->new_bytenr != reloc_root->node->start);
1138        }
1139
1140        if (!node) {
1141                rb_node = tree_search(&cache->rb_root,
1142                                      reloc_root->commit_root->start);
1143                if (rb_node) {
1144                        node = rb_entry(rb_node, struct backref_node,
1145                                        rb_node);
1146                        BUG_ON(node->detached);
1147                }
1148        }
1149
1150        if (!node)
1151                return 0;
1152
1153        new_node = alloc_backref_node(cache);
1154        if (!new_node)
1155                return -ENOMEM;
1156
1157        new_node->bytenr = dest->node->start;
1158        new_node->level = node->level;
1159        new_node->lowest = node->lowest;
1160        new_node->checked = 1;
1161        new_node->root = dest;
1162
1163        if (!node->lowest) {
1164                list_for_each_entry(edge, &node->lower, list[UPPER]) {
1165                        new_edge = alloc_backref_edge(cache);
1166                        if (!new_edge)
1167                                goto fail;
1168
1169                        new_edge->node[UPPER] = new_node;
1170                        new_edge->node[LOWER] = edge->node[LOWER];
1171                        list_add_tail(&new_edge->list[UPPER],
1172                                      &new_node->lower);
1173                }
1174        }
1175
1176        rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
1177                              &new_node->rb_node);
1178        BUG_ON(rb_node);
1179
1180        if (!new_node->lowest) {
1181                list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
1182                        list_add_tail(&new_edge->list[LOWER],
1183                                      &new_edge->node[LOWER]->upper);
1184                }
1185        }
1186        return 0;
1187fail:
1188        while (!list_empty(&new_node->lower)) {
1189                new_edge = list_entry(new_node->lower.next,
1190                                      struct backref_edge, list[UPPER]);
1191                list_del(&new_edge->list[UPPER]);
1192                free_backref_edge(cache, new_edge);
1193        }
1194        free_backref_node(cache, new_node);
1195        return -ENOMEM;
1196}
1197
1198/*
1199 * helper to add 'address of tree root -> reloc tree' mapping
1200 */
1201static int __add_reloc_root(struct btrfs_root *root)
1202{
1203        struct rb_node *rb_node;
1204        struct mapping_node *node;
1205        struct reloc_control *rc = root->fs_info->reloc_ctl;
1206
1207        node = kmalloc(sizeof(*node), GFP_NOFS);
1208        BUG_ON(!node);
1209
1210        node->bytenr = root->node->start;
1211        node->data = root;
1212
1213        spin_lock(&rc->reloc_root_tree.lock);
1214        rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1215                              node->bytenr, &node->rb_node);
1216        spin_unlock(&rc->reloc_root_tree.lock);
1217        BUG_ON(rb_node);
1218
1219        list_add_tail(&root->root_list, &rc->reloc_roots);
1220        return 0;
1221}
1222
1223/*
1224 * helper to update/delete the 'address of tree root -> reloc tree'
1225 * mapping
1226 */
1227static int __update_reloc_root(struct btrfs_root *root, int del)
1228{
1229        struct rb_node *rb_node;
1230        struct mapping_node *node = NULL;
1231        struct reloc_control *rc = root->fs_info->reloc_ctl;
1232
1233        spin_lock(&rc->reloc_root_tree.lock);
1234        rb_node = tree_search(&rc->reloc_root_tree.rb_root,
1235                              root->commit_root->start);
1236        if (rb_node) {
1237                node = rb_entry(rb_node, struct mapping_node, rb_node);
1238                rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
1239        }
1240        spin_unlock(&rc->reloc_root_tree.lock);
1241
1242        BUG_ON((struct btrfs_root *)node->data != root);
1243
1244        if (!del) {
1245                spin_lock(&rc->reloc_root_tree.lock);
1246                node->bytenr = root->node->start;
1247                rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1248                                      node->bytenr, &node->rb_node);
1249                spin_unlock(&rc->reloc_root_tree.lock);
1250                BUG_ON(rb_node);
1251        } else {
1252                list_del_init(&root->root_list);
1253                kfree(node);
1254        }
1255        return 0;
1256}
1257
1258static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
1259                                        struct btrfs_root *root, u64 objectid)
1260{
1261        struct btrfs_root *reloc_root;
1262        struct extent_buffer *eb;
1263        struct btrfs_root_item *root_item;
1264        struct btrfs_key root_key;
1265        int ret;
1266
1267        root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
1268        BUG_ON(!root_item);
1269
1270        root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
1271        root_key.type = BTRFS_ROOT_ITEM_KEY;
1272        root_key.offset = objectid;
1273
1274        if (root->root_key.objectid == objectid) {
1275                /* called by btrfs_init_reloc_root */
1276                ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
1277                                      BTRFS_TREE_RELOC_OBJECTID);
1278                BUG_ON(ret);
1279
1280                btrfs_set_root_last_snapshot(&root->root_item,
1281                                             trans->transid - 1);
1282        } else {
1283                /*
1284                 * called by btrfs_reloc_post_snapshot_hook.
1285                 * the source tree is a reloc tree, all tree blocks
1286                 * modified after it was created have RELOC flag
1287                 * set in their headers. so it's OK to not update
1288                 * the 'last_snapshot'.
1289                 */
1290                ret = btrfs_copy_root(trans, root, root->node, &eb,
1291                                      BTRFS_TREE_RELOC_OBJECTID);
1292                BUG_ON(ret);
1293        }
1294
1295        memcpy(root_item, &root->root_item, sizeof(*root_item));
1296        btrfs_set_root_bytenr(root_item, eb->start);
1297        btrfs_set_root_level(root_item, btrfs_header_level(eb));
1298        btrfs_set_root_generation(root_item, trans->transid);
1299
1300        if (root->root_key.objectid == objectid) {
1301                btrfs_set_root_refs(root_item, 0);
1302                memset(&root_item->drop_progress, 0,
1303                       sizeof(struct btrfs_disk_key));
1304                root_item->drop_level = 0;
1305        }
1306
1307        btrfs_tree_unlock(eb);
1308        free_extent_buffer(eb);
1309
1310        ret = btrfs_insert_root(trans, root->fs_info->tree_root,
1311                                &root_key, root_item);
1312        BUG_ON(ret);
1313        kfree(root_item);
1314
1315        reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
1316                                                 &root_key);
1317        BUG_ON(IS_ERR(reloc_root));
1318        reloc_root->last_trans = trans->transid;
1319        return reloc_root;
1320}
1321
1322/*
1323 * create reloc tree for a given fs tree. reloc tree is just a
1324 * snapshot of the fs tree with special root objectid.
1325 */
1326int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
1327                          struct btrfs_root *root)
1328{
1329        struct btrfs_root *reloc_root;
1330        struct reloc_control *rc = root->fs_info->reloc_ctl;
1331        int clear_rsv = 0;
1332
1333        if (root->reloc_root) {
1334                reloc_root = root->reloc_root;
1335                reloc_root->last_trans = trans->transid;
1336                return 0;
1337        }
1338
1339        if (!rc || !rc->create_reloc_tree ||
1340            root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1341                return 0;
1342
1343        if (!trans->block_rsv) {
1344                trans->block_rsv = rc->block_rsv;
1345                clear_rsv = 1;
1346        }
1347        reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
1348        if (clear_rsv)
1349                trans->block_rsv = NULL;
1350
1351        __add_reloc_root(reloc_root);
1352        root->reloc_root = reloc_root;
1353        return 0;
1354}
1355
1356/*
1357 * update root item of reloc tree
1358 */
1359int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1360                            struct btrfs_root *root)
1361{
1362        struct btrfs_root *reloc_root;
1363        struct btrfs_root_item *root_item;
1364        int del = 0;
1365        int ret;
1366
1367        if (!root->reloc_root)
1368                return 0;
1369
1370        reloc_root = root->reloc_root;
1371        root_item = &reloc_root->root_item;
1372
1373        if (root->fs_info->reloc_ctl->merge_reloc_tree &&
1374            btrfs_root_refs(root_item) == 0) {
1375                root->reloc_root = NULL;
1376                del = 1;
1377        }
1378
1379        __update_reloc_root(reloc_root, del);
1380
1381        if (reloc_root->commit_root != reloc_root->node) {
1382                btrfs_set_root_node(root_item, reloc_root->node);
1383                free_extent_buffer(reloc_root->commit_root);
1384                reloc_root->commit_root = btrfs_root_node(reloc_root);
1385        }
1386
1387        ret = btrfs_update_root(trans, root->fs_info->tree_root,
1388                                &reloc_root->root_key, root_item);
1389        BUG_ON(ret);
1390        return 0;
1391}
1392
1393/*
1394 * helper to find first cached inode with inode number >= objectid
1395 * in a subvolume
1396 */
1397static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
1398{
1399        struct rb_node *node;
1400        struct rb_node *prev;
1401        struct btrfs_inode *entry;
1402        struct inode *inode;
1403
1404        spin_lock(&root->inode_lock);
1405again:
1406        node = root->inode_tree.rb_node;
1407        prev = NULL;
1408        while (node) {
1409                prev = node;
1410                entry = rb_entry(node, struct btrfs_inode, rb_node);
1411
1412                if (objectid < entry->vfs_inode.i_ino)
1413                        node = node->rb_left;
1414                else if (objectid > entry->vfs_inode.i_ino)
1415                        node = node->rb_right;
1416                else
1417                        break;
1418        }
1419        if (!node) {
1420                while (prev) {
1421                        entry = rb_entry(prev, struct btrfs_inode, rb_node);
1422                        if (objectid <= entry->vfs_inode.i_ino) {
1423                                node = prev;
1424                                break;
1425                        }
1426                        prev = rb_next(prev);
1427                }
1428        }
1429        while (node) {
1430                entry = rb_entry(node, struct btrfs_inode, rb_node);
1431                inode = igrab(&entry->vfs_inode);
1432                if (inode) {
1433                        spin_unlock(&root->inode_lock);
1434                        return inode;
1435                }
1436
1437                objectid = entry->vfs_inode.i_ino + 1;
1438                if (cond_resched_lock(&root->inode_lock))
1439                        goto again;
1440
1441                node = rb_next(node);
1442        }
1443        spin_unlock(&root->inode_lock);
1444        return NULL;
1445}
1446
1447static int in_block_group(u64 bytenr,
1448                          struct btrfs_block_group_cache *block_group)
1449{
1450        if (bytenr >= block_group->key.objectid &&
1451            bytenr < block_group->key.objectid + block_group->key.offset)
1452                return 1;
1453        return 0;
1454}
1455
1456/*
1457 * get new location of data
1458 */
1459static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1460                            u64 bytenr, u64 num_bytes)
1461{
1462        struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1463        struct btrfs_path *path;
1464        struct btrfs_file_extent_item *fi;
1465        struct extent_buffer *leaf;
1466        int ret;
1467
1468        path = btrfs_alloc_path();
1469        if (!path)
1470                return -ENOMEM;
1471
1472        bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1473        ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
1474                                       bytenr, 0);
1475        if (ret < 0)
1476                goto out;
1477        if (ret > 0) {
1478                ret = -ENOENT;
1479                goto out;
1480        }
1481
1482        leaf = path->nodes[0];
1483        fi = btrfs_item_ptr(leaf, path->slots[0],
1484                            struct btrfs_file_extent_item);
1485
1486        BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1487               btrfs_file_extent_compression(leaf, fi) ||
1488               btrfs_file_extent_encryption(leaf, fi) ||
1489               btrfs_file_extent_other_encoding(leaf, fi));
1490
1491        if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1492                ret = 1;
1493                goto out;
1494        }
1495
1496        *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1497        ret = 0;
1498out:
1499        btrfs_free_path(path);
1500        return ret;
1501}
1502
1503/*
1504 * update file extent items in the tree leaf to point to
1505 * the new locations.
1506 */
1507static noinline_for_stack
1508int replace_file_extents(struct btrfs_trans_handle *trans,
1509                         struct reloc_control *rc,
1510                         struct btrfs_root *root,
1511                         struct extent_buffer *leaf)
1512{
1513        struct btrfs_key key;
1514        struct btrfs_file_extent_item *fi;
1515        struct inode *inode = NULL;
1516        u64 parent;
1517        u64 bytenr;
1518        u64 new_bytenr = 0;
1519        u64 num_bytes;
1520        u64 end;
1521        u32 nritems;
1522        u32 i;
1523        int ret;
1524        int first = 1;
1525        int dirty = 0;
1526
1527        if (rc->stage != UPDATE_DATA_PTRS)
1528                return 0;
1529
1530        /* reloc trees always use full backref */
1531        if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1532                parent = leaf->start;
1533        else
1534                parent = 0;
1535
1536        nritems = btrfs_header_nritems(leaf);
1537        for (i = 0; i < nritems; i++) {
1538                cond_resched();
1539                btrfs_item_key_to_cpu(leaf, &key, i);
1540                if (key.type != BTRFS_EXTENT_DATA_KEY)
1541                        continue;
1542                fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1543                if (btrfs_file_extent_type(leaf, fi) ==
1544                    BTRFS_FILE_EXTENT_INLINE)
1545                        continue;
1546                bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1547                num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1548                if (bytenr == 0)
1549                        continue;
1550                if (!in_block_group(bytenr, rc->block_group))
1551                        continue;
1552
1553                /*
1554                 * if we are modifying block in fs tree, wait for readpage
1555                 * to complete and drop the extent cache
1556                 */
1557                if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1558                        if (first) {
1559                                inode = find_next_inode(root, key.objectid);
1560                                first = 0;
1561                        } else if (inode && inode->i_ino < key.objectid) {
1562                                btrfs_add_delayed_iput(inode);
1563                                inode = find_next_inode(root, key.objectid);
1564                        }
1565                        if (inode && inode->i_ino == key.objectid) {
1566                                end = key.offset +
1567                                      btrfs_file_extent_num_bytes(leaf, fi);
1568                                WARN_ON(!IS_ALIGNED(key.offset,
1569                                                    root->sectorsize));
1570                                WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1571                                end--;
1572                                ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1573                                                      key.offset, end,
1574                                                      GFP_NOFS);
1575                                if (!ret)
1576                                        continue;
1577
1578                                btrfs_drop_extent_cache(inode, key.offset, end,
1579                                                        1);
1580                                unlock_extent(&BTRFS_I(inode)->io_tree,
1581                                              key.offset, end, GFP_NOFS);
1582                        }
1583                }
1584
1585                ret = get_new_location(rc->data_inode, &new_bytenr,
1586                                       bytenr, num_bytes);
1587                if (ret > 0) {
1588                        WARN_ON(1);
1589                        continue;
1590                }
1591                BUG_ON(ret < 0);
1592
1593                btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1594                dirty = 1;
1595
1596                key.offset -= btrfs_file_extent_offset(leaf, fi);
1597                ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
1598                                           num_bytes, parent,
1599                                           btrfs_header_owner(leaf),
1600                                           key.objectid, key.offset);
1601                BUG_ON(ret);
1602
1603                ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1604                                        parent, btrfs_header_owner(leaf),
1605                                        key.objectid, key.offset);
1606                BUG_ON(ret);
1607        }
1608        if (dirty)
1609                btrfs_mark_buffer_dirty(leaf);
1610        if (inode)
1611                btrfs_add_delayed_iput(inode);
1612        return 0;
1613}
1614
1615static noinline_for_stack
1616int memcmp_node_keys(struct extent_buffer *eb, int slot,
1617                     struct btrfs_path *path, int level)
1618{
1619        struct btrfs_disk_key key1;
1620        struct btrfs_disk_key key2;
1621        btrfs_node_key(eb, &key1, slot);
1622        btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1623        return memcmp(&key1, &key2, sizeof(key1));
1624}
1625
1626/*
1627 * try to replace tree blocks in fs tree with the new blocks
1628 * in reloc tree. tree blocks haven't been modified since the
1629 * reloc tree was create can be replaced.
1630 *
1631 * if a block was replaced, level of the block + 1 is returned.
1632 * if no block got replaced, 0 is returned. if there are other
1633 * errors, a negative error number is returned.
1634 */
1635static noinline_for_stack
1636int replace_path(struct btrfs_trans_handle *trans,
1637                 struct btrfs_root *dest, struct btrfs_root *src,
1638                 struct btrfs_path *path, struct btrfs_key *next_key,
1639                 int lowest_level, int max_level)
1640{
1641        struct extent_buffer *eb;
1642        struct extent_buffer *parent;
1643        struct btrfs_key key;
1644        u64 old_bytenr;
1645        u64 new_bytenr;
1646        u64 old_ptr_gen;
1647        u64 new_ptr_gen;
1648        u64 last_snapshot;
1649        u32 blocksize;
1650        int cow = 0;
1651        int level;
1652        int ret;
1653        int slot;
1654
1655        BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1656        BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
1657
1658        last_snapshot = btrfs_root_last_snapshot(&src->root_item);
1659again:
1660        slot = path->slots[lowest_level];
1661        btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1662
1663        eb = btrfs_lock_root_node(dest);
1664        btrfs_set_lock_blocking(eb);
1665        level = btrfs_header_level(eb);
1666
1667        if (level < lowest_level) {
1668                btrfs_tree_unlock(eb);
1669                free_extent_buffer(eb);
1670                return 0;
1671        }
1672
1673        if (cow) {
1674                ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
1675                BUG_ON(ret);
1676        }
1677        btrfs_set_lock_blocking(eb);
1678
1679        if (next_key) {
1680                next_key->objectid = (u64)-1;
1681                next_key->type = (u8)-1;
1682                next_key->offset = (u64)-1;
1683        }
1684
1685        parent = eb;
1686        while (1) {
1687                level = btrfs_header_level(parent);
1688                BUG_ON(level < lowest_level);
1689
1690                ret = btrfs_bin_search(parent, &key, level, &slot);
1691                if (ret && slot > 0)
1692                        slot--;
1693
1694                if (next_key && slot + 1 < btrfs_header_nritems(parent))
1695                        btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1696
1697                old_bytenr = btrfs_node_blockptr(parent, slot);
1698                blocksize = btrfs_level_size(dest, level - 1);
1699                old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1700
1701                if (level <= max_level) {
1702                        eb = path->nodes[level];
1703                        new_bytenr = btrfs_node_blockptr(eb,
1704                                                        path->slots[level]);
1705                        new_ptr_gen = btrfs_node_ptr_generation(eb,
1706                                                        path->slots[level]);
1707                } else {
1708                        new_bytenr = 0;
1709                        new_ptr_gen = 0;
1710                }
1711
1712                if (new_bytenr > 0 && new_bytenr == old_bytenr) {
1713                        WARN_ON(1);
1714                        ret = level;
1715                        break;
1716                }
1717
1718                if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1719                    memcmp_node_keys(parent, slot, path, level)) {
1720                        if (level <= lowest_level) {
1721                                ret = 0;
1722                                break;
1723                        }
1724
1725                        eb = read_tree_block(dest, old_bytenr, blocksize,
1726                                             old_ptr_gen);
1727                        btrfs_tree_lock(eb);
1728                        if (cow) {
1729                                ret = btrfs_cow_block(trans, dest, eb, parent,
1730                                                      slot, &eb);
1731                                BUG_ON(ret);
1732                        }
1733                        btrfs_set_lock_blocking(eb);
1734
1735                        btrfs_tree_unlock(parent);
1736                        free_extent_buffer(parent);
1737
1738                        parent = eb;
1739                        continue;
1740                }
1741
1742                if (!cow) {
1743                        btrfs_tree_unlock(parent);
1744                        free_extent_buffer(parent);
1745                        cow = 1;
1746                        goto again;
1747                }
1748
1749                btrfs_node_key_to_cpu(path->nodes[level], &key,
1750                                      path->slots[level]);
1751                btrfs_release_path(src, path);
1752
1753                path->lowest_level = level;
1754                ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1755                path->lowest_level = 0;
1756                BUG_ON(ret);
1757
1758                /*
1759                 * swap blocks in fs tree and reloc tree.
1760                 */
1761                btrfs_set_node_blockptr(parent, slot, new_bytenr);
1762                btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1763                btrfs_mark_buffer_dirty(parent);
1764
1765                btrfs_set_node_blockptr(path->nodes[level],
1766                                        path->slots[level], old_bytenr);
1767                btrfs_set_node_ptr_generation(path->nodes[level],
1768                                              path->slots[level], old_ptr_gen);
1769                btrfs_mark_buffer_dirty(path->nodes[level]);
1770
1771                ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
1772                                        path->nodes[level]->start,
1773                                        src->root_key.objectid, level - 1, 0);
1774                BUG_ON(ret);
1775                ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
1776                                        0, dest->root_key.objectid, level - 1,
1777                                        0);
1778                BUG_ON(ret);
1779
1780                ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
1781                                        path->nodes[level]->start,
1782                                        src->root_key.objectid, level - 1, 0);
1783                BUG_ON(ret);
1784
1785                ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
1786                                        0, dest->root_key.objectid, level - 1,
1787                                        0);
1788                BUG_ON(ret);
1789
1790                btrfs_unlock_up_safe(path, 0);
1791
1792                ret = level;
1793                break;
1794        }
1795        btrfs_tree_unlock(parent);
1796        free_extent_buffer(parent);
1797        return ret;
1798}
1799
1800/*
1801 * helper to find next relocated block in reloc tree
1802 */
1803static noinline_for_stack
1804int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1805                       int *level)
1806{
1807        struct extent_buffer *eb;
1808        int i;
1809        u64 last_snapshot;
1810        u32 nritems;
1811
1812        last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1813
1814        for (i = 0; i < *level; i++) {
1815                free_extent_buffer(path->nodes[i]);
1816                path->nodes[i] = NULL;
1817        }
1818
1819        for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1820                eb = path->nodes[i];
1821                nritems = btrfs_header_nritems(eb);
1822                while (path->slots[i] + 1 < nritems) {
1823                        path->slots[i]++;
1824                        if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1825                            last_snapshot)
1826                                continue;
1827
1828                        *level = i;
1829                        return 0;
1830                }
1831                free_extent_buffer(path->nodes[i]);
1832                path->nodes[i] = NULL;
1833        }
1834        return 1;
1835}
1836
1837/*
1838 * walk down reloc tree to find relocated block of lowest level
1839 */
1840static noinline_for_stack
1841int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1842                         int *level)
1843{
1844        struct extent_buffer *eb = NULL;
1845        int i;
1846        u64 bytenr;
1847        u64 ptr_gen = 0;
1848        u64 last_snapshot;
1849        u32 blocksize;
1850        u32 nritems;
1851
1852        last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1853
1854        for (i = *level; i > 0; i--) {
1855                eb = path->nodes[i];
1856                nritems = btrfs_header_nritems(eb);
1857                while (path->slots[i] < nritems) {
1858                        ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1859                        if (ptr_gen > last_snapshot)
1860                                break;
1861                        path->slots[i]++;
1862                }
1863                if (path->slots[i] >= nritems) {
1864                        if (i == *level)
1865                                break;
1866                        *level = i + 1;
1867                        return 0;
1868                }
1869                if (i == 1) {
1870                        *level = i;
1871                        return 0;
1872                }
1873
1874                bytenr = btrfs_node_blockptr(eb, path->slots[i]);
1875                blocksize = btrfs_level_size(root, i - 1);
1876                eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
1877                BUG_ON(btrfs_header_level(eb) != i - 1);
1878                path->nodes[i - 1] = eb;
1879                path->slots[i - 1] = 0;
1880        }
1881        return 1;
1882}
1883
1884/*
1885 * invalidate extent cache for file extents whose key in range of
1886 * [min_key, max_key)
1887 */
1888static int invalidate_extent_cache(struct btrfs_root *root,
1889                                   struct btrfs_key *min_key,
1890                                   struct btrfs_key *max_key)
1891{
1892        struct inode *inode = NULL;
1893        u64 objectid;
1894        u64 start, end;
1895
1896        objectid = min_key->objectid;
1897        while (1) {
1898                cond_resched();
1899                iput(inode);
1900
1901                if (objectid > max_key->objectid)
1902                        break;
1903
1904                inode = find_next_inode(root, objectid);
1905                if (!inode)
1906                        break;
1907
1908                if (inode->i_ino > max_key->objectid) {
1909                        iput(inode);
1910                        break;
1911                }
1912
1913                objectid = inode->i_ino + 1;
1914                if (!S_ISREG(inode->i_mode))
1915                        continue;
1916
1917                if (unlikely(min_key->objectid == inode->i_ino)) {
1918                        if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1919                                continue;
1920                        if (min_key->type < BTRFS_EXTENT_DATA_KEY)
1921                                start = 0;
1922                        else {
1923                                start = min_key->offset;
1924                                WARN_ON(!IS_ALIGNED(start, root->sectorsize));
1925                        }
1926                } else {
1927                        start = 0;
1928                }
1929
1930                if (unlikely(max_key->objectid == inode->i_ino)) {
1931                        if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1932                                continue;
1933                        if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
1934                                end = (u64)-1;
1935                        } else {
1936                                if (max_key->offset == 0)
1937                                        continue;
1938                                end = max_key->offset;
1939                                WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1940                                end--;
1941                        }
1942                } else {
1943                        end = (u64)-1;
1944                }
1945
1946                /* the lock_extent waits for readpage to complete */
1947                lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1948                btrfs_drop_extent_cache(inode, start, end, 1);
1949                unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1950        }
1951        return 0;
1952}
1953
1954static int find_next_key(struct btrfs_path *path, int level,
1955                         struct btrfs_key *key)
1956
1957{
1958        while (level < BTRFS_MAX_LEVEL) {
1959                if (!path->nodes[level])
1960                        break;
1961                if (path->slots[level] + 1 <
1962                    btrfs_header_nritems(path->nodes[level])) {
1963                        btrfs_node_key_to_cpu(path->nodes[level], key,
1964                                              path->slots[level] + 1);
1965                        return 0;
1966                }
1967                level++;
1968        }
1969        return 1;
1970}
1971
1972/*
1973 * merge the relocated tree blocks in reloc tree with corresponding
1974 * fs tree.
1975 */
1976static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1977                                               struct btrfs_root *root)
1978{
1979        LIST_HEAD(inode_list);
1980        struct btrfs_key key;
1981        struct btrfs_key next_key;
1982        struct btrfs_trans_handle *trans;
1983        struct btrfs_root *reloc_root;
1984        struct btrfs_root_item *root_item;
1985        struct btrfs_path *path;
1986        struct extent_buffer *leaf;
1987        unsigned long nr;
1988        int level;
1989        int max_level;
1990        int replaced = 0;
1991        int ret;
1992        int err = 0;
1993        u32 min_reserved;
1994
1995        path = btrfs_alloc_path();
1996        if (!path)
1997                return -ENOMEM;
1998
1999        reloc_root = root->reloc_root;
2000        root_item = &reloc_root->root_item;
2001
2002        if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2003                level = btrfs_root_level(root_item);
2004                extent_buffer_get(reloc_root->node);
2005                path->nodes[level] = reloc_root->node;
2006                path->slots[level] = 0;
2007        } else {
2008                btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2009
2010                level = root_item->drop_level;
2011                BUG_ON(level == 0);
2012                path->lowest_level = level;
2013                ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
2014                path->lowest_level = 0;
2015                if (ret < 0) {
2016                        btrfs_free_path(path);
2017                        return ret;
2018                }
2019
2020                btrfs_node_key_to_cpu(path->nodes[level], &next_key,
2021                                      path->slots[level]);
2022                WARN_ON(memcmp(&key, &next_key, sizeof(key)));
2023
2024                btrfs_unlock_up_safe(path, 0);
2025        }
2026
2027        min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2028        memset(&next_key, 0, sizeof(next_key));
2029
2030        while (1) {
2031                trans = btrfs_start_transaction(root, 0);
2032                BUG_ON(IS_ERR(trans));
2033                trans->block_rsv = rc->block_rsv;
2034
2035                ret = btrfs_block_rsv_check(trans, root, rc->block_rsv,
2036                                            min_reserved, 0);
2037                if (ret) {
2038                        BUG_ON(ret != -EAGAIN);
2039                        ret = btrfs_commit_transaction(trans, root);
2040                        BUG_ON(ret);
2041                        continue;
2042                }
2043
2044                replaced = 0;
2045                max_level = level;
2046
2047                ret = walk_down_reloc_tree(reloc_root, path, &level);
2048                if (ret < 0) {
2049                        err = ret;
2050                        goto out;
2051                }
2052                if (ret > 0)
2053                        break;
2054
2055                if (!find_next_key(path, level, &key) &&
2056                    btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
2057                        ret = 0;
2058                } else {
2059                        ret = replace_path(trans, root, reloc_root, path,
2060                                           &next_key, level, max_level);
2061                }
2062                if (ret < 0) {
2063                        err = ret;
2064                        goto out;
2065                }
2066
2067                if (ret > 0) {
2068                        level = ret;
2069                        btrfs_node_key_to_cpu(path->nodes[level], &key,
2070                                              path->slots[level]);
2071                        replaced = 1;
2072                }
2073
2074                ret = walk_up_reloc_tree(reloc_root, path, &level);
2075                if (ret > 0)
2076                        break;
2077
2078                BUG_ON(level == 0);
2079                /*
2080                 * save the merging progress in the drop_progress.
2081                 * this is OK since root refs == 1 in this case.
2082                 */
2083                btrfs_node_key(path->nodes[level], &root_item->drop_progress,
2084                               path->slots[level]);
2085                root_item->drop_level = level;
2086
2087                nr = trans->blocks_used;
2088                btrfs_end_transaction_throttle(trans, root);
2089
2090                btrfs_btree_balance_dirty(root, nr);
2091
2092                if (replaced && rc->stage == UPDATE_DATA_PTRS)
2093                        invalidate_extent_cache(root, &key, &next_key);
2094        }
2095
2096        /*
2097         * handle the case only one block in the fs tree need to be
2098         * relocated and the block is tree root.
2099         */
2100        leaf = btrfs_lock_root_node(root);
2101        ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
2102        btrfs_tree_unlock(leaf);
2103        free_extent_buffer(leaf);
2104        if (ret < 0)
2105                err = ret;
2106out:
2107        btrfs_free_path(path);
2108
2109        if (err == 0) {
2110                memset(&root_item->drop_progress, 0,
2111                       sizeof(root_item->drop_progress));
2112                root_item->drop_level = 0;
2113                btrfs_set_root_refs(root_item, 0);
2114                btrfs_update_reloc_root(trans, root);
2115        }
2116
2117        nr = trans->blocks_used;
2118        btrfs_end_transaction_throttle(trans, root);
2119
2120        btrfs_btree_balance_dirty(root, nr);
2121
2122        if (replaced && rc->stage == UPDATE_DATA_PTRS)
2123                invalidate_extent_cache(root, &key, &next_key);
2124
2125        return err;
2126}
2127
2128static noinline_for_stack
2129int prepare_to_merge(struct reloc_control *rc, int err)
2130{
2131        struct btrfs_root *root = rc->extent_root;
2132        struct btrfs_root *reloc_root;
2133        struct btrfs_trans_handle *trans;
2134        LIST_HEAD(reloc_roots);
2135        u64 num_bytes = 0;
2136        int ret;
2137
2138        mutex_lock(&root->fs_info->trans_mutex);
2139        rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2140        rc->merging_rsv_size += rc->nodes_relocated * 2;
2141        mutex_unlock(&root->fs_info->trans_mutex);
2142again:
2143        if (!err) {
2144                num_bytes = rc->merging_rsv_size;
2145                ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
2146                                          num_bytes);
2147                if (ret)
2148                        err = ret;
2149        }
2150
2151        trans = btrfs_join_transaction(rc->extent_root, 1);
2152        if (IS_ERR(trans)) {
2153                if (!err)
2154                        btrfs_block_rsv_release(rc->extent_root,
2155                                                rc->block_rsv, num_bytes);
2156                return PTR_ERR(trans);
2157        }
2158
2159        if (!err) {
2160                if (num_bytes != rc->merging_rsv_size) {
2161                        btrfs_end_transaction(trans, rc->extent_root);
2162                        btrfs_block_rsv_release(rc->extent_root,
2163                                                rc->block_rsv, num_bytes);
2164                        goto again;
2165                }
2166        }
2167
2168        rc->merge_reloc_tree = 1;
2169
2170        while (!list_empty(&rc->reloc_roots)) {
2171                reloc_root = list_entry(rc->reloc_roots.next,
2172                                        struct btrfs_root, root_list);
2173                list_del_init(&reloc_root->root_list);
2174
2175                root = read_fs_root(reloc_root->fs_info,
2176                                    reloc_root->root_key.offset);
2177                BUG_ON(IS_ERR(root));
2178                BUG_ON(root->reloc_root != reloc_root);
2179
2180                /*
2181                 * set reference count to 1, so btrfs_recover_relocation
2182                 * knows it should resumes merging
2183                 */
2184                if (!err)
2185                        btrfs_set_root_refs(&reloc_root->root_item, 1);
2186                btrfs_update_reloc_root(trans, root);
2187
2188                list_add(&reloc_root->root_list, &reloc_roots);
2189        }
2190
2191        list_splice(&reloc_roots, &rc->reloc_roots);
2192
2193        if (!err)
2194                btrfs_commit_transaction(trans, rc->extent_root);
2195        else
2196                btrfs_end_transaction(trans, rc->extent_root);
2197        return err;
2198}
2199
2200static noinline_for_stack
2201int merge_reloc_roots(struct reloc_control *rc)
2202{
2203        struct btrfs_root *root;
2204        struct btrfs_root *reloc_root;
2205        LIST_HEAD(reloc_roots);
2206        int found = 0;
2207        int ret;
2208again:
2209        root = rc->extent_root;
2210        mutex_lock(&root->fs_info->trans_mutex);
2211        list_splice_init(&rc->reloc_roots, &reloc_roots);
2212        mutex_unlock(&root->fs_info->trans_mutex);
2213
2214        while (!list_empty(&reloc_roots)) {
2215                found = 1;
2216                reloc_root = list_entry(reloc_roots.next,
2217                                        struct btrfs_root, root_list);
2218
2219                if (btrfs_root_refs(&reloc_root->root_item) > 0) {
2220                        root = read_fs_root(reloc_root->fs_info,
2221                                            reloc_root->root_key.offset);
2222                        BUG_ON(IS_ERR(root));
2223                        BUG_ON(root->reloc_root != reloc_root);
2224
2225                        ret = merge_reloc_root(rc, root);
2226                        BUG_ON(ret);
2227                } else {
2228                        list_del_init(&reloc_root->root_list);
2229                }
2230                btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0);
2231        }
2232
2233        if (found) {
2234                found = 0;
2235                goto again;
2236        }
2237        BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
2238        return 0;
2239}
2240
2241static void free_block_list(struct rb_root *blocks)
2242{
2243        struct tree_block *block;
2244        struct rb_node *rb_node;
2245        while ((rb_node = rb_first(blocks))) {
2246                block = rb_entry(rb_node, struct tree_block, rb_node);
2247                rb_erase(rb_node, blocks);
2248                kfree(block);
2249        }
2250}
2251
2252static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
2253                                      struct btrfs_root *reloc_root)
2254{
2255        struct btrfs_root *root;
2256
2257        if (reloc_root->last_trans == trans->transid)
2258                return 0;
2259
2260        root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset);
2261        BUG_ON(IS_ERR(root));
2262        BUG_ON(root->reloc_root != reloc_root);
2263
2264        return btrfs_record_root_in_trans(trans, root);
2265}
2266
2267static noinline_for_stack
2268struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2269                                     struct reloc_control *rc,
2270                                     struct backref_node *node,
2271                                     struct backref_edge *edges[], int *nr)
2272{
2273        struct backref_node *next;
2274        struct btrfs_root *root;
2275        int index = 0;
2276
2277        next = node;
2278        while (1) {
2279                cond_resched();
2280                next = walk_up_backref(next, edges, &index);
2281                root = next->root;
2282                BUG_ON(!root);
2283                BUG_ON(!root->ref_cows);
2284
2285                if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2286                        record_reloc_root_in_trans(trans, root);
2287                        break;
2288                }
2289
2290                btrfs_record_root_in_trans(trans, root);
2291                root = root->reloc_root;
2292
2293                if (next->new_bytenr != root->node->start) {
2294                        BUG_ON(next->new_bytenr);
2295                        BUG_ON(!list_empty(&next->list));
2296                        next->new_bytenr = root->node->start;
2297                        next->root = root;
2298                        list_add_tail(&next->list,
2299                                      &rc->backref_cache.changed);
2300                        __mark_block_processed(rc, next);
2301                        break;
2302                }
2303
2304                WARN_ON(1);
2305                root = NULL;
2306                next = walk_down_backref(edges, &index);
2307                if (!next || next->level <= node->level)
2308                        break;
2309        }
2310        if (!root)
2311                return NULL;
2312
2313        *nr = index;
2314        next = node;
2315        /* setup backref node path for btrfs_reloc_cow_block */
2316        while (1) {
2317                rc->backref_cache.path[next->level] = next;
2318                if (--index < 0)
2319                        break;
2320                next = edges[index]->node[UPPER];
2321        }
2322        return root;
2323}
2324
2325/*
2326 * select a tree root for relocation. return NULL if the block
2327 * is reference counted. we should use do_relocation() in this
2328 * case. return a tree root pointer if the block isn't reference
2329 * counted. return -ENOENT if the block is root of reloc tree.
2330 */
2331static noinline_for_stack
2332struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
2333                                   struct backref_node *node)
2334{
2335        struct backref_node *next;
2336        struct btrfs_root *root;
2337        struct btrfs_root *fs_root = NULL;
2338        struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2339        int index = 0;
2340
2341        next = node;
2342        while (1) {
2343                cond_resched();
2344                next = walk_up_backref(next, edges, &index);
2345                root = next->root;
2346                BUG_ON(!root);
2347
2348                /* no other choice for non-refernce counted tree */
2349                if (!root->ref_cows)
2350                        return root;
2351
2352                if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
2353                        fs_root = root;
2354
2355                if (next != node)
2356                        return NULL;
2357
2358                next = walk_down_backref(edges, &index);
2359                if (!next || next->level <= node->level)
2360                        break;
2361        }
2362
2363        if (!fs_root)
2364                return ERR_PTR(-ENOENT);
2365        return fs_root;
2366}
2367
2368static noinline_for_stack
2369u64 calcu_metadata_size(struct reloc_control *rc,
2370                        struct backref_node *node, int reserve)
2371{
2372        struct backref_node *next = node;
2373        struct backref_edge *edge;
2374        struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2375        u64 num_bytes = 0;
2376        int index = 0;
2377
2378        BUG_ON(reserve && node->processed);
2379
2380        while (next) {
2381                cond_resched();
2382                while (1) {
2383                        if (next->processed && (reserve || next != node))
2384                                break;
2385
2386                        num_bytes += btrfs_level_size(rc->extent_root,
2387                                                      next->level);
2388
2389                        if (list_empty(&next->upper))
2390                                break;
2391
2392                        edge = list_entry(next->upper.next,
2393                                          struct backref_edge, list[LOWER]);
2394                        edges[index++] = edge;
2395                        next = edge->node[UPPER];
2396                }
2397                next = walk_down_backref(edges, &index);
2398        }
2399        return num_bytes;
2400}
2401
2402static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2403                                  struct reloc_control *rc,
2404                                  struct backref_node *node)
2405{
2406        struct btrfs_root *root = rc->extent_root;
2407        u64 num_bytes;
2408        int ret;
2409
2410        num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2411
2412        trans->block_rsv = rc->block_rsv;
2413        ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
2414        if (ret) {
2415                if (ret == -EAGAIN)
2416                        rc->commit_transaction = 1;
2417                return ret;
2418        }
2419
2420        return 0;
2421}
2422
2423static void release_metadata_space(struct reloc_control *rc,
2424                                   struct backref_node *node)
2425{
2426        u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2;
2427        btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes);
2428}
2429
2430/*
2431 * relocate a block tree, and then update pointers in upper level
2432 * blocks that reference the block to point to the new location.
2433 *
2434 * if called by link_to_upper, the block has already been relocated.
2435 * in that case this function just updates pointers.
2436 */
2437static int do_relocation(struct btrfs_trans_handle *trans,
2438                         struct reloc_control *rc,
2439                         struct backref_node *node,
2440                         struct btrfs_key *key,
2441                         struct btrfs_path *path, int lowest)
2442{
2443        struct backref_node *upper;
2444        struct backref_edge *edge;
2445        struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2446        struct btrfs_root *root;
2447        struct extent_buffer *eb;
2448        u32 blocksize;
2449        u64 bytenr;
2450        u64 generation;
2451        int nr;
2452        int slot;
2453        int ret;
2454        int err = 0;
2455
2456        BUG_ON(lowest && node->eb);
2457
2458        path->lowest_level = node->level + 1;
2459        rc->backref_cache.path[node->level] = node;
2460        list_for_each_entry(edge, &node->upper, list[LOWER]) {
2461                cond_resched();
2462
2463                upper = edge->node[UPPER];
2464                root = select_reloc_root(trans, rc, upper, edges, &nr);
2465                BUG_ON(!root);
2466
2467                if (upper->eb && !upper->locked) {
2468                        if (!lowest) {
2469                                ret = btrfs_bin_search(upper->eb, key,
2470                                                       upper->level, &slot);
2471                                BUG_ON(ret);
2472                                bytenr = btrfs_node_blockptr(upper->eb, slot);
2473                                if (node->eb->start == bytenr)
2474                                        goto next;
2475                        }
2476                        drop_node_buffer(upper);
2477                }
2478
2479                if (!upper->eb) {
2480                        ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2481                        if (ret < 0) {
2482                                err = ret;
2483                                break;
2484                        }
2485                        BUG_ON(ret > 0);
2486
2487                        if (!upper->eb) {
2488                                upper->eb = path->nodes[upper->level];
2489                                path->nodes[upper->level] = NULL;
2490                        } else {
2491                                BUG_ON(upper->eb != path->nodes[upper->level]);
2492                        }
2493
2494                        upper->locked = 1;
2495                        path->locks[upper->level] = 0;
2496
2497                        slot = path->slots[upper->level];
2498                        btrfs_release_path(NULL, path);
2499                } else {
2500                        ret = btrfs_bin_search(upper->eb, key, upper->level,
2501                                               &slot);
2502                        BUG_ON(ret);
2503                }
2504
2505                bytenr = btrfs_node_blockptr(upper->eb, slot);
2506                if (lowest) {
2507                        BUG_ON(bytenr != node->bytenr);
2508                } else {
2509                        if (node->eb->start == bytenr)
2510                                goto next;
2511                }
2512
2513                blocksize = btrfs_level_size(root, node->level);
2514                generation = btrfs_node_ptr_generation(upper->eb, slot);
2515                eb = read_tree_block(root, bytenr, blocksize, generation);
2516                btrfs_tree_lock(eb);
2517                btrfs_set_lock_blocking(eb);
2518
2519                if (!node->eb) {
2520                        ret = btrfs_cow_block(trans, root, eb, upper->eb,
2521                                              slot, &eb);
2522                        btrfs_tree_unlock(eb);
2523                        free_extent_buffer(eb);
2524                        if (ret < 0) {
2525                                err = ret;
2526                                goto next;
2527                        }
2528                        BUG_ON(node->eb != eb);
2529                } else {
2530                        btrfs_set_node_blockptr(upper->eb, slot,
2531                                                node->eb->start);
2532                        btrfs_set_node_ptr_generation(upper->eb, slot,
2533                                                      trans->transid);
2534                        btrfs_mark_buffer_dirty(upper->eb);
2535
2536                        ret = btrfs_inc_extent_ref(trans, root,
2537                                                node->eb->start, blocksize,
2538                                                upper->eb->start,
2539                                                btrfs_header_owner(upper->eb),
2540                                                node->level, 0);
2541                        BUG_ON(ret);
2542
2543                        ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
2544                        BUG_ON(ret);
2545                }
2546next:
2547                if (!upper->pending)
2548                        drop_node_buffer(upper);
2549                else
2550                        unlock_node_buffer(upper);
2551                if (err)
2552                        break;
2553        }
2554
2555        if (!err && node->pending) {
2556                drop_node_buffer(node);
2557                list_move_tail(&node->list, &rc->backref_cache.changed);
2558                node->pending = 0;
2559        }
2560
2561        path->lowest_level = 0;
2562        BUG_ON(err == -ENOSPC);
2563        return err;
2564}
2565
2566static int link_to_upper(struct btrfs_trans_handle *trans,
2567                         struct reloc_control *rc,
2568                         struct backref_node *node,
2569                         struct btrfs_path *path)
2570{
2571        struct btrfs_key key;
2572
2573        btrfs_node_key_to_cpu(node->eb, &key, 0);
2574        return do_relocation(trans, rc, node, &key, path, 0);
2575}
2576
2577static int finish_pending_nodes(struct btrfs_trans_handle *trans,
2578                                struct reloc_control *rc,
2579                                struct btrfs_path *path, int err)
2580{
2581        LIST_HEAD(list);
2582        struct backref_cache *cache = &rc->backref_cache;
2583        struct backref_node *node;
2584        int level;
2585        int ret;
2586
2587        for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2588                while (!list_empty(&cache->pending[level])) {
2589                        node = list_entry(cache->pending[level].next,
2590                                          struct backref_node, list);
2591                        list_move_tail(&node->list, &list);
2592                        BUG_ON(!node->pending);
2593
2594                        if (!err) {
2595                                ret = link_to_upper(trans, rc, node, path);
2596                                if (ret < 0)
2597                                        err = ret;
2598                        }
2599                }
2600                list_splice_init(&list, &cache->pending[level]);
2601        }
2602        return err;
2603}
2604
2605static void mark_block_processed(struct reloc_control *rc,
2606                                 u64 bytenr, u32 blocksize)
2607{
2608        set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1,
2609                        EXTENT_DIRTY, GFP_NOFS);
2610}
2611
2612static void __mark_block_processed(struct reloc_control *rc,
2613                                   struct backref_node *node)
2614{
2615        u32 blocksize;
2616        if (node->level == 0 ||
2617            in_block_group(node->bytenr, rc->block_group)) {
2618                blocksize = btrfs_level_size(rc->extent_root, node->level);
2619                mark_block_processed(rc, node->bytenr, blocksize);
2620        }
2621        node->processed = 1;
2622}
2623
2624/*
2625 * mark a block and all blocks directly/indirectly reference the block
2626 * as processed.
2627 */
2628static void update_processed_blocks(struct reloc_control *rc,
2629                                    struct backref_node *node)
2630{
2631        struct backref_node *next = node;
2632        struct backref_edge *edge;
2633        struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2634        int index = 0;
2635
2636        while (next) {
2637                cond_resched();
2638                while (1) {
2639                        if (next->processed)
2640                                break;
2641
2642                        __mark_block_processed(rc, next);
2643
2644                        if (list_empty(&next->upper))
2645                                break;
2646
2647                        edge = list_entry(next->upper.next,
2648                                          struct backref_edge, list[LOWER]);
2649                        edges[index++] = edge;
2650                        next = edge->node[UPPER];
2651                }
2652                next = walk_down_backref(edges, &index);
2653        }
2654}
2655
2656static int tree_block_processed(u64 bytenr, u32 blocksize,
2657                                struct reloc_control *rc)
2658{
2659        if (test_range_bit(&rc->processed_blocks, bytenr,
2660                           bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2661                return 1;
2662        return 0;
2663}
2664
2665static int get_tree_block_key(struct reloc_control *rc,
2666                              struct tree_block *block)
2667{
2668        struct extent_buffer *eb;
2669
2670        BUG_ON(block->key_ready);
2671        eb = read_tree_block(rc->extent_root, block->bytenr,
2672                             block->key.objectid, block->key.offset);
2673        WARN_ON(btrfs_header_level(eb) != block->level);
2674        if (block->level == 0)
2675                btrfs_item_key_to_cpu(eb, &block->key, 0);
2676        else
2677                btrfs_node_key_to_cpu(eb, &block->key, 0);
2678        free_extent_buffer(eb);
2679        block->key_ready = 1;
2680        return 0;
2681}
2682
2683static int reada_tree_block(struct reloc_control *rc,
2684                            struct tree_block *block)
2685{
2686        BUG_ON(block->key_ready);
2687        readahead_tree_block(rc->extent_root, block->bytenr,
2688                             block->key.objectid, block->key.offset);
2689        return 0;
2690}
2691
2692/*
2693 * helper function to relocate a tree block
2694 */
2695static int relocate_tree_block(struct btrfs_trans_handle *trans,
2696                                struct reloc_control *rc,
2697                                struct backref_node *node,
2698                                struct btrfs_key *key,
2699                                struct btrfs_path *path)
2700{
2701        struct btrfs_root *root;
2702        int release = 0;
2703        int ret = 0;
2704
2705        if (!node)
2706                return 0;
2707
2708        BUG_ON(node->processed);
2709        root = select_one_root(trans, node);
2710        if (root == ERR_PTR(-ENOENT)) {
2711                update_processed_blocks(rc, node);
2712                goto out;
2713        }
2714
2715        if (!root || root->ref_cows) {
2716                ret = reserve_metadata_space(trans, rc, node);
2717                if (ret)
2718                        goto out;
2719                release = 1;
2720        }
2721
2722        if (root) {
2723                if (root->ref_cows) {
2724                        BUG_ON(node->new_bytenr);
2725                        BUG_ON(!list_empty(&node->list));
2726                        btrfs_record_root_in_trans(trans, root);
2727                        root = root->reloc_root;
2728                        node->new_bytenr = root->node->start;
2729                        node->root = root;
2730                        list_add_tail(&node->list, &rc->backref_cache.changed);
2731                } else {
2732                        path->lowest_level = node->level;
2733                        ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2734                        btrfs_release_path(root, path);
2735                        if (ret > 0)
2736                                ret = 0;
2737                }
2738                if (!ret)
2739                        update_processed_blocks(rc, node);
2740        } else {
2741                ret = do_relocation(trans, rc, node, key, path, 1);
2742        }
2743out:
2744        if (ret || node->level == 0 || node->cowonly) {
2745                if (release)
2746                        release_metadata_space(rc, node);
2747                remove_backref_node(&rc->backref_cache, node);
2748        }
2749        return ret;
2750}
2751
2752/*
2753 * relocate a list of blocks
2754 */
2755static noinline_for_stack
2756int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2757                         struct reloc_control *rc, struct rb_root *blocks)
2758{
2759        struct backref_node *node;
2760        struct btrfs_path *path;
2761        struct tree_block *block;
2762        struct rb_node *rb_node;
2763        int ret;
2764        int err = 0;
2765
2766        path = btrfs_alloc_path();
2767        if (!path)
2768                return -ENOMEM;
2769
2770        rb_node = rb_first(blocks);
2771        while (rb_node) {
2772                block = rb_entry(rb_node, struct tree_block, rb_node);
2773                if (!block->key_ready)
2774                        reada_tree_block(rc, block);
2775                rb_node = rb_next(rb_node);
2776        }
2777
2778        rb_node = rb_first(blocks);
2779        while (rb_node) {
2780                block = rb_entry(rb_node, struct tree_block, rb_node);
2781                if (!block->key_ready)
2782                        get_tree_block_key(rc, block);
2783                rb_node = rb_next(rb_node);
2784        }
2785
2786        rb_node = rb_first(blocks);
2787        while (rb_node) {
2788                block = rb_entry(rb_node, struct tree_block, rb_node);
2789
2790                node = build_backref_tree(rc, &block->key,
2791                                          block->level, block->bytenr);
2792                if (IS_ERR(node)) {
2793                        err = PTR_ERR(node);
2794                        goto out;
2795                }
2796
2797                ret = relocate_tree_block(trans, rc, node, &block->key,
2798                                          path);
2799                if (ret < 0) {
2800                        if (ret != -EAGAIN || rb_node == rb_first(blocks))
2801                                err = ret;
2802                        goto out;
2803                }
2804                rb_node = rb_next(rb_node);
2805        }
2806out:
2807        free_block_list(blocks);
2808        err = finish_pending_nodes(trans, rc, path, err);
2809
2810        btrfs_free_path(path);
2811        return err;
2812}
2813
2814static noinline_for_stack
2815int prealloc_file_extent_cluster(struct inode *inode,
2816                                 struct file_extent_cluster *cluster)
2817{
2818        u64 alloc_hint = 0;
2819        u64 start;
2820        u64 end;
2821        u64 offset = BTRFS_I(inode)->index_cnt;
2822        u64 num_bytes;
2823        int nr = 0;
2824        int ret = 0;
2825
2826        BUG_ON(cluster->start != cluster->boundary[0]);
2827        mutex_lock(&inode->i_mutex);
2828
2829        ret = btrfs_check_data_free_space(inode, cluster->end +
2830                                          1 - cluster->start);
2831        if (ret)
2832                goto out;
2833
2834        while (nr < cluster->nr) {
2835                start = cluster->boundary[nr] - offset;
2836                if (nr + 1 < cluster->nr)
2837                        end = cluster->boundary[nr + 1] - 1 - offset;
2838                else
2839                        end = cluster->end - offset;
2840
2841                lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2842                num_bytes = end + 1 - start;
2843                ret = btrfs_prealloc_file_range(inode, 0, start,
2844                                                num_bytes, num_bytes,
2845                                                end + 1, &alloc_hint);
2846                unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2847                if (ret)
2848                        break;
2849                nr++;
2850        }
2851        btrfs_free_reserved_data_space(inode, cluster->end +
2852                                       1 - cluster->start);
2853out:
2854        mutex_unlock(&inode->i_mutex);
2855        return ret;
2856}
2857
2858static noinline_for_stack
2859int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2860                         u64 block_start)
2861{
2862        struct btrfs_root *root = BTRFS_I(inode)->root;
2863        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2864        struct extent_map *em;
2865        int ret = 0;
2866
2867        em = alloc_extent_map(GFP_NOFS);
2868        if (!em)
2869                return -ENOMEM;
2870
2871        em->start = start;
2872        em->len = end + 1 - start;
2873        em->block_len = em->len;
2874        em->block_start = block_start;
2875        em->bdev = root->fs_info->fs_devices->latest_bdev;
2876        set_bit(EXTENT_FLAG_PINNED, &em->flags);
2877
2878        lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2879        while (1) {
2880                write_lock(&em_tree->lock);
2881                ret = add_extent_mapping(em_tree, em);
2882                write_unlock(&em_tree->lock);
2883                if (ret != -EEXIST) {
2884                        free_extent_map(em);
2885                        break;
2886                }
2887                btrfs_drop_extent_cache(inode, start, end, 0);
2888        }
2889        unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2890        return ret;
2891}
2892
2893static int relocate_file_extent_cluster(struct inode *inode,
2894                                        struct file_extent_cluster *cluster)
2895{
2896        u64 page_start;
2897        u64 page_end;
2898        u64 offset = BTRFS_I(inode)->index_cnt;
2899        unsigned long index;
2900        unsigned long last_index;
2901        struct page *page;
2902        struct file_ra_state *ra;
2903        int nr = 0;
2904        int ret = 0;
2905
2906        if (!cluster->nr)
2907                return 0;
2908
2909        ra = kzalloc(sizeof(*ra), GFP_NOFS);
2910        if (!ra)
2911                return -ENOMEM;
2912
2913        ret = prealloc_file_extent_cluster(inode, cluster);
2914        if (ret)
2915                goto out;
2916
2917        file_ra_state_init(ra, inode->i_mapping);
2918
2919        ret = setup_extent_mapping(inode, cluster->start - offset,
2920                                   cluster->end - offset, cluster->start);
2921        if (ret)
2922                goto out;
2923
2924        index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
2925        last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
2926        while (index <= last_index) {
2927                ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
2928                if (ret)
2929                        goto out;
2930
2931                page = find_lock_page(inode->i_mapping, index);
2932                if (!page) {
2933                        page_cache_sync_readahead(inode->i_mapping,
2934                                                  ra, NULL, index,
2935                                                  last_index + 1 - index);
2936                        page = grab_cache_page(inode->i_mapping, index);
2937                        if (!page) {
2938                                btrfs_delalloc_release_metadata(inode,
2939                                                        PAGE_CACHE_SIZE);
2940                                ret = -ENOMEM;
2941                                goto out;
2942                        }
2943                }
2944
2945                if (PageReadahead(page)) {
2946                        page_cache_async_readahead(inode->i_mapping,
2947                                                   ra, NULL, page, index,
2948                                                   last_index + 1 - index);
2949                }
2950
2951                if (!PageUptodate(page)) {
2952                        btrfs_readpage(NULL, page);
2953                        lock_page(page);
2954                        if (!PageUptodate(page)) {
2955                                unlock_page(page);
2956                                page_cache_release(page);
2957                                btrfs_delalloc_release_metadata(inode,
2958                                                        PAGE_CACHE_SIZE);
2959                                ret = -EIO;
2960                                goto out;
2961                        }
2962                }
2963
2964                page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2965                page_end = page_start + PAGE_CACHE_SIZE - 1;
2966
2967                lock_extent(&BTRFS_I(inode)->io_tree,
2968                            page_start, page_end, GFP_NOFS);
2969
2970                set_page_extent_mapped(page);
2971
2972                if (nr < cluster->nr &&
2973                    page_start + offset == cluster->boundary[nr]) {
2974                        set_extent_bits(&BTRFS_I(inode)->io_tree,
2975                                        page_start, page_end,
2976                                        EXTENT_BOUNDARY, GFP_NOFS);
2977                        nr++;
2978                }
2979
2980                btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
2981                set_page_dirty(page);
2982
2983                unlock_extent(&BTRFS_I(inode)->io_tree,
2984                              page_start, page_end, GFP_NOFS);
2985                unlock_page(page);
2986                page_cache_release(page);
2987
2988                index++;
2989                balance_dirty_pages_ratelimited(inode->i_mapping);
2990                btrfs_throttle(BTRFS_I(inode)->root);
2991        }
2992        WARN_ON(nr != cluster->nr);
2993out:
2994        kfree(ra);
2995        return ret;
2996}
2997
2998static noinline_for_stack
2999int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
3000                         struct file_extent_cluster *cluster)
3001{
3002        int ret;
3003
3004        if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
3005                ret = relocate_file_extent_cluster(inode, cluster);
3006                if (ret)
3007                        return ret;
3008                cluster->nr = 0;
3009        }
3010
3011        if (!cluster->nr)
3012                cluster->start = extent_key->objectid;
3013        else
3014                BUG_ON(cluster->nr >= MAX_EXTENTS);
3015        cluster->end = extent_key->objectid + extent_key->offset - 1;
3016        cluster->boundary[cluster->nr] = extent_key->objectid;
3017        cluster->nr++;
3018
3019        if (cluster->nr >= MAX_EXTENTS) {
3020                ret = relocate_file_extent_cluster(inode, cluster);
3021                if (ret)
3022                        return ret;
3023                cluster->nr = 0;
3024        }
3025        return 0;
3026}
3027
3028#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3029static int get_ref_objectid_v0(struct reloc_control *rc,
3030                               struct btrfs_path *path,
3031                               struct btrfs_key *extent_key,
3032                               u64 *ref_objectid, int *path_change)
3033{
3034        struct btrfs_key key;
3035        struct extent_buffer *leaf;
3036        struct btrfs_extent_ref_v0 *ref0;
3037        int ret;
3038        int slot;
3039
3040        leaf = path->nodes[0];
3041        slot = path->slots[0];
3042        while (1) {
3043                if (slot >= btrfs_header_nritems(leaf)) {
3044                        ret = btrfs_next_leaf(rc->extent_root, path);
3045                        if (ret < 0)
3046                                return ret;
3047                        BUG_ON(ret > 0);
3048                        leaf = path->nodes[0];
3049                        slot = path->slots[0];
3050                        if (path_change)
3051                                *path_change = 1;
3052                }
3053                btrfs_item_key_to_cpu(leaf, &key, slot);
3054                if (key.objectid != extent_key->objectid)
3055                        return -ENOENT;
3056
3057                if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
3058                        slot++;
3059                        continue;
3060                }
3061                ref0 = btrfs_item_ptr(leaf, slot,
3062                                struct btrfs_extent_ref_v0);
3063                *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
3064                break;
3065        }
3066        return 0;
3067}
3068#endif
3069
3070/*
3071 * helper to add a tree block to the list.
3072 * the major work is getting the generation and level of the block
3073 */
3074static int add_tree_block(struct reloc_control *rc,
3075                          struct btrfs_key *extent_key,
3076                          struct btrfs_path *path,
3077                          struct rb_root *blocks)
3078{
3079        struct extent_buffer *eb;
3080        struct btrfs_extent_item *ei;
3081        struct btrfs_tree_block_info *bi;
3082        struct tree_block *block;
3083        struct rb_node *rb_node;
3084        u32 item_size;
3085        int level = -1;
3086        int generation;
3087
3088        eb =  path->nodes[0];
3089        item_size = btrfs_item_size_nr(eb, path->slots[0]);
3090
3091        if (item_size >= sizeof(*ei) + sizeof(*bi)) {
3092                ei = btrfs_item_ptr(eb, path->slots[0],
3093                                struct btrfs_extent_item);
3094                bi = (struct btrfs_tree_block_info *)(ei + 1);
3095                generation = btrfs_extent_generation(eb, ei);
3096                level = btrfs_tree_block_level(eb, bi);
3097        } else {
3098#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3099                u64 ref_owner;
3100                int ret;
3101
3102                BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3103                ret = get_ref_objectid_v0(rc, path, extent_key,
3104                                          &ref_owner, NULL);
3105                if (ret < 0)
3106                        return ret;
3107                BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
3108                level = (int)ref_owner;
3109                /* FIXME: get real generation */
3110                generation = 0;
3111#else
3112                BUG();
3113#endif
3114        }
3115
3116        btrfs_release_path(rc->extent_root, path);
3117
3118        BUG_ON(level == -1);
3119
3120        block = kmalloc(sizeof(*block), GFP_NOFS);
3121        if (!block)
3122                return -ENOMEM;
3123
3124        block->bytenr = extent_key->objectid;
3125        block->key.objectid = extent_key->offset;
3126        block->key.offset = generation;
3127        block->level = level;
3128        block->key_ready = 0;
3129
3130        rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
3131        BUG_ON(rb_node);
3132
3133        return 0;
3134}
3135
3136/*
3137 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
3138 */
3139static int __add_tree_block(struct reloc_control *rc,
3140                            u64 bytenr, u32 blocksize,
3141                            struct rb_root *blocks)
3142{
3143        struct btrfs_path *path;
3144        struct btrfs_key key;
3145        int ret;
3146
3147        if (tree_block_processed(bytenr, blocksize, rc))
3148                return 0;
3149
3150        if (tree_search(blocks, bytenr))
3151                return 0;
3152
3153        path = btrfs_alloc_path();
3154        if (!path)
3155                return -ENOMEM;
3156
3157        key.objectid = bytenr;
3158        key.type = BTRFS_EXTENT_ITEM_KEY;
3159        key.offset = blocksize;
3160
3161        path->search_commit_root = 1;
3162        path->skip_locking = 1;
3163        ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
3164        if (ret < 0)
3165                goto out;
3166        BUG_ON(ret);
3167
3168        btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3169        ret = add_tree_block(rc, &key, path, blocks);
3170out:
3171        btrfs_free_path(path);
3172        return ret;
3173}
3174
3175/*
3176 * helper to check if the block use full backrefs for pointers in it
3177 */
3178static int block_use_full_backref(struct reloc_control *rc,
3179                                  struct extent_buffer *eb)
3180{
3181        u64 flags;
3182        int ret;
3183
3184        if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
3185            btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
3186                return 1;
3187
3188        ret = btrfs_lookup_extent_info(NULL, rc->extent_root,
3189                                       eb->start, eb->len, NULL, &flags);
3190        BUG_ON(ret);
3191
3192        if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
3193                ret = 1;
3194        else
3195                ret = 0;
3196        return ret;
3197}
3198
3199static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3200                                    struct inode *inode, u64 ino)
3201{
3202        struct btrfs_key key;
3203        struct btrfs_path *path;
3204        struct btrfs_root *root = fs_info->tree_root;
3205        struct btrfs_trans_handle *trans;
3206        unsigned long nr;
3207        int ret = 0;
3208
3209        if (inode)
3210                goto truncate;
3211
3212        key.objectid = ino;
3213        key.type = BTRFS_INODE_ITEM_KEY;
3214        key.offset = 0;
3215
3216        inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3217        if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
3218                if (inode && !IS_ERR(inode))
3219                        iput(inode);
3220                return -ENOENT;
3221        }
3222
3223truncate:
3224        path = btrfs_alloc_path();
3225        if (!path) {
3226                ret = -ENOMEM;
3227                goto out;
3228        }
3229
3230        trans = btrfs_join_transaction(root, 0);
3231        if (IS_ERR(trans)) {
3232                btrfs_free_path(path);
3233                ret = PTR_ERR(trans);
3234                goto out;
3235        }
3236
3237        ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
3238
3239        btrfs_free_path(path);
3240        nr = trans->blocks_used;
3241        btrfs_end_transaction(trans, root);
3242        btrfs_btree_balance_dirty(root, nr);
3243out:
3244        iput(inode);
3245        return ret;
3246}
3247
3248/*
3249 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
3250 * this function scans fs tree to find blocks reference the data extent
3251 */
3252static int find_data_references(struct reloc_control *rc,
3253                                struct btrfs_key *extent_key,
3254                                struct extent_buffer *leaf,
3255                                struct btrfs_extent_data_ref *ref,
3256                                struct rb_root *blocks)
3257{
3258        struct btrfs_path *path;
3259        struct tree_block *block;
3260        struct btrfs_root *root;
3261        struct btrfs_file_extent_item *fi;
3262        struct rb_node *rb_node;
3263        struct btrfs_key key;
3264        u64 ref_root;
3265        u64 ref_objectid;
3266        u64 ref_offset;
3267        u32 ref_count;
3268        u32 nritems;
3269        int err = 0;
3270        int added = 0;
3271        int counted;
3272        int ret;
3273
3274        ref_root = btrfs_extent_data_ref_root(leaf, ref);
3275        ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
3276        ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
3277        ref_count = btrfs_extent_data_ref_count(leaf, ref);
3278
3279        /*
3280         * This is an extent belonging to the free space cache, lets just delete
3281         * it and redo the search.
3282         */
3283        if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
3284                ret = delete_block_group_cache(rc->extent_root->fs_info,
3285                                               NULL, ref_objectid);
3286                if (ret != -ENOENT)
3287                        return ret;
3288                ret = 0;
3289        }
3290
3291        path = btrfs_alloc_path();
3292        if (!path)
3293                return -ENOMEM;
3294
3295        root = read_fs_root(rc->extent_root->fs_info, ref_root);
3296        if (IS_ERR(root)) {
3297                err = PTR_ERR(root);
3298                goto out;
3299        }
3300
3301        key.objectid = ref_objectid;
3302        key.offset = ref_offset;
3303        key.type = BTRFS_EXTENT_DATA_KEY;
3304
3305        path->search_commit_root = 1;
3306        path->skip_locking = 1;
3307        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3308        if (ret < 0) {
3309                err = ret;
3310                goto out;
3311        }
3312
3313        leaf = path->nodes[0];
3314        nritems = btrfs_header_nritems(leaf);
3315        /*
3316         * the references in tree blocks that use full backrefs
3317         * are not counted in
3318         */
3319        if (block_use_full_backref(rc, leaf))
3320                counted = 0;
3321        else
3322                counted = 1;
3323        rb_node = tree_search(blocks, leaf->start);
3324        if (rb_node) {
3325                if (counted)
3326                        added = 1;
3327                else
3328                        path->slots[0] = nritems;
3329        }
3330
3331        while (ref_count > 0) {
3332                while (path->slots[0] >= nritems) {
3333                        ret = btrfs_next_leaf(root, path);
3334                        if (ret < 0) {
3335                                err = ret;
3336                                goto out;
3337                        }
3338                        if (ret > 0) {
3339                                WARN_ON(1);
3340                                goto out;
3341                        }
3342
3343                        leaf = path->nodes[0];
3344                        nritems = btrfs_header_nritems(leaf);
3345                        added = 0;
3346
3347                        if (block_use_full_backref(rc, leaf))
3348                                counted = 0;
3349                        else
3350                                counted = 1;
3351                        rb_node = tree_search(blocks, leaf->start);
3352                        if (rb_node) {
3353                                if (counted)
3354                                        added = 1;
3355                                else
3356                                        path->slots[0] = nritems;
3357                        }
3358                }
3359
3360                btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3361                if (key.objectid != ref_objectid ||
3362                    key.type != BTRFS_EXTENT_DATA_KEY) {
3363                        WARN_ON(1);
3364                        break;
3365                }
3366
3367                fi = btrfs_item_ptr(leaf, path->slots[0],
3368                                    struct btrfs_file_extent_item);
3369
3370                if (btrfs_file_extent_type(leaf, fi) ==
3371                    BTRFS_FILE_EXTENT_INLINE)
3372                        goto next;
3373
3374                if (btrfs_file_extent_disk_bytenr(leaf, fi) !=
3375                    extent_key->objectid)
3376                        goto next;
3377
3378                key.offset -= btrfs_file_extent_offset(leaf, fi);
3379                if (key.offset != ref_offset)
3380                        goto next;
3381
3382                if (counted)
3383                        ref_count--;
3384                if (added)
3385                        goto next;
3386
3387                if (!tree_block_processed(leaf->start, leaf->len, rc)) {
3388                        block = kmalloc(sizeof(*block), GFP_NOFS);
3389                        if (!block) {
3390                                err = -ENOMEM;
3391                                break;
3392                        }
3393                        block->bytenr = leaf->start;
3394                        btrfs_item_key_to_cpu(leaf, &block->key, 0);
3395                        block->level = 0;
3396                        block->key_ready = 1;
3397                        rb_node = tree_insert(blocks, block->bytenr,
3398                                              &block->rb_node);
3399                        BUG_ON(rb_node);
3400                }
3401                if (counted)
3402                        added = 1;
3403                else
3404                        path->slots[0] = nritems;
3405next:
3406                path->slots[0]++;
3407
3408        }
3409out:
3410        btrfs_free_path(path);
3411        return err;
3412}
3413
3414/*
3415 * hepler to find all tree blocks that reference a given data extent
3416 */
3417static noinline_for_stack
3418int add_data_references(struct reloc_control *rc,
3419                        struct btrfs_key *extent_key,
3420                        struct btrfs_path *path,
3421                        struct rb_root *blocks)
3422{
3423        struct btrfs_key key;
3424        struct extent_buffer *eb;
3425        struct btrfs_extent_data_ref *dref;
3426        struct btrfs_extent_inline_ref *iref;
3427        unsigned long ptr;
3428        unsigned long end;
3429        u32 blocksize = btrfs_level_size(rc->extent_root, 0);
3430        int ret;
3431        int err = 0;
3432
3433        eb = path->nodes[0];
3434        ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
3435        end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
3436#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3437        if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
3438                ptr = end;
3439        else
3440#endif
3441                ptr += sizeof(struct btrfs_extent_item);
3442
3443        while (ptr < end) {
3444                iref = (struct btrfs_extent_inline_ref *)ptr;
3445                key.type = btrfs_extent_inline_ref_type(eb, iref);
3446                if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3447                        key.offset = btrfs_extent_inline_ref_offset(eb, iref);
3448                        ret = __add_tree_block(rc, key.offset, blocksize,
3449                                               blocks);
3450                } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3451                        dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3452                        ret = find_data_references(rc, extent_key,
3453                                                   eb, dref, blocks);
3454                } else {
3455                        BUG();
3456                }
3457                ptr += btrfs_extent_inline_ref_size(key.type);
3458        }
3459        WARN_ON(ptr > end);
3460
3461        while (1) {
3462                cond_resched();
3463                eb = path->nodes[0];
3464                if (path->slots[0] >= btrfs_header_nritems(eb)) {
3465                        ret = btrfs_next_leaf(rc->extent_root, path);
3466                        if (ret < 0) {
3467                                err = ret;
3468                                break;
3469                        }
3470                        if (ret > 0)
3471                                break;
3472                        eb = path->nodes[0];
3473                }
3474
3475                btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
3476                if (key.objectid != extent_key->objectid)
3477                        break;
3478
3479#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3480                if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
3481                    key.type == BTRFS_EXTENT_REF_V0_KEY) {
3482#else
3483                BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
3484                if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3485#endif
3486                        ret = __add_tree_block(rc, key.offset, blocksize,
3487                                               blocks);
3488                } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3489                        dref = btrfs_item_ptr(eb, path->slots[0],
3490                                              struct btrfs_extent_data_ref);
3491                        ret = find_data_references(rc, extent_key,
3492                                                   eb, dref, blocks);
3493                } else {
3494                        ret = 0;
3495                }
3496                if (ret) {
3497                        err = ret;
3498                        break;
3499                }
3500                path->slots[0]++;
3501        }
3502        btrfs_release_path(rc->extent_root, path);
3503        if (err)
3504                free_block_list(blocks);
3505        return err;
3506}
3507
3508/*
3509 * hepler to find next unprocessed extent
3510 */
3511static noinline_for_stack
3512int find_next_extent(struct btrfs_trans_handle *trans,
3513                     struct reloc_control *rc, struct btrfs_path *path,
3514                     struct btrfs_key *extent_key)
3515{
3516        struct btrfs_key key;
3517        struct extent_buffer *leaf;
3518        u64 start, end, last;
3519        int ret;
3520
3521        last = rc->block_group->key.objectid + rc->block_group->key.offset;
3522        while (1) {
3523                cond_resched();
3524                if (rc->search_start >= last) {
3525                        ret = 1;
3526                        break;
3527                }
3528
3529                key.objectid = rc->search_start;
3530                key.type = BTRFS_EXTENT_ITEM_KEY;
3531                key.offset = 0;
3532
3533                path->search_commit_root = 1;
3534                path->skip_locking = 1;
3535                ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3536                                        0, 0);
3537                if (ret < 0)
3538                        break;
3539next:
3540                leaf = path->nodes[0];
3541                if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3542                        ret = btrfs_next_leaf(rc->extent_root, path);
3543                        if (ret != 0)
3544                                break;
3545                        leaf = path->nodes[0];
3546                }
3547
3548                btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3549                if (key.objectid >= last) {
3550                        ret = 1;
3551                        break;
3552                }
3553
3554                if (key.type != BTRFS_EXTENT_ITEM_KEY ||
3555                    key.objectid + key.offset <= rc->search_start) {
3556                        path->slots[0]++;
3557                        goto next;
3558                }
3559
3560                ret = find_first_extent_bit(&rc->processed_blocks,
3561                                            key.objectid, &start, &end,
3562                                            EXTENT_DIRTY);
3563
3564                if (ret == 0 && start <= key.objectid) {
3565                        btrfs_release_path(rc->extent_root, path);
3566                        rc->search_start = end + 1;
3567                } else {
3568                        rc->search_start = key.objectid + key.offset;
3569                        memcpy(extent_key, &key, sizeof(key));
3570                        return 0;
3571                }
3572        }
3573        btrfs_release_path(rc->extent_root, path);
3574        return ret;
3575}
3576
3577static void set_reloc_control(struct reloc_control *rc)
3578{
3579        struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3580        mutex_lock(&fs_info->trans_mutex);
3581        fs_info->reloc_ctl = rc;
3582        mutex_unlock(&fs_info->trans_mutex);
3583}
3584
3585static void unset_reloc_control(struct reloc_control *rc)
3586{
3587        struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3588        mutex_lock(&fs_info->trans_mutex);
3589        fs_info->reloc_ctl = NULL;
3590        mutex_unlock(&fs_info->trans_mutex);
3591}
3592
3593static int check_extent_flags(u64 flags)
3594{
3595        if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3596            (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3597                return 1;
3598        if (!(flags & BTRFS_EXTENT_FLAG_DATA) &&
3599            !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3600                return 1;
3601        if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3602            (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
3603                return 1;
3604        return 0;
3605}
3606
3607static noinline_for_stack
3608int prepare_to_relocate(struct reloc_control *rc)
3609{
3610        struct btrfs_trans_handle *trans;
3611        int ret;
3612
3613        rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root);
3614        if (!rc->block_rsv)
3615                return -ENOMEM;
3616
3617        /*
3618         * reserve some space for creating reloc trees.
3619         * btrfs_init_reloc_root will use them when there
3620         * is no reservation in transaction handle.
3621         */
3622        ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
3623                                  rc->extent_root->nodesize * 256);
3624        if (ret)
3625                return ret;
3626
3627        rc->block_rsv->refill_used = 1;
3628        btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv);
3629
3630        memset(&rc->cluster, 0, sizeof(rc->cluster));
3631        rc->search_start = rc->block_group->key.objectid;
3632        rc->extents_found = 0;
3633        rc->nodes_relocated = 0;
3634        rc->merging_rsv_size = 0;
3635
3636        rc->create_reloc_tree = 1;
3637        set_reloc_control(rc);
3638
3639        trans = btrfs_join_transaction(rc->extent_root, 1);
3640        BUG_ON(IS_ERR(trans));
3641        btrfs_commit_transaction(trans, rc->extent_root);
3642        return 0;
3643}
3644
3645static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3646{
3647        struct rb_root blocks = RB_ROOT;
3648        struct btrfs_key key;
3649        struct btrfs_trans_handle *trans = NULL;
3650        struct btrfs_path *path;
3651        struct btrfs_extent_item *ei;
3652        unsigned long nr;
3653        u64 flags;
3654        u32 item_size;
3655        int ret;
3656        int err = 0;
3657        int progress = 0;
3658
3659        path = btrfs_alloc_path();
3660        if (!path)
3661                return -ENOMEM;
3662
3663        ret = prepare_to_relocate(rc);
3664        if (ret) {
3665                err = ret;
3666                goto out_free;
3667        }
3668
3669        while (1) {
3670                progress++;
3671                trans = btrfs_start_transaction(rc->extent_root, 0);
3672                BUG_ON(IS_ERR(trans));
3673restart:
3674                if (update_backref_cache(trans, &rc->backref_cache)) {
3675                        btrfs_end_transaction(trans, rc->extent_root);
3676                        continue;
3677                }
3678
3679                ret = find_next_extent(trans, rc, path, &key);
3680                if (ret < 0)
3681                        err = ret;
3682                if (ret != 0)
3683                        break;
3684
3685                rc->extents_found++;
3686
3687                ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3688                                    struct btrfs_extent_item);
3689                item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
3690                if (item_size >= sizeof(*ei)) {
3691                        flags = btrfs_extent_flags(path->nodes[0], ei);
3692                        ret = check_extent_flags(flags);
3693                        BUG_ON(ret);
3694
3695                } else {
3696#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3697                        u64 ref_owner;
3698                        int path_change = 0;
3699
3700                        BUG_ON(item_size !=
3701                               sizeof(struct btrfs_extent_item_v0));
3702                        ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
3703                                                  &path_change);
3704                        if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
3705                                flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
3706                        else
3707                                flags = BTRFS_EXTENT_FLAG_DATA;
3708
3709                        if (path_change) {
3710                                btrfs_release_path(rc->extent_root, path);
3711
3712                                path->search_commit_root = 1;
3713                                path->skip_locking = 1;
3714                                ret = btrfs_search_slot(NULL, rc->extent_root,
3715                                                        &key, path, 0, 0);
3716                                if (ret < 0) {
3717                                        err = ret;
3718                                        break;
3719                                }
3720                                BUG_ON(ret > 0);
3721                        }
3722#else
3723                        BUG();
3724#endif
3725                }
3726
3727                if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
3728                        ret = add_tree_block(rc, &key, path, &blocks);
3729                } else if (rc->stage == UPDATE_DATA_PTRS &&
3730                           (flags & BTRFS_EXTENT_FLAG_DATA)) {
3731                        ret = add_data_references(rc, &key, path, &blocks);
3732                } else {
3733                        btrfs_release_path(rc->extent_root, path);
3734                        ret = 0;
3735                }
3736                if (ret < 0) {
3737                        err = ret;
3738                        break;
3739                }
3740
3741                if (!RB_EMPTY_ROOT(&blocks)) {
3742                        ret = relocate_tree_blocks(trans, rc, &blocks);
3743                        if (ret < 0) {
3744                                if (ret != -EAGAIN) {
3745                                        err = ret;
3746                                        break;
3747                                }
3748                                rc->extents_found--;
3749                                rc->search_start = key.objectid;
3750                        }
3751                }
3752
3753                ret = btrfs_block_rsv_check(trans, rc->extent_root,
3754                                            rc->block_rsv, 0, 5);
3755                if (ret < 0) {
3756                        if (ret != -EAGAIN) {
3757                                err = ret;
3758                                WARN_ON(1);
3759                                break;
3760                        }
3761                        rc->commit_transaction = 1;
3762                }
3763
3764                if (rc->commit_transaction) {
3765                        rc->commit_transaction = 0;
3766                        ret = btrfs_commit_transaction(trans, rc->extent_root);
3767                        BUG_ON(ret);
3768                } else {
3769                        nr = trans->blocks_used;
3770                        btrfs_end_transaction_throttle(trans, rc->extent_root);
3771                        btrfs_btree_balance_dirty(rc->extent_root, nr);
3772                }
3773                trans = NULL;
3774
3775                if (rc->stage == MOVE_DATA_EXTENTS &&
3776                    (flags & BTRFS_EXTENT_FLAG_DATA)) {
3777                        rc->found_file_extent = 1;
3778                        ret = relocate_data_extent(rc->data_inode,
3779                                                   &key, &rc->cluster);
3780                        if (ret < 0) {
3781                                err = ret;
3782                                break;
3783                        }
3784                }
3785        }
3786        if (trans && progress && err == -ENOSPC) {
3787                ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
3788                                              rc->block_group->flags);
3789                if (ret == 0) {
3790                        err = 0;
3791                        progress = 0;
3792                        goto restart;
3793                }
3794        }
3795
3796        btrfs_release_path(rc->extent_root, path);
3797        clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
3798                          GFP_NOFS);
3799
3800        if (trans) {
3801                nr = trans->blocks_used;
3802                btrfs_end_transaction_throttle(trans, rc->extent_root);
3803                btrfs_btree_balance_dirty(rc->extent_root, nr);
3804        }
3805
3806        if (!err) {
3807                ret = relocate_file_extent_cluster(rc->data_inode,
3808                                                   &rc->cluster);
3809                if (ret < 0)
3810                        err = ret;
3811        }
3812
3813        rc->create_reloc_tree = 0;
3814        set_reloc_control(rc);
3815
3816        backref_cache_cleanup(&rc->backref_cache);
3817        btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
3818
3819        err = prepare_to_merge(rc, err);
3820
3821        merge_reloc_roots(rc);
3822
3823        rc->merge_reloc_tree = 0;
3824        unset_reloc_control(rc);
3825        btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
3826
3827        /* get rid of pinned extents */
3828        trans = btrfs_join_transaction(rc->extent_root, 1);
3829        if (IS_ERR(trans))
3830                err = PTR_ERR(trans);
3831        else
3832                btrfs_commit_transaction(trans, rc->extent_root);
3833out_free:
3834        btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
3835        btrfs_free_path(path);
3836        return err;
3837}
3838
3839static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3840                                 struct btrfs_root *root, u64 objectid)
3841{
3842        struct btrfs_path *path;
3843        struct btrfs_inode_item *item;
3844        struct extent_buffer *leaf;
3845        int ret;
3846
3847        path = btrfs_alloc_path();
3848        if (!path)
3849                return -ENOMEM;
3850
3851        ret = btrfs_insert_empty_inode(trans, root, path, objectid);
3852        if (ret)
3853                goto out;
3854
3855        leaf = path->nodes[0];
3856        item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3857        memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
3858        btrfs_set_inode_generation(leaf, item, 1);
3859        btrfs_set_inode_size(leaf, item, 0);
3860        btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3861        btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
3862                                          BTRFS_INODE_PREALLOC);
3863        btrfs_mark_buffer_dirty(leaf);
3864        btrfs_release_path(root, path);
3865out:
3866        btrfs_free_path(path);
3867        return ret;
3868}
3869
3870/*
3871 * helper to create inode for data relocation.
3872 * the inode is in data relocation tree and its link count is 0
3873 */
3874static noinline_for_stack
3875struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3876                                 struct btrfs_block_group_cache *group)
3877{
3878        struct inode *inode = NULL;
3879        struct btrfs_trans_handle *trans;
3880        struct btrfs_root *root;
3881        struct btrfs_key key;
3882        unsigned long nr;
3883        u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
3884        int err = 0;
3885
3886        root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
3887        if (IS_ERR(root))
3888                return ERR_CAST(root);
3889
3890        trans = btrfs_start_transaction(root, 6);
3891        if (IS_ERR(trans))
3892                return ERR_CAST(trans);
3893
3894        err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
3895        if (err)
3896                goto out;
3897
3898        err = __insert_orphan_inode(trans, root, objectid);
3899        BUG_ON(err);
3900
3901        key.objectid = objectid;
3902        key.type = BTRFS_INODE_ITEM_KEY;
3903        key.offset = 0;
3904        inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
3905        BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
3906        BTRFS_I(inode)->index_cnt = group->key.objectid;
3907
3908        err = btrfs_orphan_add(trans, inode);
3909out:
3910        nr = trans->blocks_used;
3911        btrfs_end_transaction(trans, root);
3912        btrfs_btree_balance_dirty(root, nr);
3913        if (err) {
3914                if (inode)
3915                        iput(inode);
3916                inode = ERR_PTR(err);
3917        }
3918        return inode;
3919}
3920
3921static struct reloc_control *alloc_reloc_control(void)
3922{
3923        struct reloc_control *rc;
3924
3925        rc = kzalloc(sizeof(*rc), GFP_NOFS);
3926        if (!rc)
3927                return NULL;
3928
3929        INIT_LIST_HEAD(&rc->reloc_roots);
3930        backref_cache_init(&rc->backref_cache);
3931        mapping_tree_init(&rc->reloc_root_tree);
3932        extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
3933        return rc;
3934}
3935
3936/*
3937 * function to relocate all extents in a block group.
3938 */
3939int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3940{
3941        struct btrfs_fs_info *fs_info = extent_root->fs_info;
3942        struct reloc_control *rc;
3943        struct inode *inode;
3944        struct btrfs_path *path;
3945        int ret;
3946        int rw = 0;
3947        int err = 0;
3948
3949        rc = alloc_reloc_control();
3950        if (!rc)
3951                return -ENOMEM;
3952
3953        rc->extent_root = extent_root;
3954
3955        rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
3956        BUG_ON(!rc->block_group);
3957
3958        if (!rc->block_group->ro) {
3959                ret = btrfs_set_block_group_ro(extent_root, rc->block_group);
3960                if (ret) {
3961                        err = ret;
3962                        goto out;
3963                }
3964                rw = 1;
3965        }
3966
3967        path = btrfs_alloc_path();
3968        if (!path) {
3969                err = -ENOMEM;
3970                goto out;
3971        }
3972
3973        inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
3974                                        path);
3975        btrfs_free_path(path);
3976
3977        if (!IS_ERR(inode))
3978                ret = delete_block_group_cache(fs_info, inode, 0);
3979        else
3980                ret = PTR_ERR(inode);
3981
3982        if (ret && ret != -ENOENT) {
3983                err = ret;
3984                goto out;
3985        }
3986
3987        rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
3988        if (IS_ERR(rc->data_inode)) {
3989                err = PTR_ERR(rc->data_inode);
3990                rc->data_inode = NULL;
3991                goto out;
3992        }
3993
3994        printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
3995               (unsigned long long)rc->block_group->key.objectid,
3996               (unsigned long long)rc->block_group->flags);
3997
3998        btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
3999        btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
4000
4001        while (1) {
4002                mutex_lock(&fs_info->cleaner_mutex);
4003
4004                btrfs_clean_old_snapshots(fs_info->tree_root);
4005                ret = relocate_block_group(rc);
4006
4007                mutex_unlock(&fs_info->cleaner_mutex);
4008                if (ret < 0) {
4009                        err = ret;
4010                        goto out;
4011                }
4012
4013                if (rc->extents_found == 0)
4014                        break;
4015
4016                printk(KERN_INFO "btrfs: found %llu extents\n",
4017                        (unsigned long long)rc->extents_found);
4018
4019                if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
4020                        btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
4021                        invalidate_mapping_pages(rc->data_inode->i_mapping,
4022                                                 0, -1);
4023                        rc->stage = UPDATE_DATA_PTRS;
4024                }
4025        }
4026
4027        filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
4028                                     rc->block_group->key.objectid,
4029                                     rc->block_group->key.objectid +
4030                                     rc->block_group->key.offset - 1);
4031
4032        WARN_ON(rc->block_group->pinned > 0);
4033        WARN_ON(rc->block_group->reserved > 0);
4034        WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
4035out:
4036        if (err && rw)
4037                btrfs_set_block_group_rw(extent_root, rc->block_group);
4038        iput(rc->data_inode);
4039        btrfs_put_block_group(rc->block_group);
4040        kfree(rc);
4041        return err;
4042}
4043
4044static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4045{
4046        struct btrfs_trans_handle *trans;
4047        int ret;
4048
4049        trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
4050        BUG_ON(IS_ERR(trans));
4051
4052        memset(&root->root_item.drop_progress, 0,
4053                sizeof(root->root_item.drop_progress));
4054        root->root_item.drop_level = 0;
4055        btrfs_set_root_refs(&root->root_item, 0);
4056        ret = btrfs_update_root(trans, root->fs_info->tree_root,
4057                                &root->root_key, &root->root_item);
4058        BUG_ON(ret);
4059
4060        ret = btrfs_end_transaction(trans, root->fs_info->tree_root);
4061        BUG_ON(ret);
4062        return 0;
4063}
4064
4065/*
4066 * recover relocation interrupted by system crash.
4067 *
4068 * this function resumes merging reloc trees with corresponding fs trees.
4069 * this is important for keeping the sharing of tree blocks
4070 */
4071int btrfs_recover_relocation(struct btrfs_root *root)
4072{
4073        LIST_HEAD(reloc_roots);
4074        struct btrfs_key key;
4075        struct btrfs_root *fs_root;
4076        struct btrfs_root *reloc_root;
4077        struct btrfs_path *path;
4078        struct extent_buffer *leaf;
4079        struct reloc_control *rc = NULL;
4080        struct btrfs_trans_handle *trans;
4081        int ret;
4082        int err = 0;
4083
4084        path = btrfs_alloc_path();
4085        if (!path)
4086                return -ENOMEM;
4087
4088        key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4089        key.type = BTRFS_ROOT_ITEM_KEY;
4090        key.offset = (u64)-1;
4091
4092        while (1) {
4093                ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key,
4094                                        path, 0, 0);
4095                if (ret < 0) {
4096                        err = ret;
4097                        goto out;
4098                }
4099                if (ret > 0) {
4100                        if (path->slots[0] == 0)
4101                                break;
4102                        path->slots[0]--;
4103                }
4104                leaf = path->nodes[0];
4105                btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4106                btrfs_release_path(root->fs_info->tree_root, path);
4107
4108                if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
4109                    key.type != BTRFS_ROOT_ITEM_KEY)
4110                        break;
4111
4112                reloc_root = btrfs_read_fs_root_no_radix(root, &key);
4113                if (IS_ERR(reloc_root)) {
4114                        err = PTR_ERR(reloc_root);
4115                        goto out;
4116                }
4117
4118                list_add(&reloc_root->root_list, &reloc_roots);
4119
4120                if (btrfs_root_refs(&reloc_root->root_item) > 0) {
4121                        fs_root = read_fs_root(root->fs_info,
4122                                               reloc_root->root_key.offset);
4123                        if (IS_ERR(fs_root)) {
4124                                ret = PTR_ERR(fs_root);
4125                                if (ret != -ENOENT) {
4126                                        err = ret;
4127                                        goto out;
4128                                }
4129                                mark_garbage_root(reloc_root);
4130                        }
4131                }
4132
4133                if (key.offset == 0)
4134                        break;
4135
4136                key.offset--;
4137        }
4138        btrfs_release_path(root->fs_info->tree_root, path);
4139
4140        if (list_empty(&reloc_roots))
4141                goto out;
4142
4143        rc = alloc_reloc_control();
4144        if (!rc) {
4145                err = -ENOMEM;
4146                goto out;
4147        }
4148
4149        rc->extent_root = root->fs_info->extent_root;
4150
4151        set_reloc_control(rc);
4152
4153        trans = btrfs_join_transaction(rc->extent_root, 1);
4154        if (IS_ERR(trans)) {
4155                unset_reloc_control(rc);
4156                err = PTR_ERR(trans);
4157                goto out_free;
4158        }
4159
4160        rc->merge_reloc_tree = 1;
4161
4162        while (!list_empty(&reloc_roots)) {
4163                reloc_root = list_entry(reloc_roots.next,
4164                                        struct btrfs_root, root_list);
4165                list_del(&reloc_root->root_list);
4166
4167                if (btrfs_root_refs(&reloc_root->root_item) == 0) {
4168                        list_add_tail(&reloc_root->root_list,
4169                                      &rc->reloc_roots);
4170                        continue;
4171                }
4172
4173                fs_root = read_fs_root(root->fs_info,
4174                                       reloc_root->root_key.offset);
4175                BUG_ON(IS_ERR(fs_root));
4176
4177                __add_reloc_root(reloc_root);
4178                fs_root->reloc_root = reloc_root;
4179        }
4180
4181        btrfs_commit_transaction(trans, rc->extent_root);
4182
4183        merge_reloc_roots(rc);
4184
4185        unset_reloc_control(rc);
4186
4187        trans = btrfs_join_transaction(rc->extent_root, 1);
4188        if (IS_ERR(trans))
4189                err = PTR_ERR(trans);
4190        else
4191                btrfs_commit_transaction(trans, rc->extent_root);
4192out_free:
4193        kfree(rc);
4194out:
4195        while (!list_empty(&reloc_roots)) {
4196                reloc_root = list_entry(reloc_roots.next,
4197                                        struct btrfs_root, root_list);
4198                list_del(&reloc_root->root_list);
4199                free_extent_buffer(reloc_root->node);
4200                free_extent_buffer(reloc_root->commit_root);
4201                kfree(reloc_root);
4202        }
4203        btrfs_free_path(path);
4204
4205        if (err == 0) {
4206                /* cleanup orphan inode in data relocation tree */
4207                fs_root = read_fs_root(root->fs_info,
4208                                       BTRFS_DATA_RELOC_TREE_OBJECTID);
4209                if (IS_ERR(fs_root))
4210                        err = PTR_ERR(fs_root);
4211                else
4212                        btrfs_orphan_cleanup(fs_root);
4213        }
4214        return err;
4215}
4216
4217/*
4218 * helper to add ordered checksum for data relocation.
4219 *
4220 * cloning checksum properly handles the nodatasum extents.
4221 * it also saves CPU time to re-calculate the checksum.
4222 */
4223int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4224{
4225        struct btrfs_ordered_sum *sums;
4226        struct btrfs_sector_sum *sector_sum;
4227        struct btrfs_ordered_extent *ordered;
4228        struct btrfs_root *root = BTRFS_I(inode)->root;
4229        size_t offset;
4230        int ret;
4231        u64 disk_bytenr;
4232        LIST_HEAD(list);
4233
4234        ordered = btrfs_lookup_ordered_extent(inode, file_pos);
4235        BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
4236
4237        disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
4238        ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
4239                                       disk_bytenr + len - 1, &list);
4240
4241        while (!list_empty(&list)) {
4242                sums = list_entry(list.next, struct btrfs_ordered_sum, list);
4243                list_del_init(&sums->list);
4244
4245                sector_sum = sums->sums;
4246                sums->bytenr = ordered->start;
4247
4248                offset = 0;
4249                while (offset < sums->len) {
4250                        sector_sum->bytenr += ordered->start - disk_bytenr;
4251                        sector_sum++;
4252                        offset += root->sectorsize;
4253                }
4254
4255                btrfs_add_ordered_sum(inode, ordered, sums);
4256        }
4257        btrfs_put_ordered_extent(ordered);
4258        return ret;
4259}
4260
4261void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4262                           struct btrfs_root *root, struct extent_buffer *buf,
4263                           struct extent_buffer *cow)
4264{
4265        struct reloc_control *rc;
4266        struct backref_node *node;
4267        int first_cow = 0;
4268        int level;
4269        int ret;
4270
4271        rc = root->fs_info->reloc_ctl;
4272        if (!rc)
4273                return;
4274
4275        BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
4276               root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
4277
4278        level = btrfs_header_level(buf);
4279        if (btrfs_header_generation(buf) <=
4280            btrfs_root_last_snapshot(&root->root_item))
4281                first_cow = 1;
4282
4283        if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
4284            rc->create_reloc_tree) {
4285                WARN_ON(!first_cow && level == 0);
4286
4287                node = rc->backref_cache.path[level];
4288                BUG_ON(node->bytenr != buf->start &&
4289                       node->new_bytenr != buf->start);
4290
4291                drop_node_buffer(node);
4292                extent_buffer_get(cow);
4293                node->eb = cow;
4294                node->new_bytenr = cow->start;
4295
4296                if (!node->pending) {
4297                        list_move_tail(&node->list,
4298                                       &rc->backref_cache.pending[level]);
4299                        node->pending = 1;
4300                }
4301
4302                if (first_cow)
4303                        __mark_block_processed(rc, node);
4304
4305                if (first_cow && level > 0)
4306                        rc->nodes_relocated += buf->len;
4307        }
4308
4309        if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) {
4310                ret = replace_file_extents(trans, rc, root, cow);
4311                BUG_ON(ret);
4312        }
4313}
4314
4315/*
4316 * called before creating snapshot. it calculates metadata reservation
4317 * requried for relocating tree blocks in the snapshot
4318 */
4319void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
4320                              struct btrfs_pending_snapshot *pending,
4321                              u64 *bytes_to_reserve)
4322{
4323        struct btrfs_root *root;
4324        struct reloc_control *rc;
4325
4326        root = pending->root;
4327        if (!root->reloc_root)
4328                return;
4329
4330        rc = root->fs_info->reloc_ctl;
4331        if (!rc->merge_reloc_tree)
4332                return;
4333
4334        root = root->reloc_root;
4335        BUG_ON(btrfs_root_refs(&root->root_item) == 0);
4336        /*
4337         * relocation is in the stage of merging trees. the space
4338         * used by merging a reloc tree is twice the size of
4339         * relocated tree nodes in the worst case. half for cowing
4340         * the reloc tree, half for cowing the fs tree. the space
4341         * used by cowing the reloc tree will be freed after the
4342         * tree is dropped. if we create snapshot, cowing the fs
4343         * tree may use more space than it frees. so we need
4344         * reserve extra space.
4345         */
4346        *bytes_to_reserve += rc->nodes_relocated;
4347}
4348
4349/*
4350 * called after snapshot is created. migrate block reservation
4351 * and create reloc root for the newly created snapshot
4352 */
4353void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4354                               struct btrfs_pending_snapshot *pending)
4355{
4356        struct btrfs_root *root = pending->root;
4357        struct btrfs_root *reloc_root;
4358        struct btrfs_root *new_root;
4359        struct reloc_control *rc;
4360        int ret;
4361
4362        if (!root->reloc_root)
4363                return;
4364
4365        rc = root->fs_info->reloc_ctl;
4366        rc->merging_rsv_size += rc->nodes_relocated;
4367
4368        if (rc->merge_reloc_tree) {
4369                ret = btrfs_block_rsv_migrate(&pending->block_rsv,
4370                                              rc->block_rsv,
4371                                              rc->nodes_relocated);
4372                BUG_ON(ret);
4373        }
4374
4375        new_root = pending->snap;
4376        reloc_root = create_reloc_root(trans, root->reloc_root,
4377                                       new_root->root_key.objectid);
4378
4379        __add_reloc_root(reloc_root);
4380        new_root->reloc_root = reloc_root;
4381
4382        if (rc->create_reloc_tree) {
4383                ret = clone_backref_node(trans, rc, root, reloc_root);
4384                BUG_ON(ret);
4385        }
4386}
4387