linux/fs/btrfs/backref.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2011 STRATO.  All rights reserved.
   4 */
   5
   6#include <linux/mm.h>
   7#include <linux/rbtree.h>
   8#include <trace/events/btrfs.h>
   9#include "ctree.h"
  10#include "disk-io.h"
  11#include "backref.h"
  12#include "ulist.h"
  13#include "transaction.h"
  14#include "delayed-ref.h"
  15#include "locking.h"
  16#include "misc.h"
  17
  18/* Just an arbitrary number so we can be sure this happened */
  19#define BACKREF_FOUND_SHARED 6
  20
  21struct extent_inode_elem {
  22        u64 inum;
  23        u64 offset;
  24        struct extent_inode_elem *next;
  25};
  26
  27static int check_extent_in_eb(const struct btrfs_key *key,
  28                              const struct extent_buffer *eb,
  29                              const struct btrfs_file_extent_item *fi,
  30                              u64 extent_item_pos,
  31                              struct extent_inode_elem **eie,
  32                              bool ignore_offset)
  33{
  34        u64 offset = 0;
  35        struct extent_inode_elem *e;
  36
  37        if (!ignore_offset &&
  38            !btrfs_file_extent_compression(eb, fi) &&
  39            !btrfs_file_extent_encryption(eb, fi) &&
  40            !btrfs_file_extent_other_encoding(eb, fi)) {
  41                u64 data_offset;
  42                u64 data_len;
  43
  44                data_offset = btrfs_file_extent_offset(eb, fi);
  45                data_len = btrfs_file_extent_num_bytes(eb, fi);
  46
  47                if (extent_item_pos < data_offset ||
  48                    extent_item_pos >= data_offset + data_len)
  49                        return 1;
  50                offset = extent_item_pos - data_offset;
  51        }
  52
  53        e = kmalloc(sizeof(*e), GFP_NOFS);
  54        if (!e)
  55                return -ENOMEM;
  56
  57        e->next = *eie;
  58        e->inum = key->objectid;
  59        e->offset = key->offset + offset;
  60        *eie = e;
  61
  62        return 0;
  63}
  64
  65static void free_inode_elem_list(struct extent_inode_elem *eie)
  66{
  67        struct extent_inode_elem *eie_next;
  68
  69        for (; eie; eie = eie_next) {
  70                eie_next = eie->next;
  71                kfree(eie);
  72        }
  73}
  74
  75static int find_extent_in_eb(const struct extent_buffer *eb,
  76                             u64 wanted_disk_byte, u64 extent_item_pos,
  77                             struct extent_inode_elem **eie,
  78                             bool ignore_offset)
  79{
  80        u64 disk_byte;
  81        struct btrfs_key key;
  82        struct btrfs_file_extent_item *fi;
  83        int slot;
  84        int nritems;
  85        int extent_type;
  86        int ret;
  87
  88        /*
  89         * from the shared data ref, we only have the leaf but we need
  90         * the key. thus, we must look into all items and see that we
  91         * find one (some) with a reference to our extent item.
  92         */
  93        nritems = btrfs_header_nritems(eb);
  94        for (slot = 0; slot < nritems; ++slot) {
  95                btrfs_item_key_to_cpu(eb, &key, slot);
  96                if (key.type != BTRFS_EXTENT_DATA_KEY)
  97                        continue;
  98                fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
  99                extent_type = btrfs_file_extent_type(eb, fi);
 100                if (extent_type == BTRFS_FILE_EXTENT_INLINE)
 101                        continue;
 102                /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */
 103                disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
 104                if (disk_byte != wanted_disk_byte)
 105                        continue;
 106
 107                ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie, ignore_offset);
 108                if (ret < 0)
 109                        return ret;
 110        }
 111
 112        return 0;
 113}
 114
 115struct preftree {
 116        struct rb_root_cached root;
 117        unsigned int count;
 118};
 119
 120#define PREFTREE_INIT   { .root = RB_ROOT_CACHED, .count = 0 }
 121
 122struct preftrees {
 123        struct preftree direct;    /* BTRFS_SHARED_[DATA|BLOCK]_REF_KEY */
 124        struct preftree indirect;  /* BTRFS_[TREE_BLOCK|EXTENT_DATA]_REF_KEY */
 125        struct preftree indirect_missing_keys;
 126};
 127
 128/*
 129 * Checks for a shared extent during backref search.
 130 *
 131 * The share_count tracks prelim_refs (direct and indirect) having a
 132 * ref->count >0:
 133 *  - incremented when a ref->count transitions to >0
 134 *  - decremented when a ref->count transitions to <1
 135 */
 136struct share_check {
 137        u64 root_objectid;
 138        u64 inum;
 139        int share_count;
 140};
 141
 142static inline int extent_is_shared(struct share_check *sc)
 143{
 144        return (sc && sc->share_count > 1) ? BACKREF_FOUND_SHARED : 0;
 145}
 146
 147static struct kmem_cache *btrfs_prelim_ref_cache;
 148
 149int __init btrfs_prelim_ref_init(void)
 150{
 151        btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref",
 152                                        sizeof(struct prelim_ref),
 153                                        0,
 154                                        SLAB_MEM_SPREAD,
 155                                        NULL);
 156        if (!btrfs_prelim_ref_cache)
 157                return -ENOMEM;
 158        return 0;
 159}
 160
 161void __cold btrfs_prelim_ref_exit(void)
 162{
 163        kmem_cache_destroy(btrfs_prelim_ref_cache);
 164}
 165
 166static void free_pref(struct prelim_ref *ref)
 167{
 168        kmem_cache_free(btrfs_prelim_ref_cache, ref);
 169}
 170
 171/*
 172 * Return 0 when both refs are for the same block (and can be merged).
 173 * A -1 return indicates ref1 is a 'lower' block than ref2, while 1
 174 * indicates a 'higher' block.
 175 */
 176static int prelim_ref_compare(struct prelim_ref *ref1,
 177                              struct prelim_ref *ref2)
 178{
 179        if (ref1->level < ref2->level)
 180                return -1;
 181        if (ref1->level > ref2->level)
 182                return 1;
 183        if (ref1->root_id < ref2->root_id)
 184                return -1;
 185        if (ref1->root_id > ref2->root_id)
 186                return 1;
 187        if (ref1->key_for_search.type < ref2->key_for_search.type)
 188                return -1;
 189        if (ref1->key_for_search.type > ref2->key_for_search.type)
 190                return 1;
 191        if (ref1->key_for_search.objectid < ref2->key_for_search.objectid)
 192                return -1;
 193        if (ref1->key_for_search.objectid > ref2->key_for_search.objectid)
 194                return 1;
 195        if (ref1->key_for_search.offset < ref2->key_for_search.offset)
 196                return -1;
 197        if (ref1->key_for_search.offset > ref2->key_for_search.offset)
 198                return 1;
 199        if (ref1->parent < ref2->parent)
 200                return -1;
 201        if (ref1->parent > ref2->parent)
 202                return 1;
 203
 204        return 0;
 205}
 206
 207static void update_share_count(struct share_check *sc, int oldcount,
 208                               int newcount)
 209{
 210        if ((!sc) || (oldcount == 0 && newcount < 1))
 211                return;
 212
 213        if (oldcount > 0 && newcount < 1)
 214                sc->share_count--;
 215        else if (oldcount < 1 && newcount > 0)
 216                sc->share_count++;
 217}
 218
 219/*
 220 * Add @newref to the @root rbtree, merging identical refs.
 221 *
 222 * Callers should assume that newref has been freed after calling.
 223 */
 224static void prelim_ref_insert(const struct btrfs_fs_info *fs_info,
 225                              struct preftree *preftree,
 226                              struct prelim_ref *newref,
 227                              struct share_check *sc)
 228{
 229        struct rb_root_cached *root;
 230        struct rb_node **p;
 231        struct rb_node *parent = NULL;
 232        struct prelim_ref *ref;
 233        int result;
 234        bool leftmost = true;
 235
 236        root = &preftree->root;
 237        p = &root->rb_root.rb_node;
 238
 239        while (*p) {
 240                parent = *p;
 241                ref = rb_entry(parent, struct prelim_ref, rbnode);
 242                result = prelim_ref_compare(ref, newref);
 243                if (result < 0) {
 244                        p = &(*p)->rb_left;
 245                } else if (result > 0) {
 246                        p = &(*p)->rb_right;
 247                        leftmost = false;
 248                } else {
 249                        /* Identical refs, merge them and free @newref */
 250                        struct extent_inode_elem *eie = ref->inode_list;
 251
 252                        while (eie && eie->next)
 253                                eie = eie->next;
 254
 255                        if (!eie)
 256                                ref->inode_list = newref->inode_list;
 257                        else
 258                                eie->next = newref->inode_list;
 259                        trace_btrfs_prelim_ref_merge(fs_info, ref, newref,
 260                                                     preftree->count);
 261                        /*
 262                         * A delayed ref can have newref->count < 0.
 263                         * The ref->count is updated to follow any
 264                         * BTRFS_[ADD|DROP]_DELAYED_REF actions.
 265                         */
 266                        update_share_count(sc, ref->count,
 267                                           ref->count + newref->count);
 268                        ref->count += newref->count;
 269                        free_pref(newref);
 270                        return;
 271                }
 272        }
 273
 274        update_share_count(sc, 0, newref->count);
 275        preftree->count++;
 276        trace_btrfs_prelim_ref_insert(fs_info, newref, NULL, preftree->count);
 277        rb_link_node(&newref->rbnode, parent, p);
 278        rb_insert_color_cached(&newref->rbnode, root, leftmost);
 279}
 280
 281/*
 282 * Release the entire tree.  We don't care about internal consistency so
 283 * just free everything and then reset the tree root.
 284 */
 285static void prelim_release(struct preftree *preftree)
 286{
 287        struct prelim_ref *ref, *next_ref;
 288
 289        rbtree_postorder_for_each_entry_safe(ref, next_ref,
 290                                             &preftree->root.rb_root, rbnode)
 291                free_pref(ref);
 292
 293        preftree->root = RB_ROOT_CACHED;
 294        preftree->count = 0;
 295}
 296
 297/*
 298 * the rules for all callers of this function are:
 299 * - obtaining the parent is the goal
 300 * - if you add a key, you must know that it is a correct key
 301 * - if you cannot add the parent or a correct key, then we will look into the
 302 *   block later to set a correct key
 303 *
 304 * delayed refs
 305 * ============
 306 *        backref type | shared | indirect | shared | indirect
 307 * information         |   tree |     tree |   data |     data
 308 * --------------------+--------+----------+--------+----------
 309 *      parent logical |    y   |     -    |    -   |     -
 310 *      key to resolve |    -   |     y    |    y   |     y
 311 *  tree block logical |    -   |     -    |    -   |     -
 312 *  root for resolving |    y   |     y    |    y   |     y
 313 *
 314 * - column 1:       we've the parent -> done
 315 * - column 2, 3, 4: we use the key to find the parent
 316 *
 317 * on disk refs (inline or keyed)
 318 * ==============================
 319 *        backref type | shared | indirect | shared | indirect
 320 * information         |   tree |     tree |   data |     data
 321 * --------------------+--------+----------+--------+----------
 322 *      parent logical |    y   |     -    |    y   |     -
 323 *      key to resolve |    -   |     -    |    -   |     y
 324 *  tree block logical |    y   |     y    |    y   |     y
 325 *  root for resolving |    -   |     y    |    y   |     y
 326 *
 327 * - column 1, 3: we've the parent -> done
 328 * - column 2:    we take the first key from the block to find the parent
 329 *                (see add_missing_keys)
 330 * - column 4:    we use the key to find the parent
 331 *
 332 * additional information that's available but not required to find the parent
 333 * block might help in merging entries to gain some speed.
 334 */
 335static int add_prelim_ref(const struct btrfs_fs_info *fs_info,
 336                          struct preftree *preftree, u64 root_id,
 337                          const struct btrfs_key *key, int level, u64 parent,
 338                          u64 wanted_disk_byte, int count,
 339                          struct share_check *sc, gfp_t gfp_mask)
 340{
 341        struct prelim_ref *ref;
 342
 343        if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
 344                return 0;
 345
 346        ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
 347        if (!ref)
 348                return -ENOMEM;
 349
 350        ref->root_id = root_id;
 351        if (key)
 352                ref->key_for_search = *key;
 353        else
 354                memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
 355
 356        ref->inode_list = NULL;
 357        ref->level = level;
 358        ref->count = count;
 359        ref->parent = parent;
 360        ref->wanted_disk_byte = wanted_disk_byte;
 361        prelim_ref_insert(fs_info, preftree, ref, sc);
 362        return extent_is_shared(sc);
 363}
 364
 365/* direct refs use root == 0, key == NULL */
 366static int add_direct_ref(const struct btrfs_fs_info *fs_info,
 367                          struct preftrees *preftrees, int level, u64 parent,
 368                          u64 wanted_disk_byte, int count,
 369                          struct share_check *sc, gfp_t gfp_mask)
 370{
 371        return add_prelim_ref(fs_info, &preftrees->direct, 0, NULL, level,
 372                              parent, wanted_disk_byte, count, sc, gfp_mask);
 373}
 374
 375/* indirect refs use parent == 0 */
 376static int add_indirect_ref(const struct btrfs_fs_info *fs_info,
 377                            struct preftrees *preftrees, u64 root_id,
 378                            const struct btrfs_key *key, int level,
 379                            u64 wanted_disk_byte, int count,
 380                            struct share_check *sc, gfp_t gfp_mask)
 381{
 382        struct preftree *tree = &preftrees->indirect;
 383
 384        if (!key)
 385                tree = &preftrees->indirect_missing_keys;
 386        return add_prelim_ref(fs_info, tree, root_id, key, level, 0,
 387                              wanted_disk_byte, count, sc, gfp_mask);
 388}
 389
 390static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr)
 391{
 392        struct rb_node **p = &preftrees->direct.root.rb_root.rb_node;
 393        struct rb_node *parent = NULL;
 394        struct prelim_ref *ref = NULL;
 395        struct prelim_ref target = {};
 396        int result;
 397
 398        target.parent = bytenr;
 399
 400        while (*p) {
 401                parent = *p;
 402                ref = rb_entry(parent, struct prelim_ref, rbnode);
 403                result = prelim_ref_compare(ref, &target);
 404
 405                if (result < 0)
 406                        p = &(*p)->rb_left;
 407                else if (result > 0)
 408                        p = &(*p)->rb_right;
 409                else
 410                        return 1;
 411        }
 412        return 0;
 413}
 414
 415static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
 416                           struct ulist *parents,
 417                           struct preftrees *preftrees, struct prelim_ref *ref,
 418                           int level, u64 time_seq, const u64 *extent_item_pos,
 419                           bool ignore_offset)
 420{
 421        int ret = 0;
 422        int slot;
 423        struct extent_buffer *eb;
 424        struct btrfs_key key;
 425        struct btrfs_key *key_for_search = &ref->key_for_search;
 426        struct btrfs_file_extent_item *fi;
 427        struct extent_inode_elem *eie = NULL, *old = NULL;
 428        u64 disk_byte;
 429        u64 wanted_disk_byte = ref->wanted_disk_byte;
 430        u64 count = 0;
 431        u64 data_offset;
 432
 433        if (level != 0) {
 434                eb = path->nodes[level];
 435                ret = ulist_add(parents, eb->start, 0, GFP_NOFS);
 436                if (ret < 0)
 437                        return ret;
 438                return 0;
 439        }
 440
 441        /*
 442         * 1. We normally enter this function with the path already pointing to
 443         *    the first item to check. But sometimes, we may enter it with
 444         *    slot == nritems.
 445         * 2. We are searching for normal backref but bytenr of this leaf
 446         *    matches shared data backref
 447         * 3. The leaf owner is not equal to the root we are searching
 448         *
 449         * For these cases, go to the next leaf before we continue.
 450         */
 451        eb = path->nodes[0];
 452        if (path->slots[0] >= btrfs_header_nritems(eb) ||
 453            is_shared_data_backref(preftrees, eb->start) ||
 454            ref->root_id != btrfs_header_owner(eb)) {
 455                if (time_seq == SEQ_LAST)
 456                        ret = btrfs_next_leaf(root, path);
 457                else
 458                        ret = btrfs_next_old_leaf(root, path, time_seq);
 459        }
 460
 461        while (!ret && count < ref->count) {
 462                eb = path->nodes[0];
 463                slot = path->slots[0];
 464
 465                btrfs_item_key_to_cpu(eb, &key, slot);
 466
 467                if (key.objectid != key_for_search->objectid ||
 468                    key.type != BTRFS_EXTENT_DATA_KEY)
 469                        break;
 470
 471                /*
 472                 * We are searching for normal backref but bytenr of this leaf
 473                 * matches shared data backref, OR
 474                 * the leaf owner is not equal to the root we are searching for
 475                 */
 476                if (slot == 0 &&
 477                    (is_shared_data_backref(preftrees, eb->start) ||
 478                     ref->root_id != btrfs_header_owner(eb))) {
 479                        if (time_seq == SEQ_LAST)
 480                                ret = btrfs_next_leaf(root, path);
 481                        else
 482                                ret = btrfs_next_old_leaf(root, path, time_seq);
 483                        continue;
 484                }
 485                fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
 486                disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
 487                data_offset = btrfs_file_extent_offset(eb, fi);
 488
 489                if (disk_byte == wanted_disk_byte) {
 490                        eie = NULL;
 491                        old = NULL;
 492                        if (ref->key_for_search.offset == key.offset - data_offset)
 493                                count++;
 494                        else
 495                                goto next;
 496                        if (extent_item_pos) {
 497                                ret = check_extent_in_eb(&key, eb, fi,
 498                                                *extent_item_pos,
 499                                                &eie, ignore_offset);
 500                                if (ret < 0)
 501                                        break;
 502                        }
 503                        if (ret > 0)
 504                                goto next;
 505                        ret = ulist_add_merge_ptr(parents, eb->start,
 506                                                  eie, (void **)&old, GFP_NOFS);
 507                        if (ret < 0)
 508                                break;
 509                        if (!ret && extent_item_pos) {
 510                                while (old->next)
 511                                        old = old->next;
 512                                old->next = eie;
 513                        }
 514                        eie = NULL;
 515                }
 516next:
 517                if (time_seq == SEQ_LAST)
 518                        ret = btrfs_next_item(root, path);
 519                else
 520                        ret = btrfs_next_old_item(root, path, time_seq);
 521        }
 522
 523        if (ret > 0)
 524                ret = 0;
 525        else if (ret < 0)
 526                free_inode_elem_list(eie);
 527        return ret;
 528}
 529
 530/*
 531 * resolve an indirect backref in the form (root_id, key, level)
 532 * to a logical address
 533 */
 534static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
 535                                struct btrfs_path *path, u64 time_seq,
 536                                struct preftrees *preftrees,
 537                                struct prelim_ref *ref, struct ulist *parents,
 538                                const u64 *extent_item_pos, bool ignore_offset)
 539{
 540        struct btrfs_root *root;
 541        struct extent_buffer *eb;
 542        int ret = 0;
 543        int root_level;
 544        int level = ref->level;
 545        struct btrfs_key search_key = ref->key_for_search;
 546
 547        /*
 548         * If we're search_commit_root we could possibly be holding locks on
 549         * other tree nodes.  This happens when qgroups does backref walks when
 550         * adding new delayed refs.  To deal with this we need to look in cache
 551         * for the root, and if we don't find it then we need to search the
 552         * tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage
 553         * here.
 554         */
 555        if (path->search_commit_root)
 556                root = btrfs_get_fs_root_commit_root(fs_info, path, ref->root_id);
 557        else
 558                root = btrfs_get_fs_root(fs_info, ref->root_id, false);
 559        if (IS_ERR(root)) {
 560                ret = PTR_ERR(root);
 561                goto out_free;
 562        }
 563
 564        if (!path->search_commit_root &&
 565            test_bit(BTRFS_ROOT_DELETING, &root->state)) {
 566                ret = -ENOENT;
 567                goto out;
 568        }
 569
 570        if (btrfs_is_testing(fs_info)) {
 571                ret = -ENOENT;
 572                goto out;
 573        }
 574
 575        if (path->search_commit_root)
 576                root_level = btrfs_header_level(root->commit_root);
 577        else if (time_seq == SEQ_LAST)
 578                root_level = btrfs_header_level(root->node);
 579        else
 580                root_level = btrfs_old_root_level(root, time_seq);
 581
 582        if (root_level + 1 == level)
 583                goto out;
 584
 585        /*
 586         * We can often find data backrefs with an offset that is too large
 587         * (>= LLONG_MAX, maximum allowed file offset) due to underflows when
 588         * subtracting a file's offset with the data offset of its
 589         * corresponding extent data item. This can happen for example in the
 590         * clone ioctl.
 591         *
 592         * So if we detect such case we set the search key's offset to zero to
 593         * make sure we will find the matching file extent item at
 594         * add_all_parents(), otherwise we will miss it because the offset
 595         * taken form the backref is much larger then the offset of the file
 596         * extent item. This can make us scan a very large number of file
 597         * extent items, but at least it will not make us miss any.
 598         *
 599         * This is an ugly workaround for a behaviour that should have never
 600         * existed, but it does and a fix for the clone ioctl would touch a lot
 601         * of places, cause backwards incompatibility and would not fix the
 602         * problem for extents cloned with older kernels.
 603         */
 604        if (search_key.type == BTRFS_EXTENT_DATA_KEY &&
 605            search_key.offset >= LLONG_MAX)
 606                search_key.offset = 0;
 607        path->lowest_level = level;
 608        if (time_seq == SEQ_LAST)
 609                ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
 610        else
 611                ret = btrfs_search_old_slot(root, &search_key, path, time_seq);
 612
 613        btrfs_debug(fs_info,
 614                "search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)",
 615                 ref->root_id, level, ref->count, ret,
 616                 ref->key_for_search.objectid, ref->key_for_search.type,
 617                 ref->key_for_search.offset);
 618        if (ret < 0)
 619                goto out;
 620
 621        eb = path->nodes[level];
 622        while (!eb) {
 623                if (WARN_ON(!level)) {
 624                        ret = 1;
 625                        goto out;
 626                }
 627                level--;
 628                eb = path->nodes[level];
 629        }
 630
 631        ret = add_all_parents(root, path, parents, preftrees, ref, level,
 632                              time_seq, extent_item_pos, ignore_offset);
 633out:
 634        btrfs_put_root(root);
 635out_free:
 636        path->lowest_level = 0;
 637        btrfs_release_path(path);
 638        return ret;
 639}
 640
 641static struct extent_inode_elem *
 642unode_aux_to_inode_list(struct ulist_node *node)
 643{
 644        if (!node)
 645                return NULL;
 646        return (struct extent_inode_elem *)(uintptr_t)node->aux;
 647}
 648
 649/*
 650 * We maintain three separate rbtrees: one for direct refs, one for
 651 * indirect refs which have a key, and one for indirect refs which do not
 652 * have a key. Each tree does merge on insertion.
 653 *
 654 * Once all of the references are located, we iterate over the tree of
 655 * indirect refs with missing keys. An appropriate key is located and
 656 * the ref is moved onto the tree for indirect refs. After all missing
 657 * keys are thus located, we iterate over the indirect ref tree, resolve
 658 * each reference, and then insert the resolved reference onto the
 659 * direct tree (merging there too).
 660 *
 661 * New backrefs (i.e., for parent nodes) are added to the appropriate
 662 * rbtree as they are encountered. The new backrefs are subsequently
 663 * resolved as above.
 664 */
 665static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
 666                                 struct btrfs_path *path, u64 time_seq,
 667                                 struct preftrees *preftrees,
 668                                 const u64 *extent_item_pos,
 669                                 struct share_check *sc, bool ignore_offset)
 670{
 671        int err;
 672        int ret = 0;
 673        struct ulist *parents;
 674        struct ulist_node *node;
 675        struct ulist_iterator uiter;
 676        struct rb_node *rnode;
 677
 678        parents = ulist_alloc(GFP_NOFS);
 679        if (!parents)
 680                return -ENOMEM;
 681
 682        /*
 683         * We could trade memory usage for performance here by iterating
 684         * the tree, allocating new refs for each insertion, and then
 685         * freeing the entire indirect tree when we're done.  In some test
 686         * cases, the tree can grow quite large (~200k objects).
 687         */
 688        while ((rnode = rb_first_cached(&preftrees->indirect.root))) {
 689                struct prelim_ref *ref;
 690
 691                ref = rb_entry(rnode, struct prelim_ref, rbnode);
 692                if (WARN(ref->parent,
 693                         "BUG: direct ref found in indirect tree")) {
 694                        ret = -EINVAL;
 695                        goto out;
 696                }
 697
 698                rb_erase_cached(&ref->rbnode, &preftrees->indirect.root);
 699                preftrees->indirect.count--;
 700
 701                if (ref->count == 0) {
 702                        free_pref(ref);
 703                        continue;
 704                }
 705
 706                if (sc && sc->root_objectid &&
 707                    ref->root_id != sc->root_objectid) {
 708                        free_pref(ref);
 709                        ret = BACKREF_FOUND_SHARED;
 710                        goto out;
 711                }
 712                err = resolve_indirect_ref(fs_info, path, time_seq, preftrees,
 713                                           ref, parents, extent_item_pos,
 714                                           ignore_offset);
 715                /*
 716                 * we can only tolerate ENOENT,otherwise,we should catch error
 717                 * and return directly.
 718                 */
 719                if (err == -ENOENT) {
 720                        prelim_ref_insert(fs_info, &preftrees->direct, ref,
 721                                          NULL);
 722                        continue;
 723                } else if (err) {
 724                        free_pref(ref);
 725                        ret = err;
 726                        goto out;
 727                }
 728
 729                /* we put the first parent into the ref at hand */
 730                ULIST_ITER_INIT(&uiter);
 731                node = ulist_next(parents, &uiter);
 732                ref->parent = node ? node->val : 0;
 733                ref->inode_list = unode_aux_to_inode_list(node);
 734
 735                /* Add a prelim_ref(s) for any other parent(s). */
 736                while ((node = ulist_next(parents, &uiter))) {
 737                        struct prelim_ref *new_ref;
 738
 739                        new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache,
 740                                                   GFP_NOFS);
 741                        if (!new_ref) {
 742                                free_pref(ref);
 743                                ret = -ENOMEM;
 744                                goto out;
 745                        }
 746                        memcpy(new_ref, ref, sizeof(*ref));
 747                        new_ref->parent = node->val;
 748                        new_ref->inode_list = unode_aux_to_inode_list(node);
 749                        prelim_ref_insert(fs_info, &preftrees->direct,
 750                                          new_ref, NULL);
 751                }
 752
 753                /*
 754                 * Now it's a direct ref, put it in the direct tree. We must
 755                 * do this last because the ref could be merged/freed here.
 756                 */
 757                prelim_ref_insert(fs_info, &preftrees->direct, ref, NULL);
 758
 759                ulist_reinit(parents);
 760                cond_resched();
 761        }
 762out:
 763        ulist_free(parents);
 764        return ret;
 765}
 766
 767/*
 768 * read tree blocks and add keys where required.
 769 */
 770static int add_missing_keys(struct btrfs_fs_info *fs_info,
 771                            struct preftrees *preftrees, bool lock)
 772{
 773        struct prelim_ref *ref;
 774        struct extent_buffer *eb;
 775        struct preftree *tree = &preftrees->indirect_missing_keys;
 776        struct rb_node *node;
 777
 778        while ((node = rb_first_cached(&tree->root))) {
 779                ref = rb_entry(node, struct prelim_ref, rbnode);
 780                rb_erase_cached(node, &tree->root);
 781
 782                BUG_ON(ref->parent);    /* should not be a direct ref */
 783                BUG_ON(ref->key_for_search.type);
 784                BUG_ON(!ref->wanted_disk_byte);
 785
 786                eb = read_tree_block(fs_info, ref->wanted_disk_byte,
 787                                     ref->root_id, 0, ref->level - 1, NULL);
 788                if (IS_ERR(eb)) {
 789                        free_pref(ref);
 790                        return PTR_ERR(eb);
 791                } else if (!extent_buffer_uptodate(eb)) {
 792                        free_pref(ref);
 793                        free_extent_buffer(eb);
 794                        return -EIO;
 795                }
 796                if (lock)
 797                        btrfs_tree_read_lock(eb);
 798                if (btrfs_header_level(eb) == 0)
 799                        btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
 800                else
 801                        btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
 802                if (lock)
 803                        btrfs_tree_read_unlock(eb);
 804                free_extent_buffer(eb);
 805                prelim_ref_insert(fs_info, &preftrees->indirect, ref, NULL);
 806                cond_resched();
 807        }
 808        return 0;
 809}
 810
 811/*
 812 * add all currently queued delayed refs from this head whose seq nr is
 813 * smaller or equal that seq to the list
 814 */
 815static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
 816                            struct btrfs_delayed_ref_head *head, u64 seq,
 817                            struct preftrees *preftrees, struct share_check *sc)
 818{
 819        struct btrfs_delayed_ref_node *node;
 820        struct btrfs_delayed_extent_op *extent_op = head->extent_op;
 821        struct btrfs_key key;
 822        struct btrfs_key tmp_op_key;
 823        struct rb_node *n;
 824        int count;
 825        int ret = 0;
 826
 827        if (extent_op && extent_op->update_key)
 828                btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
 829
 830        spin_lock(&head->lock);
 831        for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) {
 832                node = rb_entry(n, struct btrfs_delayed_ref_node,
 833                                ref_node);
 834                if (node->seq > seq)
 835                        continue;
 836
 837                switch (node->action) {
 838                case BTRFS_ADD_DELAYED_EXTENT:
 839                case BTRFS_UPDATE_DELAYED_HEAD:
 840                        WARN_ON(1);
 841                        continue;
 842                case BTRFS_ADD_DELAYED_REF:
 843                        count = node->ref_mod;
 844                        break;
 845                case BTRFS_DROP_DELAYED_REF:
 846                        count = node->ref_mod * -1;
 847                        break;
 848                default:
 849                        BUG();
 850                }
 851                switch (node->type) {
 852                case BTRFS_TREE_BLOCK_REF_KEY: {
 853                        /* NORMAL INDIRECT METADATA backref */
 854                        struct btrfs_delayed_tree_ref *ref;
 855
 856                        ref = btrfs_delayed_node_to_tree_ref(node);
 857                        ret = add_indirect_ref(fs_info, preftrees, ref->root,
 858                                               &tmp_op_key, ref->level + 1,
 859                                               node->bytenr, count, sc,
 860                                               GFP_ATOMIC);
 861                        break;
 862                }
 863                case BTRFS_SHARED_BLOCK_REF_KEY: {
 864                        /* SHARED DIRECT METADATA backref */
 865                        struct btrfs_delayed_tree_ref *ref;
 866
 867                        ref = btrfs_delayed_node_to_tree_ref(node);
 868
 869                        ret = add_direct_ref(fs_info, preftrees, ref->level + 1,
 870                                             ref->parent, node->bytenr, count,
 871                                             sc, GFP_ATOMIC);
 872                        break;
 873                }
 874                case BTRFS_EXTENT_DATA_REF_KEY: {
 875                        /* NORMAL INDIRECT DATA backref */
 876                        struct btrfs_delayed_data_ref *ref;
 877                        ref = btrfs_delayed_node_to_data_ref(node);
 878
 879                        key.objectid = ref->objectid;
 880                        key.type = BTRFS_EXTENT_DATA_KEY;
 881                        key.offset = ref->offset;
 882
 883                        /*
 884                         * Found a inum that doesn't match our known inum, we
 885                         * know it's shared.
 886                         */
 887                        if (sc && sc->inum && ref->objectid != sc->inum) {
 888                                ret = BACKREF_FOUND_SHARED;
 889                                goto out;
 890                        }
 891
 892                        ret = add_indirect_ref(fs_info, preftrees, ref->root,
 893                                               &key, 0, node->bytenr, count, sc,
 894                                               GFP_ATOMIC);
 895                        break;
 896                }
 897                case BTRFS_SHARED_DATA_REF_KEY: {
 898                        /* SHARED DIRECT FULL backref */
 899                        struct btrfs_delayed_data_ref *ref;
 900
 901                        ref = btrfs_delayed_node_to_data_ref(node);
 902
 903                        ret = add_direct_ref(fs_info, preftrees, 0, ref->parent,
 904                                             node->bytenr, count, sc,
 905                                             GFP_ATOMIC);
 906                        break;
 907                }
 908                default:
 909                        WARN_ON(1);
 910                }
 911                /*
 912                 * We must ignore BACKREF_FOUND_SHARED until all delayed
 913                 * refs have been checked.
 914                 */
 915                if (ret && (ret != BACKREF_FOUND_SHARED))
 916                        break;
 917        }
 918        if (!ret)
 919                ret = extent_is_shared(sc);
 920out:
 921        spin_unlock(&head->lock);
 922        return ret;
 923}
 924
 925/*
 926 * add all inline backrefs for bytenr to the list
 927 *
 928 * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED.
 929 */
 930static int add_inline_refs(const struct btrfs_fs_info *fs_info,
 931                           struct btrfs_path *path, u64 bytenr,
 932                           int *info_level, struct preftrees *preftrees,
 933                           struct share_check *sc)
 934{
 935        int ret = 0;
 936        int slot;
 937        struct extent_buffer *leaf;
 938        struct btrfs_key key;
 939        struct btrfs_key found_key;
 940        unsigned long ptr;
 941        unsigned long end;
 942        struct btrfs_extent_item *ei;
 943        u64 flags;
 944        u64 item_size;
 945
 946        /*
 947         * enumerate all inline refs
 948         */
 949        leaf = path->nodes[0];
 950        slot = path->slots[0];
 951
 952        item_size = btrfs_item_size_nr(leaf, slot);
 953        BUG_ON(item_size < sizeof(*ei));
 954
 955        ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
 956        flags = btrfs_extent_flags(leaf, ei);
 957        btrfs_item_key_to_cpu(leaf, &found_key, slot);
 958
 959        ptr = (unsigned long)(ei + 1);
 960        end = (unsigned long)ei + item_size;
 961
 962        if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
 963            flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 964                struct btrfs_tree_block_info *info;
 965
 966                info = (struct btrfs_tree_block_info *)ptr;
 967                *info_level = btrfs_tree_block_level(leaf, info);
 968                ptr += sizeof(struct btrfs_tree_block_info);
 969                BUG_ON(ptr > end);
 970        } else if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
 971                *info_level = found_key.offset;
 972        } else {
 973                BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
 974        }
 975
 976        while (ptr < end) {
 977                struct btrfs_extent_inline_ref *iref;
 978                u64 offset;
 979                int type;
 980
 981                iref = (struct btrfs_extent_inline_ref *)ptr;
 982                type = btrfs_get_extent_inline_ref_type(leaf, iref,
 983                                                        BTRFS_REF_TYPE_ANY);
 984                if (type == BTRFS_REF_TYPE_INVALID)
 985                        return -EUCLEAN;
 986
 987                offset = btrfs_extent_inline_ref_offset(leaf, iref);
 988
 989                switch (type) {
 990                case BTRFS_SHARED_BLOCK_REF_KEY:
 991                        ret = add_direct_ref(fs_info, preftrees,
 992                                             *info_level + 1, offset,
 993                                             bytenr, 1, NULL, GFP_NOFS);
 994                        break;
 995                case BTRFS_SHARED_DATA_REF_KEY: {
 996                        struct btrfs_shared_data_ref *sdref;
 997                        int count;
 998
 999                        sdref = (struct btrfs_shared_data_ref *)(iref + 1);
1000                        count = btrfs_shared_data_ref_count(leaf, sdref);
1001
1002                        ret = add_direct_ref(fs_info, preftrees, 0, offset,
1003                                             bytenr, count, sc, GFP_NOFS);
1004                        break;
1005                }
1006                case BTRFS_TREE_BLOCK_REF_KEY:
1007                        ret = add_indirect_ref(fs_info, preftrees, offset,
1008                                               NULL, *info_level + 1,
1009                                               bytenr, 1, NULL, GFP_NOFS);
1010                        break;
1011                case BTRFS_EXTENT_DATA_REF_KEY: {
1012                        struct btrfs_extent_data_ref *dref;
1013                        int count;
1014                        u64 root;
1015
1016                        dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1017                        count = btrfs_extent_data_ref_count(leaf, dref);
1018                        key.objectid = btrfs_extent_data_ref_objectid(leaf,
1019                                                                      dref);
1020                        key.type = BTRFS_EXTENT_DATA_KEY;
1021                        key.offset = btrfs_extent_data_ref_offset(leaf, dref);
1022
1023                        if (sc && sc->inum && key.objectid != sc->inum) {
1024                                ret = BACKREF_FOUND_SHARED;
1025                                break;
1026                        }
1027
1028                        root = btrfs_extent_data_ref_root(leaf, dref);
1029
1030                        ret = add_indirect_ref(fs_info, preftrees, root,
1031                                               &key, 0, bytenr, count,
1032                                               sc, GFP_NOFS);
1033                        break;
1034                }
1035                default:
1036                        WARN_ON(1);
1037                }
1038                if (ret)
1039                        return ret;
1040                ptr += btrfs_extent_inline_ref_size(type);
1041        }
1042
1043        return 0;
1044}
1045
1046/*
1047 * add all non-inline backrefs for bytenr to the list
1048 *
1049 * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED.
1050 */
1051static int add_keyed_refs(struct btrfs_fs_info *fs_info,
1052                          struct btrfs_path *path, u64 bytenr,
1053                          int info_level, struct preftrees *preftrees,
1054                          struct share_check *sc)
1055{
1056        struct btrfs_root *extent_root = fs_info->extent_root;
1057        int ret;
1058        int slot;
1059        struct extent_buffer *leaf;
1060        struct btrfs_key key;
1061
1062        while (1) {
1063                ret = btrfs_next_item(extent_root, path);
1064                if (ret < 0)
1065                        break;
1066                if (ret) {
1067                        ret = 0;
1068                        break;
1069                }
1070
1071                slot = path->slots[0];
1072                leaf = path->nodes[0];
1073                btrfs_item_key_to_cpu(leaf, &key, slot);
1074
1075                if (key.objectid != bytenr)
1076                        break;
1077                if (key.type < BTRFS_TREE_BLOCK_REF_KEY)
1078                        continue;
1079                if (key.type > BTRFS_SHARED_DATA_REF_KEY)
1080                        break;
1081
1082                switch (key.type) {
1083                case BTRFS_SHARED_BLOCK_REF_KEY:
1084                        /* SHARED DIRECT METADATA backref */
1085                        ret = add_direct_ref(fs_info, preftrees,
1086                                             info_level + 1, key.offset,
1087                                             bytenr, 1, NULL, GFP_NOFS);
1088                        break;
1089                case BTRFS_SHARED_DATA_REF_KEY: {
1090                        /* SHARED DIRECT FULL backref */
1091                        struct btrfs_shared_data_ref *sdref;
1092                        int count;
1093
1094                        sdref = btrfs_item_ptr(leaf, slot,
1095                                              struct btrfs_shared_data_ref);
1096                        count = btrfs_shared_data_ref_count(leaf, sdref);
1097                        ret = add_direct_ref(fs_info, preftrees, 0,
1098                                             key.offset, bytenr, count,
1099                                             sc, GFP_NOFS);
1100                        break;
1101                }
1102                case BTRFS_TREE_BLOCK_REF_KEY:
1103                        /* NORMAL INDIRECT METADATA backref */
1104                        ret = add_indirect_ref(fs_info, preftrees, key.offset,
1105                                               NULL, info_level + 1, bytenr,
1106                                               1, NULL, GFP_NOFS);
1107                        break;
1108                case BTRFS_EXTENT_DATA_REF_KEY: {
1109                        /* NORMAL INDIRECT DATA backref */
1110                        struct btrfs_extent_data_ref *dref;
1111                        int count;
1112                        u64 root;
1113
1114                        dref = btrfs_item_ptr(leaf, slot,
1115                                              struct btrfs_extent_data_ref);
1116                        count = btrfs_extent_data_ref_count(leaf, dref);
1117                        key.objectid = btrfs_extent_data_ref_objectid(leaf,
1118                                                                      dref);
1119                        key.type = BTRFS_EXTENT_DATA_KEY;
1120                        key.offset = btrfs_extent_data_ref_offset(leaf, dref);
1121
1122                        if (sc && sc->inum && key.objectid != sc->inum) {
1123                                ret = BACKREF_FOUND_SHARED;
1124                                break;
1125                        }
1126
1127                        root = btrfs_extent_data_ref_root(leaf, dref);
1128                        ret = add_indirect_ref(fs_info, preftrees, root,
1129                                               &key, 0, bytenr, count,
1130                                               sc, GFP_NOFS);
1131                        break;
1132                }
1133                default:
1134                        WARN_ON(1);
1135                }
1136                if (ret)
1137                        return ret;
1138
1139        }
1140
1141        return ret;
1142}
1143
1144/*
1145 * this adds all existing backrefs (inline backrefs, backrefs and delayed
1146 * refs) for the given bytenr to the refs list, merges duplicates and resolves
1147 * indirect refs to their parent bytenr.
1148 * When roots are found, they're added to the roots list
1149 *
1150 * If time_seq is set to SEQ_LAST, it will not search delayed_refs, and behave
1151 * much like trans == NULL case, the difference only lies in it will not
1152 * commit root.
1153 * The special case is for qgroup to search roots in commit_transaction().
1154 *
1155 * @sc - if !NULL, then immediately return BACKREF_FOUND_SHARED when a
1156 * shared extent is detected.
1157 *
1158 * Otherwise this returns 0 for success and <0 for an error.
1159 *
1160 * If ignore_offset is set to false, only extent refs whose offsets match
1161 * extent_item_pos are returned.  If true, every extent ref is returned
1162 * and extent_item_pos is ignored.
1163 *
1164 * FIXME some caching might speed things up
1165 */
1166static int find_parent_nodes(struct btrfs_trans_handle *trans,
1167                             struct btrfs_fs_info *fs_info, u64 bytenr,
1168                             u64 time_seq, struct ulist *refs,
1169                             struct ulist *roots, const u64 *extent_item_pos,
1170                             struct share_check *sc, bool ignore_offset)
1171{
1172        struct btrfs_key key;
1173        struct btrfs_path *path;
1174        struct btrfs_delayed_ref_root *delayed_refs = NULL;
1175        struct btrfs_delayed_ref_head *head;
1176        int info_level = 0;
1177        int ret;
1178        struct prelim_ref *ref;
1179        struct rb_node *node;
1180        struct extent_inode_elem *eie = NULL;
1181        struct preftrees preftrees = {
1182                .direct = PREFTREE_INIT,
1183                .indirect = PREFTREE_INIT,
1184                .indirect_missing_keys = PREFTREE_INIT
1185        };
1186
1187        key.objectid = bytenr;
1188        key.offset = (u64)-1;
1189        if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1190                key.type = BTRFS_METADATA_ITEM_KEY;
1191        else
1192                key.type = BTRFS_EXTENT_ITEM_KEY;
1193
1194        path = btrfs_alloc_path();
1195        if (!path)
1196                return -ENOMEM;
1197        if (!trans) {
1198                path->search_commit_root = 1;
1199                path->skip_locking = 1;
1200        }
1201
1202        if (time_seq == SEQ_LAST)
1203                path->skip_locking = 1;
1204
1205        /*
1206         * grab both a lock on the path and a lock on the delayed ref head.
1207         * We need both to get a consistent picture of how the refs look
1208         * at a specified point in time
1209         */
1210again:
1211        head = NULL;
1212
1213        ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
1214        if (ret < 0)
1215                goto out;
1216        BUG_ON(ret == 0);
1217
1218#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
1219        if (trans && likely(trans->type != __TRANS_DUMMY) &&
1220            time_seq != SEQ_LAST) {
1221#else
1222        if (trans && time_seq != SEQ_LAST) {
1223#endif
1224                /*
1225                 * look if there are updates for this ref queued and lock the
1226                 * head
1227                 */
1228                delayed_refs = &trans->transaction->delayed_refs;
1229                spin_lock(&delayed_refs->lock);
1230                head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
1231                if (head) {
1232                        if (!mutex_trylock(&head->mutex)) {
1233                                refcount_inc(&head->refs);
1234                                spin_unlock(&delayed_refs->lock);
1235
1236                                btrfs_release_path(path);
1237
1238                                /*
1239                                 * Mutex was contended, block until it's
1240                                 * released and try again
1241                                 */
1242                                mutex_lock(&head->mutex);
1243                                mutex_unlock(&head->mutex);
1244                                btrfs_put_delayed_ref_head(head);
1245                                goto again;
1246                        }
1247                        spin_unlock(&delayed_refs->lock);
1248                        ret = add_delayed_refs(fs_info, head, time_seq,
1249                                               &preftrees, sc);
1250                        mutex_unlock(&head->mutex);
1251                        if (ret)
1252                                goto out;
1253                } else {
1254                        spin_unlock(&delayed_refs->lock);
1255                }
1256        }
1257
1258        if (path->slots[0]) {
1259                struct extent_buffer *leaf;
1260                int slot;
1261
1262                path->slots[0]--;
1263                leaf = path->nodes[0];
1264                slot = path->slots[0];
1265                btrfs_item_key_to_cpu(leaf, &key, slot);
1266                if (key.objectid == bytenr &&
1267                    (key.type == BTRFS_EXTENT_ITEM_KEY ||
1268                     key.type == BTRFS_METADATA_ITEM_KEY)) {
1269                        ret = add_inline_refs(fs_info, path, bytenr,
1270                                              &info_level, &preftrees, sc);
1271                        if (ret)
1272                                goto out;
1273                        ret = add_keyed_refs(fs_info, path, bytenr, info_level,
1274                                             &preftrees, sc);
1275                        if (ret)
1276                                goto out;
1277                }
1278        }
1279
1280        btrfs_release_path(path);
1281
1282        ret = add_missing_keys(fs_info, &preftrees, path->skip_locking == 0);
1283        if (ret)
1284                goto out;
1285
1286        WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root));
1287
1288        ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
1289                                    extent_item_pos, sc, ignore_offset);
1290        if (ret)
1291                goto out;
1292
1293        WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect.root.rb_root));
1294
1295        /*
1296         * This walks the tree of merged and resolved refs. Tree blocks are
1297         * read in as needed. Unique entries are added to the ulist, and
1298         * the list of found roots is updated.
1299         *
1300         * We release the entire tree in one go before returning.
1301         */
1302        node = rb_first_cached(&preftrees.direct.root);
1303        while (node) {
1304                ref = rb_entry(node, struct prelim_ref, rbnode);
1305                node = rb_next(&ref->rbnode);
1306                /*
1307                 * ref->count < 0 can happen here if there are delayed
1308                 * refs with a node->action of BTRFS_DROP_DELAYED_REF.
1309                 * prelim_ref_insert() relies on this when merging
1310                 * identical refs to keep the overall count correct.
1311                 * prelim_ref_insert() will merge only those refs
1312                 * which compare identically.  Any refs having
1313                 * e.g. different offsets would not be merged,
1314                 * and would retain their original ref->count < 0.
1315                 */
1316                if (roots && ref->count && ref->root_id && ref->parent == 0) {
1317                        if (sc && sc->root_objectid &&
1318                            ref->root_id != sc->root_objectid) {
1319                                ret = BACKREF_FOUND_SHARED;
1320                                goto out;
1321                        }
1322
1323                        /* no parent == root of tree */
1324                        ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
1325                        if (ret < 0)
1326                                goto out;
1327                }
1328                if (ref->count && ref->parent) {
1329                        if (extent_item_pos && !ref->inode_list &&
1330                            ref->level == 0) {
1331                                struct extent_buffer *eb;
1332
1333                                eb = read_tree_block(fs_info, ref->parent, 0,
1334                                                     0, ref->level, NULL);
1335                                if (IS_ERR(eb)) {
1336                                        ret = PTR_ERR(eb);
1337                                        goto out;
1338                                } else if (!extent_buffer_uptodate(eb)) {
1339                                        free_extent_buffer(eb);
1340                                        ret = -EIO;
1341                                        goto out;
1342                                }
1343
1344                                if (!path->skip_locking)
1345                                        btrfs_tree_read_lock(eb);
1346                                ret = find_extent_in_eb(eb, bytenr,
1347                                                        *extent_item_pos, &eie, ignore_offset);
1348                                if (!path->skip_locking)
1349                                        btrfs_tree_read_unlock(eb);
1350                                free_extent_buffer(eb);
1351                                if (ret < 0)
1352                                        goto out;
1353                                ref->inode_list = eie;
1354                        }
1355                        ret = ulist_add_merge_ptr(refs, ref->parent,
1356                                                  ref->inode_list,
1357                                                  (void **)&eie, GFP_NOFS);
1358                        if (ret < 0)
1359                                goto out;
1360                        if (!ret && extent_item_pos) {
1361                                /*
1362                                 * we've recorded that parent, so we must extend
1363                                 * its inode list here
1364                                 */
1365                                BUG_ON(!eie);
1366                                while (eie->next)
1367                                        eie = eie->next;
1368                                eie->next = ref->inode_list;
1369                        }
1370                        eie = NULL;
1371                }
1372                cond_resched();
1373        }
1374
1375out:
1376        btrfs_free_path(path);
1377
1378        prelim_release(&preftrees.direct);
1379        prelim_release(&preftrees.indirect);
1380        prelim_release(&preftrees.indirect_missing_keys);
1381
1382        if (ret < 0)
1383                free_inode_elem_list(eie);
1384        return ret;
1385}
1386
1387static void free_leaf_list(struct ulist *blocks)
1388{
1389        struct ulist_node *node = NULL;
1390        struct extent_inode_elem *eie;
1391        struct ulist_iterator uiter;
1392
1393        ULIST_ITER_INIT(&uiter);
1394        while ((node = ulist_next(blocks, &uiter))) {
1395                if (!node->aux)
1396                        continue;
1397                eie = unode_aux_to_inode_list(node);
1398                free_inode_elem_list(eie);
1399                node->aux = 0;
1400        }
1401
1402        ulist_free(blocks);
1403}
1404
1405/*
1406 * Finds all leafs with a reference to the specified combination of bytenr and
1407 * offset. key_list_head will point to a list of corresponding keys (caller must
1408 * free each list element). The leafs will be stored in the leafs ulist, which
1409 * must be freed with ulist_free.
1410 *
1411 * returns 0 on success, <0 on error
1412 */
1413int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
1414                         struct btrfs_fs_info *fs_info, u64 bytenr,
1415                         u64 time_seq, struct ulist **leafs,
1416                         const u64 *extent_item_pos, bool ignore_offset)
1417{
1418        int ret;
1419
1420        *leafs = ulist_alloc(GFP_NOFS);
1421        if (!*leafs)
1422                return -ENOMEM;
1423
1424        ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
1425                                *leafs, NULL, extent_item_pos, NULL, ignore_offset);
1426        if (ret < 0 && ret != -ENOENT) {
1427                free_leaf_list(*leafs);
1428                return ret;
1429        }
1430
1431        return 0;
1432}
1433
1434/*
1435 * walk all backrefs for a given extent to find all roots that reference this
1436 * extent. Walking a backref means finding all extents that reference this
1437 * extent and in turn walk the backrefs of those, too. Naturally this is a
1438 * recursive process, but here it is implemented in an iterative fashion: We
1439 * find all referencing extents for the extent in question and put them on a
1440 * list. In turn, we find all referencing extents for those, further appending
1441 * to the list. The way we iterate the list allows adding more elements after
1442 * the current while iterating. The process stops when we reach the end of the
1443 * list. Found roots are added to the roots list.
1444 *
1445 * returns 0 on success, < 0 on error.
1446 */
1447static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
1448                                     struct btrfs_fs_info *fs_info, u64 bytenr,
1449                                     u64 time_seq, struct ulist **roots,
1450                                     bool ignore_offset)
1451{
1452        struct ulist *tmp;
1453        struct ulist_node *node = NULL;
1454        struct ulist_iterator uiter;
1455        int ret;
1456
1457        tmp = ulist_alloc(GFP_NOFS);
1458        if (!tmp)
1459                return -ENOMEM;
1460        *roots = ulist_alloc(GFP_NOFS);
1461        if (!*roots) {
1462                ulist_free(tmp);
1463                return -ENOMEM;
1464        }
1465
1466        ULIST_ITER_INIT(&uiter);
1467        while (1) {
1468                ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
1469                                        tmp, *roots, NULL, NULL, ignore_offset);
1470                if (ret < 0 && ret != -ENOENT) {
1471                        ulist_free(tmp);
1472                        ulist_free(*roots);
1473                        *roots = NULL;
1474                        return ret;
1475                }
1476                node = ulist_next(tmp, &uiter);
1477                if (!node)
1478                        break;
1479                bytenr = node->val;
1480                cond_resched();
1481        }
1482
1483        ulist_free(tmp);
1484        return 0;
1485}
1486
1487int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
1488                         struct btrfs_fs_info *fs_info, u64 bytenr,
1489                         u64 time_seq, struct ulist **roots,
1490                         bool ignore_offset)
1491{
1492        int ret;
1493
1494        if (!trans)
1495                down_read(&fs_info->commit_root_sem);
1496        ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
1497                                        time_seq, roots, ignore_offset);
1498        if (!trans)
1499                up_read(&fs_info->commit_root_sem);
1500        return ret;
1501}
1502
1503/**
1504 * Check if an extent is shared or not
1505 *
1506 * @root:   root inode belongs to
1507 * @inum:   inode number of the inode whose extent we are checking
1508 * @bytenr: logical bytenr of the extent we are checking
1509 * @roots:  list of roots this extent is shared among
1510 * @tmp:    temporary list used for iteration
1511 *
1512 * btrfs_check_shared uses the backref walking code but will short
1513 * circuit as soon as it finds a root or inode that doesn't match the
1514 * one passed in. This provides a significant performance benefit for
1515 * callers (such as fiemap) which want to know whether the extent is
1516 * shared but do not need a ref count.
1517 *
1518 * This attempts to attach to the running transaction in order to account for
1519 * delayed refs, but continues on even when no running transaction exists.
1520 *
1521 * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
1522 */
1523int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
1524                struct ulist *roots, struct ulist *tmp)
1525{
1526        struct btrfs_fs_info *fs_info = root->fs_info;
1527        struct btrfs_trans_handle *trans;
1528        struct ulist_iterator uiter;
1529        struct ulist_node *node;
1530        struct seq_list elem = SEQ_LIST_INIT(elem);
1531        int ret = 0;
1532        struct share_check shared = {
1533                .root_objectid = root->root_key.objectid,
1534                .inum = inum,
1535                .share_count = 0,
1536        };
1537
1538        ulist_init(roots);
1539        ulist_init(tmp);
1540
1541        trans = btrfs_join_transaction_nostart(root);
1542        if (IS_ERR(trans)) {
1543                if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) {
1544                        ret = PTR_ERR(trans);
1545                        goto out;
1546                }
1547                trans = NULL;
1548                down_read(&fs_info->commit_root_sem);
1549        } else {
1550                btrfs_get_tree_mod_seq(fs_info, &elem);
1551        }
1552
1553        ULIST_ITER_INIT(&uiter);
1554        while (1) {
1555                ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
1556                                        roots, NULL, &shared, false);
1557                if (ret == BACKREF_FOUND_SHARED) {
1558                        /* this is the only condition under which we return 1 */
1559                        ret = 1;
1560                        break;
1561                }
1562                if (ret < 0 && ret != -ENOENT)
1563                        break;
1564                ret = 0;
1565                node = ulist_next(tmp, &uiter);
1566                if (!node)
1567                        break;
1568                bytenr = node->val;
1569                shared.share_count = 0;
1570                cond_resched();
1571        }
1572
1573        if (trans) {
1574                btrfs_put_tree_mod_seq(fs_info, &elem);
1575                btrfs_end_transaction(trans);
1576        } else {
1577                up_read(&fs_info->commit_root_sem);
1578        }
1579out:
1580        ulist_release(roots);
1581        ulist_release(tmp);
1582        return ret;
1583}
1584
1585int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
1586                          u64 start_off, struct btrfs_path *path,
1587                          struct btrfs_inode_extref **ret_extref,
1588                          u64 *found_off)
1589{
1590        int ret, slot;
1591        struct btrfs_key key;
1592        struct btrfs_key found_key;
1593        struct btrfs_inode_extref *extref;
1594        const struct extent_buffer *leaf;
1595        unsigned long ptr;
1596
1597        key.objectid = inode_objectid;
1598        key.type = BTRFS_INODE_EXTREF_KEY;
1599        key.offset = start_off;
1600
1601        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1602        if (ret < 0)
1603                return ret;
1604
1605        while (1) {
1606                leaf = path->nodes[0];
1607                slot = path->slots[0];
1608                if (slot >= btrfs_header_nritems(leaf)) {
1609                        /*
1610                         * If the item at offset is not found,
1611                         * btrfs_search_slot will point us to the slot
1612                         * where it should be inserted. In our case
1613                         * that will be the slot directly before the
1614                         * next INODE_REF_KEY_V2 item. In the case
1615                         * that we're pointing to the last slot in a
1616                         * leaf, we must move one leaf over.
1617                         */
1618                        ret = btrfs_next_leaf(root, path);
1619                        if (ret) {
1620                                if (ret >= 1)
1621                                        ret = -ENOENT;
1622                                break;
1623                        }
1624                        continue;
1625                }
1626
1627                btrfs_item_key_to_cpu(leaf, &found_key, slot);
1628
1629                /*
1630                 * Check that we're still looking at an extended ref key for
1631                 * this particular objectid. If we have different
1632                 * objectid or type then there are no more to be found
1633                 * in the tree and we can exit.
1634                 */
1635                ret = -ENOENT;
1636                if (found_key.objectid != inode_objectid)
1637                        break;
1638                if (found_key.type != BTRFS_INODE_EXTREF_KEY)
1639                        break;
1640
1641                ret = 0;
1642                ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
1643                extref = (struct btrfs_inode_extref *)ptr;
1644                *ret_extref = extref;
1645                if (found_off)
1646                        *found_off = found_key.offset;
1647                break;
1648        }
1649
1650        return ret;
1651}
1652
1653/*
1654 * this iterates to turn a name (from iref/extref) into a full filesystem path.
1655 * Elements of the path are separated by '/' and the path is guaranteed to be
1656 * 0-terminated. the path is only given within the current file system.
1657 * Therefore, it never starts with a '/'. the caller is responsible to provide
1658 * "size" bytes in "dest". the dest buffer will be filled backwards. finally,
1659 * the start point of the resulting string is returned. this pointer is within
1660 * dest, normally.
1661 * in case the path buffer would overflow, the pointer is decremented further
1662 * as if output was written to the buffer, though no more output is actually
1663 * generated. that way, the caller can determine how much space would be
1664 * required for the path to fit into the buffer. in that case, the returned
1665 * value will be smaller than dest. callers must check this!
1666 */
1667char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
1668                        u32 name_len, unsigned long name_off,
1669                        struct extent_buffer *eb_in, u64 parent,
1670                        char *dest, u32 size)
1671{
1672        int slot;
1673        u64 next_inum;
1674        int ret;
1675        s64 bytes_left = ((s64)size) - 1;
1676        struct extent_buffer *eb = eb_in;
1677        struct btrfs_key found_key;
1678        struct btrfs_inode_ref *iref;
1679
1680        if (bytes_left >= 0)
1681                dest[bytes_left] = '\0';
1682
1683        while (1) {
1684                bytes_left -= name_len;
1685                if (bytes_left >= 0)
1686                        read_extent_buffer(eb, dest + bytes_left,
1687                                           name_off, name_len);
1688                if (eb != eb_in) {
1689                        if (!path->skip_locking)
1690                                btrfs_tree_read_unlock(eb);
1691                        free_extent_buffer(eb);
1692                }
1693                ret = btrfs_find_item(fs_root, path, parent, 0,
1694                                BTRFS_INODE_REF_KEY, &found_key);
1695                if (ret > 0)
1696                        ret = -ENOENT;
1697                if (ret)
1698                        break;
1699
1700                next_inum = found_key.offset;
1701
1702                /* regular exit ahead */
1703                if (parent == next_inum)
1704                        break;
1705
1706                slot = path->slots[0];
1707                eb = path->nodes[0];
1708                /* make sure we can use eb after releasing the path */
1709                if (eb != eb_in) {
1710                        path->nodes[0] = NULL;
1711                        path->locks[0] = 0;
1712                }
1713                btrfs_release_path(path);
1714                iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1715
1716                name_len = btrfs_inode_ref_name_len(eb, iref);
1717                name_off = (unsigned long)(iref + 1);
1718
1719                parent = next_inum;
1720                --bytes_left;
1721                if (bytes_left >= 0)
1722                        dest[bytes_left] = '/';
1723        }
1724
1725        btrfs_release_path(path);
1726
1727        if (ret)
1728                return ERR_PTR(ret);
1729
1730        return dest + bytes_left;
1731}
1732
1733/*
1734 * this makes the path point to (logical EXTENT_ITEM *)
1735 * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for
1736 * tree blocks and <0 on error.
1737 */
1738int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
1739                        struct btrfs_path *path, struct btrfs_key *found_key,
1740                        u64 *flags_ret)
1741{
1742        int ret;
1743        u64 flags;
1744        u64 size = 0;
1745        u32 item_size;
1746        const struct extent_buffer *eb;
1747        struct btrfs_extent_item *ei;
1748        struct btrfs_key key;
1749
1750        if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1751                key.type = BTRFS_METADATA_ITEM_KEY;
1752        else
1753                key.type = BTRFS_EXTENT_ITEM_KEY;
1754        key.objectid = logical;
1755        key.offset = (u64)-1;
1756
1757        ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
1758        if (ret < 0)
1759                return ret;
1760
1761        ret = btrfs_previous_extent_item(fs_info->extent_root, path, 0);
1762        if (ret) {
1763                if (ret > 0)
1764                        ret = -ENOENT;
1765                return ret;
1766        }
1767        btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]);
1768        if (found_key->type == BTRFS_METADATA_ITEM_KEY)
1769                size = fs_info->nodesize;
1770        else if (found_key->type == BTRFS_EXTENT_ITEM_KEY)
1771                size = found_key->offset;
1772
1773        if (found_key->objectid > logical ||
1774            found_key->objectid + size <= logical) {
1775                btrfs_debug(fs_info,
1776                        "logical %llu is not within any extent", logical);
1777                return -ENOENT;
1778        }
1779
1780        eb = path->nodes[0];
1781        item_size = btrfs_item_size_nr(eb, path->slots[0]);
1782        BUG_ON(item_size < sizeof(*ei));
1783
1784        ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
1785        flags = btrfs_extent_flags(eb, ei);
1786
1787        btrfs_debug(fs_info,
1788                "logical %llu is at position %llu within the extent (%llu EXTENT_ITEM %llu) flags %#llx size %u",
1789                 logical, logical - found_key->objectid, found_key->objectid,
1790                 found_key->offset, flags, item_size);
1791
1792        WARN_ON(!flags_ret);
1793        if (flags_ret) {
1794                if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1795                        *flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK;
1796                else if (flags & BTRFS_EXTENT_FLAG_DATA)
1797                        *flags_ret = BTRFS_EXTENT_FLAG_DATA;
1798                else
1799                        BUG();
1800                return 0;
1801        }
1802
1803        return -EIO;
1804}
1805
1806/*
1807 * helper function to iterate extent inline refs. ptr must point to a 0 value
1808 * for the first call and may be modified. it is used to track state.
1809 * if more refs exist, 0 is returned and the next call to
1810 * get_extent_inline_ref must pass the modified ptr parameter to get the
1811 * next ref. after the last ref was processed, 1 is returned.
1812 * returns <0 on error
1813 */
1814static int get_extent_inline_ref(unsigned long *ptr,
1815                                 const struct extent_buffer *eb,
1816                                 const struct btrfs_key *key,
1817                                 const struct btrfs_extent_item *ei,
1818                                 u32 item_size,
1819                                 struct btrfs_extent_inline_ref **out_eiref,
1820                                 int *out_type)
1821{
1822        unsigned long end;
1823        u64 flags;
1824        struct btrfs_tree_block_info *info;
1825
1826        if (!*ptr) {
1827                /* first call */
1828                flags = btrfs_extent_flags(eb, ei);
1829                if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1830                        if (key->type == BTRFS_METADATA_ITEM_KEY) {
1831                                /* a skinny metadata extent */
1832                                *out_eiref =
1833                                     (struct btrfs_extent_inline_ref *)(ei + 1);
1834                        } else {
1835                                WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY);
1836                                info = (struct btrfs_tree_block_info *)(ei + 1);
1837                                *out_eiref =
1838                                   (struct btrfs_extent_inline_ref *)(info + 1);
1839                        }
1840                } else {
1841                        *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1);
1842                }
1843                *ptr = (unsigned long)*out_eiref;
1844                if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size)
1845                        return -ENOENT;
1846        }
1847
1848        end = (unsigned long)ei + item_size;
1849        *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr);
1850        *out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref,
1851                                                     BTRFS_REF_TYPE_ANY);
1852        if (*out_type == BTRFS_REF_TYPE_INVALID)
1853                return -EUCLEAN;
1854
1855        *ptr += btrfs_extent_inline_ref_size(*out_type);
1856        WARN_ON(*ptr > end);
1857        if (*ptr == end)
1858                return 1; /* last */
1859
1860        return 0;
1861}
1862
1863/*
1864 * reads the tree block backref for an extent. tree level and root are returned
1865 * through out_level and out_root. ptr must point to a 0 value for the first
1866 * call and may be modified (see get_extent_inline_ref comment).
1867 * returns 0 if data was provided, 1 if there was no more data to provide or
1868 * <0 on error.
1869 */
1870int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
1871                            struct btrfs_key *key, struct btrfs_extent_item *ei,
1872                            u32 item_size, u64 *out_root, u8 *out_level)
1873{
1874        int ret;
1875        int type;
1876        struct btrfs_extent_inline_ref *eiref;
1877
1878        if (*ptr == (unsigned long)-1)
1879                return 1;
1880
1881        while (1) {
1882                ret = get_extent_inline_ref(ptr, eb, key, ei, item_size,
1883                                              &eiref, &type);
1884                if (ret < 0)
1885                        return ret;
1886
1887                if (type == BTRFS_TREE_BLOCK_REF_KEY ||
1888                    type == BTRFS_SHARED_BLOCK_REF_KEY)
1889                        break;
1890
1891                if (ret == 1)
1892                        return 1;
1893        }
1894
1895        /* we can treat both ref types equally here */
1896        *out_root = btrfs_extent_inline_ref_offset(eb, eiref);
1897
1898        if (key->type == BTRFS_EXTENT_ITEM_KEY) {
1899                struct btrfs_tree_block_info *info;
1900
1901                info = (struct btrfs_tree_block_info *)(ei + 1);
1902                *out_level = btrfs_tree_block_level(eb, info);
1903        } else {
1904                ASSERT(key->type == BTRFS_METADATA_ITEM_KEY);
1905                *out_level = (u8)key->offset;
1906        }
1907
1908        if (ret == 1)
1909                *ptr = (unsigned long)-1;
1910
1911        return 0;
1912}
1913
1914static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
1915                             struct extent_inode_elem *inode_list,
1916                             u64 root, u64 extent_item_objectid,
1917                             iterate_extent_inodes_t *iterate, void *ctx)
1918{
1919        struct extent_inode_elem *eie;
1920        int ret = 0;
1921
1922        for (eie = inode_list; eie; eie = eie->next) {
1923                btrfs_debug(fs_info,
1924                            "ref for %llu resolved, key (%llu EXTEND_DATA %llu), root %llu",
1925                            extent_item_objectid, eie->inum,
1926                            eie->offset, root);
1927                ret = iterate(eie->inum, eie->offset, root, ctx);
1928                if (ret) {
1929                        btrfs_debug(fs_info,
1930                                    "stopping iteration for %llu due to ret=%d",
1931                                    extent_item_objectid, ret);
1932                        break;
1933                }
1934        }
1935
1936        return ret;
1937}
1938
1939/*
1940 * calls iterate() for every inode that references the extent identified by
1941 * the given parameters.
1942 * when the iterator function returns a non-zero value, iteration stops.
1943 */
1944int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1945                                u64 extent_item_objectid, u64 extent_item_pos,
1946                                int search_commit_root,
1947                                iterate_extent_inodes_t *iterate, void *ctx,
1948                                bool ignore_offset)
1949{
1950        int ret;
1951        struct btrfs_trans_handle *trans = NULL;
1952        struct ulist *refs = NULL;
1953        struct ulist *roots = NULL;
1954        struct ulist_node *ref_node = NULL;
1955        struct ulist_node *root_node = NULL;
1956        struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
1957        struct ulist_iterator ref_uiter;
1958        struct ulist_iterator root_uiter;
1959
1960        btrfs_debug(fs_info, "resolving all inodes for extent %llu",
1961                        extent_item_objectid);
1962
1963        if (!search_commit_root) {
1964                trans = btrfs_attach_transaction(fs_info->extent_root);
1965                if (IS_ERR(trans)) {
1966                        if (PTR_ERR(trans) != -ENOENT &&
1967                            PTR_ERR(trans) != -EROFS)
1968                                return PTR_ERR(trans);
1969                        trans = NULL;
1970                }
1971        }
1972
1973        if (trans)
1974                btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1975        else
1976                down_read(&fs_info->commit_root_sem);
1977
1978        ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
1979                                   tree_mod_seq_elem.seq, &refs,
1980                                   &extent_item_pos, ignore_offset);
1981        if (ret)
1982                goto out;
1983
1984        ULIST_ITER_INIT(&ref_uiter);
1985        while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
1986                ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val,
1987                                                tree_mod_seq_elem.seq, &roots,
1988                                                ignore_offset);
1989                if (ret)
1990                        break;
1991                ULIST_ITER_INIT(&root_uiter);
1992                while (!ret && (root_node = ulist_next(roots, &root_uiter))) {
1993                        btrfs_debug(fs_info,
1994                                    "root %llu references leaf %llu, data list %#llx",
1995                                    root_node->val, ref_node->val,
1996                                    ref_node->aux);
1997                        ret = iterate_leaf_refs(fs_info,
1998                                                (struct extent_inode_elem *)
1999                                                (uintptr_t)ref_node->aux,
2000                                                root_node->val,
2001                                                extent_item_objectid,
2002                                                iterate, ctx);
2003                }
2004                ulist_free(roots);
2005        }
2006
2007        free_leaf_list(refs);
2008out:
2009        if (trans) {
2010                btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2011                btrfs_end_transaction(trans);
2012        } else {
2013                up_read(&fs_info->commit_root_sem);
2014        }
2015
2016        return ret;
2017}
2018
2019int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
2020                                struct btrfs_path *path,
2021                                iterate_extent_inodes_t *iterate, void *ctx,
2022                                bool ignore_offset)
2023{
2024        int ret;
2025        u64 extent_item_pos;
2026        u64 flags = 0;
2027        struct btrfs_key found_key;
2028        int search_commit_root = path->search_commit_root;
2029
2030        ret = extent_from_logical(fs_info, logical, path, &found_key, &flags);
2031        btrfs_release_path(path);
2032        if (ret < 0)
2033                return ret;
2034        if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
2035                return -EINVAL;
2036
2037        extent_item_pos = logical - found_key.objectid;
2038        ret = iterate_extent_inodes(fs_info, found_key.objectid,
2039                                        extent_item_pos, search_commit_root,
2040                                        iterate, ctx, ignore_offset);
2041
2042        return ret;
2043}
2044
2045typedef int (iterate_irefs_t)(u64 parent, u32 name_len, unsigned long name_off,
2046                              struct extent_buffer *eb, void *ctx);
2047
2048static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
2049                              struct btrfs_path *path,
2050                              iterate_irefs_t *iterate, void *ctx)
2051{
2052        int ret = 0;
2053        int slot;
2054        u32 cur;
2055        u32 len;
2056        u32 name_len;
2057        u64 parent = 0;
2058        int found = 0;
2059        struct extent_buffer *eb;
2060        struct btrfs_item *item;
2061        struct btrfs_inode_ref *iref;
2062        struct btrfs_key found_key;
2063
2064        while (!ret) {
2065                ret = btrfs_find_item(fs_root, path, inum,
2066                                parent ? parent + 1 : 0, BTRFS_INODE_REF_KEY,
2067                                &found_key);
2068
2069                if (ret < 0)
2070                        break;
2071                if (ret) {
2072                        ret = found ? 0 : -ENOENT;
2073                        break;
2074                }
2075                ++found;
2076
2077                parent = found_key.offset;
2078                slot = path->slots[0];
2079                eb = btrfs_clone_extent_buffer(path->nodes[0]);
2080                if (!eb) {
2081                        ret = -ENOMEM;
2082                        break;
2083                }
2084                btrfs_release_path(path);
2085
2086                item = btrfs_item_nr(slot);
2087                iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
2088
2089                for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) {
2090                        name_len = btrfs_inode_ref_name_len(eb, iref);
2091                        /* path must be released before calling iterate()! */
2092                        btrfs_debug(fs_root->fs_info,
2093                                "following ref at offset %u for inode %llu in tree %llu",
2094                                cur, found_key.objectid,
2095                                fs_root->root_key.objectid);
2096                        ret = iterate(parent, name_len,
2097                                      (unsigned long)(iref + 1), eb, ctx);
2098                        if (ret)
2099                                break;
2100                        len = sizeof(*iref) + name_len;
2101                        iref = (struct btrfs_inode_ref *)((char *)iref + len);
2102                }
2103                free_extent_buffer(eb);
2104        }
2105
2106        btrfs_release_path(path);
2107
2108        return ret;
2109}
2110
2111static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
2112                                 struct btrfs_path *path,
2113                                 iterate_irefs_t *iterate, void *ctx)
2114{
2115        int ret;
2116        int slot;
2117        u64 offset = 0;
2118        u64 parent;
2119        int found = 0;
2120        struct extent_buffer *eb;
2121        struct btrfs_inode_extref *extref;
2122        u32 item_size;
2123        u32 cur_offset;
2124        unsigned long ptr;
2125
2126        while (1) {
2127                ret = btrfs_find_one_extref(fs_root, inum, offset, path, &extref,
2128                                            &offset);
2129                if (ret < 0)
2130                        break;
2131                if (ret) {
2132                        ret = found ? 0 : -ENOENT;
2133                        break;
2134                }
2135                ++found;
2136
2137                slot = path->slots[0];
2138                eb = btrfs_clone_extent_buffer(path->nodes[0]);
2139                if (!eb) {
2140                        ret = -ENOMEM;
2141                        break;
2142                }
2143                btrfs_release_path(path);
2144
2145                item_size = btrfs_item_size_nr(eb, slot);
2146                ptr = btrfs_item_ptr_offset(eb, slot);
2147                cur_offset = 0;
2148
2149                while (cur_offset < item_size) {
2150                        u32 name_len;
2151
2152                        extref = (struct btrfs_inode_extref *)(ptr + cur_offset);
2153                        parent = btrfs_inode_extref_parent(eb, extref);
2154                        name_len = btrfs_inode_extref_name_len(eb, extref);
2155                        ret = iterate(parent, name_len,
2156                                      (unsigned long)&extref->name, eb, ctx);
2157                        if (ret)
2158                                break;
2159
2160                        cur_offset += btrfs_inode_extref_name_len(eb, extref);
2161                        cur_offset += sizeof(*extref);
2162                }
2163                free_extent_buffer(eb);
2164
2165                offset++;
2166        }
2167
2168        btrfs_release_path(path);
2169
2170        return ret;
2171}
2172
2173static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
2174                         struct btrfs_path *path, iterate_irefs_t *iterate,
2175                         void *ctx)
2176{
2177        int ret;
2178        int found_refs = 0;
2179
2180        ret = iterate_inode_refs(inum, fs_root, path, iterate, ctx);
2181        if (!ret)
2182                ++found_refs;
2183        else if (ret != -ENOENT)
2184                return ret;
2185
2186        ret = iterate_inode_extrefs(inum, fs_root, path, iterate, ctx);
2187        if (ret == -ENOENT && found_refs)
2188                return 0;
2189
2190        return ret;
2191}
2192
2193/*
2194 * returns 0 if the path could be dumped (probably truncated)
2195 * returns <0 in case of an error
2196 */
2197static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
2198                         struct extent_buffer *eb, void *ctx)
2199{
2200        struct inode_fs_paths *ipath = ctx;
2201        char *fspath;
2202        char *fspath_min;
2203        int i = ipath->fspath->elem_cnt;
2204        const int s_ptr = sizeof(char *);
2205        u32 bytes_left;
2206
2207        bytes_left = ipath->fspath->bytes_left > s_ptr ?
2208                                        ipath->fspath->bytes_left - s_ptr : 0;
2209
2210        fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr;
2211        fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len,
2212                                   name_off, eb, inum, fspath_min, bytes_left);
2213        if (IS_ERR(fspath))
2214                return PTR_ERR(fspath);
2215
2216        if (fspath > fspath_min) {
2217                ipath->fspath->val[i] = (u64)(unsigned long)fspath;
2218                ++ipath->fspath->elem_cnt;
2219                ipath->fspath->bytes_left = fspath - fspath_min;
2220        } else {
2221                ++ipath->fspath->elem_missed;
2222                ipath->fspath->bytes_missing += fspath_min - fspath;
2223                ipath->fspath->bytes_left = 0;
2224        }
2225
2226        return 0;
2227}
2228
2229/*
2230 * this dumps all file system paths to the inode into the ipath struct, provided
2231 * is has been created large enough. each path is zero-terminated and accessed
2232 * from ipath->fspath->val[i].
2233 * when it returns, there are ipath->fspath->elem_cnt number of paths available
2234 * in ipath->fspath->val[]. when the allocated space wasn't sufficient, the
2235 * number of missed paths is recorded in ipath->fspath->elem_missed, otherwise,
2236 * it's zero. ipath->fspath->bytes_missing holds the number of bytes that would
2237 * have been needed to return all paths.
2238 */
2239int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
2240{
2241        return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path,
2242                             inode_to_path, ipath);
2243}
2244
2245struct btrfs_data_container *init_data_container(u32 total_bytes)
2246{
2247        struct btrfs_data_container *data;
2248        size_t alloc_bytes;
2249
2250        alloc_bytes = max_t(size_t, total_bytes, sizeof(*data));
2251        data = kvmalloc(alloc_bytes, GFP_KERNEL);
2252        if (!data)
2253                return ERR_PTR(-ENOMEM);
2254
2255        if (total_bytes >= sizeof(*data)) {
2256                data->bytes_left = total_bytes - sizeof(*data);
2257                data->bytes_missing = 0;
2258        } else {
2259                data->bytes_missing = sizeof(*data) - total_bytes;
2260                data->bytes_left = 0;
2261        }
2262
2263        data->elem_cnt = 0;
2264        data->elem_missed = 0;
2265
2266        return data;
2267}
2268
2269/*
2270 * allocates space to return multiple file system paths for an inode.
2271 * total_bytes to allocate are passed, note that space usable for actual path
2272 * information will be total_bytes - sizeof(struct inode_fs_paths).
2273 * the returned pointer must be freed with free_ipath() in the end.
2274 */
2275struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
2276                                        struct btrfs_path *path)
2277{
2278        struct inode_fs_paths *ifp;
2279        struct btrfs_data_container *fspath;
2280
2281        fspath = init_data_container(total_bytes);
2282        if (IS_ERR(fspath))
2283                return ERR_CAST(fspath);
2284
2285        ifp = kmalloc(sizeof(*ifp), GFP_KERNEL);
2286        if (!ifp) {
2287                kvfree(fspath);
2288                return ERR_PTR(-ENOMEM);
2289        }
2290
2291        ifp->btrfs_path = path;
2292        ifp->fspath = fspath;
2293        ifp->fs_root = fs_root;
2294
2295        return ifp;
2296}
2297
2298void free_ipath(struct inode_fs_paths *ipath)
2299{
2300        if (!ipath)
2301                return;
2302        kvfree(ipath->fspath);
2303        kfree(ipath);
2304}
2305
2306struct btrfs_backref_iter *btrfs_backref_iter_alloc(
2307                struct btrfs_fs_info *fs_info, gfp_t gfp_flag)
2308{
2309        struct btrfs_backref_iter *ret;
2310
2311        ret = kzalloc(sizeof(*ret), gfp_flag);
2312        if (!ret)
2313                return NULL;
2314
2315        ret->path = btrfs_alloc_path();
2316        if (!ret->path) {
2317                kfree(ret);
2318                return NULL;
2319        }
2320
2321        /* Current backref iterator only supports iteration in commit root */
2322        ret->path->search_commit_root = 1;
2323        ret->path->skip_locking = 1;
2324        ret->fs_info = fs_info;
2325
2326        return ret;
2327}
2328
2329int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
2330{
2331        struct btrfs_fs_info *fs_info = iter->fs_info;
2332        struct btrfs_path *path = iter->path;
2333        struct btrfs_extent_item *ei;
2334        struct btrfs_key key;
2335        int ret;
2336
2337        key.objectid = bytenr;
2338        key.type = BTRFS_METADATA_ITEM_KEY;
2339        key.offset = (u64)-1;
2340        iter->bytenr = bytenr;
2341
2342        ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
2343        if (ret < 0)
2344                return ret;
2345        if (ret == 0) {
2346                ret = -EUCLEAN;
2347                goto release;
2348        }
2349        if (path->slots[0] == 0) {
2350                WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
2351                ret = -EUCLEAN;
2352                goto release;
2353        }
2354        path->slots[0]--;
2355
2356        btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2357        if ((key.type != BTRFS_EXTENT_ITEM_KEY &&
2358             key.type != BTRFS_METADATA_ITEM_KEY) || key.objectid != bytenr) {
2359                ret = -ENOENT;
2360                goto release;
2361        }
2362        memcpy(&iter->cur_key, &key, sizeof(key));
2363        iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
2364                                                    path->slots[0]);
2365        iter->end_ptr = (u32)(iter->item_ptr +
2366                        btrfs_item_size_nr(path->nodes[0], path->slots[0]));
2367        ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2368                            struct btrfs_extent_item);
2369
2370        /*
2371         * Only support iteration on tree backref yet.
2372         *
2373         * This is an extra precaution for non skinny-metadata, where
2374         * EXTENT_ITEM is also used for tree blocks, that we can only use
2375         * extent flags to determine if it's a tree block.
2376         */
2377        if (btrfs_extent_flags(path->nodes[0], ei) & BTRFS_EXTENT_FLAG_DATA) {
2378                ret = -ENOTSUPP;
2379                goto release;
2380        }
2381        iter->cur_ptr = (u32)(iter->item_ptr + sizeof(*ei));
2382
2383        /* If there is no inline backref, go search for keyed backref */
2384        if (iter->cur_ptr >= iter->end_ptr) {
2385                ret = btrfs_next_item(fs_info->extent_root, path);
2386
2387                /* No inline nor keyed ref */
2388                if (ret > 0) {
2389                        ret = -ENOENT;
2390                        goto release;
2391                }
2392                if (ret < 0)
2393                        goto release;
2394
2395                btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key,
2396                                path->slots[0]);
2397                if (iter->cur_key.objectid != bytenr ||
2398                    (iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
2399                     iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY)) {
2400                        ret = -ENOENT;
2401                        goto release;
2402                }
2403                iter->cur_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
2404                                                           path->slots[0]);
2405                iter->item_ptr = iter->cur_ptr;
2406                iter->end_ptr = (u32)(iter->item_ptr + btrfs_item_size_nr(
2407                                      path->nodes[0], path->slots[0]));
2408        }
2409
2410        return 0;
2411release:
2412        btrfs_backref_iter_release(iter);
2413        return ret;
2414}
2415
2416/*
2417 * Go to the next backref item of current bytenr, can be either inlined or
2418 * keyed.
2419 *
2420 * Caller needs to check whether it's inline ref or not by iter->cur_key.
2421 *
2422 * Return 0 if we get next backref without problem.
2423 * Return >0 if there is no extra backref for this bytenr.
2424 * Return <0 if there is something wrong happened.
2425 */
2426int btrfs_backref_iter_next(struct btrfs_backref_iter *iter)
2427{
2428        struct extent_buffer *eb = btrfs_backref_get_eb(iter);
2429        struct btrfs_path *path = iter->path;
2430        struct btrfs_extent_inline_ref *iref;
2431        int ret;
2432        u32 size;
2433
2434        if (btrfs_backref_iter_is_inline_ref(iter)) {
2435                /* We're still inside the inline refs */
2436                ASSERT(iter->cur_ptr < iter->end_ptr);
2437
2438                if (btrfs_backref_has_tree_block_info(iter)) {
2439                        /* First tree block info */
2440                        size = sizeof(struct btrfs_tree_block_info);
2441                } else {
2442                        /* Use inline ref type to determine the size */
2443                        int type;
2444
2445                        iref = (struct btrfs_extent_inline_ref *)
2446                                ((unsigned long)iter->cur_ptr);
2447                        type = btrfs_extent_inline_ref_type(eb, iref);
2448
2449                        size = btrfs_extent_inline_ref_size(type);
2450                }
2451                iter->cur_ptr += size;
2452                if (iter->cur_ptr < iter->end_ptr)
2453                        return 0;
2454
2455                /* All inline items iterated, fall through */
2456        }
2457
2458        /* We're at keyed items, there is no inline item, go to the next one */
2459        ret = btrfs_next_item(iter->fs_info->extent_root, iter->path);
2460        if (ret)
2461                return ret;
2462
2463        btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key, path->slots[0]);
2464        if (iter->cur_key.objectid != iter->bytenr ||
2465            (iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
2466             iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY))
2467                return 1;
2468        iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0],
2469                                        path->slots[0]);
2470        iter->cur_ptr = iter->item_ptr;
2471        iter->end_ptr = iter->item_ptr + (u32)btrfs_item_size_nr(path->nodes[0],
2472                                                path->slots[0]);
2473        return 0;
2474}
2475
2476void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
2477                              struct btrfs_backref_cache *cache, int is_reloc)
2478{
2479        int i;
2480
2481        cache->rb_root = RB_ROOT;
2482        for (i = 0; i < BTRFS_MAX_LEVEL; i++)
2483                INIT_LIST_HEAD(&cache->pending[i]);
2484        INIT_LIST_HEAD(&cache->changed);
2485        INIT_LIST_HEAD(&cache->detached);
2486        INIT_LIST_HEAD(&cache->leaves);
2487        INIT_LIST_HEAD(&cache->pending_edge);
2488        INIT_LIST_HEAD(&cache->useless_node);
2489        cache->fs_info = fs_info;
2490        cache->is_reloc = is_reloc;
2491}
2492
2493struct btrfs_backref_node *btrfs_backref_alloc_node(
2494                struct btrfs_backref_cache *cache, u64 bytenr, int level)
2495{
2496        struct btrfs_backref_node *node;
2497
2498        ASSERT(level >= 0 && level < BTRFS_MAX_LEVEL);
2499        node = kzalloc(sizeof(*node), GFP_NOFS);
2500        if (!node)
2501                return node;
2502
2503        INIT_LIST_HEAD(&node->list);
2504        INIT_LIST_HEAD(&node->upper);
2505        INIT_LIST_HEAD(&node->lower);
2506        RB_CLEAR_NODE(&node->rb_node);
2507        cache->nr_nodes++;
2508        node->level = level;
2509        node->bytenr = bytenr;
2510
2511        return node;
2512}
2513
2514struct btrfs_backref_edge *btrfs_backref_alloc_edge(
2515                struct btrfs_backref_cache *cache)
2516{
2517        struct btrfs_backref_edge *edge;
2518
2519        edge = kzalloc(sizeof(*edge), GFP_NOFS);
2520        if (edge)
2521                cache->nr_edges++;
2522        return edge;
2523}
2524
2525/*
2526 * Drop the backref node from cache, also cleaning up all its
2527 * upper edges and any uncached nodes in the path.
2528 *
2529 * This cleanup happens bottom up, thus the node should either
2530 * be the lowest node in the cache or a detached node.
2531 */
2532void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
2533                                struct btrfs_backref_node *node)
2534{
2535        struct btrfs_backref_node *upper;
2536        struct btrfs_backref_edge *edge;
2537
2538        if (!node)
2539                return;
2540
2541        BUG_ON(!node->lowest && !node->detached);
2542        while (!list_empty(&node->upper)) {
2543                edge = list_entry(node->upper.next, struct btrfs_backref_edge,
2544                                  list[LOWER]);
2545                upper = edge->node[UPPER];
2546                list_del(&edge->list[LOWER]);
2547                list_del(&edge->list[UPPER]);
2548                btrfs_backref_free_edge(cache, edge);
2549
2550                /*
2551                 * Add the node to leaf node list if no other child block
2552                 * cached.
2553                 */
2554                if (list_empty(&upper->lower)) {
2555                        list_add_tail(&upper->lower, &cache->leaves);
2556                        upper->lowest = 1;
2557                }
2558        }
2559
2560        btrfs_backref_drop_node(cache, node);
2561}
2562
2563/*
2564 * Release all nodes/edges from current cache
2565 */
2566void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
2567{
2568        struct btrfs_backref_node *node;
2569        int i;
2570
2571        while (!list_empty(&cache->detached)) {
2572                node = list_entry(cache->detached.next,
2573                                  struct btrfs_backref_node, list);
2574                btrfs_backref_cleanup_node(cache, node);
2575        }
2576
2577        while (!list_empty(&cache->leaves)) {
2578                node = list_entry(cache->leaves.next,
2579                                  struct btrfs_backref_node, lower);
2580                btrfs_backref_cleanup_node(cache, node);
2581        }
2582
2583        cache->last_trans = 0;
2584
2585        for (i = 0; i < BTRFS_MAX_LEVEL; i++)
2586                ASSERT(list_empty(&cache->pending[i]));
2587        ASSERT(list_empty(&cache->pending_edge));
2588        ASSERT(list_empty(&cache->useless_node));
2589        ASSERT(list_empty(&cache->changed));
2590        ASSERT(list_empty(&cache->detached));
2591        ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
2592        ASSERT(!cache->nr_nodes);
2593        ASSERT(!cache->nr_edges);
2594}
2595
2596/*
2597 * Handle direct tree backref
2598 *
2599 * Direct tree backref means, the backref item shows its parent bytenr
2600 * directly. This is for SHARED_BLOCK_REF backref (keyed or inlined).
2601 *
2602 * @ref_key:    The converted backref key.
2603 *              For keyed backref, it's the item key.
2604 *              For inlined backref, objectid is the bytenr,
2605 *              type is btrfs_inline_ref_type, offset is
2606 *              btrfs_inline_ref_offset.
2607 */
2608static int handle_direct_tree_backref(struct btrfs_backref_cache *cache,
2609                                      struct btrfs_key *ref_key,
2610                                      struct btrfs_backref_node *cur)
2611{
2612        struct btrfs_backref_edge *edge;
2613        struct btrfs_backref_node *upper;
2614        struct rb_node *rb_node;
2615
2616        ASSERT(ref_key->type == BTRFS_SHARED_BLOCK_REF_KEY);
2617
2618        /* Only reloc root uses backref pointing to itself */
2619        if (ref_key->objectid == ref_key->offset) {
2620                struct btrfs_root *root;
2621
2622                cur->is_reloc_root = 1;
2623                /* Only reloc backref cache cares about a specific root */
2624                if (cache->is_reloc) {
2625                        root = find_reloc_root(cache->fs_info, cur->bytenr);
2626                        if (!root)
2627                                return -ENOENT;
2628                        cur->root = root;
2629                } else {
2630                        /*
2631                         * For generic purpose backref cache, reloc root node
2632                         * is useless.
2633                         */
2634                        list_add(&cur->list, &cache->useless_node);
2635                }
2636                return 0;
2637        }
2638
2639        edge = btrfs_backref_alloc_edge(cache);
2640        if (!edge)
2641                return -ENOMEM;
2642
2643        rb_node = rb_simple_search(&cache->rb_root, ref_key->offset);
2644        if (!rb_node) {
2645                /* Parent node not yet cached */
2646                upper = btrfs_backref_alloc_node(cache, ref_key->offset,
2647                                           cur->level + 1);
2648                if (!upper) {
2649                        btrfs_backref_free_edge(cache, edge);
2650                        return -ENOMEM;
2651                }
2652
2653                /*
2654                 *  Backrefs for the upper level block isn't cached, add the
2655                 *  block to pending list
2656                 */
2657                list_add_tail(&edge->list[UPPER], &cache->pending_edge);
2658        } else {
2659                /* Parent node already cached */
2660                upper = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
2661                ASSERT(upper->checked);
2662                INIT_LIST_HEAD(&edge->list[UPPER]);
2663        }
2664        btrfs_backref_link_edge(edge, cur, upper, LINK_LOWER);
2665        return 0;
2666}
2667
2668/*
2669 * Handle indirect tree backref
2670 *
2671 * Indirect tree backref means, we only know which tree the node belongs to.
2672 * We still need to do a tree search to find out the parents. This is for
2673 * TREE_BLOCK_REF backref (keyed or inlined).
2674 *
2675 * @ref_key:    The same as @ref_key in  handle_direct_tree_backref()
2676 * @tree_key:   The first key of this tree block.
2677 * @path:       A clean (released) path, to avoid allocating path everytime
2678 *              the function get called.
2679 */
2680static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache,
2681                                        struct btrfs_path *path,
2682                                        struct btrfs_key *ref_key,
2683                                        struct btrfs_key *tree_key,
2684                                        struct btrfs_backref_node *cur)
2685{
2686        struct btrfs_fs_info *fs_info = cache->fs_info;
2687        struct btrfs_backref_node *upper;
2688        struct btrfs_backref_node *lower;
2689        struct btrfs_backref_edge *edge;
2690        struct extent_buffer *eb;
2691        struct btrfs_root *root;
2692        struct rb_node *rb_node;
2693        int level;
2694        bool need_check = true;
2695        int ret;
2696
2697        root = btrfs_get_fs_root(fs_info, ref_key->offset, false);
2698        if (IS_ERR(root))
2699                return PTR_ERR(root);
2700        if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
2701                cur->cowonly = 1;
2702
2703        if (btrfs_root_level(&root->root_item) == cur->level) {
2704                /* Tree root */
2705                ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr);
2706                /*
2707                 * For reloc backref cache, we may ignore reloc root.  But for
2708                 * general purpose backref cache, we can't rely on
2709                 * btrfs_should_ignore_reloc_root() as it may conflict with
2710                 * current running relocation and lead to missing root.
2711                 *
2712                 * For general purpose backref cache, reloc root detection is
2713                 * completely relying on direct backref (key->offset is parent
2714                 * bytenr), thus only do such check for reloc cache.
2715                 */
2716                if (btrfs_should_ignore_reloc_root(root) && cache->is_reloc) {
2717                        btrfs_put_root(root);
2718                        list_add(&cur->list, &cache->useless_node);
2719                } else {
2720                        cur->root = root;
2721                }
2722                return 0;
2723        }
2724
2725        level = cur->level + 1;
2726
2727        /* Search the tree to find parent blocks referring to the block */
2728        path->search_commit_root = 1;
2729        path->skip_locking = 1;
2730        path->lowest_level = level;
2731        ret = btrfs_search_slot(NULL, root, tree_key, path, 0, 0);
2732        path->lowest_level = 0;
2733        if (ret < 0) {
2734                btrfs_put_root(root);
2735                return ret;
2736        }
2737        if (ret > 0 && path->slots[level] > 0)
2738                path->slots[level]--;
2739
2740        eb = path->nodes[level];
2741        if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) {
2742                btrfs_err(fs_info,
2743"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
2744                          cur->bytenr, level - 1, root->root_key.objectid,
2745                          tree_key->objectid, tree_key->type, tree_key->offset);
2746                btrfs_put_root(root);
2747                ret = -ENOENT;
2748                goto out;
2749        }
2750        lower = cur;
2751
2752        /* Add all nodes and edges in the path */
2753        for (; level < BTRFS_MAX_LEVEL; level++) {
2754                if (!path->nodes[level]) {
2755                        ASSERT(btrfs_root_bytenr(&root->root_item) ==
2756                               lower->bytenr);
2757                        /* Same as previous should_ignore_reloc_root() call */
2758                        if (btrfs_should_ignore_reloc_root(root) &&
2759                            cache->is_reloc) {
2760                                btrfs_put_root(root);
2761                                list_add(&lower->list, &cache->useless_node);
2762                        } else {
2763                                lower->root = root;
2764                        }
2765                        break;
2766                }
2767
2768                edge = btrfs_backref_alloc_edge(cache);
2769                if (!edge) {
2770                        btrfs_put_root(root);
2771                        ret = -ENOMEM;
2772                        goto out;
2773                }
2774
2775                eb = path->nodes[level];
2776                rb_node = rb_simple_search(&cache->rb_root, eb->start);
2777                if (!rb_node) {
2778                        upper = btrfs_backref_alloc_node(cache, eb->start,
2779                                                         lower->level + 1);
2780                        if (!upper) {
2781                                btrfs_put_root(root);
2782                                btrfs_backref_free_edge(cache, edge);
2783                                ret = -ENOMEM;
2784                                goto out;
2785                        }
2786                        upper->owner = btrfs_header_owner(eb);
2787                        if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
2788                                upper->cowonly = 1;
2789
2790                        /*
2791                         * If we know the block isn't shared we can avoid
2792                         * checking its backrefs.
2793                         */
2794                        if (btrfs_block_can_be_shared(root, eb))
2795                                upper->checked = 0;
2796                        else
2797                                upper->checked = 1;
2798
2799                        /*
2800                         * Add the block to pending list if we need to check its
2801                         * backrefs, we only do this once while walking up a
2802                         * tree as we will catch anything else later on.
2803                         */
2804                        if (!upper->checked && need_check) {
2805                                need_check = false;
2806                                list_add_tail(&edge->list[UPPER],
2807                                              &cache->pending_edge);
2808                        } else {
2809                                if (upper->checked)
2810                                        need_check = true;
2811                                INIT_LIST_HEAD(&edge->list[UPPER]);
2812                        }
2813                } else {
2814                        upper = rb_entry(rb_node, struct btrfs_backref_node,
2815                                         rb_node);
2816                        ASSERT(upper->checked);
2817                        INIT_LIST_HEAD(&edge->list[UPPER]);
2818                        if (!upper->owner)
2819                                upper->owner = btrfs_header_owner(eb);
2820                }
2821                btrfs_backref_link_edge(edge, lower, upper, LINK_LOWER);
2822
2823                if (rb_node) {
2824                        btrfs_put_root(root);
2825                        break;
2826                }
2827                lower = upper;
2828                upper = NULL;
2829        }
2830out:
2831        btrfs_release_path(path);
2832        return ret;
2833}
2834
2835/*
2836 * Add backref node @cur into @cache.
2837 *
2838 * NOTE: Even if the function returned 0, @cur is not yet cached as its upper
2839 *       links aren't yet bi-directional. Needs to finish such links.
2840 *       Use btrfs_backref_finish_upper_links() to finish such linkage.
2841 *
2842 * @path:       Released path for indirect tree backref lookup
2843 * @iter:       Released backref iter for extent tree search
2844 * @node_key:   The first key of the tree block
2845 */
2846int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
2847                                struct btrfs_path *path,
2848                                struct btrfs_backref_iter *iter,
2849                                struct btrfs_key *node_key,
2850                                struct btrfs_backref_node *cur)
2851{
2852        struct btrfs_fs_info *fs_info = cache->fs_info;
2853        struct btrfs_backref_edge *edge;
2854        struct btrfs_backref_node *exist;
2855        int ret;
2856
2857        ret = btrfs_backref_iter_start(iter, cur->bytenr);
2858        if (ret < 0)
2859                return ret;
2860        /*
2861         * We skip the first btrfs_tree_block_info, as we don't use the key
2862         * stored in it, but fetch it from the tree block
2863         */
2864        if (btrfs_backref_has_tree_block_info(iter)) {
2865                ret = btrfs_backref_iter_next(iter);
2866                if (ret < 0)
2867                        goto out;
2868                /* No extra backref? This means the tree block is corrupted */
2869                if (ret > 0) {
2870                        ret = -EUCLEAN;
2871                        goto out;
2872                }
2873        }
2874        WARN_ON(cur->checked);
2875        if (!list_empty(&cur->upper)) {
2876                /*
2877                 * The backref was added previously when processing backref of
2878                 * type BTRFS_TREE_BLOCK_REF_KEY
2879                 */
2880                ASSERT(list_is_singular(&cur->upper));
2881                edge = list_entry(cur->upper.next, struct btrfs_backref_edge,
2882                                  list[LOWER]);
2883                ASSERT(list_empty(&edge->list[UPPER]));
2884                exist = edge->node[UPPER];
2885                /*
2886                 * Add the upper level block to pending list if we need check
2887                 * its backrefs
2888                 */
2889                if (!exist->checked)
2890                        list_add_tail(&edge->list[UPPER], &cache->pending_edge);
2891        } else {
2892                exist = NULL;
2893        }
2894
2895        for (; ret == 0; ret = btrfs_backref_iter_next(iter)) {
2896                struct extent_buffer *eb;
2897                struct btrfs_key key;
2898                int type;
2899
2900                cond_resched();
2901                eb = btrfs_backref_get_eb(iter);
2902
2903                key.objectid = iter->bytenr;
2904                if (btrfs_backref_iter_is_inline_ref(iter)) {
2905                        struct btrfs_extent_inline_ref *iref;
2906
2907                        /* Update key for inline backref */
2908                        iref = (struct btrfs_extent_inline_ref *)
2909                                ((unsigned long)iter->cur_ptr);
2910                        type = btrfs_get_extent_inline_ref_type(eb, iref,
2911                                                        BTRFS_REF_TYPE_BLOCK);
2912                        if (type == BTRFS_REF_TYPE_INVALID) {
2913                                ret = -EUCLEAN;
2914                                goto out;
2915                        }
2916                        key.type = type;
2917                        key.offset = btrfs_extent_inline_ref_offset(eb, iref);
2918                } else {
2919                        key.type = iter->cur_key.type;
2920                        key.offset = iter->cur_key.offset;
2921                }
2922
2923                /*
2924                 * Parent node found and matches current inline ref, no need to
2925                 * rebuild this node for this inline ref
2926                 */
2927                if (exist &&
2928                    ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
2929                      exist->owner == key.offset) ||
2930                     (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
2931                      exist->bytenr == key.offset))) {
2932                        exist = NULL;
2933                        continue;
2934                }
2935
2936                /* SHARED_BLOCK_REF means key.offset is the parent bytenr */
2937                if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
2938                        ret = handle_direct_tree_backref(cache, &key, cur);
2939                        if (ret < 0)
2940                                goto out;
2941                        continue;
2942                } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
2943                        ret = -EINVAL;
2944                        btrfs_print_v0_err(fs_info);
2945                        btrfs_handle_fs_error(fs_info, ret, NULL);
2946                        goto out;
2947                } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
2948                        continue;
2949                }
2950
2951                /*
2952                 * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset
2953                 * means the root objectid. We need to search the tree to get
2954                 * its parent bytenr.
2955                 */
2956                ret = handle_indirect_tree_backref(cache, path, &key, node_key,
2957                                                   cur);
2958                if (ret < 0)
2959                        goto out;
2960        }
2961        ret = 0;
2962        cur->checked = 1;
2963        WARN_ON(exist);
2964out:
2965        btrfs_backref_iter_release(iter);
2966        return ret;
2967}
2968
2969/*
2970 * Finish the upwards linkage created by btrfs_backref_add_tree_node()
2971 */
2972int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
2973                                     struct btrfs_backref_node *start)
2974{
2975        struct list_head *useless_node = &cache->useless_node;
2976        struct btrfs_backref_edge *edge;
2977        struct rb_node *rb_node;
2978        LIST_HEAD(pending_edge);
2979
2980        ASSERT(start->checked);
2981
2982        /* Insert this node to cache if it's not COW-only */
2983        if (!start->cowonly) {
2984                rb_node = rb_simple_insert(&cache->rb_root, start->bytenr,
2985                                           &start->rb_node);
2986                if (rb_node)
2987                        btrfs_backref_panic(cache->fs_info, start->bytenr,
2988                                            -EEXIST);
2989                list_add_tail(&start->lower, &cache->leaves);
2990        }
2991
2992        /*
2993         * Use breadth first search to iterate all related edges.
2994         *
2995         * The starting points are all the edges of this node
2996         */
2997        list_for_each_entry(edge, &start->upper, list[LOWER])
2998                list_add_tail(&edge->list[UPPER], &pending_edge);
2999
3000        while (!list_empty(&pending_edge)) {
3001                struct btrfs_backref_node *upper;
3002                struct btrfs_backref_node *lower;
3003
3004                edge = list_first_entry(&pending_edge,
3005                                struct btrfs_backref_edge, list[UPPER]);
3006                list_del_init(&edge->list[UPPER]);
3007                upper = edge->node[UPPER];
3008                lower = edge->node[LOWER];
3009
3010                /* Parent is detached, no need to keep any edges */
3011                if (upper->detached) {
3012                        list_del(&edge->list[LOWER]);
3013                        btrfs_backref_free_edge(cache, edge);
3014
3015                        /* Lower node is orphan, queue for cleanup */
3016                        if (list_empty(&lower->upper))
3017                                list_add(&lower->list, useless_node);
3018                        continue;
3019                }
3020
3021                /*
3022                 * All new nodes added in current build_backref_tree() haven't
3023                 * been linked to the cache rb tree.
3024                 * So if we have upper->rb_node populated, this means a cache
3025                 * hit. We only need to link the edge, as @upper and all its
3026                 * parents have already been linked.
3027                 */
3028                if (!RB_EMPTY_NODE(&upper->rb_node)) {
3029                        if (upper->lowest) {
3030                                list_del_init(&upper->lower);
3031                                upper->lowest = 0;
3032                        }
3033
3034                        list_add_tail(&edge->list[UPPER], &upper->lower);
3035                        continue;
3036                }
3037
3038                /* Sanity check, we shouldn't have any unchecked nodes */
3039                if (!upper->checked) {
3040                        ASSERT(0);
3041                        return -EUCLEAN;
3042                }
3043
3044                /* Sanity check, COW-only node has non-COW-only parent */
3045                if (start->cowonly != upper->cowonly) {
3046                        ASSERT(0);
3047                        return -EUCLEAN;
3048                }
3049
3050                /* Only cache non-COW-only (subvolume trees) tree blocks */
3051                if (!upper->cowonly) {
3052                        rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
3053                                                   &upper->rb_node);
3054                        if (rb_node) {
3055                                btrfs_backref_panic(cache->fs_info,
3056                                                upper->bytenr, -EEXIST);
3057                                return -EUCLEAN;
3058                        }
3059                }
3060
3061                list_add_tail(&edge->list[UPPER], &upper->lower);
3062
3063                /*
3064                 * Also queue all the parent edges of this uncached node
3065                 * to finish the upper linkage
3066                 */
3067                list_for_each_entry(edge, &upper->upper, list[LOWER])
3068                        list_add_tail(&edge->list[UPPER], &pending_edge);
3069        }
3070        return 0;
3071}
3072
3073void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
3074                                 struct btrfs_backref_node *node)
3075{
3076        struct btrfs_backref_node *lower;
3077        struct btrfs_backref_node *upper;
3078        struct btrfs_backref_edge *edge;
3079
3080        while (!list_empty(&cache->useless_node)) {
3081                lower = list_first_entry(&cache->useless_node,
3082                                   struct btrfs_backref_node, list);
3083                list_del_init(&lower->list);
3084        }
3085        while (!list_empty(&cache->pending_edge)) {
3086                edge = list_first_entry(&cache->pending_edge,
3087                                struct btrfs_backref_edge, list[UPPER]);
3088                list_del(&edge->list[UPPER]);
3089                list_del(&edge->list[LOWER]);
3090                lower = edge->node[LOWER];
3091                upper = edge->node[UPPER];
3092                btrfs_backref_free_edge(cache, edge);
3093
3094                /*
3095                 * Lower is no longer linked to any upper backref nodes and
3096                 * isn't in the cache, we can free it ourselves.
3097                 */
3098                if (list_empty(&lower->upper) &&
3099                    RB_EMPTY_NODE(&lower->rb_node))
3100                        list_add(&lower->list, &cache->useless_node);
3101
3102                if (!RB_EMPTY_NODE(&upper->rb_node))
3103                        continue;
3104
3105                /* Add this guy's upper edges to the list to process */
3106                list_for_each_entry(edge, &upper->upper, list[LOWER])
3107                        list_add_tail(&edge->list[UPPER],
3108                                      &cache->pending_edge);
3109                if (list_empty(&upper->upper))
3110                        list_add(&upper->list, &cache->useless_node);
3111        }
3112
3113        while (!list_empty(&cache->useless_node)) {
3114                lower = list_first_entry(&cache->useless_node,
3115                                   struct btrfs_backref_node, list);
3116                list_del_init(&lower->list);
3117                if (lower == node)
3118                        node = NULL;
3119                btrfs_backref_drop_node(cache, lower);
3120        }
3121
3122        btrfs_backref_cleanup_node(cache, node);
3123        ASSERT(list_empty(&cache->useless_node) &&
3124               list_empty(&cache->pending_edge));
3125}
3126