linux/fs/btrfs/backref.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2011 STRATO.  All rights reserved.
   4 */
   5
   6#include <linux/mm.h>
   7#include <linux/rbtree.h>
   8#include <trace/events/btrfs.h>
   9#include "ctree.h"
  10#include "disk-io.h"
  11#include "backref.h"
  12#include "ulist.h"
  13#include "transaction.h"
  14#include "delayed-ref.h"
  15#include "locking.h"
  16
  17/* Just an arbitrary number so we can be sure this happened */
  18#define BACKREF_FOUND_SHARED 6
  19
  20struct extent_inode_elem {
  21        u64 inum;
  22        u64 offset;
  23        struct extent_inode_elem *next;
  24};
  25
  26static int check_extent_in_eb(const struct btrfs_key *key,
  27                              const struct extent_buffer *eb,
  28                              const struct btrfs_file_extent_item *fi,
  29                              u64 extent_item_pos,
  30                              struct extent_inode_elem **eie,
  31                              bool ignore_offset)
  32{
  33        u64 offset = 0;
  34        struct extent_inode_elem *e;
  35
  36        if (!ignore_offset &&
  37            !btrfs_file_extent_compression(eb, fi) &&
  38            !btrfs_file_extent_encryption(eb, fi) &&
  39            !btrfs_file_extent_other_encoding(eb, fi)) {
  40                u64 data_offset;
  41                u64 data_len;
  42
  43                data_offset = btrfs_file_extent_offset(eb, fi);
  44                data_len = btrfs_file_extent_num_bytes(eb, fi);
  45
  46                if (extent_item_pos < data_offset ||
  47                    extent_item_pos >= data_offset + data_len)
  48                        return 1;
  49                offset = extent_item_pos - data_offset;
  50        }
  51
  52        e = kmalloc(sizeof(*e), GFP_NOFS);
  53        if (!e)
  54                return -ENOMEM;
  55
  56        e->next = *eie;
  57        e->inum = key->objectid;
  58        e->offset = key->offset + offset;
  59        *eie = e;
  60
  61        return 0;
  62}
  63
  64static void free_inode_elem_list(struct extent_inode_elem *eie)
  65{
  66        struct extent_inode_elem *eie_next;
  67
  68        for (; eie; eie = eie_next) {
  69                eie_next = eie->next;
  70                kfree(eie);
  71        }
  72}
  73
  74static int find_extent_in_eb(const struct extent_buffer *eb,
  75                             u64 wanted_disk_byte, u64 extent_item_pos,
  76                             struct extent_inode_elem **eie,
  77                             bool ignore_offset)
  78{
  79        u64 disk_byte;
  80        struct btrfs_key key;
  81        struct btrfs_file_extent_item *fi;
  82        int slot;
  83        int nritems;
  84        int extent_type;
  85        int ret;
  86
  87        /*
  88         * from the shared data ref, we only have the leaf but we need
  89         * the key. thus, we must look into all items and see that we
  90         * find one (some) with a reference to our extent item.
  91         */
  92        nritems = btrfs_header_nritems(eb);
  93        for (slot = 0; slot < nritems; ++slot) {
  94                btrfs_item_key_to_cpu(eb, &key, slot);
  95                if (key.type != BTRFS_EXTENT_DATA_KEY)
  96                        continue;
  97                fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
  98                extent_type = btrfs_file_extent_type(eb, fi);
  99                if (extent_type == BTRFS_FILE_EXTENT_INLINE)
 100                        continue;
 101                /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */
 102                disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
 103                if (disk_byte != wanted_disk_byte)
 104                        continue;
 105
 106                ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie, ignore_offset);
 107                if (ret < 0)
 108                        return ret;
 109        }
 110
 111        return 0;
 112}
 113
 114struct preftree {
 115        struct rb_root_cached root;
 116        unsigned int count;
 117};
 118
 119#define PREFTREE_INIT   { .root = RB_ROOT_CACHED, .count = 0 }
 120
 121struct preftrees {
 122        struct preftree direct;    /* BTRFS_SHARED_[DATA|BLOCK]_REF_KEY */
 123        struct preftree indirect;  /* BTRFS_[TREE_BLOCK|EXTENT_DATA]_REF_KEY */
 124        struct preftree indirect_missing_keys;
 125};
 126
 127/*
 128 * Checks for a shared extent during backref search.
 129 *
 130 * The share_count tracks prelim_refs (direct and indirect) having a
 131 * ref->count >0:
 132 *  - incremented when a ref->count transitions to >0
 133 *  - decremented when a ref->count transitions to <1
 134 */
 135struct share_check {
 136        u64 root_objectid;
 137        u64 inum;
 138        int share_count;
 139};
 140
 141static inline int extent_is_shared(struct share_check *sc)
 142{
 143        return (sc && sc->share_count > 1) ? BACKREF_FOUND_SHARED : 0;
 144}
 145
 146static struct kmem_cache *btrfs_prelim_ref_cache;
 147
 148int __init btrfs_prelim_ref_init(void)
 149{
 150        btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref",
 151                                        sizeof(struct prelim_ref),
 152                                        0,
 153                                        SLAB_MEM_SPREAD,
 154                                        NULL);
 155        if (!btrfs_prelim_ref_cache)
 156                return -ENOMEM;
 157        return 0;
 158}
 159
 160void __cold btrfs_prelim_ref_exit(void)
 161{
 162        kmem_cache_destroy(btrfs_prelim_ref_cache);
 163}
 164
 165static void free_pref(struct prelim_ref *ref)
 166{
 167        kmem_cache_free(btrfs_prelim_ref_cache, ref);
 168}
 169
 170/*
 171 * Return 0 when both refs are for the same block (and can be merged).
 172 * A -1 return indicates ref1 is a 'lower' block than ref2, while 1
 173 * indicates a 'higher' block.
 174 */
 175static int prelim_ref_compare(struct prelim_ref *ref1,
 176                              struct prelim_ref *ref2)
 177{
 178        if (ref1->level < ref2->level)
 179                return -1;
 180        if (ref1->level > ref2->level)
 181                return 1;
 182        if (ref1->root_id < ref2->root_id)
 183                return -1;
 184        if (ref1->root_id > ref2->root_id)
 185                return 1;
 186        if (ref1->key_for_search.type < ref2->key_for_search.type)
 187                return -1;
 188        if (ref1->key_for_search.type > ref2->key_for_search.type)
 189                return 1;
 190        if (ref1->key_for_search.objectid < ref2->key_for_search.objectid)
 191                return -1;
 192        if (ref1->key_for_search.objectid > ref2->key_for_search.objectid)
 193                return 1;
 194        if (ref1->key_for_search.offset < ref2->key_for_search.offset)
 195                return -1;
 196        if (ref1->key_for_search.offset > ref2->key_for_search.offset)
 197                return 1;
 198        if (ref1->parent < ref2->parent)
 199                return -1;
 200        if (ref1->parent > ref2->parent)
 201                return 1;
 202
 203        return 0;
 204}
 205
 206static void update_share_count(struct share_check *sc, int oldcount,
 207                               int newcount)
 208{
 209        if ((!sc) || (oldcount == 0 && newcount < 1))
 210                return;
 211
 212        if (oldcount > 0 && newcount < 1)
 213                sc->share_count--;
 214        else if (oldcount < 1 && newcount > 0)
 215                sc->share_count++;
 216}
 217
 218/*
 219 * Add @newref to the @root rbtree, merging identical refs.
 220 *
 221 * Callers should assume that newref has been freed after calling.
 222 */
 223static void prelim_ref_insert(const struct btrfs_fs_info *fs_info,
 224                              struct preftree *preftree,
 225                              struct prelim_ref *newref,
 226                              struct share_check *sc)
 227{
 228        struct rb_root_cached *root;
 229        struct rb_node **p;
 230        struct rb_node *parent = NULL;
 231        struct prelim_ref *ref;
 232        int result;
 233        bool leftmost = true;
 234
 235        root = &preftree->root;
 236        p = &root->rb_root.rb_node;
 237
 238        while (*p) {
 239                parent = *p;
 240                ref = rb_entry(parent, struct prelim_ref, rbnode);
 241                result = prelim_ref_compare(ref, newref);
 242                if (result < 0) {
 243                        p = &(*p)->rb_left;
 244                } else if (result > 0) {
 245                        p = &(*p)->rb_right;
 246                        leftmost = false;
 247                } else {
 248                        /* Identical refs, merge them and free @newref */
 249                        struct extent_inode_elem *eie = ref->inode_list;
 250
 251                        while (eie && eie->next)
 252                                eie = eie->next;
 253
 254                        if (!eie)
 255                                ref->inode_list = newref->inode_list;
 256                        else
 257                                eie->next = newref->inode_list;
 258                        trace_btrfs_prelim_ref_merge(fs_info, ref, newref,
 259                                                     preftree->count);
 260                        /*
 261                         * A delayed ref can have newref->count < 0.
 262                         * The ref->count is updated to follow any
 263                         * BTRFS_[ADD|DROP]_DELAYED_REF actions.
 264                         */
 265                        update_share_count(sc, ref->count,
 266                                           ref->count + newref->count);
 267                        ref->count += newref->count;
 268                        free_pref(newref);
 269                        return;
 270                }
 271        }
 272
 273        update_share_count(sc, 0, newref->count);
 274        preftree->count++;
 275        trace_btrfs_prelim_ref_insert(fs_info, newref, NULL, preftree->count);
 276        rb_link_node(&newref->rbnode, parent, p);
 277        rb_insert_color_cached(&newref->rbnode, root, leftmost);
 278}
 279
 280/*
 281 * Release the entire tree.  We don't care about internal consistency so
 282 * just free everything and then reset the tree root.
 283 */
 284static void prelim_release(struct preftree *preftree)
 285{
 286        struct prelim_ref *ref, *next_ref;
 287
 288        rbtree_postorder_for_each_entry_safe(ref, next_ref,
 289                                             &preftree->root.rb_root, rbnode)
 290                free_pref(ref);
 291
 292        preftree->root = RB_ROOT_CACHED;
 293        preftree->count = 0;
 294}
 295
 296/*
 297 * the rules for all callers of this function are:
 298 * - obtaining the parent is the goal
 299 * - if you add a key, you must know that it is a correct key
 300 * - if you cannot add the parent or a correct key, then we will look into the
 301 *   block later to set a correct key
 302 *
 303 * delayed refs
 304 * ============
 305 *        backref type | shared | indirect | shared | indirect
 306 * information         |   tree |     tree |   data |     data
 307 * --------------------+--------+----------+--------+----------
 308 *      parent logical |    y   |     -    |    -   |     -
 309 *      key to resolve |    -   |     y    |    y   |     y
 310 *  tree block logical |    -   |     -    |    -   |     -
 311 *  root for resolving |    y   |     y    |    y   |     y
 312 *
 313 * - column 1:       we've the parent -> done
 314 * - column 2, 3, 4: we use the key to find the parent
 315 *
 316 * on disk refs (inline or keyed)
 317 * ==============================
 318 *        backref type | shared | indirect | shared | indirect
 319 * information         |   tree |     tree |   data |     data
 320 * --------------------+--------+----------+--------+----------
 321 *      parent logical |    y   |     -    |    y   |     -
 322 *      key to resolve |    -   |     -    |    -   |     y
 323 *  tree block logical |    y   |     y    |    y   |     y
 324 *  root for resolving |    -   |     y    |    y   |     y
 325 *
 326 * - column 1, 3: we've the parent -> done
 327 * - column 2:    we take the first key from the block to find the parent
 328 *                (see add_missing_keys)
 329 * - column 4:    we use the key to find the parent
 330 *
 331 * additional information that's available but not required to find the parent
 332 * block might help in merging entries to gain some speed.
 333 */
 334static int add_prelim_ref(const struct btrfs_fs_info *fs_info,
 335                          struct preftree *preftree, u64 root_id,
 336                          const struct btrfs_key *key, int level, u64 parent,
 337                          u64 wanted_disk_byte, int count,
 338                          struct share_check *sc, gfp_t gfp_mask)
 339{
 340        struct prelim_ref *ref;
 341
 342        if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
 343                return 0;
 344
 345        ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
 346        if (!ref)
 347                return -ENOMEM;
 348
 349        ref->root_id = root_id;
 350        if (key)
 351                ref->key_for_search = *key;
 352        else
 353                memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
 354
 355        ref->inode_list = NULL;
 356        ref->level = level;
 357        ref->count = count;
 358        ref->parent = parent;
 359        ref->wanted_disk_byte = wanted_disk_byte;
 360        prelim_ref_insert(fs_info, preftree, ref, sc);
 361        return extent_is_shared(sc);
 362}
 363
 364/* direct refs use root == 0, key == NULL */
 365static int add_direct_ref(const struct btrfs_fs_info *fs_info,
 366                          struct preftrees *preftrees, int level, u64 parent,
 367                          u64 wanted_disk_byte, int count,
 368                          struct share_check *sc, gfp_t gfp_mask)
 369{
 370        return add_prelim_ref(fs_info, &preftrees->direct, 0, NULL, level,
 371                              parent, wanted_disk_byte, count, sc, gfp_mask);
 372}
 373
 374/* indirect refs use parent == 0 */
 375static int add_indirect_ref(const struct btrfs_fs_info *fs_info,
 376                            struct preftrees *preftrees, u64 root_id,
 377                            const struct btrfs_key *key, int level,
 378                            u64 wanted_disk_byte, int count,
 379                            struct share_check *sc, gfp_t gfp_mask)
 380{
 381        struct preftree *tree = &preftrees->indirect;
 382
 383        if (!key)
 384                tree = &preftrees->indirect_missing_keys;
 385        return add_prelim_ref(fs_info, tree, root_id, key, level, 0,
 386                              wanted_disk_byte, count, sc, gfp_mask);
 387}
 388
 389static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr)
 390{
 391        struct rb_node **p = &preftrees->direct.root.rb_root.rb_node;
 392        struct rb_node *parent = NULL;
 393        struct prelim_ref *ref = NULL;
 394        struct prelim_ref target = {};
 395        int result;
 396
 397        target.parent = bytenr;
 398
 399        while (*p) {
 400                parent = *p;
 401                ref = rb_entry(parent, struct prelim_ref, rbnode);
 402                result = prelim_ref_compare(ref, &target);
 403
 404                if (result < 0)
 405                        p = &(*p)->rb_left;
 406                else if (result > 0)
 407                        p = &(*p)->rb_right;
 408                else
 409                        return 1;
 410        }
 411        return 0;
 412}
 413
 414static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
 415                           struct ulist *parents,
 416                           struct preftrees *preftrees, struct prelim_ref *ref,
 417                           int level, u64 time_seq, const u64 *extent_item_pos,
 418                           bool ignore_offset)
 419{
 420        int ret = 0;
 421        int slot;
 422        struct extent_buffer *eb;
 423        struct btrfs_key key;
 424        struct btrfs_key *key_for_search = &ref->key_for_search;
 425        struct btrfs_file_extent_item *fi;
 426        struct extent_inode_elem *eie = NULL, *old = NULL;
 427        u64 disk_byte;
 428        u64 wanted_disk_byte = ref->wanted_disk_byte;
 429        u64 count = 0;
 430        u64 data_offset;
 431
 432        if (level != 0) {
 433                eb = path->nodes[level];
 434                ret = ulist_add(parents, eb->start, 0, GFP_NOFS);
 435                if (ret < 0)
 436                        return ret;
 437                return 0;
 438        }
 439
 440        /*
 441         * 1. We normally enter this function with the path already pointing to
 442         *    the first item to check. But sometimes, we may enter it with
 443         *    slot == nritems.
 444         * 2. We are searching for normal backref but bytenr of this leaf
 445         *    matches shared data backref
 446         * 3. The leaf owner is not equal to the root we are searching
 447         *
 448         * For these cases, go to the next leaf before we continue.
 449         */
 450        eb = path->nodes[0];
 451        if (path->slots[0] >= btrfs_header_nritems(eb) ||
 452            is_shared_data_backref(preftrees, eb->start) ||
 453            ref->root_id != btrfs_header_owner(eb)) {
 454                if (time_seq == SEQ_LAST)
 455                        ret = btrfs_next_leaf(root, path);
 456                else
 457                        ret = btrfs_next_old_leaf(root, path, time_seq);
 458        }
 459
 460        while (!ret && count < ref->count) {
 461                eb = path->nodes[0];
 462                slot = path->slots[0];
 463
 464                btrfs_item_key_to_cpu(eb, &key, slot);
 465
 466                if (key.objectid != key_for_search->objectid ||
 467                    key.type != BTRFS_EXTENT_DATA_KEY)
 468                        break;
 469
 470                /*
 471                 * We are searching for normal backref but bytenr of this leaf
 472                 * matches shared data backref, OR
 473                 * the leaf owner is not equal to the root we are searching for
 474                 */
 475                if (slot == 0 &&
 476                    (is_shared_data_backref(preftrees, eb->start) ||
 477                     ref->root_id != btrfs_header_owner(eb))) {
 478                        if (time_seq == SEQ_LAST)
 479                                ret = btrfs_next_leaf(root, path);
 480                        else
 481                                ret = btrfs_next_old_leaf(root, path, time_seq);
 482                        continue;
 483                }
 484                fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
 485                disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
 486                data_offset = btrfs_file_extent_offset(eb, fi);
 487
 488                if (disk_byte == wanted_disk_byte) {
 489                        eie = NULL;
 490                        old = NULL;
 491                        if (ref->key_for_search.offset == key.offset - data_offset)
 492                                count++;
 493                        else
 494                                goto next;
 495                        if (extent_item_pos) {
 496                                ret = check_extent_in_eb(&key, eb, fi,
 497                                                *extent_item_pos,
 498                                                &eie, ignore_offset);
 499                                if (ret < 0)
 500                                        break;
 501                        }
 502                        if (ret > 0)
 503                                goto next;
 504                        ret = ulist_add_merge_ptr(parents, eb->start,
 505                                                  eie, (void **)&old, GFP_NOFS);
 506                        if (ret < 0)
 507                                break;
 508                        if (!ret && extent_item_pos) {
 509                                while (old->next)
 510                                        old = old->next;
 511                                old->next = eie;
 512                        }
 513                        eie = NULL;
 514                }
 515next:
 516                if (time_seq == SEQ_LAST)
 517                        ret = btrfs_next_item(root, path);
 518                else
 519                        ret = btrfs_next_old_item(root, path, time_seq);
 520        }
 521
 522        if (ret > 0)
 523                ret = 0;
 524        else if (ret < 0)
 525                free_inode_elem_list(eie);
 526        return ret;
 527}
 528
 529/*
 530 * resolve an indirect backref in the form (root_id, key, level)
 531 * to a logical address
 532 */
 533static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
 534                                struct btrfs_path *path, u64 time_seq,
 535                                struct preftrees *preftrees,
 536                                struct prelim_ref *ref, struct ulist *parents,
 537                                const u64 *extent_item_pos, bool ignore_offset)
 538{
 539        struct btrfs_root *root;
 540        struct btrfs_key root_key;
 541        struct extent_buffer *eb;
 542        int ret = 0;
 543        int root_level;
 544        int level = ref->level;
 545        struct btrfs_key search_key = ref->key_for_search;
 546
 547        root_key.objectid = ref->root_id;
 548        root_key.type = BTRFS_ROOT_ITEM_KEY;
 549        root_key.offset = (u64)-1;
 550
 551        root = btrfs_get_fs_root(fs_info, &root_key, false);
 552        if (IS_ERR(root)) {
 553                ret = PTR_ERR(root);
 554                goto out_free;
 555        }
 556
 557        if (!path->search_commit_root &&
 558            test_bit(BTRFS_ROOT_DELETING, &root->state)) {
 559                ret = -ENOENT;
 560                goto out;
 561        }
 562
 563        if (btrfs_is_testing(fs_info)) {
 564                ret = -ENOENT;
 565                goto out;
 566        }
 567
 568        if (path->search_commit_root)
 569                root_level = btrfs_header_level(root->commit_root);
 570        else if (time_seq == SEQ_LAST)
 571                root_level = btrfs_header_level(root->node);
 572        else
 573                root_level = btrfs_old_root_level(root, time_seq);
 574
 575        if (root_level + 1 == level)
 576                goto out;
 577
 578        /*
 579         * We can often find data backrefs with an offset that is too large
 580         * (>= LLONG_MAX, maximum allowed file offset) due to underflows when
 581         * subtracting a file's offset with the data offset of its
 582         * corresponding extent data item. This can happen for example in the
 583         * clone ioctl.
 584         *
 585         * So if we detect such case we set the search key's offset to zero to
 586         * make sure we will find the matching file extent item at
 587         * add_all_parents(), otherwise we will miss it because the offset
 588         * taken form the backref is much larger then the offset of the file
 589         * extent item. This can make us scan a very large number of file
 590         * extent items, but at least it will not make us miss any.
 591         *
 592         * This is an ugly workaround for a behaviour that should have never
 593         * existed, but it does and a fix for the clone ioctl would touch a lot
 594         * of places, cause backwards incompatibility and would not fix the
 595         * problem for extents cloned with older kernels.
 596         */
 597        if (search_key.type == BTRFS_EXTENT_DATA_KEY &&
 598            search_key.offset >= LLONG_MAX)
 599                search_key.offset = 0;
 600        path->lowest_level = level;
 601        if (time_seq == SEQ_LAST)
 602                ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
 603        else
 604                ret = btrfs_search_old_slot(root, &search_key, path, time_seq);
 605
 606        btrfs_debug(fs_info,
 607                "search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)",
 608                 ref->root_id, level, ref->count, ret,
 609                 ref->key_for_search.objectid, ref->key_for_search.type,
 610                 ref->key_for_search.offset);
 611        if (ret < 0)
 612                goto out;
 613
 614        eb = path->nodes[level];
 615        while (!eb) {
 616                if (WARN_ON(!level)) {
 617                        ret = 1;
 618                        goto out;
 619                }
 620                level--;
 621                eb = path->nodes[level];
 622        }
 623
 624        ret = add_all_parents(root, path, parents, preftrees, ref, level,
 625                              time_seq, extent_item_pos, ignore_offset);
 626out:
 627        btrfs_put_root(root);
 628out_free:
 629        path->lowest_level = 0;
 630        btrfs_release_path(path);
 631        return ret;
 632}
 633
 634static struct extent_inode_elem *
 635unode_aux_to_inode_list(struct ulist_node *node)
 636{
 637        if (!node)
 638                return NULL;
 639        return (struct extent_inode_elem *)(uintptr_t)node->aux;
 640}
 641
 642/*
 643 * We maintain three separate rbtrees: one for direct refs, one for
 644 * indirect refs which have a key, and one for indirect refs which do not
 645 * have a key. Each tree does merge on insertion.
 646 *
 647 * Once all of the references are located, we iterate over the tree of
 648 * indirect refs with missing keys. An appropriate key is located and
 649 * the ref is moved onto the tree for indirect refs. After all missing
 650 * keys are thus located, we iterate over the indirect ref tree, resolve
 651 * each reference, and then insert the resolved reference onto the
 652 * direct tree (merging there too).
 653 *
 654 * New backrefs (i.e., for parent nodes) are added to the appropriate
 655 * rbtree as they are encountered. The new backrefs are subsequently
 656 * resolved as above.
 657 */
 658static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
 659                                 struct btrfs_path *path, u64 time_seq,
 660                                 struct preftrees *preftrees,
 661                                 const u64 *extent_item_pos,
 662                                 struct share_check *sc, bool ignore_offset)
 663{
 664        int err;
 665        int ret = 0;
 666        struct ulist *parents;
 667        struct ulist_node *node;
 668        struct ulist_iterator uiter;
 669        struct rb_node *rnode;
 670
 671        parents = ulist_alloc(GFP_NOFS);
 672        if (!parents)
 673                return -ENOMEM;
 674
 675        /*
 676         * We could trade memory usage for performance here by iterating
 677         * the tree, allocating new refs for each insertion, and then
 678         * freeing the entire indirect tree when we're done.  In some test
 679         * cases, the tree can grow quite large (~200k objects).
 680         */
 681        while ((rnode = rb_first_cached(&preftrees->indirect.root))) {
 682                struct prelim_ref *ref;
 683
 684                ref = rb_entry(rnode, struct prelim_ref, rbnode);
 685                if (WARN(ref->parent,
 686                         "BUG: direct ref found in indirect tree")) {
 687                        ret = -EINVAL;
 688                        goto out;
 689                }
 690
 691                rb_erase_cached(&ref->rbnode, &preftrees->indirect.root);
 692                preftrees->indirect.count--;
 693
 694                if (ref->count == 0) {
 695                        free_pref(ref);
 696                        continue;
 697                }
 698
 699                if (sc && sc->root_objectid &&
 700                    ref->root_id != sc->root_objectid) {
 701                        free_pref(ref);
 702                        ret = BACKREF_FOUND_SHARED;
 703                        goto out;
 704                }
 705                err = resolve_indirect_ref(fs_info, path, time_seq, preftrees,
 706                                           ref, parents, extent_item_pos,
 707                                           ignore_offset);
 708                /*
 709                 * we can only tolerate ENOENT,otherwise,we should catch error
 710                 * and return directly.
 711                 */
 712                if (err == -ENOENT) {
 713                        prelim_ref_insert(fs_info, &preftrees->direct, ref,
 714                                          NULL);
 715                        continue;
 716                } else if (err) {
 717                        free_pref(ref);
 718                        ret = err;
 719                        goto out;
 720                }
 721
 722                /* we put the first parent into the ref at hand */
 723                ULIST_ITER_INIT(&uiter);
 724                node = ulist_next(parents, &uiter);
 725                ref->parent = node ? node->val : 0;
 726                ref->inode_list = unode_aux_to_inode_list(node);
 727
 728                /* Add a prelim_ref(s) for any other parent(s). */
 729                while ((node = ulist_next(parents, &uiter))) {
 730                        struct prelim_ref *new_ref;
 731
 732                        new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache,
 733                                                   GFP_NOFS);
 734                        if (!new_ref) {
 735                                free_pref(ref);
 736                                ret = -ENOMEM;
 737                                goto out;
 738                        }
 739                        memcpy(new_ref, ref, sizeof(*ref));
 740                        new_ref->parent = node->val;
 741                        new_ref->inode_list = unode_aux_to_inode_list(node);
 742                        prelim_ref_insert(fs_info, &preftrees->direct,
 743                                          new_ref, NULL);
 744                }
 745
 746                /*
 747                 * Now it's a direct ref, put it in the direct tree. We must
 748                 * do this last because the ref could be merged/freed here.
 749                 */
 750                prelim_ref_insert(fs_info, &preftrees->direct, ref, NULL);
 751
 752                ulist_reinit(parents);
 753                cond_resched();
 754        }
 755out:
 756        ulist_free(parents);
 757        return ret;
 758}
 759
 760/*
 761 * read tree blocks and add keys where required.
 762 */
 763static int add_missing_keys(struct btrfs_fs_info *fs_info,
 764                            struct preftrees *preftrees, bool lock)
 765{
 766        struct prelim_ref *ref;
 767        struct extent_buffer *eb;
 768        struct preftree *tree = &preftrees->indirect_missing_keys;
 769        struct rb_node *node;
 770
 771        while ((node = rb_first_cached(&tree->root))) {
 772                ref = rb_entry(node, struct prelim_ref, rbnode);
 773                rb_erase_cached(node, &tree->root);
 774
 775                BUG_ON(ref->parent);    /* should not be a direct ref */
 776                BUG_ON(ref->key_for_search.type);
 777                BUG_ON(!ref->wanted_disk_byte);
 778
 779                eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0,
 780                                     ref->level - 1, NULL);
 781                if (IS_ERR(eb)) {
 782                        free_pref(ref);
 783                        return PTR_ERR(eb);
 784                } else if (!extent_buffer_uptodate(eb)) {
 785                        free_pref(ref);
 786                        free_extent_buffer(eb);
 787                        return -EIO;
 788                }
 789                if (lock)
 790                        btrfs_tree_read_lock(eb);
 791                if (btrfs_header_level(eb) == 0)
 792                        btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
 793                else
 794                        btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
 795                if (lock)
 796                        btrfs_tree_read_unlock(eb);
 797                free_extent_buffer(eb);
 798                prelim_ref_insert(fs_info, &preftrees->indirect, ref, NULL);
 799                cond_resched();
 800        }
 801        return 0;
 802}
 803
 804/*
 805 * add all currently queued delayed refs from this head whose seq nr is
 806 * smaller or equal that seq to the list
 807 */
 808static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
 809                            struct btrfs_delayed_ref_head *head, u64 seq,
 810                            struct preftrees *preftrees, struct share_check *sc)
 811{
 812        struct btrfs_delayed_ref_node *node;
 813        struct btrfs_delayed_extent_op *extent_op = head->extent_op;
 814        struct btrfs_key key;
 815        struct btrfs_key tmp_op_key;
 816        struct rb_node *n;
 817        int count;
 818        int ret = 0;
 819
 820        if (extent_op && extent_op->update_key)
 821                btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
 822
 823        spin_lock(&head->lock);
 824        for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) {
 825                node = rb_entry(n, struct btrfs_delayed_ref_node,
 826                                ref_node);
 827                if (node->seq > seq)
 828                        continue;
 829
 830                switch (node->action) {
 831                case BTRFS_ADD_DELAYED_EXTENT:
 832                case BTRFS_UPDATE_DELAYED_HEAD:
 833                        WARN_ON(1);
 834                        continue;
 835                case BTRFS_ADD_DELAYED_REF:
 836                        count = node->ref_mod;
 837                        break;
 838                case BTRFS_DROP_DELAYED_REF:
 839                        count = node->ref_mod * -1;
 840                        break;
 841                default:
 842                        BUG();
 843                }
 844                switch (node->type) {
 845                case BTRFS_TREE_BLOCK_REF_KEY: {
 846                        /* NORMAL INDIRECT METADATA backref */
 847                        struct btrfs_delayed_tree_ref *ref;
 848
 849                        ref = btrfs_delayed_node_to_tree_ref(node);
 850                        ret = add_indirect_ref(fs_info, preftrees, ref->root,
 851                                               &tmp_op_key, ref->level + 1,
 852                                               node->bytenr, count, sc,
 853                                               GFP_ATOMIC);
 854                        break;
 855                }
 856                case BTRFS_SHARED_BLOCK_REF_KEY: {
 857                        /* SHARED DIRECT METADATA backref */
 858                        struct btrfs_delayed_tree_ref *ref;
 859
 860                        ref = btrfs_delayed_node_to_tree_ref(node);
 861
 862                        ret = add_direct_ref(fs_info, preftrees, ref->level + 1,
 863                                             ref->parent, node->bytenr, count,
 864                                             sc, GFP_ATOMIC);
 865                        break;
 866                }
 867                case BTRFS_EXTENT_DATA_REF_KEY: {
 868                        /* NORMAL INDIRECT DATA backref */
 869                        struct btrfs_delayed_data_ref *ref;
 870                        ref = btrfs_delayed_node_to_data_ref(node);
 871
 872                        key.objectid = ref->objectid;
 873                        key.type = BTRFS_EXTENT_DATA_KEY;
 874                        key.offset = ref->offset;
 875
 876                        /*
 877                         * Found a inum that doesn't match our known inum, we
 878                         * know it's shared.
 879                         */
 880                        if (sc && sc->inum && ref->objectid != sc->inum) {
 881                                ret = BACKREF_FOUND_SHARED;
 882                                goto out;
 883                        }
 884
 885                        ret = add_indirect_ref(fs_info, preftrees, ref->root,
 886                                               &key, 0, node->bytenr, count, sc,
 887                                               GFP_ATOMIC);
 888                        break;
 889                }
 890                case BTRFS_SHARED_DATA_REF_KEY: {
 891                        /* SHARED DIRECT FULL backref */
 892                        struct btrfs_delayed_data_ref *ref;
 893
 894                        ref = btrfs_delayed_node_to_data_ref(node);
 895
 896                        ret = add_direct_ref(fs_info, preftrees, 0, ref->parent,
 897                                             node->bytenr, count, sc,
 898                                             GFP_ATOMIC);
 899                        break;
 900                }
 901                default:
 902                        WARN_ON(1);
 903                }
 904                /*
 905                 * We must ignore BACKREF_FOUND_SHARED until all delayed
 906                 * refs have been checked.
 907                 */
 908                if (ret && (ret != BACKREF_FOUND_SHARED))
 909                        break;
 910        }
 911        if (!ret)
 912                ret = extent_is_shared(sc);
 913out:
 914        spin_unlock(&head->lock);
 915        return ret;
 916}
 917
 918/*
 919 * add all inline backrefs for bytenr to the list
 920 *
 921 * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED.
 922 */
 923static int add_inline_refs(const struct btrfs_fs_info *fs_info,
 924                           struct btrfs_path *path, u64 bytenr,
 925                           int *info_level, struct preftrees *preftrees,
 926                           struct share_check *sc)
 927{
 928        int ret = 0;
 929        int slot;
 930        struct extent_buffer *leaf;
 931        struct btrfs_key key;
 932        struct btrfs_key found_key;
 933        unsigned long ptr;
 934        unsigned long end;
 935        struct btrfs_extent_item *ei;
 936        u64 flags;
 937        u64 item_size;
 938
 939        /*
 940         * enumerate all inline refs
 941         */
 942        leaf = path->nodes[0];
 943        slot = path->slots[0];
 944
 945        item_size = btrfs_item_size_nr(leaf, slot);
 946        BUG_ON(item_size < sizeof(*ei));
 947
 948        ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
 949        flags = btrfs_extent_flags(leaf, ei);
 950        btrfs_item_key_to_cpu(leaf, &found_key, slot);
 951
 952        ptr = (unsigned long)(ei + 1);
 953        end = (unsigned long)ei + item_size;
 954
 955        if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
 956            flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 957                struct btrfs_tree_block_info *info;
 958
 959                info = (struct btrfs_tree_block_info *)ptr;
 960                *info_level = btrfs_tree_block_level(leaf, info);
 961                ptr += sizeof(struct btrfs_tree_block_info);
 962                BUG_ON(ptr > end);
 963        } else if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
 964                *info_level = found_key.offset;
 965        } else {
 966                BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
 967        }
 968
 969        while (ptr < end) {
 970                struct btrfs_extent_inline_ref *iref;
 971                u64 offset;
 972                int type;
 973
 974                iref = (struct btrfs_extent_inline_ref *)ptr;
 975                type = btrfs_get_extent_inline_ref_type(leaf, iref,
 976                                                        BTRFS_REF_TYPE_ANY);
 977                if (type == BTRFS_REF_TYPE_INVALID)
 978                        return -EUCLEAN;
 979
 980                offset = btrfs_extent_inline_ref_offset(leaf, iref);
 981
 982                switch (type) {
 983                case BTRFS_SHARED_BLOCK_REF_KEY:
 984                        ret = add_direct_ref(fs_info, preftrees,
 985                                             *info_level + 1, offset,
 986                                             bytenr, 1, NULL, GFP_NOFS);
 987                        break;
 988                case BTRFS_SHARED_DATA_REF_KEY: {
 989                        struct btrfs_shared_data_ref *sdref;
 990                        int count;
 991
 992                        sdref = (struct btrfs_shared_data_ref *)(iref + 1);
 993                        count = btrfs_shared_data_ref_count(leaf, sdref);
 994
 995                        ret = add_direct_ref(fs_info, preftrees, 0, offset,
 996                                             bytenr, count, sc, GFP_NOFS);
 997                        break;
 998                }
 999                case BTRFS_TREE_BLOCK_REF_KEY:
1000                        ret = add_indirect_ref(fs_info, preftrees, offset,
1001                                               NULL, *info_level + 1,
1002                                               bytenr, 1, NULL, GFP_NOFS);
1003                        break;
1004                case BTRFS_EXTENT_DATA_REF_KEY: {
1005                        struct btrfs_extent_data_ref *dref;
1006                        int count;
1007                        u64 root;
1008
1009                        dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1010                        count = btrfs_extent_data_ref_count(leaf, dref);
1011                        key.objectid = btrfs_extent_data_ref_objectid(leaf,
1012                                                                      dref);
1013                        key.type = BTRFS_EXTENT_DATA_KEY;
1014                        key.offset = btrfs_extent_data_ref_offset(leaf, dref);
1015
1016                        if (sc && sc->inum && key.objectid != sc->inum) {
1017                                ret = BACKREF_FOUND_SHARED;
1018                                break;
1019                        }
1020
1021                        root = btrfs_extent_data_ref_root(leaf, dref);
1022
1023                        ret = add_indirect_ref(fs_info, preftrees, root,
1024                                               &key, 0, bytenr, count,
1025                                               sc, GFP_NOFS);
1026                        break;
1027                }
1028                default:
1029                        WARN_ON(1);
1030                }
1031                if (ret)
1032                        return ret;
1033                ptr += btrfs_extent_inline_ref_size(type);
1034        }
1035
1036        return 0;
1037}
1038
1039/*
1040 * add all non-inline backrefs for bytenr to the list
1041 *
1042 * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED.
1043 */
1044static int add_keyed_refs(struct btrfs_fs_info *fs_info,
1045                          struct btrfs_path *path, u64 bytenr,
1046                          int info_level, struct preftrees *preftrees,
1047                          struct share_check *sc)
1048{
1049        struct btrfs_root *extent_root = fs_info->extent_root;
1050        int ret;
1051        int slot;
1052        struct extent_buffer *leaf;
1053        struct btrfs_key key;
1054
1055        while (1) {
1056                ret = btrfs_next_item(extent_root, path);
1057                if (ret < 0)
1058                        break;
1059                if (ret) {
1060                        ret = 0;
1061                        break;
1062                }
1063
1064                slot = path->slots[0];
1065                leaf = path->nodes[0];
1066                btrfs_item_key_to_cpu(leaf, &key, slot);
1067
1068                if (key.objectid != bytenr)
1069                        break;
1070                if (key.type < BTRFS_TREE_BLOCK_REF_KEY)
1071                        continue;
1072                if (key.type > BTRFS_SHARED_DATA_REF_KEY)
1073                        break;
1074
1075                switch (key.type) {
1076                case BTRFS_SHARED_BLOCK_REF_KEY:
1077                        /* SHARED DIRECT METADATA backref */
1078                        ret = add_direct_ref(fs_info, preftrees,
1079                                             info_level + 1, key.offset,
1080                                             bytenr, 1, NULL, GFP_NOFS);
1081                        break;
1082                case BTRFS_SHARED_DATA_REF_KEY: {
1083                        /* SHARED DIRECT FULL backref */
1084                        struct btrfs_shared_data_ref *sdref;
1085                        int count;
1086
1087                        sdref = btrfs_item_ptr(leaf, slot,
1088                                              struct btrfs_shared_data_ref);
1089                        count = btrfs_shared_data_ref_count(leaf, sdref);
1090                        ret = add_direct_ref(fs_info, preftrees, 0,
1091                                             key.offset, bytenr, count,
1092                                             sc, GFP_NOFS);
1093                        break;
1094                }
1095                case BTRFS_TREE_BLOCK_REF_KEY:
1096                        /* NORMAL INDIRECT METADATA backref */
1097                        ret = add_indirect_ref(fs_info, preftrees, key.offset,
1098                                               NULL, info_level + 1, bytenr,
1099                                               1, NULL, GFP_NOFS);
1100                        break;
1101                case BTRFS_EXTENT_DATA_REF_KEY: {
1102                        /* NORMAL INDIRECT DATA backref */
1103                        struct btrfs_extent_data_ref *dref;
1104                        int count;
1105                        u64 root;
1106
1107                        dref = btrfs_item_ptr(leaf, slot,
1108                                              struct btrfs_extent_data_ref);
1109                        count = btrfs_extent_data_ref_count(leaf, dref);
1110                        key.objectid = btrfs_extent_data_ref_objectid(leaf,
1111                                                                      dref);
1112                        key.type = BTRFS_EXTENT_DATA_KEY;
1113                        key.offset = btrfs_extent_data_ref_offset(leaf, dref);
1114
1115                        if (sc && sc->inum && key.objectid != sc->inum) {
1116                                ret = BACKREF_FOUND_SHARED;
1117                                break;
1118                        }
1119
1120                        root = btrfs_extent_data_ref_root(leaf, dref);
1121                        ret = add_indirect_ref(fs_info, preftrees, root,
1122                                               &key, 0, bytenr, count,
1123                                               sc, GFP_NOFS);
1124                        break;
1125                }
1126                default:
1127                        WARN_ON(1);
1128                }
1129                if (ret)
1130                        return ret;
1131
1132        }
1133
1134        return ret;
1135}
1136
1137/*
1138 * this adds all existing backrefs (inline backrefs, backrefs and delayed
1139 * refs) for the given bytenr to the refs list, merges duplicates and resolves
1140 * indirect refs to their parent bytenr.
1141 * When roots are found, they're added to the roots list
1142 *
1143 * If time_seq is set to SEQ_LAST, it will not search delayed_refs, and behave
1144 * much like trans == NULL case, the difference only lies in it will not
1145 * commit root.
1146 * The special case is for qgroup to search roots in commit_transaction().
1147 *
1148 * @sc - if !NULL, then immediately return BACKREF_FOUND_SHARED when a
1149 * shared extent is detected.
1150 *
1151 * Otherwise this returns 0 for success and <0 for an error.
1152 *
1153 * If ignore_offset is set to false, only extent refs whose offsets match
1154 * extent_item_pos are returned.  If true, every extent ref is returned
1155 * and extent_item_pos is ignored.
1156 *
1157 * FIXME some caching might speed things up
1158 */
1159static int find_parent_nodes(struct btrfs_trans_handle *trans,
1160                             struct btrfs_fs_info *fs_info, u64 bytenr,
1161                             u64 time_seq, struct ulist *refs,
1162                             struct ulist *roots, const u64 *extent_item_pos,
1163                             struct share_check *sc, bool ignore_offset)
1164{
1165        struct btrfs_key key;
1166        struct btrfs_path *path;
1167        struct btrfs_delayed_ref_root *delayed_refs = NULL;
1168        struct btrfs_delayed_ref_head *head;
1169        int info_level = 0;
1170        int ret;
1171        struct prelim_ref *ref;
1172        struct rb_node *node;
1173        struct extent_inode_elem *eie = NULL;
1174        struct preftrees preftrees = {
1175                .direct = PREFTREE_INIT,
1176                .indirect = PREFTREE_INIT,
1177                .indirect_missing_keys = PREFTREE_INIT
1178        };
1179
1180        key.objectid = bytenr;
1181        key.offset = (u64)-1;
1182        if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1183                key.type = BTRFS_METADATA_ITEM_KEY;
1184        else
1185                key.type = BTRFS_EXTENT_ITEM_KEY;
1186
1187        path = btrfs_alloc_path();
1188        if (!path)
1189                return -ENOMEM;
1190        if (!trans) {
1191                path->search_commit_root = 1;
1192                path->skip_locking = 1;
1193        }
1194
1195        if (time_seq == SEQ_LAST)
1196                path->skip_locking = 1;
1197
1198        /*
1199         * grab both a lock on the path and a lock on the delayed ref head.
1200         * We need both to get a consistent picture of how the refs look
1201         * at a specified point in time
1202         */
1203again:
1204        head = NULL;
1205
1206        ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
1207        if (ret < 0)
1208                goto out;
1209        BUG_ON(ret == 0);
1210
1211#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
1212        if (trans && likely(trans->type != __TRANS_DUMMY) &&
1213            time_seq != SEQ_LAST) {
1214#else
1215        if (trans && time_seq != SEQ_LAST) {
1216#endif
1217                /*
1218                 * look if there are updates for this ref queued and lock the
1219                 * head
1220                 */
1221                delayed_refs = &trans->transaction->delayed_refs;
1222                spin_lock(&delayed_refs->lock);
1223                head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
1224                if (head) {
1225                        if (!mutex_trylock(&head->mutex)) {
1226                                refcount_inc(&head->refs);
1227                                spin_unlock(&delayed_refs->lock);
1228
1229                                btrfs_release_path(path);
1230
1231                                /*
1232                                 * Mutex was contended, block until it's
1233                                 * released and try again
1234                                 */
1235                                mutex_lock(&head->mutex);
1236                                mutex_unlock(&head->mutex);
1237                                btrfs_put_delayed_ref_head(head);
1238                                goto again;
1239                        }
1240                        spin_unlock(&delayed_refs->lock);
1241                        ret = add_delayed_refs(fs_info, head, time_seq,
1242                                               &preftrees, sc);
1243                        mutex_unlock(&head->mutex);
1244                        if (ret)
1245                                goto out;
1246                } else {
1247                        spin_unlock(&delayed_refs->lock);
1248                }
1249        }
1250
1251        if (path->slots[0]) {
1252                struct extent_buffer *leaf;
1253                int slot;
1254
1255                path->slots[0]--;
1256                leaf = path->nodes[0];
1257                slot = path->slots[0];
1258                btrfs_item_key_to_cpu(leaf, &key, slot);
1259                if (key.objectid == bytenr &&
1260                    (key.type == BTRFS_EXTENT_ITEM_KEY ||
1261                     key.type == BTRFS_METADATA_ITEM_KEY)) {
1262                        ret = add_inline_refs(fs_info, path, bytenr,
1263                                              &info_level, &preftrees, sc);
1264                        if (ret)
1265                                goto out;
1266                        ret = add_keyed_refs(fs_info, path, bytenr, info_level,
1267                                             &preftrees, sc);
1268                        if (ret)
1269                                goto out;
1270                }
1271        }
1272
1273        btrfs_release_path(path);
1274
1275        ret = add_missing_keys(fs_info, &preftrees, path->skip_locking == 0);
1276        if (ret)
1277                goto out;
1278
1279        WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root));
1280
1281        ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
1282                                    extent_item_pos, sc, ignore_offset);
1283        if (ret)
1284                goto out;
1285
1286        WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect.root.rb_root));
1287
1288        /*
1289         * This walks the tree of merged and resolved refs. Tree blocks are
1290         * read in as needed. Unique entries are added to the ulist, and
1291         * the list of found roots is updated.
1292         *
1293         * We release the entire tree in one go before returning.
1294         */
1295        node = rb_first_cached(&preftrees.direct.root);
1296        while (node) {
1297                ref = rb_entry(node, struct prelim_ref, rbnode);
1298                node = rb_next(&ref->rbnode);
1299                /*
1300                 * ref->count < 0 can happen here if there are delayed
1301                 * refs with a node->action of BTRFS_DROP_DELAYED_REF.
1302                 * prelim_ref_insert() relies on this when merging
1303                 * identical refs to keep the overall count correct.
1304                 * prelim_ref_insert() will merge only those refs
1305                 * which compare identically.  Any refs having
1306                 * e.g. different offsets would not be merged,
1307                 * and would retain their original ref->count < 0.
1308                 */
1309                if (roots && ref->count && ref->root_id && ref->parent == 0) {
1310                        if (sc && sc->root_objectid &&
1311                            ref->root_id != sc->root_objectid) {
1312                                ret = BACKREF_FOUND_SHARED;
1313                                goto out;
1314                        }
1315
1316                        /* no parent == root of tree */
1317                        ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
1318                        if (ret < 0)
1319                                goto out;
1320                }
1321                if (ref->count && ref->parent) {
1322                        if (extent_item_pos && !ref->inode_list &&
1323                            ref->level == 0) {
1324                                struct extent_buffer *eb;
1325
1326                                eb = read_tree_block(fs_info, ref->parent, 0,
1327                                                     ref->level, NULL);
1328                                if (IS_ERR(eb)) {
1329                                        ret = PTR_ERR(eb);
1330                                        goto out;
1331                                } else if (!extent_buffer_uptodate(eb)) {
1332                                        free_extent_buffer(eb);
1333                                        ret = -EIO;
1334                                        goto out;
1335                                }
1336
1337                                if (!path->skip_locking) {
1338                                        btrfs_tree_read_lock(eb);
1339                                        btrfs_set_lock_blocking_read(eb);
1340                                }
1341                                ret = find_extent_in_eb(eb, bytenr,
1342                                                        *extent_item_pos, &eie, ignore_offset);
1343                                if (!path->skip_locking)
1344                                        btrfs_tree_read_unlock_blocking(eb);
1345                                free_extent_buffer(eb);
1346                                if (ret < 0)
1347                                        goto out;
1348                                ref->inode_list = eie;
1349                        }
1350                        ret = ulist_add_merge_ptr(refs, ref->parent,
1351                                                  ref->inode_list,
1352                                                  (void **)&eie, GFP_NOFS);
1353                        if (ret < 0)
1354                                goto out;
1355                        if (!ret && extent_item_pos) {
1356                                /*
1357                                 * we've recorded that parent, so we must extend
1358                                 * its inode list here
1359                                 */
1360                                BUG_ON(!eie);
1361                                while (eie->next)
1362                                        eie = eie->next;
1363                                eie->next = ref->inode_list;
1364                        }
1365                        eie = NULL;
1366                }
1367                cond_resched();
1368        }
1369
1370out:
1371        btrfs_free_path(path);
1372
1373        prelim_release(&preftrees.direct);
1374        prelim_release(&preftrees.indirect);
1375        prelim_release(&preftrees.indirect_missing_keys);
1376
1377        if (ret < 0)
1378                free_inode_elem_list(eie);
1379        return ret;
1380}
1381
1382static void free_leaf_list(struct ulist *blocks)
1383{
1384        struct ulist_node *node = NULL;
1385        struct extent_inode_elem *eie;
1386        struct ulist_iterator uiter;
1387
1388        ULIST_ITER_INIT(&uiter);
1389        while ((node = ulist_next(blocks, &uiter))) {
1390                if (!node->aux)
1391                        continue;
1392                eie = unode_aux_to_inode_list(node);
1393                free_inode_elem_list(eie);
1394                node->aux = 0;
1395        }
1396
1397        ulist_free(blocks);
1398}
1399
1400/*
1401 * Finds all leafs with a reference to the specified combination of bytenr and
1402 * offset. key_list_head will point to a list of corresponding keys (caller must
1403 * free each list element). The leafs will be stored in the leafs ulist, which
1404 * must be freed with ulist_free.
1405 *
1406 * returns 0 on success, <0 on error
1407 */
1408int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
1409                         struct btrfs_fs_info *fs_info, u64 bytenr,
1410                         u64 time_seq, struct ulist **leafs,
1411                         const u64 *extent_item_pos, bool ignore_offset)
1412{
1413        int ret;
1414
1415        *leafs = ulist_alloc(GFP_NOFS);
1416        if (!*leafs)
1417                return -ENOMEM;
1418
1419        ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
1420                                *leafs, NULL, extent_item_pos, NULL, ignore_offset);
1421        if (ret < 0 && ret != -ENOENT) {
1422                free_leaf_list(*leafs);
1423                return ret;
1424        }
1425
1426        return 0;
1427}
1428
1429/*
1430 * walk all backrefs for a given extent to find all roots that reference this
1431 * extent. Walking a backref means finding all extents that reference this
1432 * extent and in turn walk the backrefs of those, too. Naturally this is a
1433 * recursive process, but here it is implemented in an iterative fashion: We
1434 * find all referencing extents for the extent in question and put them on a
1435 * list. In turn, we find all referencing extents for those, further appending
1436 * to the list. The way we iterate the list allows adding more elements after
1437 * the current while iterating. The process stops when we reach the end of the
1438 * list. Found roots are added to the roots list.
1439 *
1440 * returns 0 on success, < 0 on error.
1441 */
1442static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
1443                                     struct btrfs_fs_info *fs_info, u64 bytenr,
1444                                     u64 time_seq, struct ulist **roots,
1445                                     bool ignore_offset)
1446{
1447        struct ulist *tmp;
1448        struct ulist_node *node = NULL;
1449        struct ulist_iterator uiter;
1450        int ret;
1451
1452        tmp = ulist_alloc(GFP_NOFS);
1453        if (!tmp)
1454                return -ENOMEM;
1455        *roots = ulist_alloc(GFP_NOFS);
1456        if (!*roots) {
1457                ulist_free(tmp);
1458                return -ENOMEM;
1459        }
1460
1461        ULIST_ITER_INIT(&uiter);
1462        while (1) {
1463                ret = find_parent_nodes(trans, fs_info, bytenr, time_seq,
1464                                        tmp, *roots, NULL, NULL, ignore_offset);
1465                if (ret < 0 && ret != -ENOENT) {
1466                        ulist_free(tmp);
1467                        ulist_free(*roots);
1468                        return ret;
1469                }
1470                node = ulist_next(tmp, &uiter);
1471                if (!node)
1472                        break;
1473                bytenr = node->val;
1474                cond_resched();
1475        }
1476
1477        ulist_free(tmp);
1478        return 0;
1479}
1480
1481int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
1482                         struct btrfs_fs_info *fs_info, u64 bytenr,
1483                         u64 time_seq, struct ulist **roots,
1484                         bool ignore_offset)
1485{
1486        int ret;
1487
1488        if (!trans)
1489                down_read(&fs_info->commit_root_sem);
1490        ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
1491                                        time_seq, roots, ignore_offset);
1492        if (!trans)
1493                up_read(&fs_info->commit_root_sem);
1494        return ret;
1495}
1496
1497/**
1498 * btrfs_check_shared - tell us whether an extent is shared
1499 *
1500 * btrfs_check_shared uses the backref walking code but will short
1501 * circuit as soon as it finds a root or inode that doesn't match the
1502 * one passed in. This provides a significant performance benefit for
1503 * callers (such as fiemap) which want to know whether the extent is
1504 * shared but do not need a ref count.
1505 *
1506 * This attempts to attach to the running transaction in order to account for
1507 * delayed refs, but continues on even when no running transaction exists.
1508 *
1509 * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
1510 */
1511int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
1512                struct ulist *roots, struct ulist *tmp)
1513{
1514        struct btrfs_fs_info *fs_info = root->fs_info;
1515        struct btrfs_trans_handle *trans;
1516        struct ulist_iterator uiter;
1517        struct ulist_node *node;
1518        struct seq_list elem = SEQ_LIST_INIT(elem);
1519        int ret = 0;
1520        struct share_check shared = {
1521                .root_objectid = root->root_key.objectid,
1522                .inum = inum,
1523                .share_count = 0,
1524        };
1525
1526        ulist_init(roots);
1527        ulist_init(tmp);
1528
1529        trans = btrfs_join_transaction_nostart(root);
1530        if (IS_ERR(trans)) {
1531                if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) {
1532                        ret = PTR_ERR(trans);
1533                        goto out;
1534                }
1535                trans = NULL;
1536                down_read(&fs_info->commit_root_sem);
1537        } else {
1538                btrfs_get_tree_mod_seq(fs_info, &elem);
1539        }
1540
1541        ULIST_ITER_INIT(&uiter);
1542        while (1) {
1543                ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
1544                                        roots, NULL, &shared, false);
1545                if (ret == BACKREF_FOUND_SHARED) {
1546                        /* this is the only condition under which we return 1 */
1547                        ret = 1;
1548                        break;
1549                }
1550                if (ret < 0 && ret != -ENOENT)
1551                        break;
1552                ret = 0;
1553                node = ulist_next(tmp, &uiter);
1554                if (!node)
1555                        break;
1556                bytenr = node->val;
1557                shared.share_count = 0;
1558                cond_resched();
1559        }
1560
1561        if (trans) {
1562                btrfs_put_tree_mod_seq(fs_info, &elem);
1563                btrfs_end_transaction(trans);
1564        } else {
1565                up_read(&fs_info->commit_root_sem);
1566        }
1567out:
1568        ulist_release(roots);
1569        ulist_release(tmp);
1570        return ret;
1571}
1572
1573int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
1574                          u64 start_off, struct btrfs_path *path,
1575                          struct btrfs_inode_extref **ret_extref,
1576                          u64 *found_off)
1577{
1578        int ret, slot;
1579        struct btrfs_key key;
1580        struct btrfs_key found_key;
1581        struct btrfs_inode_extref *extref;
1582        const struct extent_buffer *leaf;
1583        unsigned long ptr;
1584
1585        key.objectid = inode_objectid;
1586        key.type = BTRFS_INODE_EXTREF_KEY;
1587        key.offset = start_off;
1588
1589        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1590        if (ret < 0)
1591                return ret;
1592
1593        while (1) {
1594                leaf = path->nodes[0];
1595                slot = path->slots[0];
1596                if (slot >= btrfs_header_nritems(leaf)) {
1597                        /*
1598                         * If the item at offset is not found,
1599                         * btrfs_search_slot will point us to the slot
1600                         * where it should be inserted. In our case
1601                         * that will be the slot directly before the
1602                         * next INODE_REF_KEY_V2 item. In the case
1603                         * that we're pointing to the last slot in a
1604                         * leaf, we must move one leaf over.
1605                         */
1606                        ret = btrfs_next_leaf(root, path);
1607                        if (ret) {
1608                                if (ret >= 1)
1609                                        ret = -ENOENT;
1610                                break;
1611                        }
1612                        continue;
1613                }
1614
1615                btrfs_item_key_to_cpu(leaf, &found_key, slot);
1616
1617                /*
1618                 * Check that we're still looking at an extended ref key for
1619                 * this particular objectid. If we have different
1620                 * objectid or type then there are no more to be found
1621                 * in the tree and we can exit.
1622                 */
1623                ret = -ENOENT;
1624                if (found_key.objectid != inode_objectid)
1625                        break;
1626                if (found_key.type != BTRFS_INODE_EXTREF_KEY)
1627                        break;
1628
1629                ret = 0;
1630                ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
1631                extref = (struct btrfs_inode_extref *)ptr;
1632                *ret_extref = extref;
1633                if (found_off)
1634                        *found_off = found_key.offset;
1635                break;
1636        }
1637
1638        return ret;
1639}
1640
1641/*
1642 * this iterates to turn a name (from iref/extref) into a full filesystem path.
1643 * Elements of the path are separated by '/' and the path is guaranteed to be
1644 * 0-terminated. the path is only given within the current file system.
1645 * Therefore, it never starts with a '/'. the caller is responsible to provide
1646 * "size" bytes in "dest". the dest buffer will be filled backwards. finally,
1647 * the start point of the resulting string is returned. this pointer is within
1648 * dest, normally.
1649 * in case the path buffer would overflow, the pointer is decremented further
1650 * as if output was written to the buffer, though no more output is actually
1651 * generated. that way, the caller can determine how much space would be
1652 * required for the path to fit into the buffer. in that case, the returned
1653 * value will be smaller than dest. callers must check this!
1654 */
1655char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
1656                        u32 name_len, unsigned long name_off,
1657                        struct extent_buffer *eb_in, u64 parent,
1658                        char *dest, u32 size)
1659{
1660        int slot;
1661        u64 next_inum;
1662        int ret;
1663        s64 bytes_left = ((s64)size) - 1;
1664        struct extent_buffer *eb = eb_in;
1665        struct btrfs_key found_key;
1666        int leave_spinning = path->leave_spinning;
1667        struct btrfs_inode_ref *iref;
1668
1669        if (bytes_left >= 0)
1670                dest[bytes_left] = '\0';
1671
1672        path->leave_spinning = 1;
1673        while (1) {
1674                bytes_left -= name_len;
1675                if (bytes_left >= 0)
1676                        read_extent_buffer(eb, dest + bytes_left,
1677                                           name_off, name_len);
1678                if (eb != eb_in) {
1679                        if (!path->skip_locking)
1680                                btrfs_tree_read_unlock_blocking(eb);
1681                        free_extent_buffer(eb);
1682                }
1683                ret = btrfs_find_item(fs_root, path, parent, 0,
1684                                BTRFS_INODE_REF_KEY, &found_key);
1685                if (ret > 0)
1686                        ret = -ENOENT;
1687                if (ret)
1688                        break;
1689
1690                next_inum = found_key.offset;
1691
1692                /* regular exit ahead */
1693                if (parent == next_inum)
1694                        break;
1695
1696                slot = path->slots[0];
1697                eb = path->nodes[0];
1698                /* make sure we can use eb after releasing the path */
1699                if (eb != eb_in) {
1700                        if (!path->skip_locking)
1701                                btrfs_set_lock_blocking_read(eb);
1702                        path->nodes[0] = NULL;
1703                        path->locks[0] = 0;
1704                }
1705                btrfs_release_path(path);
1706                iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1707
1708                name_len = btrfs_inode_ref_name_len(eb, iref);
1709                name_off = (unsigned long)(iref + 1);
1710
1711                parent = next_inum;
1712                --bytes_left;
1713                if (bytes_left >= 0)
1714                        dest[bytes_left] = '/';
1715        }
1716
1717        btrfs_release_path(path);
1718        path->leave_spinning = leave_spinning;
1719
1720        if (ret)
1721                return ERR_PTR(ret);
1722
1723        return dest + bytes_left;
1724}
1725
1726/*
1727 * this makes the path point to (logical EXTENT_ITEM *)
1728 * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for
1729 * tree blocks and <0 on error.
1730 */
1731int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
1732                        struct btrfs_path *path, struct btrfs_key *found_key,
1733                        u64 *flags_ret)
1734{
1735        int ret;
1736        u64 flags;
1737        u64 size = 0;
1738        u32 item_size;
1739        const struct extent_buffer *eb;
1740        struct btrfs_extent_item *ei;
1741        struct btrfs_key key;
1742
1743        if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
1744                key.type = BTRFS_METADATA_ITEM_KEY;
1745        else
1746                key.type = BTRFS_EXTENT_ITEM_KEY;
1747        key.objectid = logical;
1748        key.offset = (u64)-1;
1749
1750        ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
1751        if (ret < 0)
1752                return ret;
1753
1754        ret = btrfs_previous_extent_item(fs_info->extent_root, path, 0);
1755        if (ret) {
1756                if (ret > 0)
1757                        ret = -ENOENT;
1758                return ret;
1759        }
1760        btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]);
1761        if (found_key->type == BTRFS_METADATA_ITEM_KEY)
1762                size = fs_info->nodesize;
1763        else if (found_key->type == BTRFS_EXTENT_ITEM_KEY)
1764                size = found_key->offset;
1765
1766        if (found_key->objectid > logical ||
1767            found_key->objectid + size <= logical) {
1768                btrfs_debug(fs_info,
1769                        "logical %llu is not within any extent", logical);
1770                return -ENOENT;
1771        }
1772
1773        eb = path->nodes[0];
1774        item_size = btrfs_item_size_nr(eb, path->slots[0]);
1775        BUG_ON(item_size < sizeof(*ei));
1776
1777        ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
1778        flags = btrfs_extent_flags(eb, ei);
1779
1780        btrfs_debug(fs_info,
1781                "logical %llu is at position %llu within the extent (%llu EXTENT_ITEM %llu) flags %#llx size %u",
1782                 logical, logical - found_key->objectid, found_key->objectid,
1783                 found_key->offset, flags, item_size);
1784
1785        WARN_ON(!flags_ret);
1786        if (flags_ret) {
1787                if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1788                        *flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK;
1789                else if (flags & BTRFS_EXTENT_FLAG_DATA)
1790                        *flags_ret = BTRFS_EXTENT_FLAG_DATA;
1791                else
1792                        BUG();
1793                return 0;
1794        }
1795
1796        return -EIO;
1797}
1798
1799/*
1800 * helper function to iterate extent inline refs. ptr must point to a 0 value
1801 * for the first call and may be modified. it is used to track state.
1802 * if more refs exist, 0 is returned and the next call to
1803 * get_extent_inline_ref must pass the modified ptr parameter to get the
1804 * next ref. after the last ref was processed, 1 is returned.
1805 * returns <0 on error
1806 */
1807static int get_extent_inline_ref(unsigned long *ptr,
1808                                 const struct extent_buffer *eb,
1809                                 const struct btrfs_key *key,
1810                                 const struct btrfs_extent_item *ei,
1811                                 u32 item_size,
1812                                 struct btrfs_extent_inline_ref **out_eiref,
1813                                 int *out_type)
1814{
1815        unsigned long end;
1816        u64 flags;
1817        struct btrfs_tree_block_info *info;
1818
1819        if (!*ptr) {
1820                /* first call */
1821                flags = btrfs_extent_flags(eb, ei);
1822                if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1823                        if (key->type == BTRFS_METADATA_ITEM_KEY) {
1824                                /* a skinny metadata extent */
1825                                *out_eiref =
1826                                     (struct btrfs_extent_inline_ref *)(ei + 1);
1827                        } else {
1828                                WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY);
1829                                info = (struct btrfs_tree_block_info *)(ei + 1);
1830                                *out_eiref =
1831                                   (struct btrfs_extent_inline_ref *)(info + 1);
1832                        }
1833                } else {
1834                        *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1);
1835                }
1836                *ptr = (unsigned long)*out_eiref;
1837                if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size)
1838                        return -ENOENT;
1839        }
1840
1841        end = (unsigned long)ei + item_size;
1842        *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr);
1843        *out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref,
1844                                                     BTRFS_REF_TYPE_ANY);
1845        if (*out_type == BTRFS_REF_TYPE_INVALID)
1846                return -EUCLEAN;
1847
1848        *ptr += btrfs_extent_inline_ref_size(*out_type);
1849        WARN_ON(*ptr > end);
1850        if (*ptr == end)
1851                return 1; /* last */
1852
1853        return 0;
1854}
1855
1856/*
1857 * reads the tree block backref for an extent. tree level and root are returned
1858 * through out_level and out_root. ptr must point to a 0 value for the first
1859 * call and may be modified (see get_extent_inline_ref comment).
1860 * returns 0 if data was provided, 1 if there was no more data to provide or
1861 * <0 on error.
1862 */
1863int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
1864                            struct btrfs_key *key, struct btrfs_extent_item *ei,
1865                            u32 item_size, u64 *out_root, u8 *out_level)
1866{
1867        int ret;
1868        int type;
1869        struct btrfs_extent_inline_ref *eiref;
1870
1871        if (*ptr == (unsigned long)-1)
1872                return 1;
1873
1874        while (1) {
1875                ret = get_extent_inline_ref(ptr, eb, key, ei, item_size,
1876                                              &eiref, &type);
1877                if (ret < 0)
1878                        return ret;
1879
1880                if (type == BTRFS_TREE_BLOCK_REF_KEY ||
1881                    type == BTRFS_SHARED_BLOCK_REF_KEY)
1882                        break;
1883
1884                if (ret == 1)
1885                        return 1;
1886        }
1887
1888        /* we can treat both ref types equally here */
1889        *out_root = btrfs_extent_inline_ref_offset(eb, eiref);
1890
1891        if (key->type == BTRFS_EXTENT_ITEM_KEY) {
1892                struct btrfs_tree_block_info *info;
1893
1894                info = (struct btrfs_tree_block_info *)(ei + 1);
1895                *out_level = btrfs_tree_block_level(eb, info);
1896        } else {
1897                ASSERT(key->type == BTRFS_METADATA_ITEM_KEY);
1898                *out_level = (u8)key->offset;
1899        }
1900
1901        if (ret == 1)
1902                *ptr = (unsigned long)-1;
1903
1904        return 0;
1905}
1906
1907static int iterate_leaf_refs(struct btrfs_fs_info *fs_info,
1908                             struct extent_inode_elem *inode_list,
1909                             u64 root, u64 extent_item_objectid,
1910                             iterate_extent_inodes_t *iterate, void *ctx)
1911{
1912        struct extent_inode_elem *eie;
1913        int ret = 0;
1914
1915        for (eie = inode_list; eie; eie = eie->next) {
1916                btrfs_debug(fs_info,
1917                            "ref for %llu resolved, key (%llu EXTEND_DATA %llu), root %llu",
1918                            extent_item_objectid, eie->inum,
1919                            eie->offset, root);
1920                ret = iterate(eie->inum, eie->offset, root, ctx);
1921                if (ret) {
1922                        btrfs_debug(fs_info,
1923                                    "stopping iteration for %llu due to ret=%d",
1924                                    extent_item_objectid, ret);
1925                        break;
1926                }
1927        }
1928
1929        return ret;
1930}
1931
1932/*
1933 * calls iterate() for every inode that references the extent identified by
1934 * the given parameters.
1935 * when the iterator function returns a non-zero value, iteration stops.
1936 */
1937int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1938                                u64 extent_item_objectid, u64 extent_item_pos,
1939                                int search_commit_root,
1940                                iterate_extent_inodes_t *iterate, void *ctx,
1941                                bool ignore_offset)
1942{
1943        int ret;
1944        struct btrfs_trans_handle *trans = NULL;
1945        struct ulist *refs = NULL;
1946        struct ulist *roots = NULL;
1947        struct ulist_node *ref_node = NULL;
1948        struct ulist_node *root_node = NULL;
1949        struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
1950        struct ulist_iterator ref_uiter;
1951        struct ulist_iterator root_uiter;
1952
1953        btrfs_debug(fs_info, "resolving all inodes for extent %llu",
1954                        extent_item_objectid);
1955
1956        if (!search_commit_root) {
1957                trans = btrfs_attach_transaction(fs_info->extent_root);
1958                if (IS_ERR(trans)) {
1959                        if (PTR_ERR(trans) != -ENOENT &&
1960                            PTR_ERR(trans) != -EROFS)
1961                                return PTR_ERR(trans);
1962                        trans = NULL;
1963                }
1964        }
1965
1966        if (trans)
1967                btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1968        else
1969                down_read(&fs_info->commit_root_sem);
1970
1971        ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
1972                                   tree_mod_seq_elem.seq, &refs,
1973                                   &extent_item_pos, ignore_offset);
1974        if (ret)
1975                goto out;
1976
1977        ULIST_ITER_INIT(&ref_uiter);
1978        while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
1979                ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val,
1980                                                tree_mod_seq_elem.seq, &roots,
1981                                                ignore_offset);
1982                if (ret)
1983                        break;
1984                ULIST_ITER_INIT(&root_uiter);
1985                while (!ret && (root_node = ulist_next(roots, &root_uiter))) {
1986                        btrfs_debug(fs_info,
1987                                    "root %llu references leaf %llu, data list %#llx",
1988                                    root_node->val, ref_node->val,
1989                                    ref_node->aux);
1990                        ret = iterate_leaf_refs(fs_info,
1991                                                (struct extent_inode_elem *)
1992                                                (uintptr_t)ref_node->aux,
1993                                                root_node->val,
1994                                                extent_item_objectid,
1995                                                iterate, ctx);
1996                }
1997                ulist_free(roots);
1998        }
1999
2000        free_leaf_list(refs);
2001out:
2002        if (trans) {
2003                btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2004                btrfs_end_transaction(trans);
2005        } else {
2006                up_read(&fs_info->commit_root_sem);
2007        }
2008
2009        return ret;
2010}
2011
2012int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
2013                                struct btrfs_path *path,
2014                                iterate_extent_inodes_t *iterate, void *ctx,
2015                                bool ignore_offset)
2016{
2017        int ret;
2018        u64 extent_item_pos;
2019        u64 flags = 0;
2020        struct btrfs_key found_key;
2021        int search_commit_root = path->search_commit_root;
2022
2023        ret = extent_from_logical(fs_info, logical, path, &found_key, &flags);
2024        btrfs_release_path(path);
2025        if (ret < 0)
2026                return ret;
2027        if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
2028                return -EINVAL;
2029
2030        extent_item_pos = logical - found_key.objectid;
2031        ret = iterate_extent_inodes(fs_info, found_key.objectid,
2032                                        extent_item_pos, search_commit_root,
2033                                        iterate, ctx, ignore_offset);
2034
2035        return ret;
2036}
2037
2038typedef int (iterate_irefs_t)(u64 parent, u32 name_len, unsigned long name_off,
2039                              struct extent_buffer *eb, void *ctx);
2040
2041static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
2042                              struct btrfs_path *path,
2043                              iterate_irefs_t *iterate, void *ctx)
2044{
2045        int ret = 0;
2046        int slot;
2047        u32 cur;
2048        u32 len;
2049        u32 name_len;
2050        u64 parent = 0;
2051        int found = 0;
2052        struct extent_buffer *eb;
2053        struct btrfs_item *item;
2054        struct btrfs_inode_ref *iref;
2055        struct btrfs_key found_key;
2056
2057        while (!ret) {
2058                ret = btrfs_find_item(fs_root, path, inum,
2059                                parent ? parent + 1 : 0, BTRFS_INODE_REF_KEY,
2060                                &found_key);
2061
2062                if (ret < 0)
2063                        break;
2064                if (ret) {
2065                        ret = found ? 0 : -ENOENT;
2066                        break;
2067                }
2068                ++found;
2069
2070                parent = found_key.offset;
2071                slot = path->slots[0];
2072                eb = btrfs_clone_extent_buffer(path->nodes[0]);
2073                if (!eb) {
2074                        ret = -ENOMEM;
2075                        break;
2076                }
2077                btrfs_release_path(path);
2078
2079                item = btrfs_item_nr(slot);
2080                iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
2081
2082                for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) {
2083                        name_len = btrfs_inode_ref_name_len(eb, iref);
2084                        /* path must be released before calling iterate()! */
2085                        btrfs_debug(fs_root->fs_info,
2086                                "following ref at offset %u for inode %llu in tree %llu",
2087                                cur, found_key.objectid,
2088                                fs_root->root_key.objectid);
2089                        ret = iterate(parent, name_len,
2090                                      (unsigned long)(iref + 1), eb, ctx);
2091                        if (ret)
2092                                break;
2093                        len = sizeof(*iref) + name_len;
2094                        iref = (struct btrfs_inode_ref *)((char *)iref + len);
2095                }
2096                free_extent_buffer(eb);
2097        }
2098
2099        btrfs_release_path(path);
2100
2101        return ret;
2102}
2103
2104static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
2105                                 struct btrfs_path *path,
2106                                 iterate_irefs_t *iterate, void *ctx)
2107{
2108        int ret;
2109        int slot;
2110        u64 offset = 0;
2111        u64 parent;
2112        int found = 0;
2113        struct extent_buffer *eb;
2114        struct btrfs_inode_extref *extref;
2115        u32 item_size;
2116        u32 cur_offset;
2117        unsigned long ptr;
2118
2119        while (1) {
2120                ret = btrfs_find_one_extref(fs_root, inum, offset, path, &extref,
2121                                            &offset);
2122                if (ret < 0)
2123                        break;
2124                if (ret) {
2125                        ret = found ? 0 : -ENOENT;
2126                        break;
2127                }
2128                ++found;
2129
2130                slot = path->slots[0];
2131                eb = btrfs_clone_extent_buffer(path->nodes[0]);
2132                if (!eb) {
2133                        ret = -ENOMEM;
2134                        break;
2135                }
2136                btrfs_release_path(path);
2137
2138                item_size = btrfs_item_size_nr(eb, slot);
2139                ptr = btrfs_item_ptr_offset(eb, slot);
2140                cur_offset = 0;
2141
2142                while (cur_offset < item_size) {
2143                        u32 name_len;
2144
2145                        extref = (struct btrfs_inode_extref *)(ptr + cur_offset);
2146                        parent = btrfs_inode_extref_parent(eb, extref);
2147                        name_len = btrfs_inode_extref_name_len(eb, extref);
2148                        ret = iterate(parent, name_len,
2149                                      (unsigned long)&extref->name, eb, ctx);
2150                        if (ret)
2151                                break;
2152
2153                        cur_offset += btrfs_inode_extref_name_len(eb, extref);
2154                        cur_offset += sizeof(*extref);
2155                }
2156                free_extent_buffer(eb);
2157
2158                offset++;
2159        }
2160
2161        btrfs_release_path(path);
2162
2163        return ret;
2164}
2165
2166static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
2167                         struct btrfs_path *path, iterate_irefs_t *iterate,
2168                         void *ctx)
2169{
2170        int ret;
2171        int found_refs = 0;
2172
2173        ret = iterate_inode_refs(inum, fs_root, path, iterate, ctx);
2174        if (!ret)
2175                ++found_refs;
2176        else if (ret != -ENOENT)
2177                return ret;
2178
2179        ret = iterate_inode_extrefs(inum, fs_root, path, iterate, ctx);
2180        if (ret == -ENOENT && found_refs)
2181                return 0;
2182
2183        return ret;
2184}
2185
2186/*
2187 * returns 0 if the path could be dumped (probably truncated)
2188 * returns <0 in case of an error
2189 */
2190static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
2191                         struct extent_buffer *eb, void *ctx)
2192{
2193        struct inode_fs_paths *ipath = ctx;
2194        char *fspath;
2195        char *fspath_min;
2196        int i = ipath->fspath->elem_cnt;
2197        const int s_ptr = sizeof(char *);
2198        u32 bytes_left;
2199
2200        bytes_left = ipath->fspath->bytes_left > s_ptr ?
2201                                        ipath->fspath->bytes_left - s_ptr : 0;
2202
2203        fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr;
2204        fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len,
2205                                   name_off, eb, inum, fspath_min, bytes_left);
2206        if (IS_ERR(fspath))
2207                return PTR_ERR(fspath);
2208
2209        if (fspath > fspath_min) {
2210                ipath->fspath->val[i] = (u64)(unsigned long)fspath;
2211                ++ipath->fspath->elem_cnt;
2212                ipath->fspath->bytes_left = fspath - fspath_min;
2213        } else {
2214                ++ipath->fspath->elem_missed;
2215                ipath->fspath->bytes_missing += fspath_min - fspath;
2216                ipath->fspath->bytes_left = 0;
2217        }
2218
2219        return 0;
2220}
2221
2222/*
2223 * this dumps all file system paths to the inode into the ipath struct, provided
2224 * is has been created large enough. each path is zero-terminated and accessed
2225 * from ipath->fspath->val[i].
2226 * when it returns, there are ipath->fspath->elem_cnt number of paths available
2227 * in ipath->fspath->val[]. when the allocated space wasn't sufficient, the
2228 * number of missed paths is recorded in ipath->fspath->elem_missed, otherwise,
2229 * it's zero. ipath->fspath->bytes_missing holds the number of bytes that would
2230 * have been needed to return all paths.
2231 */
2232int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
2233{
2234        return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path,
2235                             inode_to_path, ipath);
2236}
2237
2238struct btrfs_data_container *init_data_container(u32 total_bytes)
2239{
2240        struct btrfs_data_container *data;
2241        size_t alloc_bytes;
2242
2243        alloc_bytes = max_t(size_t, total_bytes, sizeof(*data));
2244        data = kvmalloc(alloc_bytes, GFP_KERNEL);
2245        if (!data)
2246                return ERR_PTR(-ENOMEM);
2247
2248        if (total_bytes >= sizeof(*data)) {
2249                data->bytes_left = total_bytes - sizeof(*data);
2250                data->bytes_missing = 0;
2251        } else {
2252                data->bytes_missing = sizeof(*data) - total_bytes;
2253                data->bytes_left = 0;
2254        }
2255
2256        data->elem_cnt = 0;
2257        data->elem_missed = 0;
2258
2259        return data;
2260}
2261
2262/*
2263 * allocates space to return multiple file system paths for an inode.
2264 * total_bytes to allocate are passed, note that space usable for actual path
2265 * information will be total_bytes - sizeof(struct inode_fs_paths).
2266 * the returned pointer must be freed with free_ipath() in the end.
2267 */
2268struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
2269                                        struct btrfs_path *path)
2270{
2271        struct inode_fs_paths *ifp;
2272        struct btrfs_data_container *fspath;
2273
2274        fspath = init_data_container(total_bytes);
2275        if (IS_ERR(fspath))
2276                return ERR_CAST(fspath);
2277
2278        ifp = kmalloc(sizeof(*ifp), GFP_KERNEL);
2279        if (!ifp) {
2280                kvfree(fspath);
2281                return ERR_PTR(-ENOMEM);
2282        }
2283
2284        ifp->btrfs_path = path;
2285        ifp->fspath = fspath;
2286        ifp->fs_root = fs_root;
2287
2288        return ifp;
2289}
2290
2291void free_ipath(struct inode_fs_paths *ipath)
2292{
2293        if (!ipath)
2294                return;
2295        kvfree(ipath->fspath);
2296        kfree(ipath);
2297}
2298