linux/fs/btrfs/delayed-ref.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2009 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/sched.h>
  20#include <linux/slab.h>
  21#include <linux/sort.h>
  22#include "ctree.h"
  23#include "delayed-ref.h"
  24#include "transaction.h"
  25
  26struct kmem_cache *btrfs_delayed_ref_head_cachep;
  27struct kmem_cache *btrfs_delayed_tree_ref_cachep;
  28struct kmem_cache *btrfs_delayed_data_ref_cachep;
  29struct kmem_cache *btrfs_delayed_extent_op_cachep;
  30/*
  31 * delayed back reference update tracking.  For subvolume trees
  32 * we queue up extent allocations and backref maintenance for
  33 * delayed processing.   This avoids deep call chains where we
  34 * add extents in the middle of btrfs_search_slot, and it allows
  35 * us to buffer up frequently modified backrefs in an rb tree instead
  36 * of hammering updates on the extent allocation tree.
  37 */
  38
  39/*
  40 * compare two delayed tree backrefs with same bytenr and type
  41 */
  42static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
  43                          struct btrfs_delayed_tree_ref *ref1, int type)
  44{
  45        if (type == BTRFS_TREE_BLOCK_REF_KEY) {
  46                if (ref1->root < ref2->root)
  47                        return -1;
  48                if (ref1->root > ref2->root)
  49                        return 1;
  50        } else {
  51                if (ref1->parent < ref2->parent)
  52                        return -1;
  53                if (ref1->parent > ref2->parent)
  54                        return 1;
  55        }
  56        return 0;
  57}
  58
  59/*
  60 * compare two delayed data backrefs with same bytenr and type
  61 */
  62static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
  63                          struct btrfs_delayed_data_ref *ref1)
  64{
  65        if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
  66                if (ref1->root < ref2->root)
  67                        return -1;
  68                if (ref1->root > ref2->root)
  69                        return 1;
  70                if (ref1->objectid < ref2->objectid)
  71                        return -1;
  72                if (ref1->objectid > ref2->objectid)
  73                        return 1;
  74                if (ref1->offset < ref2->offset)
  75                        return -1;
  76                if (ref1->offset > ref2->offset)
  77                        return 1;
  78        } else {
  79                if (ref1->parent < ref2->parent)
  80                        return -1;
  81                if (ref1->parent > ref2->parent)
  82                        return 1;
  83        }
  84        return 0;
  85}
  86
  87/*
  88 * entries in the rb tree are ordered by the byte number of the extent,
  89 * type of the delayed backrefs and content of delayed backrefs.
  90 */
  91static int comp_entry(struct btrfs_delayed_ref_node *ref2,
  92                      struct btrfs_delayed_ref_node *ref1,
  93                      bool compare_seq)
  94{
  95        if (ref1->bytenr < ref2->bytenr)
  96                return -1;
  97        if (ref1->bytenr > ref2->bytenr)
  98                return 1;
  99        if (ref1->is_head && ref2->is_head)
 100                return 0;
 101        if (ref2->is_head)
 102                return -1;
 103        if (ref1->is_head)
 104                return 1;
 105        if (ref1->type < ref2->type)
 106                return -1;
 107        if (ref1->type > ref2->type)
 108                return 1;
 109        /* merging of sequenced refs is not allowed */
 110        if (compare_seq) {
 111                if (ref1->seq < ref2->seq)
 112                        return -1;
 113                if (ref1->seq > ref2->seq)
 114                        return 1;
 115        }
 116        if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
 117            ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
 118                return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
 119                                      btrfs_delayed_node_to_tree_ref(ref1),
 120                                      ref1->type);
 121        } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
 122                   ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
 123                return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
 124                                      btrfs_delayed_node_to_data_ref(ref1));
 125        }
 126        BUG();
 127        return 0;
 128}
 129
 130/*
 131 * insert a new ref into the rbtree.  This returns any existing refs
 132 * for the same (bytenr,parent) tuple, or NULL if the new node was properly
 133 * inserted.
 134 */
 135static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
 136                                                  struct rb_node *node)
 137{
 138        struct rb_node **p = &root->rb_node;
 139        struct rb_node *parent_node = NULL;
 140        struct btrfs_delayed_ref_node *entry;
 141        struct btrfs_delayed_ref_node *ins;
 142        int cmp;
 143
 144        ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
 145        while (*p) {
 146                parent_node = *p;
 147                entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
 148                                 rb_node);
 149
 150                cmp = comp_entry(entry, ins, 1);
 151                if (cmp < 0)
 152                        p = &(*p)->rb_left;
 153                else if (cmp > 0)
 154                        p = &(*p)->rb_right;
 155                else
 156                        return entry;
 157        }
 158
 159        rb_link_node(node, parent_node, p);
 160        rb_insert_color(node, root);
 161        return NULL;
 162}
 163
 164/*
 165 * find an head entry based on bytenr. This returns the delayed ref
 166 * head if it was able to find one, or NULL if nothing was in that spot.
 167 * If return_bigger is given, the next bigger entry is returned if no exact
 168 * match is found.
 169 */
 170static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
 171                                  u64 bytenr,
 172                                  struct btrfs_delayed_ref_node **last,
 173                                  int return_bigger)
 174{
 175        struct rb_node *n;
 176        struct btrfs_delayed_ref_node *entry;
 177        int cmp = 0;
 178
 179again:
 180        n = root->rb_node;
 181        entry = NULL;
 182        while (n) {
 183                entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
 184                WARN_ON(!entry->in_tree);
 185                if (last)
 186                        *last = entry;
 187
 188                if (bytenr < entry->bytenr)
 189                        cmp = -1;
 190                else if (bytenr > entry->bytenr)
 191                        cmp = 1;
 192                else if (!btrfs_delayed_ref_is_head(entry))
 193                        cmp = 1;
 194                else
 195                        cmp = 0;
 196
 197                if (cmp < 0)
 198                        n = n->rb_left;
 199                else if (cmp > 0)
 200                        n = n->rb_right;
 201                else
 202                        return entry;
 203        }
 204        if (entry && return_bigger) {
 205                if (cmp > 0) {
 206                        n = rb_next(&entry->rb_node);
 207                        if (!n)
 208                                n = rb_first(root);
 209                        entry = rb_entry(n, struct btrfs_delayed_ref_node,
 210                                         rb_node);
 211                        bytenr = entry->bytenr;
 212                        return_bigger = 0;
 213                        goto again;
 214                }
 215                return entry;
 216        }
 217        return NULL;
 218}
 219
 220int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
 221                           struct btrfs_delayed_ref_head *head)
 222{
 223        struct btrfs_delayed_ref_root *delayed_refs;
 224
 225        delayed_refs = &trans->transaction->delayed_refs;
 226        assert_spin_locked(&delayed_refs->lock);
 227        if (mutex_trylock(&head->mutex))
 228                return 0;
 229
 230        atomic_inc(&head->node.refs);
 231        spin_unlock(&delayed_refs->lock);
 232
 233        mutex_lock(&head->mutex);
 234        spin_lock(&delayed_refs->lock);
 235        if (!head->node.in_tree) {
 236                mutex_unlock(&head->mutex);
 237                btrfs_put_delayed_ref(&head->node);
 238                return -EAGAIN;
 239        }
 240        btrfs_put_delayed_ref(&head->node);
 241        return 0;
 242}
 243
 244static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
 245                                    struct btrfs_delayed_ref_root *delayed_refs,
 246                                    struct btrfs_delayed_ref_node *ref)
 247{
 248        rb_erase(&ref->rb_node, &delayed_refs->root);
 249        ref->in_tree = 0;
 250        btrfs_put_delayed_ref(ref);
 251        delayed_refs->num_entries--;
 252        if (trans->delayed_ref_updates)
 253                trans->delayed_ref_updates--;
 254}
 255
 256static int merge_ref(struct btrfs_trans_handle *trans,
 257                     struct btrfs_delayed_ref_root *delayed_refs,
 258                     struct btrfs_delayed_ref_node *ref, u64 seq)
 259{
 260        struct rb_node *node;
 261        int merged = 0;
 262        int mod = 0;
 263        int done = 0;
 264
 265        node = rb_prev(&ref->rb_node);
 266        while (node) {
 267                struct btrfs_delayed_ref_node *next;
 268
 269                next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
 270                node = rb_prev(node);
 271                if (next->bytenr != ref->bytenr)
 272                        break;
 273                if (seq && next->seq >= seq)
 274                        break;
 275                if (comp_entry(ref, next, 0))
 276                        continue;
 277
 278                if (ref->action == next->action) {
 279                        mod = next->ref_mod;
 280                } else {
 281                        if (ref->ref_mod < next->ref_mod) {
 282                                struct btrfs_delayed_ref_node *tmp;
 283
 284                                tmp = ref;
 285                                ref = next;
 286                                next = tmp;
 287                                done = 1;
 288                        }
 289                        mod = -next->ref_mod;
 290                }
 291
 292                merged++;
 293                drop_delayed_ref(trans, delayed_refs, next);
 294                ref->ref_mod += mod;
 295                if (ref->ref_mod == 0) {
 296                        drop_delayed_ref(trans, delayed_refs, ref);
 297                        break;
 298                } else {
 299                        /*
 300                         * You can't have multiples of the same ref on a tree
 301                         * block.
 302                         */
 303                        WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
 304                                ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
 305                }
 306
 307                if (done)
 308                        break;
 309                node = rb_prev(&ref->rb_node);
 310        }
 311
 312        return merged;
 313}
 314
 315void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
 316                              struct btrfs_fs_info *fs_info,
 317                              struct btrfs_delayed_ref_root *delayed_refs,
 318                              struct btrfs_delayed_ref_head *head)
 319{
 320        struct rb_node *node;
 321        u64 seq = 0;
 322
 323        spin_lock(&fs_info->tree_mod_seq_lock);
 324        if (!list_empty(&fs_info->tree_mod_seq_list)) {
 325                struct seq_list *elem;
 326
 327                elem = list_first_entry(&fs_info->tree_mod_seq_list,
 328                                        struct seq_list, list);
 329                seq = elem->seq;
 330        }
 331        spin_unlock(&fs_info->tree_mod_seq_lock);
 332
 333        node = rb_prev(&head->node.rb_node);
 334        while (node) {
 335                struct btrfs_delayed_ref_node *ref;
 336
 337                ref = rb_entry(node, struct btrfs_delayed_ref_node,
 338                               rb_node);
 339                if (ref->bytenr != head->node.bytenr)
 340                        break;
 341
 342                /* We can't merge refs that are outside of our seq count */
 343                if (seq && ref->seq >= seq)
 344                        break;
 345                if (merge_ref(trans, delayed_refs, ref, seq))
 346                        node = rb_prev(&head->node.rb_node);
 347                else
 348                        node = rb_prev(node);
 349        }
 350}
 351
 352int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
 353                            struct btrfs_delayed_ref_root *delayed_refs,
 354                            u64 seq)
 355{
 356        struct seq_list *elem;
 357        int ret = 0;
 358
 359        spin_lock(&fs_info->tree_mod_seq_lock);
 360        if (!list_empty(&fs_info->tree_mod_seq_list)) {
 361                elem = list_first_entry(&fs_info->tree_mod_seq_list,
 362                                        struct seq_list, list);
 363                if (seq >= elem->seq) {
 364                        pr_debug("holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)\n",
 365                                 (u32)(seq >> 32), (u32)seq,
 366                                 (u32)(elem->seq >> 32), (u32)elem->seq,
 367                                 delayed_refs);
 368                        ret = 1;
 369                }
 370        }
 371
 372        spin_unlock(&fs_info->tree_mod_seq_lock);
 373        return ret;
 374}
 375
 376int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
 377                           struct list_head *cluster, u64 start)
 378{
 379        int count = 0;
 380        struct btrfs_delayed_ref_root *delayed_refs;
 381        struct rb_node *node;
 382        struct btrfs_delayed_ref_node *ref;
 383        struct btrfs_delayed_ref_head *head;
 384
 385        delayed_refs = &trans->transaction->delayed_refs;
 386        if (start == 0) {
 387                node = rb_first(&delayed_refs->root);
 388        } else {
 389                ref = NULL;
 390                find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
 391                if (ref) {
 392                        node = &ref->rb_node;
 393                } else
 394                        node = rb_first(&delayed_refs->root);
 395        }
 396again:
 397        while (node && count < 32) {
 398                ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
 399                if (btrfs_delayed_ref_is_head(ref)) {
 400                        head = btrfs_delayed_node_to_head(ref);
 401                        if (list_empty(&head->cluster)) {
 402                                list_add_tail(&head->cluster, cluster);
 403                                delayed_refs->run_delayed_start =
 404                                        head->node.bytenr;
 405                                count++;
 406
 407                                WARN_ON(delayed_refs->num_heads_ready == 0);
 408                                delayed_refs->num_heads_ready--;
 409                        } else if (count) {
 410                                /* the goal of the clustering is to find extents
 411                                 * that are likely to end up in the same extent
 412                                 * leaf on disk.  So, we don't want them spread
 413                                 * all over the tree.  Stop now if we've hit
 414                                 * a head that was already in use
 415                                 */
 416                                break;
 417                        }
 418                }
 419                node = rb_next(node);
 420        }
 421        if (count) {
 422                return 0;
 423        } else if (start) {
 424                /*
 425                 * we've gone to the end of the rbtree without finding any
 426                 * clusters.  start from the beginning and try again
 427                 */
 428                start = 0;
 429                node = rb_first(&delayed_refs->root);
 430                goto again;
 431        }
 432        return 1;
 433}
 434
 435void btrfs_release_ref_cluster(struct list_head *cluster)
 436{
 437        struct list_head *pos, *q;
 438
 439        list_for_each_safe(pos, q, cluster)
 440                list_del_init(pos);
 441}
 442
 443/*
 444 * helper function to update an extent delayed ref in the
 445 * rbtree.  existing and update must both have the same
 446 * bytenr and parent
 447 *
 448 * This may free existing if the update cancels out whatever
 449 * operation it was doing.
 450 */
 451static noinline void
 452update_existing_ref(struct btrfs_trans_handle *trans,
 453                    struct btrfs_delayed_ref_root *delayed_refs,
 454                    struct btrfs_delayed_ref_node *existing,
 455                    struct btrfs_delayed_ref_node *update)
 456{
 457        if (update->action != existing->action) {
 458                /*
 459                 * this is effectively undoing either an add or a
 460                 * drop.  We decrement the ref_mod, and if it goes
 461                 * down to zero we just delete the entry without
 462                 * every changing the extent allocation tree.
 463                 */
 464                existing->ref_mod--;
 465                if (existing->ref_mod == 0)
 466                        drop_delayed_ref(trans, delayed_refs, existing);
 467                else
 468                        WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
 469                                existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
 470        } else {
 471                WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
 472                        existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
 473                /*
 474                 * the action on the existing ref matches
 475                 * the action on the ref we're trying to add.
 476                 * Bump the ref_mod by one so the backref that
 477                 * is eventually added/removed has the correct
 478                 * reference count
 479                 */
 480                existing->ref_mod += update->ref_mod;
 481        }
 482}
 483
 484/*
 485 * helper function to update the accounting in the head ref
 486 * existing and update must have the same bytenr
 487 */
 488static noinline void
 489update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
 490                         struct btrfs_delayed_ref_node *update)
 491{
 492        struct btrfs_delayed_ref_head *existing_ref;
 493        struct btrfs_delayed_ref_head *ref;
 494
 495        existing_ref = btrfs_delayed_node_to_head(existing);
 496        ref = btrfs_delayed_node_to_head(update);
 497        BUG_ON(existing_ref->is_data != ref->is_data);
 498
 499        if (ref->must_insert_reserved) {
 500                /* if the extent was freed and then
 501                 * reallocated before the delayed ref
 502                 * entries were processed, we can end up
 503                 * with an existing head ref without
 504                 * the must_insert_reserved flag set.
 505                 * Set it again here
 506                 */
 507                existing_ref->must_insert_reserved = ref->must_insert_reserved;
 508
 509                /*
 510                 * update the num_bytes so we make sure the accounting
 511                 * is done correctly
 512                 */
 513                existing->num_bytes = update->num_bytes;
 514
 515        }
 516
 517        if (ref->extent_op) {
 518                if (!existing_ref->extent_op) {
 519                        existing_ref->extent_op = ref->extent_op;
 520                } else {
 521                        if (ref->extent_op->update_key) {
 522                                memcpy(&existing_ref->extent_op->key,
 523                                       &ref->extent_op->key,
 524                                       sizeof(ref->extent_op->key));
 525                                existing_ref->extent_op->update_key = 1;
 526                        }
 527                        if (ref->extent_op->update_flags) {
 528                                existing_ref->extent_op->flags_to_set |=
 529                                        ref->extent_op->flags_to_set;
 530                                existing_ref->extent_op->update_flags = 1;
 531                        }
 532                        btrfs_free_delayed_extent_op(ref->extent_op);
 533                }
 534        }
 535        /*
 536         * update the reference mod on the head to reflect this new operation
 537         */
 538        existing->ref_mod += update->ref_mod;
 539}
 540
 541/*
 542 * helper function to actually insert a head node into the rbtree.
 543 * this does all the dirty work in terms of maintaining the correct
 544 * overall modification count.
 545 */
 546static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 547                                        struct btrfs_trans_handle *trans,
 548                                        struct btrfs_delayed_ref_node *ref,
 549                                        u64 bytenr, u64 num_bytes,
 550                                        int action, int is_data)
 551{
 552        struct btrfs_delayed_ref_node *existing;
 553        struct btrfs_delayed_ref_head *head_ref = NULL;
 554        struct btrfs_delayed_ref_root *delayed_refs;
 555        int count_mod = 1;
 556        int must_insert_reserved = 0;
 557
 558        /*
 559         * the head node stores the sum of all the mods, so dropping a ref
 560         * should drop the sum in the head node by one.
 561         */
 562        if (action == BTRFS_UPDATE_DELAYED_HEAD)
 563                count_mod = 0;
 564        else if (action == BTRFS_DROP_DELAYED_REF)
 565                count_mod = -1;
 566
 567        /*
 568         * BTRFS_ADD_DELAYED_EXTENT means that we need to update
 569         * the reserved accounting when the extent is finally added, or
 570         * if a later modification deletes the delayed ref without ever
 571         * inserting the extent into the extent allocation tree.
 572         * ref->must_insert_reserved is the flag used to record
 573         * that accounting mods are required.
 574         *
 575         * Once we record must_insert_reserved, switch the action to
 576         * BTRFS_ADD_DELAYED_REF because other special casing is not required.
 577         */
 578        if (action == BTRFS_ADD_DELAYED_EXTENT)
 579                must_insert_reserved = 1;
 580        else
 581                must_insert_reserved = 0;
 582
 583        delayed_refs = &trans->transaction->delayed_refs;
 584
 585        /* first set the basic ref node struct up */
 586        atomic_set(&ref->refs, 1);
 587        ref->bytenr = bytenr;
 588        ref->num_bytes = num_bytes;
 589        ref->ref_mod = count_mod;
 590        ref->type  = 0;
 591        ref->action  = 0;
 592        ref->is_head = 1;
 593        ref->in_tree = 1;
 594        ref->seq = 0;
 595
 596        head_ref = btrfs_delayed_node_to_head(ref);
 597        head_ref->must_insert_reserved = must_insert_reserved;
 598        head_ref->is_data = is_data;
 599
 600        INIT_LIST_HEAD(&head_ref->cluster);
 601        mutex_init(&head_ref->mutex);
 602
 603        trace_btrfs_delayed_ref_head(ref, head_ref, action);
 604
 605        existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 606
 607        if (existing) {
 608                update_existing_head_ref(existing, ref);
 609                /*
 610                 * we've updated the existing ref, free the newly
 611                 * allocated ref
 612                 */
 613                kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
 614        } else {
 615                delayed_refs->num_heads++;
 616                delayed_refs->num_heads_ready++;
 617                delayed_refs->num_entries++;
 618                trans->delayed_ref_updates++;
 619        }
 620}
 621
 622/*
 623 * helper to insert a delayed tree ref into the rbtree.
 624 */
 625static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 626                                         struct btrfs_trans_handle *trans,
 627                                         struct btrfs_delayed_ref_node *ref,
 628                                         u64 bytenr, u64 num_bytes, u64 parent,
 629                                         u64 ref_root, int level, int action,
 630                                         int for_cow)
 631{
 632        struct btrfs_delayed_ref_node *existing;
 633        struct btrfs_delayed_tree_ref *full_ref;
 634        struct btrfs_delayed_ref_root *delayed_refs;
 635        u64 seq = 0;
 636
 637        if (action == BTRFS_ADD_DELAYED_EXTENT)
 638                action = BTRFS_ADD_DELAYED_REF;
 639
 640        delayed_refs = &trans->transaction->delayed_refs;
 641
 642        /* first set the basic ref node struct up */
 643        atomic_set(&ref->refs, 1);
 644        ref->bytenr = bytenr;
 645        ref->num_bytes = num_bytes;
 646        ref->ref_mod = 1;
 647        ref->action = action;
 648        ref->is_head = 0;
 649        ref->in_tree = 1;
 650
 651        if (need_ref_seq(for_cow, ref_root))
 652                seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
 653        ref->seq = seq;
 654
 655        full_ref = btrfs_delayed_node_to_tree_ref(ref);
 656        full_ref->parent = parent;
 657        full_ref->root = ref_root;
 658        if (parent)
 659                ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
 660        else
 661                ref->type = BTRFS_TREE_BLOCK_REF_KEY;
 662        full_ref->level = level;
 663
 664        trace_btrfs_delayed_tree_ref(ref, full_ref, action);
 665
 666        existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 667
 668        if (existing) {
 669                update_existing_ref(trans, delayed_refs, existing, ref);
 670                /*
 671                 * we've updated the existing ref, free the newly
 672                 * allocated ref
 673                 */
 674                kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
 675        } else {
 676                delayed_refs->num_entries++;
 677                trans->delayed_ref_updates++;
 678        }
 679}
 680
 681/*
 682 * helper to insert a delayed data ref into the rbtree.
 683 */
 684static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 685                                         struct btrfs_trans_handle *trans,
 686                                         struct btrfs_delayed_ref_node *ref,
 687                                         u64 bytenr, u64 num_bytes, u64 parent,
 688                                         u64 ref_root, u64 owner, u64 offset,
 689                                         int action, int for_cow)
 690{
 691        struct btrfs_delayed_ref_node *existing;
 692        struct btrfs_delayed_data_ref *full_ref;
 693        struct btrfs_delayed_ref_root *delayed_refs;
 694        u64 seq = 0;
 695
 696        if (action == BTRFS_ADD_DELAYED_EXTENT)
 697                action = BTRFS_ADD_DELAYED_REF;
 698
 699        delayed_refs = &trans->transaction->delayed_refs;
 700
 701        /* first set the basic ref node struct up */
 702        atomic_set(&ref->refs, 1);
 703        ref->bytenr = bytenr;
 704        ref->num_bytes = num_bytes;
 705        ref->ref_mod = 1;
 706        ref->action = action;
 707        ref->is_head = 0;
 708        ref->in_tree = 1;
 709
 710        if (need_ref_seq(for_cow, ref_root))
 711                seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
 712        ref->seq = seq;
 713
 714        full_ref = btrfs_delayed_node_to_data_ref(ref);
 715        full_ref->parent = parent;
 716        full_ref->root = ref_root;
 717        if (parent)
 718                ref->type = BTRFS_SHARED_DATA_REF_KEY;
 719        else
 720                ref->type = BTRFS_EXTENT_DATA_REF_KEY;
 721
 722        full_ref->objectid = owner;
 723        full_ref->offset = offset;
 724
 725        trace_btrfs_delayed_data_ref(ref, full_ref, action);
 726
 727        existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 728
 729        if (existing) {
 730                update_existing_ref(trans, delayed_refs, existing, ref);
 731                /*
 732                 * we've updated the existing ref, free the newly
 733                 * allocated ref
 734                 */
 735                kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
 736        } else {
 737                delayed_refs->num_entries++;
 738                trans->delayed_ref_updates++;
 739        }
 740}
 741
 742/*
 743 * add a delayed tree ref.  This does all of the accounting required
 744 * to make sure the delayed ref is eventually processed before this
 745 * transaction commits.
 746 */
 747int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 748                               struct btrfs_trans_handle *trans,
 749                               u64 bytenr, u64 num_bytes, u64 parent,
 750                               u64 ref_root,  int level, int action,
 751                               struct btrfs_delayed_extent_op *extent_op,
 752                               int for_cow)
 753{
 754        struct btrfs_delayed_tree_ref *ref;
 755        struct btrfs_delayed_ref_head *head_ref;
 756        struct btrfs_delayed_ref_root *delayed_refs;
 757
 758        BUG_ON(extent_op && extent_op->is_data);
 759        ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
 760        if (!ref)
 761                return -ENOMEM;
 762
 763        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 764        if (!head_ref) {
 765                kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
 766                return -ENOMEM;
 767        }
 768
 769        head_ref->extent_op = extent_op;
 770
 771        delayed_refs = &trans->transaction->delayed_refs;
 772        spin_lock(&delayed_refs->lock);
 773
 774        /*
 775         * insert both the head node and the new ref without dropping
 776         * the spin lock
 777         */
 778        add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
 779                                   num_bytes, action, 0);
 780
 781        add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
 782                                   num_bytes, parent, ref_root, level, action,
 783                                   for_cow);
 784        spin_unlock(&delayed_refs->lock);
 785        if (need_ref_seq(for_cow, ref_root))
 786                btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
 787
 788        return 0;
 789}
 790
 791/*
 792 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
 793 */
 794int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 795                               struct btrfs_trans_handle *trans,
 796                               u64 bytenr, u64 num_bytes,
 797                               u64 parent, u64 ref_root,
 798                               u64 owner, u64 offset, int action,
 799                               struct btrfs_delayed_extent_op *extent_op,
 800                               int for_cow)
 801{
 802        struct btrfs_delayed_data_ref *ref;
 803        struct btrfs_delayed_ref_head *head_ref;
 804        struct btrfs_delayed_ref_root *delayed_refs;
 805
 806        BUG_ON(extent_op && !extent_op->is_data);
 807        ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
 808        if (!ref)
 809                return -ENOMEM;
 810
 811        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 812        if (!head_ref) {
 813                kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
 814                return -ENOMEM;
 815        }
 816
 817        head_ref->extent_op = extent_op;
 818
 819        delayed_refs = &trans->transaction->delayed_refs;
 820        spin_lock(&delayed_refs->lock);
 821
 822        /*
 823         * insert both the head node and the new ref without dropping
 824         * the spin lock
 825         */
 826        add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
 827                                   num_bytes, action, 1);
 828
 829        add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
 830                                   num_bytes, parent, ref_root, owner, offset,
 831                                   action, for_cow);
 832        spin_unlock(&delayed_refs->lock);
 833        if (need_ref_seq(for_cow, ref_root))
 834                btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
 835
 836        return 0;
 837}
 838
 839int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 840                                struct btrfs_trans_handle *trans,
 841                                u64 bytenr, u64 num_bytes,
 842                                struct btrfs_delayed_extent_op *extent_op)
 843{
 844        struct btrfs_delayed_ref_head *head_ref;
 845        struct btrfs_delayed_ref_root *delayed_refs;
 846
 847        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 848        if (!head_ref)
 849                return -ENOMEM;
 850
 851        head_ref->extent_op = extent_op;
 852
 853        delayed_refs = &trans->transaction->delayed_refs;
 854        spin_lock(&delayed_refs->lock);
 855
 856        add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
 857                                   num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
 858                                   extent_op->is_data);
 859
 860        spin_unlock(&delayed_refs->lock);
 861        return 0;
 862}
 863
 864/*
 865 * this does a simple search for the head node for a given extent.
 866 * It must be called with the delayed ref spinlock held, and it returns
 867 * the head node if any where found, or NULL if not.
 868 */
 869struct btrfs_delayed_ref_head *
 870btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
 871{
 872        struct btrfs_delayed_ref_node *ref;
 873        struct btrfs_delayed_ref_root *delayed_refs;
 874
 875        delayed_refs = &trans->transaction->delayed_refs;
 876        ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0);
 877        if (ref)
 878                return btrfs_delayed_node_to_head(ref);
 879        return NULL;
 880}
 881
 882void btrfs_delayed_ref_exit(void)
 883{
 884        if (btrfs_delayed_ref_head_cachep)
 885                kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
 886        if (btrfs_delayed_tree_ref_cachep)
 887                kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
 888        if (btrfs_delayed_data_ref_cachep)
 889                kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
 890        if (btrfs_delayed_extent_op_cachep)
 891                kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
 892}
 893
 894int btrfs_delayed_ref_init(void)
 895{
 896        btrfs_delayed_ref_head_cachep = kmem_cache_create(
 897                                "btrfs_delayed_ref_head",
 898                                sizeof(struct btrfs_delayed_ref_head), 0,
 899                                SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
 900        if (!btrfs_delayed_ref_head_cachep)
 901                goto fail;
 902
 903        btrfs_delayed_tree_ref_cachep = kmem_cache_create(
 904                                "btrfs_delayed_tree_ref",
 905                                sizeof(struct btrfs_delayed_tree_ref), 0,
 906                                SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
 907        if (!btrfs_delayed_tree_ref_cachep)
 908                goto fail;
 909
 910        btrfs_delayed_data_ref_cachep = kmem_cache_create(
 911                                "btrfs_delayed_data_ref",
 912                                sizeof(struct btrfs_delayed_data_ref), 0,
 913                                SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
 914        if (!btrfs_delayed_data_ref_cachep)
 915                goto fail;
 916
 917        btrfs_delayed_extent_op_cachep = kmem_cache_create(
 918                                "btrfs_delayed_extent_op",
 919                                sizeof(struct btrfs_delayed_extent_op), 0,
 920                                SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
 921        if (!btrfs_delayed_extent_op_cachep)
 922                goto fail;
 923
 924        return 0;
 925fail:
 926        btrfs_delayed_ref_exit();
 927        return -ENOMEM;
 928}
 929