linux/fs/btrfs/delayed-ref.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2009 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/sched.h>
  20#include <linux/slab.h>
  21#include <linux/sort.h>
  22#include "ctree.h"
  23#include "delayed-ref.h"
  24#include "transaction.h"
  25
  26struct kmem_cache *btrfs_delayed_ref_head_cachep;
  27struct kmem_cache *btrfs_delayed_tree_ref_cachep;
  28struct kmem_cache *btrfs_delayed_data_ref_cachep;
  29struct kmem_cache *btrfs_delayed_extent_op_cachep;
  30/*
  31 * delayed back reference update tracking.  For subvolume trees
  32 * we queue up extent allocations and backref maintenance for
  33 * delayed processing.   This avoids deep call chains where we
  34 * add extents in the middle of btrfs_search_slot, and it allows
  35 * us to buffer up frequently modified backrefs in an rb tree instead
  36 * of hammering updates on the extent allocation tree.
  37 */
  38
  39/*
  40 * compare two delayed tree backrefs with same bytenr and type
  41 */
  42static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
  43                          struct btrfs_delayed_tree_ref *ref1, int type)
  44{
  45        if (type == BTRFS_TREE_BLOCK_REF_KEY) {
  46                if (ref1->root < ref2->root)
  47                        return -1;
  48                if (ref1->root > ref2->root)
  49                        return 1;
  50        } else {
  51                if (ref1->parent < ref2->parent)
  52                        return -1;
  53                if (ref1->parent > ref2->parent)
  54                        return 1;
  55        }
  56        return 0;
  57}
  58
  59/*
  60 * compare two delayed data backrefs with same bytenr and type
  61 */
  62static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
  63                          struct btrfs_delayed_data_ref *ref1)
  64{
  65        if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
  66                if (ref1->root < ref2->root)
  67                        return -1;
  68                if (ref1->root > ref2->root)
  69                        return 1;
  70                if (ref1->objectid < ref2->objectid)
  71                        return -1;
  72                if (ref1->objectid > ref2->objectid)
  73                        return 1;
  74                if (ref1->offset < ref2->offset)
  75                        return -1;
  76                if (ref1->offset > ref2->offset)
  77                        return 1;
  78        } else {
  79                if (ref1->parent < ref2->parent)
  80                        return -1;
  81                if (ref1->parent > ref2->parent)
  82                        return 1;
  83        }
  84        return 0;
  85}
  86
  87/*
  88 * entries in the rb tree are ordered by the byte number of the extent,
  89 * type of the delayed backrefs and content of delayed backrefs.
  90 */
  91static int comp_entry(struct btrfs_delayed_ref_node *ref2,
  92                      struct btrfs_delayed_ref_node *ref1,
  93                      bool compare_seq)
  94{
  95        if (ref1->bytenr < ref2->bytenr)
  96                return -1;
  97        if (ref1->bytenr > ref2->bytenr)
  98                return 1;
  99        if (ref1->is_head && ref2->is_head)
 100                return 0;
 101        if (ref2->is_head)
 102                return -1;
 103        if (ref1->is_head)
 104                return 1;
 105        if (ref1->type < ref2->type)
 106                return -1;
 107        if (ref1->type > ref2->type)
 108                return 1;
 109        if (ref1->no_quota > ref2->no_quota)
 110                return 1;
 111        if (ref1->no_quota < ref2->no_quota)
 112                return -1;
 113        /* merging of sequenced refs is not allowed */
 114        if (compare_seq) {
 115                if (ref1->seq < ref2->seq)
 116                        return -1;
 117                if (ref1->seq > ref2->seq)
 118                        return 1;
 119        }
 120        if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
 121            ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
 122                return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
 123                                      btrfs_delayed_node_to_tree_ref(ref1),
 124                                      ref1->type);
 125        } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
 126                   ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
 127                return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
 128                                      btrfs_delayed_node_to_data_ref(ref1));
 129        }
 130        BUG();
 131        return 0;
 132}
 133
 134/*
 135 * insert a new ref into the rbtree.  This returns any existing refs
 136 * for the same (bytenr,parent) tuple, or NULL if the new node was properly
 137 * inserted.
 138 */
 139static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
 140                                                  struct rb_node *node)
 141{
 142        struct rb_node **p = &root->rb_node;
 143        struct rb_node *parent_node = NULL;
 144        struct btrfs_delayed_ref_node *entry;
 145        struct btrfs_delayed_ref_node *ins;
 146        int cmp;
 147
 148        ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
 149        while (*p) {
 150                parent_node = *p;
 151                entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
 152                                 rb_node);
 153
 154                cmp = comp_entry(entry, ins, 1);
 155                if (cmp < 0)
 156                        p = &(*p)->rb_left;
 157                else if (cmp > 0)
 158                        p = &(*p)->rb_right;
 159                else
 160                        return entry;
 161        }
 162
 163        rb_link_node(node, parent_node, p);
 164        rb_insert_color(node, root);
 165        return NULL;
 166}
 167
 168/* insert a new ref to head ref rbtree */
 169static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
 170                                                   struct rb_node *node)
 171{
 172        struct rb_node **p = &root->rb_node;
 173        struct rb_node *parent_node = NULL;
 174        struct btrfs_delayed_ref_head *entry;
 175        struct btrfs_delayed_ref_head *ins;
 176        u64 bytenr;
 177
 178        ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
 179        bytenr = ins->node.bytenr;
 180        while (*p) {
 181                parent_node = *p;
 182                entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
 183                                 href_node);
 184
 185                if (bytenr < entry->node.bytenr)
 186                        p = &(*p)->rb_left;
 187                else if (bytenr > entry->node.bytenr)
 188                        p = &(*p)->rb_right;
 189                else
 190                        return entry;
 191        }
 192
 193        rb_link_node(node, parent_node, p);
 194        rb_insert_color(node, root);
 195        return NULL;
 196}
 197
 198/*
 199 * find an head entry based on bytenr. This returns the delayed ref
 200 * head if it was able to find one, or NULL if nothing was in that spot.
 201 * If return_bigger is given, the next bigger entry is returned if no exact
 202 * match is found.
 203 */
 204static struct btrfs_delayed_ref_head *
 205find_ref_head(struct rb_root *root, u64 bytenr,
 206              int return_bigger)
 207{
 208        struct rb_node *n;
 209        struct btrfs_delayed_ref_head *entry;
 210
 211        n = root->rb_node;
 212        entry = NULL;
 213        while (n) {
 214                entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
 215
 216                if (bytenr < entry->node.bytenr)
 217                        n = n->rb_left;
 218                else if (bytenr > entry->node.bytenr)
 219                        n = n->rb_right;
 220                else
 221                        return entry;
 222        }
 223        if (entry && return_bigger) {
 224                if (bytenr > entry->node.bytenr) {
 225                        n = rb_next(&entry->href_node);
 226                        if (!n)
 227                                n = rb_first(root);
 228                        entry = rb_entry(n, struct btrfs_delayed_ref_head,
 229                                         href_node);
 230                        return entry;
 231                }
 232                return entry;
 233        }
 234        return NULL;
 235}
 236
 237int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
 238                           struct btrfs_delayed_ref_head *head)
 239{
 240        struct btrfs_delayed_ref_root *delayed_refs;
 241
 242        delayed_refs = &trans->transaction->delayed_refs;
 243        assert_spin_locked(&delayed_refs->lock);
 244        if (mutex_trylock(&head->mutex))
 245                return 0;
 246
 247        atomic_inc(&head->node.refs);
 248        spin_unlock(&delayed_refs->lock);
 249
 250        mutex_lock(&head->mutex);
 251        spin_lock(&delayed_refs->lock);
 252        if (!head->node.in_tree) {
 253                mutex_unlock(&head->mutex);
 254                btrfs_put_delayed_ref(&head->node);
 255                return -EAGAIN;
 256        }
 257        btrfs_put_delayed_ref(&head->node);
 258        return 0;
 259}
 260
 261static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
 262                                    struct btrfs_delayed_ref_root *delayed_refs,
 263                                    struct btrfs_delayed_ref_head *head,
 264                                    struct btrfs_delayed_ref_node *ref)
 265{
 266        if (btrfs_delayed_ref_is_head(ref)) {
 267                head = btrfs_delayed_node_to_head(ref);
 268                rb_erase(&head->href_node, &delayed_refs->href_root);
 269        } else {
 270                assert_spin_locked(&head->lock);
 271                rb_erase(&ref->rb_node, &head->ref_root);
 272        }
 273        ref->in_tree = 0;
 274        btrfs_put_delayed_ref(ref);
 275        atomic_dec(&delayed_refs->num_entries);
 276        if (trans->delayed_ref_updates)
 277                trans->delayed_ref_updates--;
 278}
 279
 280static int merge_ref(struct btrfs_trans_handle *trans,
 281                     struct btrfs_delayed_ref_root *delayed_refs,
 282                     struct btrfs_delayed_ref_head *head,
 283                     struct btrfs_delayed_ref_node *ref, u64 seq)
 284{
 285        struct rb_node *node;
 286        int mod = 0;
 287        int done = 0;
 288
 289        node = rb_next(&ref->rb_node);
 290        while (!done && node) {
 291                struct btrfs_delayed_ref_node *next;
 292
 293                next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
 294                node = rb_next(node);
 295                if (seq && next->seq >= seq)
 296                        break;
 297                if (comp_entry(ref, next, 0))
 298                        continue;
 299
 300                if (ref->action == next->action) {
 301                        mod = next->ref_mod;
 302                } else {
 303                        if (ref->ref_mod < next->ref_mod) {
 304                                struct btrfs_delayed_ref_node *tmp;
 305
 306                                tmp = ref;
 307                                ref = next;
 308                                next = tmp;
 309                                done = 1;
 310                        }
 311                        mod = -next->ref_mod;
 312                }
 313
 314                drop_delayed_ref(trans, delayed_refs, head, next);
 315                ref->ref_mod += mod;
 316                if (ref->ref_mod == 0) {
 317                        drop_delayed_ref(trans, delayed_refs, head, ref);
 318                        done = 1;
 319                } else {
 320                        /*
 321                         * You can't have multiples of the same ref on a tree
 322                         * block.
 323                         */
 324                        WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
 325                                ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
 326                }
 327        }
 328        return done;
 329}
 330
 331void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
 332                              struct btrfs_fs_info *fs_info,
 333                              struct btrfs_delayed_ref_root *delayed_refs,
 334                              struct btrfs_delayed_ref_head *head)
 335{
 336        struct rb_node *node;
 337        u64 seq = 0;
 338
 339        assert_spin_locked(&head->lock);
 340        /*
 341         * We don't have too much refs to merge in the case of delayed data
 342         * refs.
 343         */
 344        if (head->is_data)
 345                return;
 346
 347        spin_lock(&fs_info->tree_mod_seq_lock);
 348        if (!list_empty(&fs_info->tree_mod_seq_list)) {
 349                struct seq_list *elem;
 350
 351                elem = list_first_entry(&fs_info->tree_mod_seq_list,
 352                                        struct seq_list, list);
 353                seq = elem->seq;
 354        }
 355        spin_unlock(&fs_info->tree_mod_seq_lock);
 356
 357        node = rb_first(&head->ref_root);
 358        while (node) {
 359                struct btrfs_delayed_ref_node *ref;
 360
 361                ref = rb_entry(node, struct btrfs_delayed_ref_node,
 362                               rb_node);
 363                /* We can't merge refs that are outside of our seq count */
 364                if (seq && ref->seq >= seq)
 365                        break;
 366                if (merge_ref(trans, delayed_refs, head, ref, seq))
 367                        node = rb_first(&head->ref_root);
 368                else
 369                        node = rb_next(&ref->rb_node);
 370        }
 371}
 372
 373int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
 374                            struct btrfs_delayed_ref_root *delayed_refs,
 375                            u64 seq)
 376{
 377        struct seq_list *elem;
 378        int ret = 0;
 379
 380        spin_lock(&fs_info->tree_mod_seq_lock);
 381        if (!list_empty(&fs_info->tree_mod_seq_list)) {
 382                elem = list_first_entry(&fs_info->tree_mod_seq_list,
 383                                        struct seq_list, list);
 384                if (seq >= elem->seq) {
 385                        pr_debug("holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)\n",
 386                                 (u32)(seq >> 32), (u32)seq,
 387                                 (u32)(elem->seq >> 32), (u32)elem->seq,
 388                                 delayed_refs);
 389                        ret = 1;
 390                }
 391        }
 392
 393        spin_unlock(&fs_info->tree_mod_seq_lock);
 394        return ret;
 395}
 396
 397struct btrfs_delayed_ref_head *
 398btrfs_select_ref_head(struct btrfs_trans_handle *trans)
 399{
 400        struct btrfs_delayed_ref_root *delayed_refs;
 401        struct btrfs_delayed_ref_head *head;
 402        u64 start;
 403        bool loop = false;
 404
 405        delayed_refs = &trans->transaction->delayed_refs;
 406
 407again:
 408        start = delayed_refs->run_delayed_start;
 409        head = find_ref_head(&delayed_refs->href_root, start, 1);
 410        if (!head && !loop) {
 411                delayed_refs->run_delayed_start = 0;
 412                start = 0;
 413                loop = true;
 414                head = find_ref_head(&delayed_refs->href_root, start, 1);
 415                if (!head)
 416                        return NULL;
 417        } else if (!head && loop) {
 418                return NULL;
 419        }
 420
 421        while (head->processing) {
 422                struct rb_node *node;
 423
 424                node = rb_next(&head->href_node);
 425                if (!node) {
 426                        if (loop)
 427                                return NULL;
 428                        delayed_refs->run_delayed_start = 0;
 429                        start = 0;
 430                        loop = true;
 431                        goto again;
 432                }
 433                head = rb_entry(node, struct btrfs_delayed_ref_head,
 434                                href_node);
 435        }
 436
 437        head->processing = 1;
 438        WARN_ON(delayed_refs->num_heads_ready == 0);
 439        delayed_refs->num_heads_ready--;
 440        delayed_refs->run_delayed_start = head->node.bytenr +
 441                head->node.num_bytes;
 442        return head;
 443}
 444
 445/*
 446 * helper function to update an extent delayed ref in the
 447 * rbtree.  existing and update must both have the same
 448 * bytenr and parent
 449 *
 450 * This may free existing if the update cancels out whatever
 451 * operation it was doing.
 452 */
 453static noinline void
 454update_existing_ref(struct btrfs_trans_handle *trans,
 455                    struct btrfs_delayed_ref_root *delayed_refs,
 456                    struct btrfs_delayed_ref_head *head,
 457                    struct btrfs_delayed_ref_node *existing,
 458                    struct btrfs_delayed_ref_node *update)
 459{
 460        if (update->action != existing->action) {
 461                /*
 462                 * this is effectively undoing either an add or a
 463                 * drop.  We decrement the ref_mod, and if it goes
 464                 * down to zero we just delete the entry without
 465                 * every changing the extent allocation tree.
 466                 */
 467                existing->ref_mod--;
 468                if (existing->ref_mod == 0)
 469                        drop_delayed_ref(trans, delayed_refs, head, existing);
 470                else
 471                        WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
 472                                existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
 473        } else {
 474                WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
 475                        existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
 476                /*
 477                 * the action on the existing ref matches
 478                 * the action on the ref we're trying to add.
 479                 * Bump the ref_mod by one so the backref that
 480                 * is eventually added/removed has the correct
 481                 * reference count
 482                 */
 483                existing->ref_mod += update->ref_mod;
 484        }
 485}
 486
 487/*
 488 * helper function to update the accounting in the head ref
 489 * existing and update must have the same bytenr
 490 */
 491static noinline void
 492update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
 493                         struct btrfs_delayed_ref_node *existing,
 494                         struct btrfs_delayed_ref_node *update)
 495{
 496        struct btrfs_delayed_ref_head *existing_ref;
 497        struct btrfs_delayed_ref_head *ref;
 498        int old_ref_mod;
 499
 500        existing_ref = btrfs_delayed_node_to_head(existing);
 501        ref = btrfs_delayed_node_to_head(update);
 502        BUG_ON(existing_ref->is_data != ref->is_data);
 503
 504        spin_lock(&existing_ref->lock);
 505        if (ref->must_insert_reserved) {
 506                /* if the extent was freed and then
 507                 * reallocated before the delayed ref
 508                 * entries were processed, we can end up
 509                 * with an existing head ref without
 510                 * the must_insert_reserved flag set.
 511                 * Set it again here
 512                 */
 513                existing_ref->must_insert_reserved = ref->must_insert_reserved;
 514
 515                /*
 516                 * update the num_bytes so we make sure the accounting
 517                 * is done correctly
 518                 */
 519                existing->num_bytes = update->num_bytes;
 520
 521        }
 522
 523        if (ref->extent_op) {
 524                if (!existing_ref->extent_op) {
 525                        existing_ref->extent_op = ref->extent_op;
 526                } else {
 527                        if (ref->extent_op->update_key) {
 528                                memcpy(&existing_ref->extent_op->key,
 529                                       &ref->extent_op->key,
 530                                       sizeof(ref->extent_op->key));
 531                                existing_ref->extent_op->update_key = 1;
 532                        }
 533                        if (ref->extent_op->update_flags) {
 534                                existing_ref->extent_op->flags_to_set |=
 535                                        ref->extent_op->flags_to_set;
 536                                existing_ref->extent_op->update_flags = 1;
 537                        }
 538                        btrfs_free_delayed_extent_op(ref->extent_op);
 539                }
 540        }
 541        /*
 542         * update the reference mod on the head to reflect this new operation,
 543         * only need the lock for this case cause we could be processing it
 544         * currently, for refs we just added we know we're a-ok.
 545         */
 546        old_ref_mod = existing_ref->total_ref_mod;
 547        existing->ref_mod += update->ref_mod;
 548        existing_ref->total_ref_mod += update->ref_mod;
 549
 550        /*
 551         * If we are going to from a positive ref mod to a negative or vice
 552         * versa we need to make sure to adjust pending_csums accordingly.
 553         */
 554        if (existing_ref->is_data) {
 555                if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0)
 556                        delayed_refs->pending_csums -= existing->num_bytes;
 557                if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0)
 558                        delayed_refs->pending_csums += existing->num_bytes;
 559        }
 560        spin_unlock(&existing_ref->lock);
 561}
 562
 563/*
 564 * helper function to actually insert a head node into the rbtree.
 565 * this does all the dirty work in terms of maintaining the correct
 566 * overall modification count.
 567 */
 568static noinline struct btrfs_delayed_ref_head *
 569add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 570                     struct btrfs_trans_handle *trans,
 571                     struct btrfs_delayed_ref_node *ref, u64 bytenr,
 572                     u64 num_bytes, int action, int is_data)
 573{
 574        struct btrfs_delayed_ref_head *existing;
 575        struct btrfs_delayed_ref_head *head_ref = NULL;
 576        struct btrfs_delayed_ref_root *delayed_refs;
 577        int count_mod = 1;
 578        int must_insert_reserved = 0;
 579
 580        /*
 581         * the head node stores the sum of all the mods, so dropping a ref
 582         * should drop the sum in the head node by one.
 583         */
 584        if (action == BTRFS_UPDATE_DELAYED_HEAD)
 585                count_mod = 0;
 586        else if (action == BTRFS_DROP_DELAYED_REF)
 587                count_mod = -1;
 588
 589        /*
 590         * BTRFS_ADD_DELAYED_EXTENT means that we need to update
 591         * the reserved accounting when the extent is finally added, or
 592         * if a later modification deletes the delayed ref without ever
 593         * inserting the extent into the extent allocation tree.
 594         * ref->must_insert_reserved is the flag used to record
 595         * that accounting mods are required.
 596         *
 597         * Once we record must_insert_reserved, switch the action to
 598         * BTRFS_ADD_DELAYED_REF because other special casing is not required.
 599         */
 600        if (action == BTRFS_ADD_DELAYED_EXTENT)
 601                must_insert_reserved = 1;
 602        else
 603                must_insert_reserved = 0;
 604
 605        delayed_refs = &trans->transaction->delayed_refs;
 606
 607        /* first set the basic ref node struct up */
 608        atomic_set(&ref->refs, 1);
 609        ref->bytenr = bytenr;
 610        ref->num_bytes = num_bytes;
 611        ref->ref_mod = count_mod;
 612        ref->type  = 0;
 613        ref->action  = 0;
 614        ref->is_head = 1;
 615        ref->in_tree = 1;
 616        ref->seq = 0;
 617
 618        head_ref = btrfs_delayed_node_to_head(ref);
 619        head_ref->must_insert_reserved = must_insert_reserved;
 620        head_ref->is_data = is_data;
 621        head_ref->ref_root = RB_ROOT;
 622        head_ref->processing = 0;
 623        head_ref->total_ref_mod = count_mod;
 624
 625        spin_lock_init(&head_ref->lock);
 626        mutex_init(&head_ref->mutex);
 627
 628        trace_add_delayed_ref_head(ref, head_ref, action);
 629
 630        existing = htree_insert(&delayed_refs->href_root,
 631                                &head_ref->href_node);
 632        if (existing) {
 633                update_existing_head_ref(delayed_refs, &existing->node, ref);
 634                /*
 635                 * we've updated the existing ref, free the newly
 636                 * allocated ref
 637                 */
 638                kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
 639                head_ref = existing;
 640        } else {
 641                if (is_data && count_mod < 0)
 642                        delayed_refs->pending_csums += num_bytes;
 643                delayed_refs->num_heads++;
 644                delayed_refs->num_heads_ready++;
 645                atomic_inc(&delayed_refs->num_entries);
 646                trans->delayed_ref_updates++;
 647        }
 648        return head_ref;
 649}
 650
 651/*
 652 * helper to insert a delayed tree ref into the rbtree.
 653 */
 654static noinline void
 655add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 656                     struct btrfs_trans_handle *trans,
 657                     struct btrfs_delayed_ref_head *head_ref,
 658                     struct btrfs_delayed_ref_node *ref, u64 bytenr,
 659                     u64 num_bytes, u64 parent, u64 ref_root, int level,
 660                     int action, int no_quota)
 661{
 662        struct btrfs_delayed_ref_node *existing;
 663        struct btrfs_delayed_tree_ref *full_ref;
 664        struct btrfs_delayed_ref_root *delayed_refs;
 665        u64 seq = 0;
 666
 667        if (action == BTRFS_ADD_DELAYED_EXTENT)
 668                action = BTRFS_ADD_DELAYED_REF;
 669
 670        if (is_fstree(ref_root))
 671                seq = atomic64_read(&fs_info->tree_mod_seq);
 672        delayed_refs = &trans->transaction->delayed_refs;
 673
 674        /* first set the basic ref node struct up */
 675        atomic_set(&ref->refs, 1);
 676        ref->bytenr = bytenr;
 677        ref->num_bytes = num_bytes;
 678        ref->ref_mod = 1;
 679        ref->action = action;
 680        ref->is_head = 0;
 681        ref->in_tree = 1;
 682        ref->no_quota = no_quota;
 683        ref->seq = seq;
 684
 685        full_ref = btrfs_delayed_node_to_tree_ref(ref);
 686        full_ref->parent = parent;
 687        full_ref->root = ref_root;
 688        if (parent)
 689                ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
 690        else
 691                ref->type = BTRFS_TREE_BLOCK_REF_KEY;
 692        full_ref->level = level;
 693
 694        trace_add_delayed_tree_ref(ref, full_ref, action);
 695
 696        spin_lock(&head_ref->lock);
 697        existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
 698        if (existing) {
 699                update_existing_ref(trans, delayed_refs, head_ref, existing,
 700                                    ref);
 701                /*
 702                 * we've updated the existing ref, free the newly
 703                 * allocated ref
 704                 */
 705                kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
 706        } else {
 707                atomic_inc(&delayed_refs->num_entries);
 708                trans->delayed_ref_updates++;
 709        }
 710        spin_unlock(&head_ref->lock);
 711}
 712
 713/*
 714 * helper to insert a delayed data ref into the rbtree.
 715 */
 716static noinline void
 717add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 718                     struct btrfs_trans_handle *trans,
 719                     struct btrfs_delayed_ref_head *head_ref,
 720                     struct btrfs_delayed_ref_node *ref, u64 bytenr,
 721                     u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
 722                     u64 offset, int action, int no_quota)
 723{
 724        struct btrfs_delayed_ref_node *existing;
 725        struct btrfs_delayed_data_ref *full_ref;
 726        struct btrfs_delayed_ref_root *delayed_refs;
 727        u64 seq = 0;
 728
 729        if (action == BTRFS_ADD_DELAYED_EXTENT)
 730                action = BTRFS_ADD_DELAYED_REF;
 731
 732        delayed_refs = &trans->transaction->delayed_refs;
 733
 734        if (is_fstree(ref_root))
 735                seq = atomic64_read(&fs_info->tree_mod_seq);
 736
 737        /* first set the basic ref node struct up */
 738        atomic_set(&ref->refs, 1);
 739        ref->bytenr = bytenr;
 740        ref->num_bytes = num_bytes;
 741        ref->ref_mod = 1;
 742        ref->action = action;
 743        ref->is_head = 0;
 744        ref->in_tree = 1;
 745        ref->no_quota = no_quota;
 746        ref->seq = seq;
 747
 748        full_ref = btrfs_delayed_node_to_data_ref(ref);
 749        full_ref->parent = parent;
 750        full_ref->root = ref_root;
 751        if (parent)
 752                ref->type = BTRFS_SHARED_DATA_REF_KEY;
 753        else
 754                ref->type = BTRFS_EXTENT_DATA_REF_KEY;
 755
 756        full_ref->objectid = owner;
 757        full_ref->offset = offset;
 758
 759        trace_add_delayed_data_ref(ref, full_ref, action);
 760
 761        spin_lock(&head_ref->lock);
 762        existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
 763        if (existing) {
 764                update_existing_ref(trans, delayed_refs, head_ref, existing,
 765                                    ref);
 766                /*
 767                 * we've updated the existing ref, free the newly
 768                 * allocated ref
 769                 */
 770                kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
 771        } else {
 772                atomic_inc(&delayed_refs->num_entries);
 773                trans->delayed_ref_updates++;
 774        }
 775        spin_unlock(&head_ref->lock);
 776}
 777
 778/*
 779 * add a delayed tree ref.  This does all of the accounting required
 780 * to make sure the delayed ref is eventually processed before this
 781 * transaction commits.
 782 */
 783int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 784                               struct btrfs_trans_handle *trans,
 785                               u64 bytenr, u64 num_bytes, u64 parent,
 786                               u64 ref_root,  int level, int action,
 787                               struct btrfs_delayed_extent_op *extent_op,
 788                               int no_quota)
 789{
 790        struct btrfs_delayed_tree_ref *ref;
 791        struct btrfs_delayed_ref_head *head_ref;
 792        struct btrfs_delayed_ref_root *delayed_refs;
 793
 794        if (!is_fstree(ref_root) || !fs_info->quota_enabled)
 795                no_quota = 0;
 796
 797        BUG_ON(extent_op && extent_op->is_data);
 798        ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
 799        if (!ref)
 800                return -ENOMEM;
 801
 802        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 803        if (!head_ref) {
 804                kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
 805                return -ENOMEM;
 806        }
 807
 808        head_ref->extent_op = extent_op;
 809
 810        delayed_refs = &trans->transaction->delayed_refs;
 811        spin_lock(&delayed_refs->lock);
 812
 813        /*
 814         * insert both the head node and the new ref without dropping
 815         * the spin lock
 816         */
 817        head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
 818                                        bytenr, num_bytes, action, 0);
 819
 820        add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
 821                                   num_bytes, parent, ref_root, level, action,
 822                                   no_quota);
 823        spin_unlock(&delayed_refs->lock);
 824
 825        return 0;
 826}
 827
 828/*
 829 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
 830 */
 831int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 832                               struct btrfs_trans_handle *trans,
 833                               u64 bytenr, u64 num_bytes,
 834                               u64 parent, u64 ref_root,
 835                               u64 owner, u64 offset, int action,
 836                               struct btrfs_delayed_extent_op *extent_op,
 837                               int no_quota)
 838{
 839        struct btrfs_delayed_data_ref *ref;
 840        struct btrfs_delayed_ref_head *head_ref;
 841        struct btrfs_delayed_ref_root *delayed_refs;
 842
 843        if (!is_fstree(ref_root) || !fs_info->quota_enabled)
 844                no_quota = 0;
 845
 846        BUG_ON(extent_op && !extent_op->is_data);
 847        ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
 848        if (!ref)
 849                return -ENOMEM;
 850
 851        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 852        if (!head_ref) {
 853                kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
 854                return -ENOMEM;
 855        }
 856
 857        head_ref->extent_op = extent_op;
 858
 859        delayed_refs = &trans->transaction->delayed_refs;
 860        spin_lock(&delayed_refs->lock);
 861
 862        /*
 863         * insert both the head node and the new ref without dropping
 864         * the spin lock
 865         */
 866        head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
 867                                        bytenr, num_bytes, action, 1);
 868
 869        add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
 870                                   num_bytes, parent, ref_root, owner, offset,
 871                                   action, no_quota);
 872        spin_unlock(&delayed_refs->lock);
 873
 874        return 0;
 875}
 876
 877int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 878                                struct btrfs_trans_handle *trans,
 879                                u64 bytenr, u64 num_bytes,
 880                                struct btrfs_delayed_extent_op *extent_op)
 881{
 882        struct btrfs_delayed_ref_head *head_ref;
 883        struct btrfs_delayed_ref_root *delayed_refs;
 884
 885        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 886        if (!head_ref)
 887                return -ENOMEM;
 888
 889        head_ref->extent_op = extent_op;
 890
 891        delayed_refs = &trans->transaction->delayed_refs;
 892        spin_lock(&delayed_refs->lock);
 893
 894        add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
 895                                   num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
 896                                   extent_op->is_data);
 897
 898        spin_unlock(&delayed_refs->lock);
 899        return 0;
 900}
 901
 902/*
 903 * this does a simple search for the head node for a given extent.
 904 * It must be called with the delayed ref spinlock held, and it returns
 905 * the head node if any where found, or NULL if not.
 906 */
 907struct btrfs_delayed_ref_head *
 908btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
 909{
 910        struct btrfs_delayed_ref_root *delayed_refs;
 911
 912        delayed_refs = &trans->transaction->delayed_refs;
 913        return find_ref_head(&delayed_refs->href_root, bytenr, 0);
 914}
 915
 916void btrfs_delayed_ref_exit(void)
 917{
 918        if (btrfs_delayed_ref_head_cachep)
 919                kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
 920        if (btrfs_delayed_tree_ref_cachep)
 921                kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
 922        if (btrfs_delayed_data_ref_cachep)
 923                kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
 924        if (btrfs_delayed_extent_op_cachep)
 925                kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
 926}
 927
 928int btrfs_delayed_ref_init(void)
 929{
 930        btrfs_delayed_ref_head_cachep = kmem_cache_create(
 931                                "btrfs_delayed_ref_head",
 932                                sizeof(struct btrfs_delayed_ref_head), 0,
 933                                SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
 934        if (!btrfs_delayed_ref_head_cachep)
 935                goto fail;
 936
 937        btrfs_delayed_tree_ref_cachep = kmem_cache_create(
 938                                "btrfs_delayed_tree_ref",
 939                                sizeof(struct btrfs_delayed_tree_ref), 0,
 940                                SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
 941        if (!btrfs_delayed_tree_ref_cachep)
 942                goto fail;
 943
 944        btrfs_delayed_data_ref_cachep = kmem_cache_create(
 945                                "btrfs_delayed_data_ref",
 946                                sizeof(struct btrfs_delayed_data_ref), 0,
 947                                SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
 948        if (!btrfs_delayed_data_ref_cachep)
 949                goto fail;
 950
 951        btrfs_delayed_extent_op_cachep = kmem_cache_create(
 952                                "btrfs_delayed_extent_op",
 953                                sizeof(struct btrfs_delayed_extent_op), 0,
 954                                SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
 955        if (!btrfs_delayed_extent_op_cachep)
 956                goto fail;
 957
 958        return 0;
 959fail:
 960        btrfs_delayed_ref_exit();
 961        return -ENOMEM;
 962}
 963