linux/fs/btrfs/delayed-ref.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2009 Oracle.  All rights reserved.
   4 */
   5
   6#include <linux/sched.h>
   7#include <linux/slab.h>
   8#include <linux/sort.h>
   9#include "ctree.h"
  10#include "delayed-ref.h"
  11#include "transaction.h"
  12#include "qgroup.h"
  13
  14struct kmem_cache *btrfs_delayed_ref_head_cachep;
  15struct kmem_cache *btrfs_delayed_tree_ref_cachep;
  16struct kmem_cache *btrfs_delayed_data_ref_cachep;
  17struct kmem_cache *btrfs_delayed_extent_op_cachep;
  18/*
  19 * delayed back reference update tracking.  For subvolume trees
  20 * we queue up extent allocations and backref maintenance for
  21 * delayed processing.   This avoids deep call chains where we
  22 * add extents in the middle of btrfs_search_slot, and it allows
  23 * us to buffer up frequently modified backrefs in an rb tree instead
  24 * of hammering updates on the extent allocation tree.
  25 */
  26
  27/*
  28 * compare two delayed tree backrefs with same bytenr and type
  29 */
  30static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
  31                          struct btrfs_delayed_tree_ref *ref2)
  32{
  33        if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
  34                if (ref1->root < ref2->root)
  35                        return -1;
  36                if (ref1->root > ref2->root)
  37                        return 1;
  38        } else {
  39                if (ref1->parent < ref2->parent)
  40                        return -1;
  41                if (ref1->parent > ref2->parent)
  42                        return 1;
  43        }
  44        return 0;
  45}
  46
  47/*
  48 * compare two delayed data backrefs with same bytenr and type
  49 */
  50static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
  51                          struct btrfs_delayed_data_ref *ref2)
  52{
  53        if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
  54                if (ref1->root < ref2->root)
  55                        return -1;
  56                if (ref1->root > ref2->root)
  57                        return 1;
  58                if (ref1->objectid < ref2->objectid)
  59                        return -1;
  60                if (ref1->objectid > ref2->objectid)
  61                        return 1;
  62                if (ref1->offset < ref2->offset)
  63                        return -1;
  64                if (ref1->offset > ref2->offset)
  65                        return 1;
  66        } else {
  67                if (ref1->parent < ref2->parent)
  68                        return -1;
  69                if (ref1->parent > ref2->parent)
  70                        return 1;
  71        }
  72        return 0;
  73}
  74
  75static int comp_refs(struct btrfs_delayed_ref_node *ref1,
  76                     struct btrfs_delayed_ref_node *ref2,
  77                     bool check_seq)
  78{
  79        int ret = 0;
  80
  81        if (ref1->type < ref2->type)
  82                return -1;
  83        if (ref1->type > ref2->type)
  84                return 1;
  85        if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
  86            ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
  87                ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
  88                                     btrfs_delayed_node_to_tree_ref(ref2));
  89        else
  90                ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
  91                                     btrfs_delayed_node_to_data_ref(ref2));
  92        if (ret)
  93                return ret;
  94        if (check_seq) {
  95                if (ref1->seq < ref2->seq)
  96                        return -1;
  97                if (ref1->seq > ref2->seq)
  98                        return 1;
  99        }
 100        return 0;
 101}
 102
 103/* insert a new ref to head ref rbtree */
 104static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
 105                                                   struct rb_node *node)
 106{
 107        struct rb_node **p = &root->rb_node;
 108        struct rb_node *parent_node = NULL;
 109        struct btrfs_delayed_ref_head *entry;
 110        struct btrfs_delayed_ref_head *ins;
 111        u64 bytenr;
 112
 113        ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
 114        bytenr = ins->bytenr;
 115        while (*p) {
 116                parent_node = *p;
 117                entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
 118                                 href_node);
 119
 120                if (bytenr < entry->bytenr)
 121                        p = &(*p)->rb_left;
 122                else if (bytenr > entry->bytenr)
 123                        p = &(*p)->rb_right;
 124                else
 125                        return entry;
 126        }
 127
 128        rb_link_node(node, parent_node, p);
 129        rb_insert_color(node, root);
 130        return NULL;
 131}
 132
 133static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
 134                struct btrfs_delayed_ref_node *ins)
 135{
 136        struct rb_node **p = &root->rb_node;
 137        struct rb_node *node = &ins->ref_node;
 138        struct rb_node *parent_node = NULL;
 139        struct btrfs_delayed_ref_node *entry;
 140
 141        while (*p) {
 142                int comp;
 143
 144                parent_node = *p;
 145                entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
 146                                 ref_node);
 147                comp = comp_refs(ins, entry, true);
 148                if (comp < 0)
 149                        p = &(*p)->rb_left;
 150                else if (comp > 0)
 151                        p = &(*p)->rb_right;
 152                else
 153                        return entry;
 154        }
 155
 156        rb_link_node(node, parent_node, p);
 157        rb_insert_color(node, root);
 158        return NULL;
 159}
 160
 161/*
 162 * find an head entry based on bytenr. This returns the delayed ref
 163 * head if it was able to find one, or NULL if nothing was in that spot.
 164 * If return_bigger is given, the next bigger entry is returned if no exact
 165 * match is found.
 166 */
 167static struct btrfs_delayed_ref_head *
 168find_ref_head(struct rb_root *root, u64 bytenr,
 169              int return_bigger)
 170{
 171        struct rb_node *n;
 172        struct btrfs_delayed_ref_head *entry;
 173
 174        n = root->rb_node;
 175        entry = NULL;
 176        while (n) {
 177                entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
 178
 179                if (bytenr < entry->bytenr)
 180                        n = n->rb_left;
 181                else if (bytenr > entry->bytenr)
 182                        n = n->rb_right;
 183                else
 184                        return entry;
 185        }
 186        if (entry && return_bigger) {
 187                if (bytenr > entry->bytenr) {
 188                        n = rb_next(&entry->href_node);
 189                        if (!n)
 190                                n = rb_first(root);
 191                        entry = rb_entry(n, struct btrfs_delayed_ref_head,
 192                                         href_node);
 193                        return entry;
 194                }
 195                return entry;
 196        }
 197        return NULL;
 198}
 199
 200int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
 201                           struct btrfs_delayed_ref_head *head)
 202{
 203        struct btrfs_delayed_ref_root *delayed_refs;
 204
 205        delayed_refs = &trans->transaction->delayed_refs;
 206        lockdep_assert_held(&delayed_refs->lock);
 207        if (mutex_trylock(&head->mutex))
 208                return 0;
 209
 210        refcount_inc(&head->refs);
 211        spin_unlock(&delayed_refs->lock);
 212
 213        mutex_lock(&head->mutex);
 214        spin_lock(&delayed_refs->lock);
 215        if (RB_EMPTY_NODE(&head->href_node)) {
 216                mutex_unlock(&head->mutex);
 217                btrfs_put_delayed_ref_head(head);
 218                return -EAGAIN;
 219        }
 220        btrfs_put_delayed_ref_head(head);
 221        return 0;
 222}
 223
 224static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
 225                                    struct btrfs_delayed_ref_root *delayed_refs,
 226                                    struct btrfs_delayed_ref_head *head,
 227                                    struct btrfs_delayed_ref_node *ref)
 228{
 229        lockdep_assert_held(&head->lock);
 230        rb_erase(&ref->ref_node, &head->ref_tree);
 231        RB_CLEAR_NODE(&ref->ref_node);
 232        if (!list_empty(&ref->add_list))
 233                list_del(&ref->add_list);
 234        ref->in_tree = 0;
 235        btrfs_put_delayed_ref(ref);
 236        atomic_dec(&delayed_refs->num_entries);
 237        if (trans->delayed_ref_updates)
 238                trans->delayed_ref_updates--;
 239}
 240
 241static bool merge_ref(struct btrfs_trans_handle *trans,
 242                      struct btrfs_delayed_ref_root *delayed_refs,
 243                      struct btrfs_delayed_ref_head *head,
 244                      struct btrfs_delayed_ref_node *ref,
 245                      u64 seq)
 246{
 247        struct btrfs_delayed_ref_node *next;
 248        struct rb_node *node = rb_next(&ref->ref_node);
 249        bool done = false;
 250
 251        while (!done && node) {
 252                int mod;
 253
 254                next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
 255                node = rb_next(node);
 256                if (seq && next->seq >= seq)
 257                        break;
 258                if (comp_refs(ref, next, false))
 259                        break;
 260
 261                if (ref->action == next->action) {
 262                        mod = next->ref_mod;
 263                } else {
 264                        if (ref->ref_mod < next->ref_mod) {
 265                                swap(ref, next);
 266                                done = true;
 267                        }
 268                        mod = -next->ref_mod;
 269                }
 270
 271                drop_delayed_ref(trans, delayed_refs, head, next);
 272                ref->ref_mod += mod;
 273                if (ref->ref_mod == 0) {
 274                        drop_delayed_ref(trans, delayed_refs, head, ref);
 275                        done = true;
 276                } else {
 277                        /*
 278                         * Can't have multiples of the same ref on a tree block.
 279                         */
 280                        WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
 281                                ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
 282                }
 283        }
 284
 285        return done;
 286}
 287
 288void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
 289                              struct btrfs_fs_info *fs_info,
 290                              struct btrfs_delayed_ref_root *delayed_refs,
 291                              struct btrfs_delayed_ref_head *head)
 292{
 293        struct btrfs_delayed_ref_node *ref;
 294        struct rb_node *node;
 295        u64 seq = 0;
 296
 297        lockdep_assert_held(&head->lock);
 298
 299        if (RB_EMPTY_ROOT(&head->ref_tree))
 300                return;
 301
 302        /* We don't have too many refs to merge for data. */
 303        if (head->is_data)
 304                return;
 305
 306        spin_lock(&fs_info->tree_mod_seq_lock);
 307        if (!list_empty(&fs_info->tree_mod_seq_list)) {
 308                struct seq_list *elem;
 309
 310                elem = list_first_entry(&fs_info->tree_mod_seq_list,
 311                                        struct seq_list, list);
 312                seq = elem->seq;
 313        }
 314        spin_unlock(&fs_info->tree_mod_seq_lock);
 315
 316again:
 317        for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) {
 318                ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
 319                if (seq && ref->seq >= seq)
 320                        continue;
 321                if (merge_ref(trans, delayed_refs, head, ref, seq))
 322                        goto again;
 323        }
 324}
 325
 326int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
 327                            struct btrfs_delayed_ref_root *delayed_refs,
 328                            u64 seq)
 329{
 330        struct seq_list *elem;
 331        int ret = 0;
 332
 333        spin_lock(&fs_info->tree_mod_seq_lock);
 334        if (!list_empty(&fs_info->tree_mod_seq_list)) {
 335                elem = list_first_entry(&fs_info->tree_mod_seq_list,
 336                                        struct seq_list, list);
 337                if (seq >= elem->seq) {
 338                        btrfs_debug(fs_info,
 339                                "holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)",
 340                                (u32)(seq >> 32), (u32)seq,
 341                                (u32)(elem->seq >> 32), (u32)elem->seq,
 342                                delayed_refs);
 343                        ret = 1;
 344                }
 345        }
 346
 347        spin_unlock(&fs_info->tree_mod_seq_lock);
 348        return ret;
 349}
 350
 351struct btrfs_delayed_ref_head *
 352btrfs_select_ref_head(struct btrfs_trans_handle *trans)
 353{
 354        struct btrfs_delayed_ref_root *delayed_refs;
 355        struct btrfs_delayed_ref_head *head;
 356        u64 start;
 357        bool loop = false;
 358
 359        delayed_refs = &trans->transaction->delayed_refs;
 360
 361again:
 362        start = delayed_refs->run_delayed_start;
 363        head = find_ref_head(&delayed_refs->href_root, start, 1);
 364        if (!head && !loop) {
 365                delayed_refs->run_delayed_start = 0;
 366                start = 0;
 367                loop = true;
 368                head = find_ref_head(&delayed_refs->href_root, start, 1);
 369                if (!head)
 370                        return NULL;
 371        } else if (!head && loop) {
 372                return NULL;
 373        }
 374
 375        while (head->processing) {
 376                struct rb_node *node;
 377
 378                node = rb_next(&head->href_node);
 379                if (!node) {
 380                        if (loop)
 381                                return NULL;
 382                        delayed_refs->run_delayed_start = 0;
 383                        start = 0;
 384                        loop = true;
 385                        goto again;
 386                }
 387                head = rb_entry(node, struct btrfs_delayed_ref_head,
 388                                href_node);
 389        }
 390
 391        head->processing = 1;
 392        WARN_ON(delayed_refs->num_heads_ready == 0);
 393        delayed_refs->num_heads_ready--;
 394        delayed_refs->run_delayed_start = head->bytenr +
 395                head->num_bytes;
 396        return head;
 397}
 398
 399/*
 400 * Helper to insert the ref_node to the tail or merge with tail.
 401 *
 402 * Return 0 for insert.
 403 * Return >0 for merge.
 404 */
 405static int insert_delayed_ref(struct btrfs_trans_handle *trans,
 406                              struct btrfs_delayed_ref_root *root,
 407                              struct btrfs_delayed_ref_head *href,
 408                              struct btrfs_delayed_ref_node *ref)
 409{
 410        struct btrfs_delayed_ref_node *exist;
 411        int mod;
 412        int ret = 0;
 413
 414        spin_lock(&href->lock);
 415        exist = tree_insert(&href->ref_tree, ref);
 416        if (!exist)
 417                goto inserted;
 418
 419        /* Now we are sure we can merge */
 420        ret = 1;
 421        if (exist->action == ref->action) {
 422                mod = ref->ref_mod;
 423        } else {
 424                /* Need to change action */
 425                if (exist->ref_mod < ref->ref_mod) {
 426                        exist->action = ref->action;
 427                        mod = -exist->ref_mod;
 428                        exist->ref_mod = ref->ref_mod;
 429                        if (ref->action == BTRFS_ADD_DELAYED_REF)
 430                                list_add_tail(&exist->add_list,
 431                                              &href->ref_add_list);
 432                        else if (ref->action == BTRFS_DROP_DELAYED_REF) {
 433                                ASSERT(!list_empty(&exist->add_list));
 434                                list_del(&exist->add_list);
 435                        } else {
 436                                ASSERT(0);
 437                        }
 438                } else
 439                        mod = -ref->ref_mod;
 440        }
 441        exist->ref_mod += mod;
 442
 443        /* remove existing tail if its ref_mod is zero */
 444        if (exist->ref_mod == 0)
 445                drop_delayed_ref(trans, root, href, exist);
 446        spin_unlock(&href->lock);
 447        return ret;
 448inserted:
 449        if (ref->action == BTRFS_ADD_DELAYED_REF)
 450                list_add_tail(&ref->add_list, &href->ref_add_list);
 451        atomic_inc(&root->num_entries);
 452        trans->delayed_ref_updates++;
 453        spin_unlock(&href->lock);
 454        return ret;
 455}
 456
 457/*
 458 * helper function to update the accounting in the head ref
 459 * existing and update must have the same bytenr
 460 */
 461static noinline void
 462update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
 463                         struct btrfs_delayed_ref_head *existing,
 464                         struct btrfs_delayed_ref_head *update,
 465                         int *old_ref_mod_ret)
 466{
 467        int old_ref_mod;
 468
 469        BUG_ON(existing->is_data != update->is_data);
 470
 471        spin_lock(&existing->lock);
 472        if (update->must_insert_reserved) {
 473                /* if the extent was freed and then
 474                 * reallocated before the delayed ref
 475                 * entries were processed, we can end up
 476                 * with an existing head ref without
 477                 * the must_insert_reserved flag set.
 478                 * Set it again here
 479                 */
 480                existing->must_insert_reserved = update->must_insert_reserved;
 481
 482                /*
 483                 * update the num_bytes so we make sure the accounting
 484                 * is done correctly
 485                 */
 486                existing->num_bytes = update->num_bytes;
 487
 488        }
 489
 490        if (update->extent_op) {
 491                if (!existing->extent_op) {
 492                        existing->extent_op = update->extent_op;
 493                } else {
 494                        if (update->extent_op->update_key) {
 495                                memcpy(&existing->extent_op->key,
 496                                       &update->extent_op->key,
 497                                       sizeof(update->extent_op->key));
 498                                existing->extent_op->update_key = true;
 499                        }
 500                        if (update->extent_op->update_flags) {
 501                                existing->extent_op->flags_to_set |=
 502                                        update->extent_op->flags_to_set;
 503                                existing->extent_op->update_flags = true;
 504                        }
 505                        btrfs_free_delayed_extent_op(update->extent_op);
 506                }
 507        }
 508        /*
 509         * update the reference mod on the head to reflect this new operation,
 510         * only need the lock for this case cause we could be processing it
 511         * currently, for refs we just added we know we're a-ok.
 512         */
 513        old_ref_mod = existing->total_ref_mod;
 514        if (old_ref_mod_ret)
 515                *old_ref_mod_ret = old_ref_mod;
 516        existing->ref_mod += update->ref_mod;
 517        existing->total_ref_mod += update->ref_mod;
 518
 519        /*
 520         * If we are going to from a positive ref mod to a negative or vice
 521         * versa we need to make sure to adjust pending_csums accordingly.
 522         */
 523        if (existing->is_data) {
 524                if (existing->total_ref_mod >= 0 && old_ref_mod < 0)
 525                        delayed_refs->pending_csums -= existing->num_bytes;
 526                if (existing->total_ref_mod < 0 && old_ref_mod >= 0)
 527                        delayed_refs->pending_csums += existing->num_bytes;
 528        }
 529        spin_unlock(&existing->lock);
 530}
 531
 532/*
 533 * helper function to actually insert a head node into the rbtree.
 534 * this does all the dirty work in terms of maintaining the correct
 535 * overall modification count.
 536 */
 537static noinline struct btrfs_delayed_ref_head *
 538add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 539                     struct btrfs_trans_handle *trans,
 540                     struct btrfs_delayed_ref_head *head_ref,
 541                     struct btrfs_qgroup_extent_record *qrecord,
 542                     u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
 543                     int action, int is_data, int is_system,
 544                     int *qrecord_inserted_ret,
 545                     int *old_ref_mod, int *new_ref_mod)
 546
 547{
 548        struct btrfs_delayed_ref_head *existing;
 549        struct btrfs_delayed_ref_root *delayed_refs;
 550        int count_mod = 1;
 551        int must_insert_reserved = 0;
 552        int qrecord_inserted = 0;
 553
 554        /* If reserved is provided, it must be a data extent. */
 555        BUG_ON(!is_data && reserved);
 556
 557        /*
 558         * the head node stores the sum of all the mods, so dropping a ref
 559         * should drop the sum in the head node by one.
 560         */
 561        if (action == BTRFS_UPDATE_DELAYED_HEAD)
 562                count_mod = 0;
 563        else if (action == BTRFS_DROP_DELAYED_REF)
 564                count_mod = -1;
 565
 566        /*
 567         * BTRFS_ADD_DELAYED_EXTENT means that we need to update
 568         * the reserved accounting when the extent is finally added, or
 569         * if a later modification deletes the delayed ref without ever
 570         * inserting the extent into the extent allocation tree.
 571         * ref->must_insert_reserved is the flag used to record
 572         * that accounting mods are required.
 573         *
 574         * Once we record must_insert_reserved, switch the action to
 575         * BTRFS_ADD_DELAYED_REF because other special casing is not required.
 576         */
 577        if (action == BTRFS_ADD_DELAYED_EXTENT)
 578                must_insert_reserved = 1;
 579        else
 580                must_insert_reserved = 0;
 581
 582        delayed_refs = &trans->transaction->delayed_refs;
 583
 584        refcount_set(&head_ref->refs, 1);
 585        head_ref->bytenr = bytenr;
 586        head_ref->num_bytes = num_bytes;
 587        head_ref->ref_mod = count_mod;
 588        head_ref->must_insert_reserved = must_insert_reserved;
 589        head_ref->is_data = is_data;
 590        head_ref->is_system = is_system;
 591        head_ref->ref_tree = RB_ROOT;
 592        INIT_LIST_HEAD(&head_ref->ref_add_list);
 593        RB_CLEAR_NODE(&head_ref->href_node);
 594        head_ref->processing = 0;
 595        head_ref->total_ref_mod = count_mod;
 596        head_ref->qgroup_reserved = 0;
 597        head_ref->qgroup_ref_root = 0;
 598        spin_lock_init(&head_ref->lock);
 599        mutex_init(&head_ref->mutex);
 600
 601        /* Record qgroup extent info if provided */
 602        if (qrecord) {
 603                if (ref_root && reserved) {
 604                        head_ref->qgroup_ref_root = ref_root;
 605                        head_ref->qgroup_reserved = reserved;
 606                }
 607
 608                qrecord->bytenr = bytenr;
 609                qrecord->num_bytes = num_bytes;
 610                qrecord->old_roots = NULL;
 611
 612                if(btrfs_qgroup_trace_extent_nolock(fs_info,
 613                                        delayed_refs, qrecord))
 614                        kfree(qrecord);
 615                else
 616                        qrecord_inserted = 1;
 617        }
 618
 619        trace_add_delayed_ref_head(fs_info, head_ref, action);
 620
 621        existing = htree_insert(&delayed_refs->href_root,
 622                                &head_ref->href_node);
 623        if (existing) {
 624                WARN_ON(ref_root && reserved && existing->qgroup_ref_root
 625                        && existing->qgroup_reserved);
 626                update_existing_head_ref(delayed_refs, existing, head_ref,
 627                                         old_ref_mod);
 628                /*
 629                 * we've updated the existing ref, free the newly
 630                 * allocated ref
 631                 */
 632                kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
 633                head_ref = existing;
 634        } else {
 635                if (old_ref_mod)
 636                        *old_ref_mod = 0;
 637                if (is_data && count_mod < 0)
 638                        delayed_refs->pending_csums += num_bytes;
 639                delayed_refs->num_heads++;
 640                delayed_refs->num_heads_ready++;
 641                atomic_inc(&delayed_refs->num_entries);
 642                trans->delayed_ref_updates++;
 643        }
 644        if (qrecord_inserted_ret)
 645                *qrecord_inserted_ret = qrecord_inserted;
 646        if (new_ref_mod)
 647                *new_ref_mod = head_ref->total_ref_mod;
 648        return head_ref;
 649}
 650
 651/*
 652 * helper to insert a delayed tree ref into the rbtree.
 653 */
 654static noinline void
 655add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 656                     struct btrfs_trans_handle *trans,
 657                     struct btrfs_delayed_ref_head *head_ref,
 658                     struct btrfs_delayed_ref_node *ref, u64 bytenr,
 659                     u64 num_bytes, u64 parent, u64 ref_root, int level,
 660                     int action)
 661{
 662        struct btrfs_delayed_tree_ref *full_ref;
 663        struct btrfs_delayed_ref_root *delayed_refs;
 664        u64 seq = 0;
 665        int ret;
 666
 667        if (action == BTRFS_ADD_DELAYED_EXTENT)
 668                action = BTRFS_ADD_DELAYED_REF;
 669
 670        if (is_fstree(ref_root))
 671                seq = atomic64_read(&fs_info->tree_mod_seq);
 672        delayed_refs = &trans->transaction->delayed_refs;
 673
 674        /* first set the basic ref node struct up */
 675        refcount_set(&ref->refs, 1);
 676        ref->bytenr = bytenr;
 677        ref->num_bytes = num_bytes;
 678        ref->ref_mod = 1;
 679        ref->action = action;
 680        ref->is_head = 0;
 681        ref->in_tree = 1;
 682        ref->seq = seq;
 683        RB_CLEAR_NODE(&ref->ref_node);
 684        INIT_LIST_HEAD(&ref->add_list);
 685
 686        full_ref = btrfs_delayed_node_to_tree_ref(ref);
 687        full_ref->parent = parent;
 688        full_ref->root = ref_root;
 689        if (parent)
 690                ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
 691        else
 692                ref->type = BTRFS_TREE_BLOCK_REF_KEY;
 693        full_ref->level = level;
 694
 695        trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
 696
 697        ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
 698
 699        /*
 700         * XXX: memory should be freed at the same level allocated.
 701         * But bad practice is anywhere... Follow it now. Need cleanup.
 702         */
 703        if (ret > 0)
 704                kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
 705}
 706
 707/*
 708 * helper to insert a delayed data ref into the rbtree.
 709 */
 710static noinline void
 711add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 712                     struct btrfs_trans_handle *trans,
 713                     struct btrfs_delayed_ref_head *head_ref,
 714                     struct btrfs_delayed_ref_node *ref, u64 bytenr,
 715                     u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
 716                     u64 offset, int action)
 717{
 718        struct btrfs_delayed_data_ref *full_ref;
 719        struct btrfs_delayed_ref_root *delayed_refs;
 720        u64 seq = 0;
 721        int ret;
 722
 723        if (action == BTRFS_ADD_DELAYED_EXTENT)
 724                action = BTRFS_ADD_DELAYED_REF;
 725
 726        delayed_refs = &trans->transaction->delayed_refs;
 727
 728        if (is_fstree(ref_root))
 729                seq = atomic64_read(&fs_info->tree_mod_seq);
 730
 731        /* first set the basic ref node struct up */
 732        refcount_set(&ref->refs, 1);
 733        ref->bytenr = bytenr;
 734        ref->num_bytes = num_bytes;
 735        ref->ref_mod = 1;
 736        ref->action = action;
 737        ref->is_head = 0;
 738        ref->in_tree = 1;
 739        ref->seq = seq;
 740        RB_CLEAR_NODE(&ref->ref_node);
 741        INIT_LIST_HEAD(&ref->add_list);
 742
 743        full_ref = btrfs_delayed_node_to_data_ref(ref);
 744        full_ref->parent = parent;
 745        full_ref->root = ref_root;
 746        if (parent)
 747                ref->type = BTRFS_SHARED_DATA_REF_KEY;
 748        else
 749                ref->type = BTRFS_EXTENT_DATA_REF_KEY;
 750
 751        full_ref->objectid = owner;
 752        full_ref->offset = offset;
 753
 754        trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
 755
 756        ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
 757        if (ret > 0)
 758                kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
 759}
 760
 761/*
 762 * add a delayed tree ref.  This does all of the accounting required
 763 * to make sure the delayed ref is eventually processed before this
 764 * transaction commits.
 765 */
 766int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 767                               struct btrfs_trans_handle *trans,
 768                               u64 bytenr, u64 num_bytes, u64 parent,
 769                               u64 ref_root,  int level, int action,
 770                               struct btrfs_delayed_extent_op *extent_op,
 771                               int *old_ref_mod, int *new_ref_mod)
 772{
 773        struct btrfs_delayed_tree_ref *ref;
 774        struct btrfs_delayed_ref_head *head_ref;
 775        struct btrfs_delayed_ref_root *delayed_refs;
 776        struct btrfs_qgroup_extent_record *record = NULL;
 777        int qrecord_inserted;
 778        int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
 779
 780        BUG_ON(extent_op && extent_op->is_data);
 781        ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
 782        if (!ref)
 783                return -ENOMEM;
 784
 785        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 786        if (!head_ref)
 787                goto free_ref;
 788
 789        if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
 790            is_fstree(ref_root)) {
 791                record = kmalloc(sizeof(*record), GFP_NOFS);
 792                if (!record)
 793                        goto free_head_ref;
 794        }
 795
 796        head_ref->extent_op = extent_op;
 797
 798        delayed_refs = &trans->transaction->delayed_refs;
 799        spin_lock(&delayed_refs->lock);
 800
 801        /*
 802         * insert both the head node and the new ref without dropping
 803         * the spin lock
 804         */
 805        head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
 806                                        bytenr, num_bytes, 0, 0, action, 0,
 807                                        is_system, &qrecord_inserted,
 808                                        old_ref_mod, new_ref_mod);
 809
 810        add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
 811                             num_bytes, parent, ref_root, level, action);
 812        spin_unlock(&delayed_refs->lock);
 813
 814        if (qrecord_inserted)
 815                btrfs_qgroup_trace_extent_post(fs_info, record);
 816
 817        return 0;
 818
 819free_head_ref:
 820        kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
 821free_ref:
 822        kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
 823
 824        return -ENOMEM;
 825}
 826
 827/*
 828 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
 829 */
 830int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 831                               struct btrfs_trans_handle *trans,
 832                               u64 bytenr, u64 num_bytes,
 833                               u64 parent, u64 ref_root,
 834                               u64 owner, u64 offset, u64 reserved, int action,
 835                               int *old_ref_mod, int *new_ref_mod)
 836{
 837        struct btrfs_delayed_data_ref *ref;
 838        struct btrfs_delayed_ref_head *head_ref;
 839        struct btrfs_delayed_ref_root *delayed_refs;
 840        struct btrfs_qgroup_extent_record *record = NULL;
 841        int qrecord_inserted;
 842
 843        ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
 844        if (!ref)
 845                return -ENOMEM;
 846
 847        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 848        if (!head_ref) {
 849                kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
 850                return -ENOMEM;
 851        }
 852
 853        if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
 854            is_fstree(ref_root)) {
 855                record = kmalloc(sizeof(*record), GFP_NOFS);
 856                if (!record) {
 857                        kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
 858                        kmem_cache_free(btrfs_delayed_ref_head_cachep,
 859                                        head_ref);
 860                        return -ENOMEM;
 861                }
 862        }
 863
 864        head_ref->extent_op = NULL;
 865
 866        delayed_refs = &trans->transaction->delayed_refs;
 867        spin_lock(&delayed_refs->lock);
 868
 869        /*
 870         * insert both the head node and the new ref without dropping
 871         * the spin lock
 872         */
 873        head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
 874                                        bytenr, num_bytes, ref_root, reserved,
 875                                        action, 1, 0, &qrecord_inserted,
 876                                        old_ref_mod, new_ref_mod);
 877
 878        add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
 879                                   num_bytes, parent, ref_root, owner, offset,
 880                                   action);
 881        spin_unlock(&delayed_refs->lock);
 882
 883        if (qrecord_inserted)
 884                return btrfs_qgroup_trace_extent_post(fs_info, record);
 885        return 0;
 886}
 887
 888int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 889                                struct btrfs_trans_handle *trans,
 890                                u64 bytenr, u64 num_bytes,
 891                                struct btrfs_delayed_extent_op *extent_op)
 892{
 893        struct btrfs_delayed_ref_head *head_ref;
 894        struct btrfs_delayed_ref_root *delayed_refs;
 895
 896        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 897        if (!head_ref)
 898                return -ENOMEM;
 899
 900        head_ref->extent_op = extent_op;
 901
 902        delayed_refs = &trans->transaction->delayed_refs;
 903        spin_lock(&delayed_refs->lock);
 904
 905        /*
 906         * extent_ops just modify the flags of an extent and they don't result
 907         * in ref count changes, hence it's safe to pass false/0 for is_system
 908         * argument
 909         */
 910        add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
 911                             num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
 912                             extent_op->is_data, 0, NULL, NULL, NULL);
 913
 914        spin_unlock(&delayed_refs->lock);
 915        return 0;
 916}
 917
 918/*
 919 * this does a simple search for the head node for a given extent.
 920 * It must be called with the delayed ref spinlock held, and it returns
 921 * the head node if any where found, or NULL if not.
 922 */
 923struct btrfs_delayed_ref_head *
 924btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
 925{
 926        return find_ref_head(&delayed_refs->href_root, bytenr, 0);
 927}
 928
 929void __cold btrfs_delayed_ref_exit(void)
 930{
 931        kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
 932        kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
 933        kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
 934        kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
 935}
 936
 937int __init btrfs_delayed_ref_init(void)
 938{
 939        btrfs_delayed_ref_head_cachep = kmem_cache_create(
 940                                "btrfs_delayed_ref_head",
 941                                sizeof(struct btrfs_delayed_ref_head), 0,
 942                                SLAB_MEM_SPREAD, NULL);
 943        if (!btrfs_delayed_ref_head_cachep)
 944                goto fail;
 945
 946        btrfs_delayed_tree_ref_cachep = kmem_cache_create(
 947                                "btrfs_delayed_tree_ref",
 948                                sizeof(struct btrfs_delayed_tree_ref), 0,
 949                                SLAB_MEM_SPREAD, NULL);
 950        if (!btrfs_delayed_tree_ref_cachep)
 951                goto fail;
 952
 953        btrfs_delayed_data_ref_cachep = kmem_cache_create(
 954                                "btrfs_delayed_data_ref",
 955                                sizeof(struct btrfs_delayed_data_ref), 0,
 956                                SLAB_MEM_SPREAD, NULL);
 957        if (!btrfs_delayed_data_ref_cachep)
 958                goto fail;
 959
 960        btrfs_delayed_extent_op_cachep = kmem_cache_create(
 961                                "btrfs_delayed_extent_op",
 962                                sizeof(struct btrfs_delayed_extent_op), 0,
 963                                SLAB_MEM_SPREAD, NULL);
 964        if (!btrfs_delayed_extent_op_cachep)
 965                goto fail;
 966
 967        return 0;
 968fail:
 969        btrfs_delayed_ref_exit();
 970        return -ENOMEM;
 971}
 972