linux/fs/btrfs/delayed-ref.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2009 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/sched.h>
  20#include <linux/slab.h>
  21#include <linux/sort.h>
  22#include "ctree.h"
  23#include "delayed-ref.h"
  24#include "transaction.h"
  25
  26struct kmem_cache *btrfs_delayed_ref_head_cachep;
  27struct kmem_cache *btrfs_delayed_tree_ref_cachep;
  28struct kmem_cache *btrfs_delayed_data_ref_cachep;
  29struct kmem_cache *btrfs_delayed_extent_op_cachep;
  30/*
  31 * delayed back reference update tracking.  For subvolume trees
  32 * we queue up extent allocations and backref maintenance for
  33 * delayed processing.   This avoids deep call chains where we
  34 * add extents in the middle of btrfs_search_slot, and it allows
  35 * us to buffer up frequently modified backrefs in an rb tree instead
  36 * of hammering updates on the extent allocation tree.
  37 */
  38
  39/*
  40 * compare two delayed tree backrefs with same bytenr and type
  41 */
  42static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
  43                          struct btrfs_delayed_tree_ref *ref1)
  44{
  45        if (ref1->root < ref2->root)
  46                return -1;
  47        if (ref1->root > ref2->root)
  48                return 1;
  49        if (ref1->parent < ref2->parent)
  50                return -1;
  51        if (ref1->parent > ref2->parent)
  52                return 1;
  53        return 0;
  54}
  55
  56/*
  57 * compare two delayed data backrefs with same bytenr and type
  58 */
  59static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
  60                          struct btrfs_delayed_data_ref *ref1)
  61{
  62        if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
  63                if (ref1->root < ref2->root)
  64                        return -1;
  65                if (ref1->root > ref2->root)
  66                        return 1;
  67                if (ref1->objectid < ref2->objectid)
  68                        return -1;
  69                if (ref1->objectid > ref2->objectid)
  70                        return 1;
  71                if (ref1->offset < ref2->offset)
  72                        return -1;
  73                if (ref1->offset > ref2->offset)
  74                        return 1;
  75        } else {
  76                if (ref1->parent < ref2->parent)
  77                        return -1;
  78                if (ref1->parent > ref2->parent)
  79                        return 1;
  80        }
  81        return 0;
  82}
  83
  84/*
  85 * entries in the rb tree are ordered by the byte number of the extent,
  86 * type of the delayed backrefs and content of delayed backrefs.
  87 */
  88static int comp_entry(struct btrfs_delayed_ref_node *ref2,
  89                      struct btrfs_delayed_ref_node *ref1,
  90                      bool compare_seq)
  91{
  92        if (ref1->bytenr < ref2->bytenr)
  93                return -1;
  94        if (ref1->bytenr > ref2->bytenr)
  95                return 1;
  96        if (ref1->is_head && ref2->is_head)
  97                return 0;
  98        if (ref2->is_head)
  99                return -1;
 100        if (ref1->is_head)
 101                return 1;
 102        if (ref1->type < ref2->type)
 103                return -1;
 104        if (ref1->type > ref2->type)
 105                return 1;
 106        /* merging of sequenced refs is not allowed */
 107        if (compare_seq) {
 108                if (ref1->seq < ref2->seq)
 109                        return -1;
 110                if (ref1->seq > ref2->seq)
 111                        return 1;
 112        }
 113        if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
 114            ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
 115                return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
 116                                      btrfs_delayed_node_to_tree_ref(ref1));
 117        } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
 118                   ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
 119                return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
 120                                      btrfs_delayed_node_to_data_ref(ref1));
 121        }
 122        BUG();
 123        return 0;
 124}
 125
 126/*
 127 * insert a new ref into the rbtree.  This returns any existing refs
 128 * for the same (bytenr,parent) tuple, or NULL if the new node was properly
 129 * inserted.
 130 */
 131static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
 132                                                  struct rb_node *node)
 133{
 134        struct rb_node **p = &root->rb_node;
 135        struct rb_node *parent_node = NULL;
 136        struct btrfs_delayed_ref_node *entry;
 137        struct btrfs_delayed_ref_node *ins;
 138        int cmp;
 139
 140        ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
 141        while (*p) {
 142                parent_node = *p;
 143                entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
 144                                 rb_node);
 145
 146                cmp = comp_entry(entry, ins, 1);
 147                if (cmp < 0)
 148                        p = &(*p)->rb_left;
 149                else if (cmp > 0)
 150                        p = &(*p)->rb_right;
 151                else
 152                        return entry;
 153        }
 154
 155        rb_link_node(node, parent_node, p);
 156        rb_insert_color(node, root);
 157        return NULL;
 158}
 159
 160/*
 161 * find an head entry based on bytenr. This returns the delayed ref
 162 * head if it was able to find one, or NULL if nothing was in that spot.
 163 * If return_bigger is given, the next bigger entry is returned if no exact
 164 * match is found.
 165 */
 166static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
 167                                  u64 bytenr,
 168                                  struct btrfs_delayed_ref_node **last,
 169                                  int return_bigger)
 170{
 171        struct rb_node *n;
 172        struct btrfs_delayed_ref_node *entry;
 173        int cmp = 0;
 174
 175again:
 176        n = root->rb_node;
 177        entry = NULL;
 178        while (n) {
 179                entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
 180                WARN_ON(!entry->in_tree);
 181                if (last)
 182                        *last = entry;
 183
 184                if (bytenr < entry->bytenr)
 185                        cmp = -1;
 186                else if (bytenr > entry->bytenr)
 187                        cmp = 1;
 188                else if (!btrfs_delayed_ref_is_head(entry))
 189                        cmp = 1;
 190                else
 191                        cmp = 0;
 192
 193                if (cmp < 0)
 194                        n = n->rb_left;
 195                else if (cmp > 0)
 196                        n = n->rb_right;
 197                else
 198                        return entry;
 199        }
 200        if (entry && return_bigger) {
 201                if (cmp > 0) {
 202                        n = rb_next(&entry->rb_node);
 203                        if (!n)
 204                                n = rb_first(root);
 205                        entry = rb_entry(n, struct btrfs_delayed_ref_node,
 206                                         rb_node);
 207                        bytenr = entry->bytenr;
 208                        return_bigger = 0;
 209                        goto again;
 210                }
 211                return entry;
 212        }
 213        return NULL;
 214}
 215
 216int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
 217                           struct btrfs_delayed_ref_head *head)
 218{
 219        struct btrfs_delayed_ref_root *delayed_refs;
 220
 221        delayed_refs = &trans->transaction->delayed_refs;
 222        assert_spin_locked(&delayed_refs->lock);
 223        if (mutex_trylock(&head->mutex))
 224                return 0;
 225
 226        atomic_inc(&head->node.refs);
 227        spin_unlock(&delayed_refs->lock);
 228
 229        mutex_lock(&head->mutex);
 230        spin_lock(&delayed_refs->lock);
 231        if (!head->node.in_tree) {
 232                mutex_unlock(&head->mutex);
 233                btrfs_put_delayed_ref(&head->node);
 234                return -EAGAIN;
 235        }
 236        btrfs_put_delayed_ref(&head->node);
 237        return 0;
 238}
 239
 240static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
 241                                    struct btrfs_delayed_ref_root *delayed_refs,
 242                                    struct btrfs_delayed_ref_node *ref)
 243{
 244        rb_erase(&ref->rb_node, &delayed_refs->root);
 245        ref->in_tree = 0;
 246        btrfs_put_delayed_ref(ref);
 247        delayed_refs->num_entries--;
 248        if (trans->delayed_ref_updates)
 249                trans->delayed_ref_updates--;
 250}
 251
 252static int merge_ref(struct btrfs_trans_handle *trans,
 253                     struct btrfs_delayed_ref_root *delayed_refs,
 254                     struct btrfs_delayed_ref_node *ref, u64 seq)
 255{
 256        struct rb_node *node;
 257        int merged = 0;
 258        int mod = 0;
 259        int done = 0;
 260
 261        node = rb_prev(&ref->rb_node);
 262        while (node) {
 263                struct btrfs_delayed_ref_node *next;
 264
 265                next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
 266                node = rb_prev(node);
 267                if (next->bytenr != ref->bytenr)
 268                        break;
 269                if (seq && next->seq >= seq)
 270                        break;
 271                if (comp_entry(ref, next, 0))
 272                        continue;
 273
 274                if (ref->action == next->action) {
 275                        mod = next->ref_mod;
 276                } else {
 277                        if (ref->ref_mod < next->ref_mod) {
 278                                struct btrfs_delayed_ref_node *tmp;
 279
 280                                tmp = ref;
 281                                ref = next;
 282                                next = tmp;
 283                                done = 1;
 284                        }
 285                        mod = -next->ref_mod;
 286                }
 287
 288                merged++;
 289                drop_delayed_ref(trans, delayed_refs, next);
 290                ref->ref_mod += mod;
 291                if (ref->ref_mod == 0) {
 292                        drop_delayed_ref(trans, delayed_refs, ref);
 293                        break;
 294                } else {
 295                        /*
 296                         * You can't have multiples of the same ref on a tree
 297                         * block.
 298                         */
 299                        WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
 300                                ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
 301                }
 302
 303                if (done)
 304                        break;
 305                node = rb_prev(&ref->rb_node);
 306        }
 307
 308        return merged;
 309}
 310
 311void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
 312                              struct btrfs_fs_info *fs_info,
 313                              struct btrfs_delayed_ref_root *delayed_refs,
 314                              struct btrfs_delayed_ref_head *head)
 315{
 316        struct rb_node *node;
 317        u64 seq = 0;
 318
 319        spin_lock(&fs_info->tree_mod_seq_lock);
 320        if (!list_empty(&fs_info->tree_mod_seq_list)) {
 321                struct seq_list *elem;
 322
 323                elem = list_first_entry(&fs_info->tree_mod_seq_list,
 324                                        struct seq_list, list);
 325                seq = elem->seq;
 326        }
 327        spin_unlock(&fs_info->tree_mod_seq_lock);
 328
 329        node = rb_prev(&head->node.rb_node);
 330        while (node) {
 331                struct btrfs_delayed_ref_node *ref;
 332
 333                ref = rb_entry(node, struct btrfs_delayed_ref_node,
 334                               rb_node);
 335                if (ref->bytenr != head->node.bytenr)
 336                        break;
 337
 338                /* We can't merge refs that are outside of our seq count */
 339                if (seq && ref->seq >= seq)
 340                        break;
 341                if (merge_ref(trans, delayed_refs, ref, seq))
 342                        node = rb_prev(&head->node.rb_node);
 343                else
 344                        node = rb_prev(node);
 345        }
 346}
 347
 348int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
 349                            struct btrfs_delayed_ref_root *delayed_refs,
 350                            u64 seq)
 351{
 352        struct seq_list *elem;
 353        int ret = 0;
 354
 355        spin_lock(&fs_info->tree_mod_seq_lock);
 356        if (!list_empty(&fs_info->tree_mod_seq_list)) {
 357                elem = list_first_entry(&fs_info->tree_mod_seq_list,
 358                                        struct seq_list, list);
 359                if (seq >= elem->seq) {
 360                        pr_debug("holding back delayed_ref %llu, lowest is "
 361                                 "%llu (%p)\n", seq, elem->seq, delayed_refs);
 362                        ret = 1;
 363                }
 364        }
 365
 366        spin_unlock(&fs_info->tree_mod_seq_lock);
 367        return ret;
 368}
 369
 370int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
 371                           struct list_head *cluster, u64 start)
 372{
 373        int count = 0;
 374        struct btrfs_delayed_ref_root *delayed_refs;
 375        struct rb_node *node;
 376        struct btrfs_delayed_ref_node *ref;
 377        struct btrfs_delayed_ref_head *head;
 378
 379        delayed_refs = &trans->transaction->delayed_refs;
 380        if (start == 0) {
 381                node = rb_first(&delayed_refs->root);
 382        } else {
 383                ref = NULL;
 384                find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
 385                if (ref) {
 386                        node = &ref->rb_node;
 387                } else
 388                        node = rb_first(&delayed_refs->root);
 389        }
 390again:
 391        while (node && count < 32) {
 392                ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
 393                if (btrfs_delayed_ref_is_head(ref)) {
 394                        head = btrfs_delayed_node_to_head(ref);
 395                        if (list_empty(&head->cluster)) {
 396                                list_add_tail(&head->cluster, cluster);
 397                                delayed_refs->run_delayed_start =
 398                                        head->node.bytenr;
 399                                count++;
 400
 401                                WARN_ON(delayed_refs->num_heads_ready == 0);
 402                                delayed_refs->num_heads_ready--;
 403                        } else if (count) {
 404                                /* the goal of the clustering is to find extents
 405                                 * that are likely to end up in the same extent
 406                                 * leaf on disk.  So, we don't want them spread
 407                                 * all over the tree.  Stop now if we've hit
 408                                 * a head that was already in use
 409                                 */
 410                                break;
 411                        }
 412                }
 413                node = rb_next(node);
 414        }
 415        if (count) {
 416                return 0;
 417        } else if (start) {
 418                /*
 419                 * we've gone to the end of the rbtree without finding any
 420                 * clusters.  start from the beginning and try again
 421                 */
 422                start = 0;
 423                node = rb_first(&delayed_refs->root);
 424                goto again;
 425        }
 426        return 1;
 427}
 428
 429void btrfs_release_ref_cluster(struct list_head *cluster)
 430{
 431        struct list_head *pos, *q;
 432
 433        list_for_each_safe(pos, q, cluster)
 434                list_del_init(pos);
 435}
 436
 437/*
 438 * helper function to update an extent delayed ref in the
 439 * rbtree.  existing and update must both have the same
 440 * bytenr and parent
 441 *
 442 * This may free existing if the update cancels out whatever
 443 * operation it was doing.
 444 */
 445static noinline void
 446update_existing_ref(struct btrfs_trans_handle *trans,
 447                    struct btrfs_delayed_ref_root *delayed_refs,
 448                    struct btrfs_delayed_ref_node *existing,
 449                    struct btrfs_delayed_ref_node *update)
 450{
 451        if (update->action != existing->action) {
 452                /*
 453                 * this is effectively undoing either an add or a
 454                 * drop.  We decrement the ref_mod, and if it goes
 455                 * down to zero we just delete the entry without
 456                 * every changing the extent allocation tree.
 457                 */
 458                existing->ref_mod--;
 459                if (existing->ref_mod == 0)
 460                        drop_delayed_ref(trans, delayed_refs, existing);
 461                else
 462                        WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
 463                                existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
 464        } else {
 465                WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
 466                        existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
 467                /*
 468                 * the action on the existing ref matches
 469                 * the action on the ref we're trying to add.
 470                 * Bump the ref_mod by one so the backref that
 471                 * is eventually added/removed has the correct
 472                 * reference count
 473                 */
 474                existing->ref_mod += update->ref_mod;
 475        }
 476}
 477
 478/*
 479 * helper function to update the accounting in the head ref
 480 * existing and update must have the same bytenr
 481 */
 482static noinline void
 483update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
 484                         struct btrfs_delayed_ref_node *update)
 485{
 486        struct btrfs_delayed_ref_head *existing_ref;
 487        struct btrfs_delayed_ref_head *ref;
 488
 489        existing_ref = btrfs_delayed_node_to_head(existing);
 490        ref = btrfs_delayed_node_to_head(update);
 491        BUG_ON(existing_ref->is_data != ref->is_data);
 492
 493        if (ref->must_insert_reserved) {
 494                /* if the extent was freed and then
 495                 * reallocated before the delayed ref
 496                 * entries were processed, we can end up
 497                 * with an existing head ref without
 498                 * the must_insert_reserved flag set.
 499                 * Set it again here
 500                 */
 501                existing_ref->must_insert_reserved = ref->must_insert_reserved;
 502
 503                /*
 504                 * update the num_bytes so we make sure the accounting
 505                 * is done correctly
 506                 */
 507                existing->num_bytes = update->num_bytes;
 508
 509        }
 510
 511        if (ref->extent_op) {
 512                if (!existing_ref->extent_op) {
 513                        existing_ref->extent_op = ref->extent_op;
 514                } else {
 515                        if (ref->extent_op->update_key) {
 516                                memcpy(&existing_ref->extent_op->key,
 517                                       &ref->extent_op->key,
 518                                       sizeof(ref->extent_op->key));
 519                                existing_ref->extent_op->update_key = 1;
 520                        }
 521                        if (ref->extent_op->update_flags) {
 522                                existing_ref->extent_op->flags_to_set |=
 523                                        ref->extent_op->flags_to_set;
 524                                existing_ref->extent_op->update_flags = 1;
 525                        }
 526                        btrfs_free_delayed_extent_op(ref->extent_op);
 527                }
 528        }
 529        /*
 530         * update the reference mod on the head to reflect this new operation
 531         */
 532        existing->ref_mod += update->ref_mod;
 533}
 534
 535/*
 536 * helper function to actually insert a head node into the rbtree.
 537 * this does all the dirty work in terms of maintaining the correct
 538 * overall modification count.
 539 */
 540static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 541                                        struct btrfs_trans_handle *trans,
 542                                        struct btrfs_delayed_ref_node *ref,
 543                                        u64 bytenr, u64 num_bytes,
 544                                        int action, int is_data)
 545{
 546        struct btrfs_delayed_ref_node *existing;
 547        struct btrfs_delayed_ref_head *head_ref = NULL;
 548        struct btrfs_delayed_ref_root *delayed_refs;
 549        int count_mod = 1;
 550        int must_insert_reserved = 0;
 551
 552        /*
 553         * the head node stores the sum of all the mods, so dropping a ref
 554         * should drop the sum in the head node by one.
 555         */
 556        if (action == BTRFS_UPDATE_DELAYED_HEAD)
 557                count_mod = 0;
 558        else if (action == BTRFS_DROP_DELAYED_REF)
 559                count_mod = -1;
 560
 561        /*
 562         * BTRFS_ADD_DELAYED_EXTENT means that we need to update
 563         * the reserved accounting when the extent is finally added, or
 564         * if a later modification deletes the delayed ref without ever
 565         * inserting the extent into the extent allocation tree.
 566         * ref->must_insert_reserved is the flag used to record
 567         * that accounting mods are required.
 568         *
 569         * Once we record must_insert_reserved, switch the action to
 570         * BTRFS_ADD_DELAYED_REF because other special casing is not required.
 571         */
 572        if (action == BTRFS_ADD_DELAYED_EXTENT)
 573                must_insert_reserved = 1;
 574        else
 575                must_insert_reserved = 0;
 576
 577        delayed_refs = &trans->transaction->delayed_refs;
 578
 579        /* first set the basic ref node struct up */
 580        atomic_set(&ref->refs, 1);
 581        ref->bytenr = bytenr;
 582        ref->num_bytes = num_bytes;
 583        ref->ref_mod = count_mod;
 584        ref->type  = 0;
 585        ref->action  = 0;
 586        ref->is_head = 1;
 587        ref->in_tree = 1;
 588        ref->seq = 0;
 589
 590        head_ref = btrfs_delayed_node_to_head(ref);
 591        head_ref->must_insert_reserved = must_insert_reserved;
 592        head_ref->is_data = is_data;
 593
 594        INIT_LIST_HEAD(&head_ref->cluster);
 595        mutex_init(&head_ref->mutex);
 596
 597        trace_btrfs_delayed_ref_head(ref, head_ref, action);
 598
 599        existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 600
 601        if (existing) {
 602                update_existing_head_ref(existing, ref);
 603                /*
 604                 * we've updated the existing ref, free the newly
 605                 * allocated ref
 606                 */
 607                kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
 608        } else {
 609                delayed_refs->num_heads++;
 610                delayed_refs->num_heads_ready++;
 611                delayed_refs->num_entries++;
 612                trans->delayed_ref_updates++;
 613        }
 614}
 615
 616/*
 617 * helper to insert a delayed tree ref into the rbtree.
 618 */
 619static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 620                                         struct btrfs_trans_handle *trans,
 621                                         struct btrfs_delayed_ref_node *ref,
 622                                         u64 bytenr, u64 num_bytes, u64 parent,
 623                                         u64 ref_root, int level, int action,
 624                                         int for_cow)
 625{
 626        struct btrfs_delayed_ref_node *existing;
 627        struct btrfs_delayed_tree_ref *full_ref;
 628        struct btrfs_delayed_ref_root *delayed_refs;
 629        u64 seq = 0;
 630
 631        if (action == BTRFS_ADD_DELAYED_EXTENT)
 632                action = BTRFS_ADD_DELAYED_REF;
 633
 634        delayed_refs = &trans->transaction->delayed_refs;
 635
 636        /* first set the basic ref node struct up */
 637        atomic_set(&ref->refs, 1);
 638        ref->bytenr = bytenr;
 639        ref->num_bytes = num_bytes;
 640        ref->ref_mod = 1;
 641        ref->action = action;
 642        ref->is_head = 0;
 643        ref->in_tree = 1;
 644
 645        if (need_ref_seq(for_cow, ref_root))
 646                seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
 647        ref->seq = seq;
 648
 649        full_ref = btrfs_delayed_node_to_tree_ref(ref);
 650        full_ref->parent = parent;
 651        full_ref->root = ref_root;
 652        if (parent)
 653                ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
 654        else
 655                ref->type = BTRFS_TREE_BLOCK_REF_KEY;
 656        full_ref->level = level;
 657
 658        trace_btrfs_delayed_tree_ref(ref, full_ref, action);
 659
 660        existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 661
 662        if (existing) {
 663                update_existing_ref(trans, delayed_refs, existing, ref);
 664                /*
 665                 * we've updated the existing ref, free the newly
 666                 * allocated ref
 667                 */
 668                kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
 669        } else {
 670                delayed_refs->num_entries++;
 671                trans->delayed_ref_updates++;
 672        }
 673}
 674
 675/*
 676 * helper to insert a delayed data ref into the rbtree.
 677 */
 678static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 679                                         struct btrfs_trans_handle *trans,
 680                                         struct btrfs_delayed_ref_node *ref,
 681                                         u64 bytenr, u64 num_bytes, u64 parent,
 682                                         u64 ref_root, u64 owner, u64 offset,
 683                                         int action, int for_cow)
 684{
 685        struct btrfs_delayed_ref_node *existing;
 686        struct btrfs_delayed_data_ref *full_ref;
 687        struct btrfs_delayed_ref_root *delayed_refs;
 688        u64 seq = 0;
 689
 690        if (action == BTRFS_ADD_DELAYED_EXTENT)
 691                action = BTRFS_ADD_DELAYED_REF;
 692
 693        delayed_refs = &trans->transaction->delayed_refs;
 694
 695        /* first set the basic ref node struct up */
 696        atomic_set(&ref->refs, 1);
 697        ref->bytenr = bytenr;
 698        ref->num_bytes = num_bytes;
 699        ref->ref_mod = 1;
 700        ref->action = action;
 701        ref->is_head = 0;
 702        ref->in_tree = 1;
 703
 704        if (need_ref_seq(for_cow, ref_root))
 705                seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
 706        ref->seq = seq;
 707
 708        full_ref = btrfs_delayed_node_to_data_ref(ref);
 709        full_ref->parent = parent;
 710        full_ref->root = ref_root;
 711        if (parent)
 712                ref->type = BTRFS_SHARED_DATA_REF_KEY;
 713        else
 714                ref->type = BTRFS_EXTENT_DATA_REF_KEY;
 715
 716        full_ref->objectid = owner;
 717        full_ref->offset = offset;
 718
 719        trace_btrfs_delayed_data_ref(ref, full_ref, action);
 720
 721        existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 722
 723        if (existing) {
 724                update_existing_ref(trans, delayed_refs, existing, ref);
 725                /*
 726                 * we've updated the existing ref, free the newly
 727                 * allocated ref
 728                 */
 729                kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
 730        } else {
 731                delayed_refs->num_entries++;
 732                trans->delayed_ref_updates++;
 733        }
 734}
 735
 736/*
 737 * add a delayed tree ref.  This does all of the accounting required
 738 * to make sure the delayed ref is eventually processed before this
 739 * transaction commits.
 740 */
 741int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 742                               struct btrfs_trans_handle *trans,
 743                               u64 bytenr, u64 num_bytes, u64 parent,
 744                               u64 ref_root,  int level, int action,
 745                               struct btrfs_delayed_extent_op *extent_op,
 746                               int for_cow)
 747{
 748        struct btrfs_delayed_tree_ref *ref;
 749        struct btrfs_delayed_ref_head *head_ref;
 750        struct btrfs_delayed_ref_root *delayed_refs;
 751
 752        BUG_ON(extent_op && extent_op->is_data);
 753        ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
 754        if (!ref)
 755                return -ENOMEM;
 756
 757        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 758        if (!head_ref) {
 759                kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
 760                return -ENOMEM;
 761        }
 762
 763        head_ref->extent_op = extent_op;
 764
 765        delayed_refs = &trans->transaction->delayed_refs;
 766        spin_lock(&delayed_refs->lock);
 767
 768        /*
 769         * insert both the head node and the new ref without dropping
 770         * the spin lock
 771         */
 772        add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
 773                                   num_bytes, action, 0);
 774
 775        add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
 776                                   num_bytes, parent, ref_root, level, action,
 777                                   for_cow);
 778        spin_unlock(&delayed_refs->lock);
 779        if (need_ref_seq(for_cow, ref_root))
 780                btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
 781
 782        return 0;
 783}
 784
 785/*
 786 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
 787 */
 788int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 789                               struct btrfs_trans_handle *trans,
 790                               u64 bytenr, u64 num_bytes,
 791                               u64 parent, u64 ref_root,
 792                               u64 owner, u64 offset, int action,
 793                               struct btrfs_delayed_extent_op *extent_op,
 794                               int for_cow)
 795{
 796        struct btrfs_delayed_data_ref *ref;
 797        struct btrfs_delayed_ref_head *head_ref;
 798        struct btrfs_delayed_ref_root *delayed_refs;
 799
 800        BUG_ON(extent_op && !extent_op->is_data);
 801        ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
 802        if (!ref)
 803                return -ENOMEM;
 804
 805        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 806        if (!head_ref) {
 807                kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
 808                return -ENOMEM;
 809        }
 810
 811        head_ref->extent_op = extent_op;
 812
 813        delayed_refs = &trans->transaction->delayed_refs;
 814        spin_lock(&delayed_refs->lock);
 815
 816        /*
 817         * insert both the head node and the new ref without dropping
 818         * the spin lock
 819         */
 820        add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
 821                                   num_bytes, action, 1);
 822
 823        add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
 824                                   num_bytes, parent, ref_root, owner, offset,
 825                                   action, for_cow);
 826        spin_unlock(&delayed_refs->lock);
 827        if (need_ref_seq(for_cow, ref_root))
 828                btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
 829
 830        return 0;
 831}
 832
 833int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 834                                struct btrfs_trans_handle *trans,
 835                                u64 bytenr, u64 num_bytes,
 836                                struct btrfs_delayed_extent_op *extent_op)
 837{
 838        struct btrfs_delayed_ref_head *head_ref;
 839        struct btrfs_delayed_ref_root *delayed_refs;
 840
 841        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 842        if (!head_ref)
 843                return -ENOMEM;
 844
 845        head_ref->extent_op = extent_op;
 846
 847        delayed_refs = &trans->transaction->delayed_refs;
 848        spin_lock(&delayed_refs->lock);
 849
 850        add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
 851                                   num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
 852                                   extent_op->is_data);
 853
 854        spin_unlock(&delayed_refs->lock);
 855        return 0;
 856}
 857
 858/*
 859 * this does a simple search for the head node for a given extent.
 860 * It must be called with the delayed ref spinlock held, and it returns
 861 * the head node if any where found, or NULL if not.
 862 */
 863struct btrfs_delayed_ref_head *
 864btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
 865{
 866        struct btrfs_delayed_ref_node *ref;
 867        struct btrfs_delayed_ref_root *delayed_refs;
 868
 869        delayed_refs = &trans->transaction->delayed_refs;
 870        ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0);
 871        if (ref)
 872                return btrfs_delayed_node_to_head(ref);
 873        return NULL;
 874}
 875
 876void btrfs_delayed_ref_exit(void)
 877{
 878        if (btrfs_delayed_ref_head_cachep)
 879                kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
 880        if (btrfs_delayed_tree_ref_cachep)
 881                kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
 882        if (btrfs_delayed_data_ref_cachep)
 883                kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
 884        if (btrfs_delayed_extent_op_cachep)
 885                kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
 886}
 887
 888int btrfs_delayed_ref_init(void)
 889{
 890        btrfs_delayed_ref_head_cachep = kmem_cache_create(
 891                                "btrfs_delayed_ref_head",
 892                                sizeof(struct btrfs_delayed_ref_head), 0,
 893                                SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
 894        if (!btrfs_delayed_ref_head_cachep)
 895                goto fail;
 896
 897        btrfs_delayed_tree_ref_cachep = kmem_cache_create(
 898                                "btrfs_delayed_tree_ref",
 899                                sizeof(struct btrfs_delayed_tree_ref), 0,
 900                                SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
 901        if (!btrfs_delayed_tree_ref_cachep)
 902                goto fail;
 903
 904        btrfs_delayed_data_ref_cachep = kmem_cache_create(
 905                                "btrfs_delayed_data_ref",
 906                                sizeof(struct btrfs_delayed_data_ref), 0,
 907                                SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
 908        if (!btrfs_delayed_data_ref_cachep)
 909                goto fail;
 910
 911        btrfs_delayed_extent_op_cachep = kmem_cache_create(
 912                                "btrfs_delayed_extent_op",
 913                                sizeof(struct btrfs_delayed_extent_op), 0,
 914                                SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
 915        if (!btrfs_delayed_extent_op_cachep)
 916                goto fail;
 917
 918        return 0;
 919fail:
 920        btrfs_delayed_ref_exit();
 921        return -ENOMEM;
 922}
 923