linux/fs/btrfs/delayed-ref.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2009 Oracle.  All rights reserved.
   4 */
   5
   6#include <linux/sched.h>
   7#include <linux/slab.h>
   8#include <linux/sort.h>
   9#include "ctree.h"
  10#include "delayed-ref.h"
  11#include "transaction.h"
  12#include "qgroup.h"
  13#include "space-info.h"
  14
  15struct kmem_cache *btrfs_delayed_ref_head_cachep;
  16struct kmem_cache *btrfs_delayed_tree_ref_cachep;
  17struct kmem_cache *btrfs_delayed_data_ref_cachep;
  18struct kmem_cache *btrfs_delayed_extent_op_cachep;
  19/*
  20 * delayed back reference update tracking.  For subvolume trees
  21 * we queue up extent allocations and backref maintenance for
  22 * delayed processing.   This avoids deep call chains where we
  23 * add extents in the middle of btrfs_search_slot, and it allows
  24 * us to buffer up frequently modified backrefs in an rb tree instead
  25 * of hammering updates on the extent allocation tree.
  26 */
  27
  28bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
  29{
  30        struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
  31        struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
  32        bool ret = false;
  33        u64 reserved;
  34
  35        spin_lock(&global_rsv->lock);
  36        reserved = global_rsv->reserved;
  37        spin_unlock(&global_rsv->lock);
  38
  39        /*
  40         * Since the global reserve is just kind of magic we don't really want
  41         * to rely on it to save our bacon, so if our size is more than the
  42         * delayed_refs_rsv and the global rsv then it's time to think about
  43         * bailing.
  44         */
  45        spin_lock(&delayed_refs_rsv->lock);
  46        reserved += delayed_refs_rsv->reserved;
  47        if (delayed_refs_rsv->size >= reserved)
  48                ret = true;
  49        spin_unlock(&delayed_refs_rsv->lock);
  50        return ret;
  51}
  52
  53int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
  54{
  55        u64 num_entries =
  56                atomic_read(&trans->transaction->delayed_refs.num_entries);
  57        u64 avg_runtime;
  58        u64 val;
  59
  60        smp_mb();
  61        avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
  62        val = num_entries * avg_runtime;
  63        if (val >= NSEC_PER_SEC)
  64                return 1;
  65        if (val >= NSEC_PER_SEC / 2)
  66                return 2;
  67
  68        return btrfs_check_space_for_delayed_refs(trans->fs_info);
  69}
  70
  71/**
  72 * btrfs_delayed_refs_rsv_release - release a ref head's reservation.
  73 * @fs_info - the fs_info for our fs.
  74 * @nr - the number of items to drop.
  75 *
  76 * This drops the delayed ref head's count from the delayed refs rsv and frees
  77 * any excess reservation we had.
  78 */
  79void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
  80{
  81        struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
  82        u64 num_bytes = btrfs_calc_insert_metadata_size(fs_info, nr);
  83        u64 released = 0;
  84
  85        released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
  86        if (released)
  87                trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
  88                                              0, released, 0);
  89}
  90
  91/*
  92 * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
  93 * @trans - the trans that may have generated delayed refs
  94 *
  95 * This is to be called anytime we may have adjusted trans->delayed_ref_updates,
  96 * it'll calculate the additional size and add it to the delayed_refs_rsv.
  97 */
  98void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
  99{
 100        struct btrfs_fs_info *fs_info = trans->fs_info;
 101        struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
 102        u64 num_bytes;
 103
 104        if (!trans->delayed_ref_updates)
 105                return;
 106
 107        num_bytes = btrfs_calc_insert_metadata_size(fs_info,
 108                                                    trans->delayed_ref_updates);
 109        spin_lock(&delayed_rsv->lock);
 110        delayed_rsv->size += num_bytes;
 111        delayed_rsv->full = 0;
 112        spin_unlock(&delayed_rsv->lock);
 113        trans->delayed_ref_updates = 0;
 114}
 115
 116/**
 117 * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv.
 118 * @fs_info - the fs info for our fs.
 119 * @src - the source block rsv to transfer from.
 120 * @num_bytes - the number of bytes to transfer.
 121 *
 122 * This transfers up to the num_bytes amount from the src rsv to the
 123 * delayed_refs_rsv.  Any extra bytes are returned to the space info.
 124 */
 125void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
 126                                       struct btrfs_block_rsv *src,
 127                                       u64 num_bytes)
 128{
 129        struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
 130        u64 to_free = 0;
 131
 132        spin_lock(&src->lock);
 133        src->reserved -= num_bytes;
 134        src->size -= num_bytes;
 135        spin_unlock(&src->lock);
 136
 137        spin_lock(&delayed_refs_rsv->lock);
 138        if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
 139                u64 delta = delayed_refs_rsv->size -
 140                        delayed_refs_rsv->reserved;
 141                if (num_bytes > delta) {
 142                        to_free = num_bytes - delta;
 143                        num_bytes = delta;
 144                }
 145        } else {
 146                to_free = num_bytes;
 147                num_bytes = 0;
 148        }
 149
 150        if (num_bytes)
 151                delayed_refs_rsv->reserved += num_bytes;
 152        if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
 153                delayed_refs_rsv->full = 1;
 154        spin_unlock(&delayed_refs_rsv->lock);
 155
 156        if (num_bytes)
 157                trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
 158                                              0, num_bytes, 1);
 159        if (to_free)
 160                btrfs_space_info_free_bytes_may_use(fs_info,
 161                                delayed_refs_rsv->space_info, to_free);
 162}
 163
 164/**
 165 * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage.
 166 * @fs_info - the fs_info for our fs.
 167 * @flush - control how we can flush for this reservation.
 168 *
 169 * This will refill the delayed block_rsv up to 1 items size worth of space and
 170 * will return -ENOSPC if we can't make the reservation.
 171 */
 172int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
 173                                  enum btrfs_reserve_flush_enum flush)
 174{
 175        struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
 176        u64 limit = btrfs_calc_insert_metadata_size(fs_info, 1);
 177        u64 num_bytes = 0;
 178        int ret = -ENOSPC;
 179
 180        spin_lock(&block_rsv->lock);
 181        if (block_rsv->reserved < block_rsv->size) {
 182                num_bytes = block_rsv->size - block_rsv->reserved;
 183                num_bytes = min(num_bytes, limit);
 184        }
 185        spin_unlock(&block_rsv->lock);
 186
 187        if (!num_bytes)
 188                return 0;
 189
 190        ret = btrfs_reserve_metadata_bytes(fs_info->extent_root, block_rsv,
 191                                           num_bytes, flush);
 192        if (ret)
 193                return ret;
 194        btrfs_block_rsv_add_bytes(block_rsv, num_bytes, 0);
 195        trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
 196                                      0, num_bytes, 1);
 197        return 0;
 198}
 199
 200/*
 201 * compare two delayed tree backrefs with same bytenr and type
 202 */
 203static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1,
 204                          struct btrfs_delayed_tree_ref *ref2)
 205{
 206        if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
 207                if (ref1->root < ref2->root)
 208                        return -1;
 209                if (ref1->root > ref2->root)
 210                        return 1;
 211        } else {
 212                if (ref1->parent < ref2->parent)
 213                        return -1;
 214                if (ref1->parent > ref2->parent)
 215                        return 1;
 216        }
 217        return 0;
 218}
 219
 220/*
 221 * compare two delayed data backrefs with same bytenr and type
 222 */
 223static int comp_data_refs(struct btrfs_delayed_data_ref *ref1,
 224                          struct btrfs_delayed_data_ref *ref2)
 225{
 226        if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
 227                if (ref1->root < ref2->root)
 228                        return -1;
 229                if (ref1->root > ref2->root)
 230                        return 1;
 231                if (ref1->objectid < ref2->objectid)
 232                        return -1;
 233                if (ref1->objectid > ref2->objectid)
 234                        return 1;
 235                if (ref1->offset < ref2->offset)
 236                        return -1;
 237                if (ref1->offset > ref2->offset)
 238                        return 1;
 239        } else {
 240                if (ref1->parent < ref2->parent)
 241                        return -1;
 242                if (ref1->parent > ref2->parent)
 243                        return 1;
 244        }
 245        return 0;
 246}
 247
 248static int comp_refs(struct btrfs_delayed_ref_node *ref1,
 249                     struct btrfs_delayed_ref_node *ref2,
 250                     bool check_seq)
 251{
 252        int ret = 0;
 253
 254        if (ref1->type < ref2->type)
 255                return -1;
 256        if (ref1->type > ref2->type)
 257                return 1;
 258        if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
 259            ref1->type == BTRFS_SHARED_BLOCK_REF_KEY)
 260                ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1),
 261                                     btrfs_delayed_node_to_tree_ref(ref2));
 262        else
 263                ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1),
 264                                     btrfs_delayed_node_to_data_ref(ref2));
 265        if (ret)
 266                return ret;
 267        if (check_seq) {
 268                if (ref1->seq < ref2->seq)
 269                        return -1;
 270                if (ref1->seq > ref2->seq)
 271                        return 1;
 272        }
 273        return 0;
 274}
 275
 276/* insert a new ref to head ref rbtree */
 277static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root,
 278                                                   struct rb_node *node)
 279{
 280        struct rb_node **p = &root->rb_root.rb_node;
 281        struct rb_node *parent_node = NULL;
 282        struct btrfs_delayed_ref_head *entry;
 283        struct btrfs_delayed_ref_head *ins;
 284        u64 bytenr;
 285        bool leftmost = true;
 286
 287        ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
 288        bytenr = ins->bytenr;
 289        while (*p) {
 290                parent_node = *p;
 291                entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
 292                                 href_node);
 293
 294                if (bytenr < entry->bytenr) {
 295                        p = &(*p)->rb_left;
 296                } else if (bytenr > entry->bytenr) {
 297                        p = &(*p)->rb_right;
 298                        leftmost = false;
 299                } else {
 300                        return entry;
 301                }
 302        }
 303
 304        rb_link_node(node, parent_node, p);
 305        rb_insert_color_cached(node, root, leftmost);
 306        return NULL;
 307}
 308
 309static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
 310                struct btrfs_delayed_ref_node *ins)
 311{
 312        struct rb_node **p = &root->rb_root.rb_node;
 313        struct rb_node *node = &ins->ref_node;
 314        struct rb_node *parent_node = NULL;
 315        struct btrfs_delayed_ref_node *entry;
 316        bool leftmost = true;
 317
 318        while (*p) {
 319                int comp;
 320
 321                parent_node = *p;
 322                entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
 323                                 ref_node);
 324                comp = comp_refs(ins, entry, true);
 325                if (comp < 0) {
 326                        p = &(*p)->rb_left;
 327                } else if (comp > 0) {
 328                        p = &(*p)->rb_right;
 329                        leftmost = false;
 330                } else {
 331                        return entry;
 332                }
 333        }
 334
 335        rb_link_node(node, parent_node, p);
 336        rb_insert_color_cached(node, root, leftmost);
 337        return NULL;
 338}
 339
 340static struct btrfs_delayed_ref_head *find_first_ref_head(
 341                struct btrfs_delayed_ref_root *dr)
 342{
 343        struct rb_node *n;
 344        struct btrfs_delayed_ref_head *entry;
 345
 346        n = rb_first_cached(&dr->href_root);
 347        if (!n)
 348                return NULL;
 349
 350        entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
 351
 352        return entry;
 353}
 354
 355/*
 356 * Find a head entry based on bytenr. This returns the delayed ref head if it
 357 * was able to find one, or NULL if nothing was in that spot.  If return_bigger
 358 * is given, the next bigger entry is returned if no exact match is found.
 359 */
 360static struct btrfs_delayed_ref_head *find_ref_head(
 361                struct btrfs_delayed_ref_root *dr, u64 bytenr,
 362                bool return_bigger)
 363{
 364        struct rb_root *root = &dr->href_root.rb_root;
 365        struct rb_node *n;
 366        struct btrfs_delayed_ref_head *entry;
 367
 368        n = root->rb_node;
 369        entry = NULL;
 370        while (n) {
 371                entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
 372
 373                if (bytenr < entry->bytenr)
 374                        n = n->rb_left;
 375                else if (bytenr > entry->bytenr)
 376                        n = n->rb_right;
 377                else
 378                        return entry;
 379        }
 380        if (entry && return_bigger) {
 381                if (bytenr > entry->bytenr) {
 382                        n = rb_next(&entry->href_node);
 383                        if (!n)
 384                                return NULL;
 385                        entry = rb_entry(n, struct btrfs_delayed_ref_head,
 386                                         href_node);
 387                }
 388                return entry;
 389        }
 390        return NULL;
 391}
 392
 393int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
 394                           struct btrfs_delayed_ref_head *head)
 395{
 396        lockdep_assert_held(&delayed_refs->lock);
 397        if (mutex_trylock(&head->mutex))
 398                return 0;
 399
 400        refcount_inc(&head->refs);
 401        spin_unlock(&delayed_refs->lock);
 402
 403        mutex_lock(&head->mutex);
 404        spin_lock(&delayed_refs->lock);
 405        if (RB_EMPTY_NODE(&head->href_node)) {
 406                mutex_unlock(&head->mutex);
 407                btrfs_put_delayed_ref_head(head);
 408                return -EAGAIN;
 409        }
 410        btrfs_put_delayed_ref_head(head);
 411        return 0;
 412}
 413
 414static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
 415                                    struct btrfs_delayed_ref_root *delayed_refs,
 416                                    struct btrfs_delayed_ref_head *head,
 417                                    struct btrfs_delayed_ref_node *ref)
 418{
 419        lockdep_assert_held(&head->lock);
 420        rb_erase_cached(&ref->ref_node, &head->ref_tree);
 421        RB_CLEAR_NODE(&ref->ref_node);
 422        if (!list_empty(&ref->add_list))
 423                list_del(&ref->add_list);
 424        ref->in_tree = 0;
 425        btrfs_put_delayed_ref(ref);
 426        atomic_dec(&delayed_refs->num_entries);
 427}
 428
 429static bool merge_ref(struct btrfs_trans_handle *trans,
 430                      struct btrfs_delayed_ref_root *delayed_refs,
 431                      struct btrfs_delayed_ref_head *head,
 432                      struct btrfs_delayed_ref_node *ref,
 433                      u64 seq)
 434{
 435        struct btrfs_delayed_ref_node *next;
 436        struct rb_node *node = rb_next(&ref->ref_node);
 437        bool done = false;
 438
 439        while (!done && node) {
 440                int mod;
 441
 442                next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
 443                node = rb_next(node);
 444                if (seq && next->seq >= seq)
 445                        break;
 446                if (comp_refs(ref, next, false))
 447                        break;
 448
 449                if (ref->action == next->action) {
 450                        mod = next->ref_mod;
 451                } else {
 452                        if (ref->ref_mod < next->ref_mod) {
 453                                swap(ref, next);
 454                                done = true;
 455                        }
 456                        mod = -next->ref_mod;
 457                }
 458
 459                drop_delayed_ref(trans, delayed_refs, head, next);
 460                ref->ref_mod += mod;
 461                if (ref->ref_mod == 0) {
 462                        drop_delayed_ref(trans, delayed_refs, head, ref);
 463                        done = true;
 464                } else {
 465                        /*
 466                         * Can't have multiples of the same ref on a tree block.
 467                         */
 468                        WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
 469                                ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
 470                }
 471        }
 472
 473        return done;
 474}
 475
 476void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
 477                              struct btrfs_delayed_ref_root *delayed_refs,
 478                              struct btrfs_delayed_ref_head *head)
 479{
 480        struct btrfs_fs_info *fs_info = trans->fs_info;
 481        struct btrfs_delayed_ref_node *ref;
 482        struct rb_node *node;
 483        u64 seq = 0;
 484
 485        lockdep_assert_held(&head->lock);
 486
 487        if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
 488                return;
 489
 490        /* We don't have too many refs to merge for data. */
 491        if (head->is_data)
 492                return;
 493
 494        read_lock(&fs_info->tree_mod_log_lock);
 495        if (!list_empty(&fs_info->tree_mod_seq_list)) {
 496                struct seq_list *elem;
 497
 498                elem = list_first_entry(&fs_info->tree_mod_seq_list,
 499                                        struct seq_list, list);
 500                seq = elem->seq;
 501        }
 502        read_unlock(&fs_info->tree_mod_log_lock);
 503
 504again:
 505        for (node = rb_first_cached(&head->ref_tree); node;
 506             node = rb_next(node)) {
 507                ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
 508                if (seq && ref->seq >= seq)
 509                        continue;
 510                if (merge_ref(trans, delayed_refs, head, ref, seq))
 511                        goto again;
 512        }
 513}
 514
 515int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
 516{
 517        struct seq_list *elem;
 518        int ret = 0;
 519
 520        read_lock(&fs_info->tree_mod_log_lock);
 521        if (!list_empty(&fs_info->tree_mod_seq_list)) {
 522                elem = list_first_entry(&fs_info->tree_mod_seq_list,
 523                                        struct seq_list, list);
 524                if (seq >= elem->seq) {
 525                        btrfs_debug(fs_info,
 526                                "holding back delayed_ref %#x.%x, lowest is %#x.%x",
 527                                (u32)(seq >> 32), (u32)seq,
 528                                (u32)(elem->seq >> 32), (u32)elem->seq);
 529                        ret = 1;
 530                }
 531        }
 532
 533        read_unlock(&fs_info->tree_mod_log_lock);
 534        return ret;
 535}
 536
 537struct btrfs_delayed_ref_head *btrfs_select_ref_head(
 538                struct btrfs_delayed_ref_root *delayed_refs)
 539{
 540        struct btrfs_delayed_ref_head *head;
 541
 542again:
 543        head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start,
 544                             true);
 545        if (!head && delayed_refs->run_delayed_start != 0) {
 546                delayed_refs->run_delayed_start = 0;
 547                head = find_first_ref_head(delayed_refs);
 548        }
 549        if (!head)
 550                return NULL;
 551
 552        while (head->processing) {
 553                struct rb_node *node;
 554
 555                node = rb_next(&head->href_node);
 556                if (!node) {
 557                        if (delayed_refs->run_delayed_start == 0)
 558                                return NULL;
 559                        delayed_refs->run_delayed_start = 0;
 560                        goto again;
 561                }
 562                head = rb_entry(node, struct btrfs_delayed_ref_head,
 563                                href_node);
 564        }
 565
 566        head->processing = 1;
 567        WARN_ON(delayed_refs->num_heads_ready == 0);
 568        delayed_refs->num_heads_ready--;
 569        delayed_refs->run_delayed_start = head->bytenr +
 570                head->num_bytes;
 571        return head;
 572}
 573
 574void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
 575                           struct btrfs_delayed_ref_head *head)
 576{
 577        lockdep_assert_held(&delayed_refs->lock);
 578        lockdep_assert_held(&head->lock);
 579
 580        rb_erase_cached(&head->href_node, &delayed_refs->href_root);
 581        RB_CLEAR_NODE(&head->href_node);
 582        atomic_dec(&delayed_refs->num_entries);
 583        delayed_refs->num_heads--;
 584        if (head->processing == 0)
 585                delayed_refs->num_heads_ready--;
 586}
 587
 588/*
 589 * Helper to insert the ref_node to the tail or merge with tail.
 590 *
 591 * Return 0 for insert.
 592 * Return >0 for merge.
 593 */
 594static int insert_delayed_ref(struct btrfs_trans_handle *trans,
 595                              struct btrfs_delayed_ref_root *root,
 596                              struct btrfs_delayed_ref_head *href,
 597                              struct btrfs_delayed_ref_node *ref)
 598{
 599        struct btrfs_delayed_ref_node *exist;
 600        int mod;
 601        int ret = 0;
 602
 603        spin_lock(&href->lock);
 604        exist = tree_insert(&href->ref_tree, ref);
 605        if (!exist)
 606                goto inserted;
 607
 608        /* Now we are sure we can merge */
 609        ret = 1;
 610        if (exist->action == ref->action) {
 611                mod = ref->ref_mod;
 612        } else {
 613                /* Need to change action */
 614                if (exist->ref_mod < ref->ref_mod) {
 615                        exist->action = ref->action;
 616                        mod = -exist->ref_mod;
 617                        exist->ref_mod = ref->ref_mod;
 618                        if (ref->action == BTRFS_ADD_DELAYED_REF)
 619                                list_add_tail(&exist->add_list,
 620                                              &href->ref_add_list);
 621                        else if (ref->action == BTRFS_DROP_DELAYED_REF) {
 622                                ASSERT(!list_empty(&exist->add_list));
 623                                list_del(&exist->add_list);
 624                        } else {
 625                                ASSERT(0);
 626                        }
 627                } else
 628                        mod = -ref->ref_mod;
 629        }
 630        exist->ref_mod += mod;
 631
 632        /* remove existing tail if its ref_mod is zero */
 633        if (exist->ref_mod == 0)
 634                drop_delayed_ref(trans, root, href, exist);
 635        spin_unlock(&href->lock);
 636        return ret;
 637inserted:
 638        if (ref->action == BTRFS_ADD_DELAYED_REF)
 639                list_add_tail(&ref->add_list, &href->ref_add_list);
 640        atomic_inc(&root->num_entries);
 641        spin_unlock(&href->lock);
 642        return ret;
 643}
 644
 645/*
 646 * helper function to update the accounting in the head ref
 647 * existing and update must have the same bytenr
 648 */
 649static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
 650                         struct btrfs_delayed_ref_head *existing,
 651                         struct btrfs_delayed_ref_head *update,
 652                         int *old_ref_mod_ret)
 653{
 654        struct btrfs_delayed_ref_root *delayed_refs =
 655                &trans->transaction->delayed_refs;
 656        struct btrfs_fs_info *fs_info = trans->fs_info;
 657        int old_ref_mod;
 658
 659        BUG_ON(existing->is_data != update->is_data);
 660
 661        spin_lock(&existing->lock);
 662        if (update->must_insert_reserved) {
 663                /* if the extent was freed and then
 664                 * reallocated before the delayed ref
 665                 * entries were processed, we can end up
 666                 * with an existing head ref without
 667                 * the must_insert_reserved flag set.
 668                 * Set it again here
 669                 */
 670                existing->must_insert_reserved = update->must_insert_reserved;
 671
 672                /*
 673                 * update the num_bytes so we make sure the accounting
 674                 * is done correctly
 675                 */
 676                existing->num_bytes = update->num_bytes;
 677
 678        }
 679
 680        if (update->extent_op) {
 681                if (!existing->extent_op) {
 682                        existing->extent_op = update->extent_op;
 683                } else {
 684                        if (update->extent_op->update_key) {
 685                                memcpy(&existing->extent_op->key,
 686                                       &update->extent_op->key,
 687                                       sizeof(update->extent_op->key));
 688                                existing->extent_op->update_key = true;
 689                        }
 690                        if (update->extent_op->update_flags) {
 691                                existing->extent_op->flags_to_set |=
 692                                        update->extent_op->flags_to_set;
 693                                existing->extent_op->update_flags = true;
 694                        }
 695                        btrfs_free_delayed_extent_op(update->extent_op);
 696                }
 697        }
 698        /*
 699         * update the reference mod on the head to reflect this new operation,
 700         * only need the lock for this case cause we could be processing it
 701         * currently, for refs we just added we know we're a-ok.
 702         */
 703        old_ref_mod = existing->total_ref_mod;
 704        if (old_ref_mod_ret)
 705                *old_ref_mod_ret = old_ref_mod;
 706        existing->ref_mod += update->ref_mod;
 707        existing->total_ref_mod += update->ref_mod;
 708
 709        /*
 710         * If we are going to from a positive ref mod to a negative or vice
 711         * versa we need to make sure to adjust pending_csums accordingly.
 712         */
 713        if (existing->is_data) {
 714                u64 csum_leaves =
 715                        btrfs_csum_bytes_to_leaves(fs_info,
 716                                                   existing->num_bytes);
 717
 718                if (existing->total_ref_mod >= 0 && old_ref_mod < 0) {
 719                        delayed_refs->pending_csums -= existing->num_bytes;
 720                        btrfs_delayed_refs_rsv_release(fs_info, csum_leaves);
 721                }
 722                if (existing->total_ref_mod < 0 && old_ref_mod >= 0) {
 723                        delayed_refs->pending_csums += existing->num_bytes;
 724                        trans->delayed_ref_updates += csum_leaves;
 725                }
 726        }
 727        spin_unlock(&existing->lock);
 728}
 729
 730static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
 731                                  struct btrfs_qgroup_extent_record *qrecord,
 732                                  u64 bytenr, u64 num_bytes, u64 ref_root,
 733                                  u64 reserved, int action, bool is_data,
 734                                  bool is_system)
 735{
 736        int count_mod = 1;
 737        int must_insert_reserved = 0;
 738
 739        /* If reserved is provided, it must be a data extent. */
 740        BUG_ON(!is_data && reserved);
 741
 742        /*
 743         * The head node stores the sum of all the mods, so dropping a ref
 744         * should drop the sum in the head node by one.
 745         */
 746        if (action == BTRFS_UPDATE_DELAYED_HEAD)
 747                count_mod = 0;
 748        else if (action == BTRFS_DROP_DELAYED_REF)
 749                count_mod = -1;
 750
 751        /*
 752         * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
 753         * accounting when the extent is finally added, or if a later
 754         * modification deletes the delayed ref without ever inserting the
 755         * extent into the extent allocation tree.  ref->must_insert_reserved
 756         * is the flag used to record that accounting mods are required.
 757         *
 758         * Once we record must_insert_reserved, switch the action to
 759         * BTRFS_ADD_DELAYED_REF because other special casing is not required.
 760         */
 761        if (action == BTRFS_ADD_DELAYED_EXTENT)
 762                must_insert_reserved = 1;
 763        else
 764                must_insert_reserved = 0;
 765
 766        refcount_set(&head_ref->refs, 1);
 767        head_ref->bytenr = bytenr;
 768        head_ref->num_bytes = num_bytes;
 769        head_ref->ref_mod = count_mod;
 770        head_ref->must_insert_reserved = must_insert_reserved;
 771        head_ref->is_data = is_data;
 772        head_ref->is_system = is_system;
 773        head_ref->ref_tree = RB_ROOT_CACHED;
 774        INIT_LIST_HEAD(&head_ref->ref_add_list);
 775        RB_CLEAR_NODE(&head_ref->href_node);
 776        head_ref->processing = 0;
 777        head_ref->total_ref_mod = count_mod;
 778        spin_lock_init(&head_ref->lock);
 779        mutex_init(&head_ref->mutex);
 780
 781        if (qrecord) {
 782                if (ref_root && reserved) {
 783                        qrecord->data_rsv = reserved;
 784                        qrecord->data_rsv_refroot = ref_root;
 785                }
 786                qrecord->bytenr = bytenr;
 787                qrecord->num_bytes = num_bytes;
 788                qrecord->old_roots = NULL;
 789        }
 790}
 791
 792/*
 793 * helper function to actually insert a head node into the rbtree.
 794 * this does all the dirty work in terms of maintaining the correct
 795 * overall modification count.
 796 */
 797static noinline struct btrfs_delayed_ref_head *
 798add_delayed_ref_head(struct btrfs_trans_handle *trans,
 799                     struct btrfs_delayed_ref_head *head_ref,
 800                     struct btrfs_qgroup_extent_record *qrecord,
 801                     int action, int *qrecord_inserted_ret,
 802                     int *old_ref_mod, int *new_ref_mod)
 803{
 804        struct btrfs_delayed_ref_head *existing;
 805        struct btrfs_delayed_ref_root *delayed_refs;
 806        int qrecord_inserted = 0;
 807
 808        delayed_refs = &trans->transaction->delayed_refs;
 809
 810        /* Record qgroup extent info if provided */
 811        if (qrecord) {
 812                if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
 813                                        delayed_refs, qrecord))
 814                        kfree(qrecord);
 815                else
 816                        qrecord_inserted = 1;
 817        }
 818
 819        trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
 820
 821        existing = htree_insert(&delayed_refs->href_root,
 822                                &head_ref->href_node);
 823        if (existing) {
 824                update_existing_head_ref(trans, existing, head_ref,
 825                                         old_ref_mod);
 826                /*
 827                 * we've updated the existing ref, free the newly
 828                 * allocated ref
 829                 */
 830                kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
 831                head_ref = existing;
 832        } else {
 833                if (old_ref_mod)
 834                        *old_ref_mod = 0;
 835                if (head_ref->is_data && head_ref->ref_mod < 0) {
 836                        delayed_refs->pending_csums += head_ref->num_bytes;
 837                        trans->delayed_ref_updates +=
 838                                btrfs_csum_bytes_to_leaves(trans->fs_info,
 839                                                           head_ref->num_bytes);
 840                }
 841                delayed_refs->num_heads++;
 842                delayed_refs->num_heads_ready++;
 843                atomic_inc(&delayed_refs->num_entries);
 844                trans->delayed_ref_updates++;
 845        }
 846        if (qrecord_inserted_ret)
 847                *qrecord_inserted_ret = qrecord_inserted;
 848        if (new_ref_mod)
 849                *new_ref_mod = head_ref->total_ref_mod;
 850
 851        return head_ref;
 852}
 853
 854/*
 855 * init_delayed_ref_common - Initialize the structure which represents a
 856 *                           modification to a an extent.
 857 *
 858 * @fs_info:    Internal to the mounted filesystem mount structure.
 859 *
 860 * @ref:        The structure which is going to be initialized.
 861 *
 862 * @bytenr:     The logical address of the extent for which a modification is
 863 *              going to be recorded.
 864 *
 865 * @num_bytes:  Size of the extent whose modification is being recorded.
 866 *
 867 * @ref_root:   The id of the root where this modification has originated, this
 868 *              can be either one of the well-known metadata trees or the
 869 *              subvolume id which references this extent.
 870 *
 871 * @action:     Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
 872 *              BTRFS_ADD_DELAYED_EXTENT
 873 *
 874 * @ref_type:   Holds the type of the extent which is being recorded, can be
 875 *              one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
 876 *              when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
 877 *              BTRFS_EXTENT_DATA_REF_KEY when recording data extent
 878 */
 879static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
 880                                    struct btrfs_delayed_ref_node *ref,
 881                                    u64 bytenr, u64 num_bytes, u64 ref_root,
 882                                    int action, u8 ref_type)
 883{
 884        u64 seq = 0;
 885
 886        if (action == BTRFS_ADD_DELAYED_EXTENT)
 887                action = BTRFS_ADD_DELAYED_REF;
 888
 889        if (is_fstree(ref_root))
 890                seq = atomic64_read(&fs_info->tree_mod_seq);
 891
 892        refcount_set(&ref->refs, 1);
 893        ref->bytenr = bytenr;
 894        ref->num_bytes = num_bytes;
 895        ref->ref_mod = 1;
 896        ref->action = action;
 897        ref->is_head = 0;
 898        ref->in_tree = 1;
 899        ref->seq = seq;
 900        ref->type = ref_type;
 901        RB_CLEAR_NODE(&ref->ref_node);
 902        INIT_LIST_HEAD(&ref->add_list);
 903}
 904
 905/*
 906 * add a delayed tree ref.  This does all of the accounting required
 907 * to make sure the delayed ref is eventually processed before this
 908 * transaction commits.
 909 */
 910int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
 911                               struct btrfs_ref *generic_ref,
 912                               struct btrfs_delayed_extent_op *extent_op,
 913                               int *old_ref_mod, int *new_ref_mod)
 914{
 915        struct btrfs_fs_info *fs_info = trans->fs_info;
 916        struct btrfs_delayed_tree_ref *ref;
 917        struct btrfs_delayed_ref_head *head_ref;
 918        struct btrfs_delayed_ref_root *delayed_refs;
 919        struct btrfs_qgroup_extent_record *record = NULL;
 920        int qrecord_inserted;
 921        bool is_system;
 922        int action = generic_ref->action;
 923        int level = generic_ref->tree_ref.level;
 924        int ret;
 925        u64 bytenr = generic_ref->bytenr;
 926        u64 num_bytes = generic_ref->len;
 927        u64 parent = generic_ref->parent;
 928        u8 ref_type;
 929
 930        is_system = (generic_ref->real_root == BTRFS_CHUNK_TREE_OBJECTID);
 931
 932        ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
 933        BUG_ON(extent_op && extent_op->is_data);
 934        ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
 935        if (!ref)
 936                return -ENOMEM;
 937
 938        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 939        if (!head_ref) {
 940                kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
 941                return -ENOMEM;
 942        }
 943
 944        if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
 945            is_fstree(generic_ref->real_root) &&
 946            is_fstree(generic_ref->tree_ref.root) &&
 947            !generic_ref->skip_qgroup) {
 948                record = kzalloc(sizeof(*record), GFP_NOFS);
 949                if (!record) {
 950                        kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
 951                        kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
 952                        return -ENOMEM;
 953                }
 954        }
 955
 956        if (parent)
 957                ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
 958        else
 959                ref_type = BTRFS_TREE_BLOCK_REF_KEY;
 960
 961        init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
 962                                generic_ref->tree_ref.root, action, ref_type);
 963        ref->root = generic_ref->tree_ref.root;
 964        ref->parent = parent;
 965        ref->level = level;
 966
 967        init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
 968                              generic_ref->tree_ref.root, 0, action, false,
 969                              is_system);
 970        head_ref->extent_op = extent_op;
 971
 972        delayed_refs = &trans->transaction->delayed_refs;
 973        spin_lock(&delayed_refs->lock);
 974
 975        /*
 976         * insert both the head node and the new ref without dropping
 977         * the spin lock
 978         */
 979        head_ref = add_delayed_ref_head(trans, head_ref, record,
 980                                        action, &qrecord_inserted,
 981                                        old_ref_mod, new_ref_mod);
 982
 983        ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
 984        spin_unlock(&delayed_refs->lock);
 985
 986        /*
 987         * Need to update the delayed_refs_rsv with any changes we may have
 988         * made.
 989         */
 990        btrfs_update_delayed_refs_rsv(trans);
 991
 992        trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
 993                                   action == BTRFS_ADD_DELAYED_EXTENT ?
 994                                   BTRFS_ADD_DELAYED_REF : action);
 995        if (ret > 0)
 996                kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
 997
 998        if (qrecord_inserted)
 999                btrfs_qgroup_trace_extent_post(fs_info, record);
1000
1001        return 0;
1002}
1003
1004/*
1005 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
1006 */
1007int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
1008                               struct btrfs_ref *generic_ref,
1009                               u64 reserved, int *old_ref_mod,
1010                               int *new_ref_mod)
1011{
1012        struct btrfs_fs_info *fs_info = trans->fs_info;
1013        struct btrfs_delayed_data_ref *ref;
1014        struct btrfs_delayed_ref_head *head_ref;
1015        struct btrfs_delayed_ref_root *delayed_refs;
1016        struct btrfs_qgroup_extent_record *record = NULL;
1017        int qrecord_inserted;
1018        int action = generic_ref->action;
1019        int ret;
1020        u64 bytenr = generic_ref->bytenr;
1021        u64 num_bytes = generic_ref->len;
1022        u64 parent = generic_ref->parent;
1023        u64 ref_root = generic_ref->data_ref.ref_root;
1024        u64 owner = generic_ref->data_ref.ino;
1025        u64 offset = generic_ref->data_ref.offset;
1026        u8 ref_type;
1027
1028        ASSERT(generic_ref->type == BTRFS_REF_DATA && action);
1029        ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
1030        if (!ref)
1031                return -ENOMEM;
1032
1033        if (parent)
1034                ref_type = BTRFS_SHARED_DATA_REF_KEY;
1035        else
1036                ref_type = BTRFS_EXTENT_DATA_REF_KEY;
1037        init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
1038                                ref_root, action, ref_type);
1039        ref->root = ref_root;
1040        ref->parent = parent;
1041        ref->objectid = owner;
1042        ref->offset = offset;
1043
1044
1045        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
1046        if (!head_ref) {
1047                kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
1048                return -ENOMEM;
1049        }
1050
1051        if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
1052            is_fstree(ref_root) &&
1053            is_fstree(generic_ref->real_root) &&
1054            !generic_ref->skip_qgroup) {
1055                record = kzalloc(sizeof(*record), GFP_NOFS);
1056                if (!record) {
1057                        kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
1058                        kmem_cache_free(btrfs_delayed_ref_head_cachep,
1059                                        head_ref);
1060                        return -ENOMEM;
1061                }
1062        }
1063
1064        init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root,
1065                              reserved, action, true, false);
1066        head_ref->extent_op = NULL;
1067
1068        delayed_refs = &trans->transaction->delayed_refs;
1069        spin_lock(&delayed_refs->lock);
1070
1071        /*
1072         * insert both the head node and the new ref without dropping
1073         * the spin lock
1074         */
1075        head_ref = add_delayed_ref_head(trans, head_ref, record,
1076                                        action, &qrecord_inserted,
1077                                        old_ref_mod, new_ref_mod);
1078
1079        ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
1080        spin_unlock(&delayed_refs->lock);
1081
1082        /*
1083         * Need to update the delayed_refs_rsv with any changes we may have
1084         * made.
1085         */
1086        btrfs_update_delayed_refs_rsv(trans);
1087
1088        trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
1089                                   action == BTRFS_ADD_DELAYED_EXTENT ?
1090                                   BTRFS_ADD_DELAYED_REF : action);
1091        if (ret > 0)
1092                kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
1093
1094
1095        if (qrecord_inserted)
1096                return btrfs_qgroup_trace_extent_post(fs_info, record);
1097        return 0;
1098}
1099
1100int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
1101                                u64 bytenr, u64 num_bytes,
1102                                struct btrfs_delayed_extent_op *extent_op)
1103{
1104        struct btrfs_delayed_ref_head *head_ref;
1105        struct btrfs_delayed_ref_root *delayed_refs;
1106
1107        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
1108        if (!head_ref)
1109                return -ENOMEM;
1110
1111        init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0,
1112                              BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data,
1113                              false);
1114        head_ref->extent_op = extent_op;
1115
1116        delayed_refs = &trans->transaction->delayed_refs;
1117        spin_lock(&delayed_refs->lock);
1118
1119        add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD,
1120                             NULL, NULL, NULL);
1121
1122        spin_unlock(&delayed_refs->lock);
1123
1124        /*
1125         * Need to update the delayed_refs_rsv with any changes we may have
1126         * made.
1127         */
1128        btrfs_update_delayed_refs_rsv(trans);
1129        return 0;
1130}
1131
1132/*
1133 * This does a simple search for the head node for a given extent.  Returns the
1134 * head node if found, or NULL if not.
1135 */
1136struct btrfs_delayed_ref_head *
1137btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
1138{
1139        lockdep_assert_held(&delayed_refs->lock);
1140
1141        return find_ref_head(delayed_refs, bytenr, false);
1142}
1143
1144void __cold btrfs_delayed_ref_exit(void)
1145{
1146        kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
1147        kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
1148        kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
1149        kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
1150}
1151
1152int __init btrfs_delayed_ref_init(void)
1153{
1154        btrfs_delayed_ref_head_cachep = kmem_cache_create(
1155                                "btrfs_delayed_ref_head",
1156                                sizeof(struct btrfs_delayed_ref_head), 0,
1157                                SLAB_MEM_SPREAD, NULL);
1158        if (!btrfs_delayed_ref_head_cachep)
1159                goto fail;
1160
1161        btrfs_delayed_tree_ref_cachep = kmem_cache_create(
1162                                "btrfs_delayed_tree_ref",
1163                                sizeof(struct btrfs_delayed_tree_ref), 0,
1164                                SLAB_MEM_SPREAD, NULL);
1165        if (!btrfs_delayed_tree_ref_cachep)
1166                goto fail;
1167
1168        btrfs_delayed_data_ref_cachep = kmem_cache_create(
1169                                "btrfs_delayed_data_ref",
1170                                sizeof(struct btrfs_delayed_data_ref), 0,
1171                                SLAB_MEM_SPREAD, NULL);
1172        if (!btrfs_delayed_data_ref_cachep)
1173                goto fail;
1174
1175        btrfs_delayed_extent_op_cachep = kmem_cache_create(
1176                                "btrfs_delayed_extent_op",
1177                                sizeof(struct btrfs_delayed_extent_op), 0,
1178                                SLAB_MEM_SPREAD, NULL);
1179        if (!btrfs_delayed_extent_op_cachep)
1180                goto fail;
1181
1182        return 0;
1183fail:
1184        btrfs_delayed_ref_exit();
1185        return -ENOMEM;
1186}
1187