linux/fs/btrfs/delayed-inode.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2011 Fujitsu.  All rights reserved.
   3 * Written by Miao Xie <miaox@cn.fujitsu.com>
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public
   7 * License v2 as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12 * General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public
  15 * License along with this program; if not, write to the
  16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  17 * Boston, MA 021110-1307, USA.
  18 */
  19
  20#include <linux/slab.h>
  21#include "delayed-inode.h"
  22#include "disk-io.h"
  23#include "transaction.h"
  24
  25#define BTRFS_DELAYED_WRITEBACK         512
  26#define BTRFS_DELAYED_BACKGROUND        128
  27#define BTRFS_DELAYED_BATCH             16
  28
  29static struct kmem_cache *delayed_node_cache;
  30
  31int __init btrfs_delayed_inode_init(void)
  32{
  33        delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
  34                                        sizeof(struct btrfs_delayed_node),
  35                                        0,
  36                                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
  37                                        NULL);
  38        if (!delayed_node_cache)
  39                return -ENOMEM;
  40        return 0;
  41}
  42
  43void btrfs_delayed_inode_exit(void)
  44{
  45        if (delayed_node_cache)
  46                kmem_cache_destroy(delayed_node_cache);
  47}
  48
  49static inline void btrfs_init_delayed_node(
  50                                struct btrfs_delayed_node *delayed_node,
  51                                struct btrfs_root *root, u64 inode_id)
  52{
  53        delayed_node->root = root;
  54        delayed_node->inode_id = inode_id;
  55        atomic_set(&delayed_node->refs, 0);
  56        delayed_node->count = 0;
  57        delayed_node->in_list = 0;
  58        delayed_node->inode_dirty = 0;
  59        delayed_node->ins_root = RB_ROOT;
  60        delayed_node->del_root = RB_ROOT;
  61        mutex_init(&delayed_node->mutex);
  62        delayed_node->index_cnt = 0;
  63        INIT_LIST_HEAD(&delayed_node->n_list);
  64        INIT_LIST_HEAD(&delayed_node->p_list);
  65        delayed_node->bytes_reserved = 0;
  66        memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item));
  67}
  68
  69static inline int btrfs_is_continuous_delayed_item(
  70                                        struct btrfs_delayed_item *item1,
  71                                        struct btrfs_delayed_item *item2)
  72{
  73        if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
  74            item1->key.objectid == item2->key.objectid &&
  75            item1->key.type == item2->key.type &&
  76            item1->key.offset + 1 == item2->key.offset)
  77                return 1;
  78        return 0;
  79}
  80
  81static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
  82                                                        struct btrfs_root *root)
  83{
  84        return root->fs_info->delayed_root;
  85}
  86
  87static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
  88{
  89        struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
  90        struct btrfs_root *root = btrfs_inode->root;
  91        u64 ino = btrfs_ino(inode);
  92        struct btrfs_delayed_node *node;
  93
  94        node = ACCESS_ONCE(btrfs_inode->delayed_node);
  95        if (node) {
  96                atomic_inc(&node->refs);
  97                return node;
  98        }
  99
 100        spin_lock(&root->inode_lock);
 101        node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
 102        if (node) {
 103                if (btrfs_inode->delayed_node) {
 104                        atomic_inc(&node->refs);        /* can be accessed */
 105                        BUG_ON(btrfs_inode->delayed_node != node);
 106                        spin_unlock(&root->inode_lock);
 107                        return node;
 108                }
 109                btrfs_inode->delayed_node = node;
 110                atomic_inc(&node->refs);        /* can be accessed */
 111                atomic_inc(&node->refs);        /* cached in the inode */
 112                spin_unlock(&root->inode_lock);
 113                return node;
 114        }
 115        spin_unlock(&root->inode_lock);
 116
 117        return NULL;
 118}
 119
 120/* Will return either the node or PTR_ERR(-ENOMEM) */
 121static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
 122                                                        struct inode *inode)
 123{
 124        struct btrfs_delayed_node *node;
 125        struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
 126        struct btrfs_root *root = btrfs_inode->root;
 127        u64 ino = btrfs_ino(inode);
 128        int ret;
 129
 130again:
 131        node = btrfs_get_delayed_node(inode);
 132        if (node)
 133                return node;
 134
 135        node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
 136        if (!node)
 137                return ERR_PTR(-ENOMEM);
 138        btrfs_init_delayed_node(node, root, ino);
 139
 140        atomic_inc(&node->refs);        /* cached in the btrfs inode */
 141        atomic_inc(&node->refs);        /* can be accessed */
 142
 143        ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
 144        if (ret) {
 145                kmem_cache_free(delayed_node_cache, node);
 146                return ERR_PTR(ret);
 147        }
 148
 149        spin_lock(&root->inode_lock);
 150        ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
 151        if (ret == -EEXIST) {
 152                kmem_cache_free(delayed_node_cache, node);
 153                spin_unlock(&root->inode_lock);
 154                radix_tree_preload_end();
 155                goto again;
 156        }
 157        btrfs_inode->delayed_node = node;
 158        spin_unlock(&root->inode_lock);
 159        radix_tree_preload_end();
 160
 161        return node;
 162}
 163
 164/*
 165 * Call it when holding delayed_node->mutex
 166 *
 167 * If mod = 1, add this node into the prepared list.
 168 */
 169static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
 170                                     struct btrfs_delayed_node *node,
 171                                     int mod)
 172{
 173        spin_lock(&root->lock);
 174        if (node->in_list) {
 175                if (!list_empty(&node->p_list))
 176                        list_move_tail(&node->p_list, &root->prepare_list);
 177                else if (mod)
 178                        list_add_tail(&node->p_list, &root->prepare_list);
 179        } else {
 180                list_add_tail(&node->n_list, &root->node_list);
 181                list_add_tail(&node->p_list, &root->prepare_list);
 182                atomic_inc(&node->refs);        /* inserted into list */
 183                root->nodes++;
 184                node->in_list = 1;
 185        }
 186        spin_unlock(&root->lock);
 187}
 188
 189/* Call it when holding delayed_node->mutex */
 190static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
 191                                       struct btrfs_delayed_node *node)
 192{
 193        spin_lock(&root->lock);
 194        if (node->in_list) {
 195                root->nodes--;
 196                atomic_dec(&node->refs);        /* not in the list */
 197                list_del_init(&node->n_list);
 198                if (!list_empty(&node->p_list))
 199                        list_del_init(&node->p_list);
 200                node->in_list = 0;
 201        }
 202        spin_unlock(&root->lock);
 203}
 204
 205static struct btrfs_delayed_node *btrfs_first_delayed_node(
 206                        struct btrfs_delayed_root *delayed_root)
 207{
 208        struct list_head *p;
 209        struct btrfs_delayed_node *node = NULL;
 210
 211        spin_lock(&delayed_root->lock);
 212        if (list_empty(&delayed_root->node_list))
 213                goto out;
 214
 215        p = delayed_root->node_list.next;
 216        node = list_entry(p, struct btrfs_delayed_node, n_list);
 217        atomic_inc(&node->refs);
 218out:
 219        spin_unlock(&delayed_root->lock);
 220
 221        return node;
 222}
 223
 224static struct btrfs_delayed_node *btrfs_next_delayed_node(
 225                                                struct btrfs_delayed_node *node)
 226{
 227        struct btrfs_delayed_root *delayed_root;
 228        struct list_head *p;
 229        struct btrfs_delayed_node *next = NULL;
 230
 231        delayed_root = node->root->fs_info->delayed_root;
 232        spin_lock(&delayed_root->lock);
 233        if (!node->in_list) {   /* not in the list */
 234                if (list_empty(&delayed_root->node_list))
 235                        goto out;
 236                p = delayed_root->node_list.next;
 237        } else if (list_is_last(&node->n_list, &delayed_root->node_list))
 238                goto out;
 239        else
 240                p = node->n_list.next;
 241
 242        next = list_entry(p, struct btrfs_delayed_node, n_list);
 243        atomic_inc(&next->refs);
 244out:
 245        spin_unlock(&delayed_root->lock);
 246
 247        return next;
 248}
 249
 250static void __btrfs_release_delayed_node(
 251                                struct btrfs_delayed_node *delayed_node,
 252                                int mod)
 253{
 254        struct btrfs_delayed_root *delayed_root;
 255
 256        if (!delayed_node)
 257                return;
 258
 259        delayed_root = delayed_node->root->fs_info->delayed_root;
 260
 261        mutex_lock(&delayed_node->mutex);
 262        if (delayed_node->count)
 263                btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
 264        else
 265                btrfs_dequeue_delayed_node(delayed_root, delayed_node);
 266        mutex_unlock(&delayed_node->mutex);
 267
 268        if (atomic_dec_and_test(&delayed_node->refs)) {
 269                struct btrfs_root *root = delayed_node->root;
 270                spin_lock(&root->inode_lock);
 271                if (atomic_read(&delayed_node->refs) == 0) {
 272                        radix_tree_delete(&root->delayed_nodes_tree,
 273                                          delayed_node->inode_id);
 274                        kmem_cache_free(delayed_node_cache, delayed_node);
 275                }
 276                spin_unlock(&root->inode_lock);
 277        }
 278}
 279
 280static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
 281{
 282        __btrfs_release_delayed_node(node, 0);
 283}
 284
 285static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
 286                                        struct btrfs_delayed_root *delayed_root)
 287{
 288        struct list_head *p;
 289        struct btrfs_delayed_node *node = NULL;
 290
 291        spin_lock(&delayed_root->lock);
 292        if (list_empty(&delayed_root->prepare_list))
 293                goto out;
 294
 295        p = delayed_root->prepare_list.next;
 296        list_del_init(p);
 297        node = list_entry(p, struct btrfs_delayed_node, p_list);
 298        atomic_inc(&node->refs);
 299out:
 300        spin_unlock(&delayed_root->lock);
 301
 302        return node;
 303}
 304
 305static inline void btrfs_release_prepared_delayed_node(
 306                                        struct btrfs_delayed_node *node)
 307{
 308        __btrfs_release_delayed_node(node, 1);
 309}
 310
 311static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
 312{
 313        struct btrfs_delayed_item *item;
 314        item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
 315        if (item) {
 316                item->data_len = data_len;
 317                item->ins_or_del = 0;
 318                item->bytes_reserved = 0;
 319                item->delayed_node = NULL;
 320                atomic_set(&item->refs, 1);
 321        }
 322        return item;
 323}
 324
 325/*
 326 * __btrfs_lookup_delayed_item - look up the delayed item by key
 327 * @delayed_node: pointer to the delayed node
 328 * @key:          the key to look up
 329 * @prev:         used to store the prev item if the right item isn't found
 330 * @next:         used to store the next item if the right item isn't found
 331 *
 332 * Note: if we don't find the right item, we will return the prev item and
 333 * the next item.
 334 */
 335static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
 336                                struct rb_root *root,
 337                                struct btrfs_key *key,
 338                                struct btrfs_delayed_item **prev,
 339                                struct btrfs_delayed_item **next)
 340{
 341        struct rb_node *node, *prev_node = NULL;
 342        struct btrfs_delayed_item *delayed_item = NULL;
 343        int ret = 0;
 344
 345        node = root->rb_node;
 346
 347        while (node) {
 348                delayed_item = rb_entry(node, struct btrfs_delayed_item,
 349                                        rb_node);
 350                prev_node = node;
 351                ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
 352                if (ret < 0)
 353                        node = node->rb_right;
 354                else if (ret > 0)
 355                        node = node->rb_left;
 356                else
 357                        return delayed_item;
 358        }
 359
 360        if (prev) {
 361                if (!prev_node)
 362                        *prev = NULL;
 363                else if (ret < 0)
 364                        *prev = delayed_item;
 365                else if ((node = rb_prev(prev_node)) != NULL) {
 366                        *prev = rb_entry(node, struct btrfs_delayed_item,
 367                                         rb_node);
 368                } else
 369                        *prev = NULL;
 370        }
 371
 372        if (next) {
 373                if (!prev_node)
 374                        *next = NULL;
 375                else if (ret > 0)
 376                        *next = delayed_item;
 377                else if ((node = rb_next(prev_node)) != NULL) {
 378                        *next = rb_entry(node, struct btrfs_delayed_item,
 379                                         rb_node);
 380                } else
 381                        *next = NULL;
 382        }
 383        return NULL;
 384}
 385
 386static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
 387                                        struct btrfs_delayed_node *delayed_node,
 388                                        struct btrfs_key *key)
 389{
 390        struct btrfs_delayed_item *item;
 391
 392        item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
 393                                           NULL, NULL);
 394        return item;
 395}
 396
 397static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
 398                                    struct btrfs_delayed_item *ins,
 399                                    int action)
 400{
 401        struct rb_node **p, *node;
 402        struct rb_node *parent_node = NULL;
 403        struct rb_root *root;
 404        struct btrfs_delayed_item *item;
 405        int cmp;
 406
 407        if (action == BTRFS_DELAYED_INSERTION_ITEM)
 408                root = &delayed_node->ins_root;
 409        else if (action == BTRFS_DELAYED_DELETION_ITEM)
 410                root = &delayed_node->del_root;
 411        else
 412                BUG();
 413        p = &root->rb_node;
 414        node = &ins->rb_node;
 415
 416        while (*p) {
 417                parent_node = *p;
 418                item = rb_entry(parent_node, struct btrfs_delayed_item,
 419                                 rb_node);
 420
 421                cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
 422                if (cmp < 0)
 423                        p = &(*p)->rb_right;
 424                else if (cmp > 0)
 425                        p = &(*p)->rb_left;
 426                else
 427                        return -EEXIST;
 428        }
 429
 430        rb_link_node(node, parent_node, p);
 431        rb_insert_color(node, root);
 432        ins->delayed_node = delayed_node;
 433        ins->ins_or_del = action;
 434
 435        if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
 436            action == BTRFS_DELAYED_INSERTION_ITEM &&
 437            ins->key.offset >= delayed_node->index_cnt)
 438                        delayed_node->index_cnt = ins->key.offset + 1;
 439
 440        delayed_node->count++;
 441        atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
 442        return 0;
 443}
 444
 445static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
 446                                              struct btrfs_delayed_item *item)
 447{
 448        return __btrfs_add_delayed_item(node, item,
 449                                        BTRFS_DELAYED_INSERTION_ITEM);
 450}
 451
 452static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
 453                                             struct btrfs_delayed_item *item)
 454{
 455        return __btrfs_add_delayed_item(node, item,
 456                                        BTRFS_DELAYED_DELETION_ITEM);
 457}
 458
 459static void finish_one_item(struct btrfs_delayed_root *delayed_root)
 460{
 461        int seq = atomic_inc_return(&delayed_root->items_seq);
 462        if ((atomic_dec_return(&delayed_root->items) <
 463            BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
 464            waitqueue_active(&delayed_root->wait))
 465                wake_up(&delayed_root->wait);
 466}
 467
 468static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
 469{
 470        struct rb_root *root;
 471        struct btrfs_delayed_root *delayed_root;
 472
 473        delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
 474
 475        BUG_ON(!delayed_root);
 476        BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
 477               delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
 478
 479        if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
 480                root = &delayed_item->delayed_node->ins_root;
 481        else
 482                root = &delayed_item->delayed_node->del_root;
 483
 484        rb_erase(&delayed_item->rb_node, root);
 485        delayed_item->delayed_node->count--;
 486
 487        finish_one_item(delayed_root);
 488}
 489
 490static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
 491{
 492        if (item) {
 493                __btrfs_remove_delayed_item(item);
 494                if (atomic_dec_and_test(&item->refs))
 495                        kfree(item);
 496        }
 497}
 498
 499static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
 500                                        struct btrfs_delayed_node *delayed_node)
 501{
 502        struct rb_node *p;
 503        struct btrfs_delayed_item *item = NULL;
 504
 505        p = rb_first(&delayed_node->ins_root);
 506        if (p)
 507                item = rb_entry(p, struct btrfs_delayed_item, rb_node);
 508
 509        return item;
 510}
 511
 512static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
 513                                        struct btrfs_delayed_node *delayed_node)
 514{
 515        struct rb_node *p;
 516        struct btrfs_delayed_item *item = NULL;
 517
 518        p = rb_first(&delayed_node->del_root);
 519        if (p)
 520                item = rb_entry(p, struct btrfs_delayed_item, rb_node);
 521
 522        return item;
 523}
 524
 525static struct btrfs_delayed_item *__btrfs_next_delayed_item(
 526                                                struct btrfs_delayed_item *item)
 527{
 528        struct rb_node *p;
 529        struct btrfs_delayed_item *next = NULL;
 530
 531        p = rb_next(&item->rb_node);
 532        if (p)
 533                next = rb_entry(p, struct btrfs_delayed_item, rb_node);
 534
 535        return next;
 536}
 537
 538static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
 539                                               struct btrfs_root *root,
 540                                               struct btrfs_delayed_item *item)
 541{
 542        struct btrfs_block_rsv *src_rsv;
 543        struct btrfs_block_rsv *dst_rsv;
 544        u64 num_bytes;
 545        int ret;
 546
 547        if (!trans->bytes_reserved)
 548                return 0;
 549
 550        src_rsv = trans->block_rsv;
 551        dst_rsv = &root->fs_info->delayed_block_rsv;
 552
 553        num_bytes = btrfs_calc_trans_metadata_size(root, 1);
 554        ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
 555        if (!ret) {
 556                trace_btrfs_space_reservation(root->fs_info, "delayed_item",
 557                                              item->key.objectid,
 558                                              num_bytes, 1);
 559                item->bytes_reserved = num_bytes;
 560        }
 561
 562        return ret;
 563}
 564
 565static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
 566                                                struct btrfs_delayed_item *item)
 567{
 568        struct btrfs_block_rsv *rsv;
 569
 570        if (!item->bytes_reserved)
 571                return;
 572
 573        rsv = &root->fs_info->delayed_block_rsv;
 574        trace_btrfs_space_reservation(root->fs_info, "delayed_item",
 575                                      item->key.objectid, item->bytes_reserved,
 576                                      0);
 577        btrfs_block_rsv_release(root, rsv,
 578                                item->bytes_reserved);
 579}
 580
 581static int btrfs_delayed_inode_reserve_metadata(
 582                                        struct btrfs_trans_handle *trans,
 583                                        struct btrfs_root *root,
 584                                        struct inode *inode,
 585                                        struct btrfs_delayed_node *node)
 586{
 587        struct btrfs_block_rsv *src_rsv;
 588        struct btrfs_block_rsv *dst_rsv;
 589        u64 num_bytes;
 590        int ret;
 591        bool release = false;
 592
 593        src_rsv = trans->block_rsv;
 594        dst_rsv = &root->fs_info->delayed_block_rsv;
 595
 596        num_bytes = btrfs_calc_trans_metadata_size(root, 1);
 597
 598        /*
 599         * btrfs_dirty_inode will update the inode under btrfs_join_transaction
 600         * which doesn't reserve space for speed.  This is a problem since we
 601         * still need to reserve space for this update, so try to reserve the
 602         * space.
 603         *
 604         * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
 605         * we're accounted for.
 606         */
 607        if (!src_rsv || (!trans->bytes_reserved &&
 608                         src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
 609                ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
 610                                          BTRFS_RESERVE_NO_FLUSH);
 611                /*
 612                 * Since we're under a transaction reserve_metadata_bytes could
 613                 * try to commit the transaction which will make it return
 614                 * EAGAIN to make us stop the transaction we have, so return
 615                 * ENOSPC instead so that btrfs_dirty_inode knows what to do.
 616                 */
 617                if (ret == -EAGAIN)
 618                        ret = -ENOSPC;
 619                if (!ret) {
 620                        node->bytes_reserved = num_bytes;
 621                        trace_btrfs_space_reservation(root->fs_info,
 622                                                      "delayed_inode",
 623                                                      btrfs_ino(inode),
 624                                                      num_bytes, 1);
 625                }
 626                return ret;
 627        } else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
 628                spin_lock(&BTRFS_I(inode)->lock);
 629                if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
 630                                       &BTRFS_I(inode)->runtime_flags)) {
 631                        spin_unlock(&BTRFS_I(inode)->lock);
 632                        release = true;
 633                        goto migrate;
 634                }
 635                spin_unlock(&BTRFS_I(inode)->lock);
 636
 637                /* Ok we didn't have space pre-reserved.  This shouldn't happen
 638                 * too often but it can happen if we do delalloc to an existing
 639                 * inode which gets dirtied because of the time update, and then
 640                 * isn't touched again until after the transaction commits and
 641                 * then we try to write out the data.  First try to be nice and
 642                 * reserve something strictly for us.  If not be a pain and try
 643                 * to steal from the delalloc block rsv.
 644                 */
 645                ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
 646                                          BTRFS_RESERVE_NO_FLUSH);
 647                if (!ret)
 648                        goto out;
 649
 650                ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
 651                if (!ret)
 652                        goto out;
 653
 654                /*
 655                 * Ok this is a problem, let's just steal from the global rsv
 656                 * since this really shouldn't happen that often.
 657                 */
 658                WARN_ON(1);
 659                ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
 660                                              dst_rsv, num_bytes);
 661                goto out;
 662        }
 663
 664migrate:
 665        ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
 666
 667out:
 668        /*
 669         * Migrate only takes a reservation, it doesn't touch the size of the
 670         * block_rsv.  This is to simplify people who don't normally have things
 671         * migrated from their block rsv.  If they go to release their
 672         * reservation, that will decrease the size as well, so if migrate
 673         * reduced size we'd end up with a negative size.  But for the
 674         * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
 675         * but we could in fact do this reserve/migrate dance several times
 676         * between the time we did the original reservation and we'd clean it
 677         * up.  So to take care of this, release the space for the meta
 678         * reservation here.  I think it may be time for a documentation page on
 679         * how block rsvs. work.
 680         */
 681        if (!ret) {
 682                trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
 683                                              btrfs_ino(inode), num_bytes, 1);
 684                node->bytes_reserved = num_bytes;
 685        }
 686
 687        if (release) {
 688                trace_btrfs_space_reservation(root->fs_info, "delalloc",
 689                                              btrfs_ino(inode), num_bytes, 0);
 690                btrfs_block_rsv_release(root, src_rsv, num_bytes);
 691        }
 692
 693        return ret;
 694}
 695
 696static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
 697                                                struct btrfs_delayed_node *node)
 698{
 699        struct btrfs_block_rsv *rsv;
 700
 701        if (!node->bytes_reserved)
 702                return;
 703
 704        rsv = &root->fs_info->delayed_block_rsv;
 705        trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
 706                                      node->inode_id, node->bytes_reserved, 0);
 707        btrfs_block_rsv_release(root, rsv,
 708                                node->bytes_reserved);
 709        node->bytes_reserved = 0;
 710}
 711
 712/*
 713 * This helper will insert some continuous items into the same leaf according
 714 * to the free space of the leaf.
 715 */
 716static int btrfs_batch_insert_items(struct btrfs_root *root,
 717                                    struct btrfs_path *path,
 718                                    struct btrfs_delayed_item *item)
 719{
 720        struct btrfs_delayed_item *curr, *next;
 721        int free_space;
 722        int total_data_size = 0, total_size = 0;
 723        struct extent_buffer *leaf;
 724        char *data_ptr;
 725        struct btrfs_key *keys;
 726        u32 *data_size;
 727        struct list_head head;
 728        int slot;
 729        int nitems;
 730        int i;
 731        int ret = 0;
 732
 733        BUG_ON(!path->nodes[0]);
 734
 735        leaf = path->nodes[0];
 736        free_space = btrfs_leaf_free_space(root, leaf);
 737        INIT_LIST_HEAD(&head);
 738
 739        next = item;
 740        nitems = 0;
 741
 742        /*
 743         * count the number of the continuous items that we can insert in batch
 744         */
 745        while (total_size + next->data_len + sizeof(struct btrfs_item) <=
 746               free_space) {
 747                total_data_size += next->data_len;
 748                total_size += next->data_len + sizeof(struct btrfs_item);
 749                list_add_tail(&next->tree_list, &head);
 750                nitems++;
 751
 752                curr = next;
 753                next = __btrfs_next_delayed_item(curr);
 754                if (!next)
 755                        break;
 756
 757                if (!btrfs_is_continuous_delayed_item(curr, next))
 758                        break;
 759        }
 760
 761        if (!nitems) {
 762                ret = 0;
 763                goto out;
 764        }
 765
 766        /*
 767         * we need allocate some memory space, but it might cause the task
 768         * to sleep, so we set all locked nodes in the path to blocking locks
 769         * first.
 770         */
 771        btrfs_set_path_blocking(path);
 772
 773        keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
 774        if (!keys) {
 775                ret = -ENOMEM;
 776                goto out;
 777        }
 778
 779        data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
 780        if (!data_size) {
 781                ret = -ENOMEM;
 782                goto error;
 783        }
 784
 785        /* get keys of all the delayed items */
 786        i = 0;
 787        list_for_each_entry(next, &head, tree_list) {
 788                keys[i] = next->key;
 789                data_size[i] = next->data_len;
 790                i++;
 791        }
 792
 793        /* reset all the locked nodes in the patch to spinning locks. */
 794        btrfs_clear_path_blocking(path, NULL, 0);
 795
 796        /* insert the keys of the items */
 797        setup_items_for_insert(root, path, keys, data_size,
 798                               total_data_size, total_size, nitems);
 799
 800        /* insert the dir index items */
 801        slot = path->slots[0];
 802        list_for_each_entry_safe(curr, next, &head, tree_list) {
 803                data_ptr = btrfs_item_ptr(leaf, slot, char);
 804                write_extent_buffer(leaf, &curr->data,
 805                                    (unsigned long)data_ptr,
 806                                    curr->data_len);
 807                slot++;
 808
 809                btrfs_delayed_item_release_metadata(root, curr);
 810
 811                list_del(&curr->tree_list);
 812                btrfs_release_delayed_item(curr);
 813        }
 814
 815error:
 816        kfree(data_size);
 817        kfree(keys);
 818out:
 819        return ret;
 820}
 821
 822/*
 823 * This helper can just do simple insertion that needn't extend item for new
 824 * data, such as directory name index insertion, inode insertion.
 825 */
 826static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
 827                                     struct btrfs_root *root,
 828                                     struct btrfs_path *path,
 829                                     struct btrfs_delayed_item *delayed_item)
 830{
 831        struct extent_buffer *leaf;
 832        char *ptr;
 833        int ret;
 834
 835        ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
 836                                      delayed_item->data_len);
 837        if (ret < 0 && ret != -EEXIST)
 838                return ret;
 839
 840        leaf = path->nodes[0];
 841
 842        ptr = btrfs_item_ptr(leaf, path->slots[0], char);
 843
 844        write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
 845                            delayed_item->data_len);
 846        btrfs_mark_buffer_dirty(leaf);
 847
 848        btrfs_delayed_item_release_metadata(root, delayed_item);
 849        return 0;
 850}
 851
 852/*
 853 * we insert an item first, then if there are some continuous items, we try
 854 * to insert those items into the same leaf.
 855 */
 856static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
 857                                      struct btrfs_path *path,
 858                                      struct btrfs_root *root,
 859                                      struct btrfs_delayed_node *node)
 860{
 861        struct btrfs_delayed_item *curr, *prev;
 862        int ret = 0;
 863
 864do_again:
 865        mutex_lock(&node->mutex);
 866        curr = __btrfs_first_delayed_insertion_item(node);
 867        if (!curr)
 868                goto insert_end;
 869
 870        ret = btrfs_insert_delayed_item(trans, root, path, curr);
 871        if (ret < 0) {
 872                btrfs_release_path(path);
 873                goto insert_end;
 874        }
 875
 876        prev = curr;
 877        curr = __btrfs_next_delayed_item(prev);
 878        if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
 879                /* insert the continuous items into the same leaf */
 880                path->slots[0]++;
 881                btrfs_batch_insert_items(root, path, curr);
 882        }
 883        btrfs_release_delayed_item(prev);
 884        btrfs_mark_buffer_dirty(path->nodes[0]);
 885
 886        btrfs_release_path(path);
 887        mutex_unlock(&node->mutex);
 888        goto do_again;
 889
 890insert_end:
 891        mutex_unlock(&node->mutex);
 892        return ret;
 893}
 894
 895static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
 896                                    struct btrfs_root *root,
 897                                    struct btrfs_path *path,
 898                                    struct btrfs_delayed_item *item)
 899{
 900        struct btrfs_delayed_item *curr, *next;
 901        struct extent_buffer *leaf;
 902        struct btrfs_key key;
 903        struct list_head head;
 904        int nitems, i, last_item;
 905        int ret = 0;
 906
 907        BUG_ON(!path->nodes[0]);
 908
 909        leaf = path->nodes[0];
 910
 911        i = path->slots[0];
 912        last_item = btrfs_header_nritems(leaf) - 1;
 913        if (i > last_item)
 914                return -ENOENT; /* FIXME: Is errno suitable? */
 915
 916        next = item;
 917        INIT_LIST_HEAD(&head);
 918        btrfs_item_key_to_cpu(leaf, &key, i);
 919        nitems = 0;
 920        /*
 921         * count the number of the dir index items that we can delete in batch
 922         */
 923        while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
 924                list_add_tail(&next->tree_list, &head);
 925                nitems++;
 926
 927                curr = next;
 928                next = __btrfs_next_delayed_item(curr);
 929                if (!next)
 930                        break;
 931
 932                if (!btrfs_is_continuous_delayed_item(curr, next))
 933                        break;
 934
 935                i++;
 936                if (i > last_item)
 937                        break;
 938                btrfs_item_key_to_cpu(leaf, &key, i);
 939        }
 940
 941        if (!nitems)
 942                return 0;
 943
 944        ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
 945        if (ret)
 946                goto out;
 947
 948        list_for_each_entry_safe(curr, next, &head, tree_list) {
 949                btrfs_delayed_item_release_metadata(root, curr);
 950                list_del(&curr->tree_list);
 951                btrfs_release_delayed_item(curr);
 952        }
 953
 954out:
 955        return ret;
 956}
 957
 958static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
 959                                      struct btrfs_path *path,
 960                                      struct btrfs_root *root,
 961                                      struct btrfs_delayed_node *node)
 962{
 963        struct btrfs_delayed_item *curr, *prev;
 964        int ret = 0;
 965
 966do_again:
 967        mutex_lock(&node->mutex);
 968        curr = __btrfs_first_delayed_deletion_item(node);
 969        if (!curr)
 970                goto delete_fail;
 971
 972        ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
 973        if (ret < 0)
 974                goto delete_fail;
 975        else if (ret > 0) {
 976                /*
 977                 * can't find the item which the node points to, so this node
 978                 * is invalid, just drop it.
 979                 */
 980                prev = curr;
 981                curr = __btrfs_next_delayed_item(prev);
 982                btrfs_release_delayed_item(prev);
 983                ret = 0;
 984                btrfs_release_path(path);
 985                if (curr) {
 986                        mutex_unlock(&node->mutex);
 987                        goto do_again;
 988                } else
 989                        goto delete_fail;
 990        }
 991
 992        btrfs_batch_delete_items(trans, root, path, curr);
 993        btrfs_release_path(path);
 994        mutex_unlock(&node->mutex);
 995        goto do_again;
 996
 997delete_fail:
 998        btrfs_release_path(path);
 999        mutex_unlock(&node->mutex);
1000        return ret;
1001}
1002
1003static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1004{
1005        struct btrfs_delayed_root *delayed_root;
1006
1007        if (delayed_node && delayed_node->inode_dirty) {
1008                BUG_ON(!delayed_node->root);
1009                delayed_node->inode_dirty = 0;
1010                delayed_node->count--;
1011
1012                delayed_root = delayed_node->root->fs_info->delayed_root;
1013                finish_one_item(delayed_root);
1014        }
1015}
1016
1017static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1018                                        struct btrfs_root *root,
1019                                        struct btrfs_path *path,
1020                                        struct btrfs_delayed_node *node)
1021{
1022        struct btrfs_key key;
1023        struct btrfs_inode_item *inode_item;
1024        struct extent_buffer *leaf;
1025        int ret;
1026
1027        key.objectid = node->inode_id;
1028        btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1029        key.offset = 0;
1030
1031        ret = btrfs_lookup_inode(trans, root, path, &key, 1);
1032        if (ret > 0) {
1033                btrfs_release_path(path);
1034                return -ENOENT;
1035        } else if (ret < 0) {
1036                return ret;
1037        }
1038
1039        btrfs_unlock_up_safe(path, 1);
1040        leaf = path->nodes[0];
1041        inode_item = btrfs_item_ptr(leaf, path->slots[0],
1042                                    struct btrfs_inode_item);
1043        write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1044                            sizeof(struct btrfs_inode_item));
1045        btrfs_mark_buffer_dirty(leaf);
1046        btrfs_release_path(path);
1047
1048        btrfs_delayed_inode_release_metadata(root, node);
1049        btrfs_release_delayed_inode(node);
1050
1051        return 0;
1052}
1053
1054static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1055                                             struct btrfs_root *root,
1056                                             struct btrfs_path *path,
1057                                             struct btrfs_delayed_node *node)
1058{
1059        int ret;
1060
1061        mutex_lock(&node->mutex);
1062        if (!node->inode_dirty) {
1063                mutex_unlock(&node->mutex);
1064                return 0;
1065        }
1066
1067        ret = __btrfs_update_delayed_inode(trans, root, path, node);
1068        mutex_unlock(&node->mutex);
1069        return ret;
1070}
1071
1072static inline int
1073__btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1074                                   struct btrfs_path *path,
1075                                   struct btrfs_delayed_node *node)
1076{
1077        int ret;
1078
1079        ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1080        if (ret)
1081                return ret;
1082
1083        ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1084        if (ret)
1085                return ret;
1086
1087        ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1088        return ret;
1089}
1090
1091/*
1092 * Called when committing the transaction.
1093 * Returns 0 on success.
1094 * Returns < 0 on error and returns with an aborted transaction with any
1095 * outstanding delayed items cleaned up.
1096 */
1097static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1098                                     struct btrfs_root *root, int nr)
1099{
1100        struct btrfs_delayed_root *delayed_root;
1101        struct btrfs_delayed_node *curr_node, *prev_node;
1102        struct btrfs_path *path;
1103        struct btrfs_block_rsv *block_rsv;
1104        int ret = 0;
1105        bool count = (nr > 0);
1106
1107        if (trans->aborted)
1108                return -EIO;
1109
1110        path = btrfs_alloc_path();
1111        if (!path)
1112                return -ENOMEM;
1113        path->leave_spinning = 1;
1114
1115        block_rsv = trans->block_rsv;
1116        trans->block_rsv = &root->fs_info->delayed_block_rsv;
1117
1118        delayed_root = btrfs_get_delayed_root(root);
1119
1120        curr_node = btrfs_first_delayed_node(delayed_root);
1121        while (curr_node && (!count || (count && nr--))) {
1122                ret = __btrfs_commit_inode_delayed_items(trans, path,
1123                                                         curr_node);
1124                if (ret) {
1125                        btrfs_release_delayed_node(curr_node);
1126                        curr_node = NULL;
1127                        btrfs_abort_transaction(trans, root, ret);
1128                        break;
1129                }
1130
1131                prev_node = curr_node;
1132                curr_node = btrfs_next_delayed_node(curr_node);
1133                btrfs_release_delayed_node(prev_node);
1134        }
1135
1136        if (curr_node)
1137                btrfs_release_delayed_node(curr_node);
1138        btrfs_free_path(path);
1139        trans->block_rsv = block_rsv;
1140
1141        return ret;
1142}
1143
1144int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1145                            struct btrfs_root *root)
1146{
1147        return __btrfs_run_delayed_items(trans, root, -1);
1148}
1149
1150int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
1151                               struct btrfs_root *root, int nr)
1152{
1153        return __btrfs_run_delayed_items(trans, root, nr);
1154}
1155
1156int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1157                                     struct inode *inode)
1158{
1159        struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1160        struct btrfs_path *path;
1161        struct btrfs_block_rsv *block_rsv;
1162        int ret;
1163
1164        if (!delayed_node)
1165                return 0;
1166
1167        mutex_lock(&delayed_node->mutex);
1168        if (!delayed_node->count) {
1169                mutex_unlock(&delayed_node->mutex);
1170                btrfs_release_delayed_node(delayed_node);
1171                return 0;
1172        }
1173        mutex_unlock(&delayed_node->mutex);
1174
1175        path = btrfs_alloc_path();
1176        if (!path)
1177                return -ENOMEM;
1178        path->leave_spinning = 1;
1179
1180        block_rsv = trans->block_rsv;
1181        trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1182
1183        ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1184
1185        btrfs_release_delayed_node(delayed_node);
1186        btrfs_free_path(path);
1187        trans->block_rsv = block_rsv;
1188
1189        return ret;
1190}
1191
1192int btrfs_commit_inode_delayed_inode(struct inode *inode)
1193{
1194        struct btrfs_trans_handle *trans;
1195        struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1196        struct btrfs_path *path;
1197        struct btrfs_block_rsv *block_rsv;
1198        int ret;
1199
1200        if (!delayed_node)
1201                return 0;
1202
1203        mutex_lock(&delayed_node->mutex);
1204        if (!delayed_node->inode_dirty) {
1205                mutex_unlock(&delayed_node->mutex);
1206                btrfs_release_delayed_node(delayed_node);
1207                return 0;
1208        }
1209        mutex_unlock(&delayed_node->mutex);
1210
1211        trans = btrfs_join_transaction(delayed_node->root);
1212        if (IS_ERR(trans)) {
1213                ret = PTR_ERR(trans);
1214                goto out;
1215        }
1216
1217        path = btrfs_alloc_path();
1218        if (!path) {
1219                ret = -ENOMEM;
1220                goto trans_out;
1221        }
1222        path->leave_spinning = 1;
1223
1224        block_rsv = trans->block_rsv;
1225        trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1226
1227        mutex_lock(&delayed_node->mutex);
1228        if (delayed_node->inode_dirty)
1229                ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1230                                                   path, delayed_node);
1231        else
1232                ret = 0;
1233        mutex_unlock(&delayed_node->mutex);
1234
1235        btrfs_free_path(path);
1236        trans->block_rsv = block_rsv;
1237trans_out:
1238        btrfs_end_transaction(trans, delayed_node->root);
1239        btrfs_btree_balance_dirty(delayed_node->root);
1240out:
1241        btrfs_release_delayed_node(delayed_node);
1242
1243        return ret;
1244}
1245
1246void btrfs_remove_delayed_node(struct inode *inode)
1247{
1248        struct btrfs_delayed_node *delayed_node;
1249
1250        delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
1251        if (!delayed_node)
1252                return;
1253
1254        BTRFS_I(inode)->delayed_node = NULL;
1255        btrfs_release_delayed_node(delayed_node);
1256}
1257
1258struct btrfs_async_delayed_work {
1259        struct btrfs_delayed_root *delayed_root;
1260        int nr;
1261        struct btrfs_work work;
1262};
1263
1264static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1265{
1266        struct btrfs_async_delayed_work *async_work;
1267        struct btrfs_delayed_root *delayed_root;
1268        struct btrfs_trans_handle *trans;
1269        struct btrfs_path *path;
1270        struct btrfs_delayed_node *delayed_node = NULL;
1271        struct btrfs_root *root;
1272        struct btrfs_block_rsv *block_rsv;
1273        int total_done = 0;
1274
1275        async_work = container_of(work, struct btrfs_async_delayed_work, work);
1276        delayed_root = async_work->delayed_root;
1277
1278        path = btrfs_alloc_path();
1279        if (!path)
1280                goto out;
1281
1282again:
1283        if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
1284                goto free_path;
1285
1286        delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1287        if (!delayed_node)
1288                goto free_path;
1289
1290        path->leave_spinning = 1;
1291        root = delayed_node->root;
1292
1293        trans = btrfs_join_transaction(root);
1294        if (IS_ERR(trans))
1295                goto release_path;
1296
1297        block_rsv = trans->block_rsv;
1298        trans->block_rsv = &root->fs_info->delayed_block_rsv;
1299
1300        __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1301        /*
1302         * Maybe new delayed items have been inserted, so we need requeue
1303         * the work. Besides that, we must dequeue the empty delayed nodes
1304         * to avoid the race between delayed items balance and the worker.
1305         * The race like this:
1306         *      Task1                           Worker thread
1307         *                                      count == 0, needn't requeue
1308         *                                        also needn't insert the
1309         *                                        delayed node into prepare
1310         *                                        list again.
1311         *      add lots of delayed items
1312         *      queue the delayed node
1313         *        already in the list,
1314         *        and not in the prepare
1315         *        list, it means the delayed
1316         *        node is being dealt with
1317         *        by the worker.
1318         *      do delayed items balance
1319         *        the delayed node is being
1320         *        dealt with by the worker
1321         *        now, just wait.
1322         *                                      the worker goto idle.
1323         * Task1 will sleep until the transaction is commited.
1324         */
1325        mutex_lock(&delayed_node->mutex);
1326        btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node);
1327        mutex_unlock(&delayed_node->mutex);
1328
1329        trans->block_rsv = block_rsv;
1330        btrfs_end_transaction_dmeta(trans, root);
1331        btrfs_btree_balance_dirty_nodelay(root);
1332
1333release_path:
1334        btrfs_release_path(path);
1335        total_done++;
1336
1337        btrfs_release_prepared_delayed_node(delayed_node);
1338        if (async_work->nr == 0 || total_done < async_work->nr)
1339                goto again;
1340
1341free_path:
1342        btrfs_free_path(path);
1343out:
1344        wake_up(&delayed_root->wait);
1345        kfree(async_work);
1346}
1347
1348
1349static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1350                                     struct btrfs_root *root, int nr)
1351{
1352        struct btrfs_async_delayed_work *async_work;
1353
1354        if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1355                return 0;
1356
1357        async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1358        if (!async_work)
1359                return -ENOMEM;
1360
1361        async_work->delayed_root = delayed_root;
1362        async_work->work.func = btrfs_async_run_delayed_root;
1363        async_work->work.flags = 0;
1364        async_work->nr = nr;
1365
1366        btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work);
1367        return 0;
1368}
1369
1370void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1371{
1372        struct btrfs_delayed_root *delayed_root;
1373        delayed_root = btrfs_get_delayed_root(root);
1374        WARN_ON(btrfs_first_delayed_node(delayed_root));
1375}
1376
1377static int refs_newer(struct btrfs_delayed_root *delayed_root,
1378                      int seq, int count)
1379{
1380        int val = atomic_read(&delayed_root->items_seq);
1381
1382        if (val < seq || val >= seq + count)
1383                return 1;
1384        return 0;
1385}
1386
1387void btrfs_balance_delayed_items(struct btrfs_root *root)
1388{
1389        struct btrfs_delayed_root *delayed_root;
1390        int seq;
1391
1392        delayed_root = btrfs_get_delayed_root(root);
1393
1394        if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1395                return;
1396
1397        seq = atomic_read(&delayed_root->items_seq);
1398
1399        if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1400                int ret;
1401                DEFINE_WAIT(__wait);
1402
1403                ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
1404                if (ret)
1405                        return;
1406
1407                while (1) {
1408                        prepare_to_wait(&delayed_root->wait, &__wait,
1409                                        TASK_INTERRUPTIBLE);
1410
1411                        if (refs_newer(delayed_root, seq,
1412                                       BTRFS_DELAYED_BATCH) ||
1413                            atomic_read(&delayed_root->items) <
1414                            BTRFS_DELAYED_BACKGROUND) {
1415                                break;
1416                        }
1417                        if (!signal_pending(current))
1418                                schedule();
1419                        else
1420                                break;
1421                }
1422                finish_wait(&delayed_root->wait, &__wait);
1423        }
1424
1425        btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
1426}
1427
1428/* Will return 0 or -ENOMEM */
1429int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1430                                   struct btrfs_root *root, const char *name,
1431                                   int name_len, struct inode *dir,
1432                                   struct btrfs_disk_key *disk_key, u8 type,
1433                                   u64 index)
1434{
1435        struct btrfs_delayed_node *delayed_node;
1436        struct btrfs_delayed_item *delayed_item;
1437        struct btrfs_dir_item *dir_item;
1438        int ret;
1439
1440        delayed_node = btrfs_get_or_create_delayed_node(dir);
1441        if (IS_ERR(delayed_node))
1442                return PTR_ERR(delayed_node);
1443
1444        delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1445        if (!delayed_item) {
1446                ret = -ENOMEM;
1447                goto release_node;
1448        }
1449
1450        delayed_item->key.objectid = btrfs_ino(dir);
1451        btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
1452        delayed_item->key.offset = index;
1453
1454        dir_item = (struct btrfs_dir_item *)delayed_item->data;
1455        dir_item->location = *disk_key;
1456        dir_item->transid = cpu_to_le64(trans->transid);
1457        dir_item->data_len = 0;
1458        dir_item->name_len = cpu_to_le16(name_len);
1459        dir_item->type = type;
1460        memcpy((char *)(dir_item + 1), name, name_len);
1461
1462        ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
1463        /*
1464         * we have reserved enough space when we start a new transaction,
1465         * so reserving metadata failure is impossible
1466         */
1467        BUG_ON(ret);
1468
1469
1470        mutex_lock(&delayed_node->mutex);
1471        ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1472        if (unlikely(ret)) {
1473                printk(KERN_ERR "err add delayed dir index item(name: %s) into "
1474                                "the insertion tree of the delayed node"
1475                                "(root id: %llu, inode id: %llu, errno: %d)\n",
1476                                name,
1477                                (unsigned long long)delayed_node->root->objectid,
1478                                (unsigned long long)delayed_node->inode_id,
1479                                ret);
1480                BUG();
1481        }
1482        mutex_unlock(&delayed_node->mutex);
1483
1484release_node:
1485        btrfs_release_delayed_node(delayed_node);
1486        return ret;
1487}
1488
1489static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
1490                                               struct btrfs_delayed_node *node,
1491                                               struct btrfs_key *key)
1492{
1493        struct btrfs_delayed_item *item;
1494
1495        mutex_lock(&node->mutex);
1496        item = __btrfs_lookup_delayed_insertion_item(node, key);
1497        if (!item) {
1498                mutex_unlock(&node->mutex);
1499                return 1;
1500        }
1501
1502        btrfs_delayed_item_release_metadata(root, item);
1503        btrfs_release_delayed_item(item);
1504        mutex_unlock(&node->mutex);
1505        return 0;
1506}
1507
1508int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1509                                   struct btrfs_root *root, struct inode *dir,
1510                                   u64 index)
1511{
1512        struct btrfs_delayed_node *node;
1513        struct btrfs_delayed_item *item;
1514        struct btrfs_key item_key;
1515        int ret;
1516
1517        node = btrfs_get_or_create_delayed_node(dir);
1518        if (IS_ERR(node))
1519                return PTR_ERR(node);
1520
1521        item_key.objectid = btrfs_ino(dir);
1522        btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY);
1523        item_key.offset = index;
1524
1525        ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
1526        if (!ret)
1527                goto end;
1528
1529        item = btrfs_alloc_delayed_item(0);
1530        if (!item) {
1531                ret = -ENOMEM;
1532                goto end;
1533        }
1534
1535        item->key = item_key;
1536
1537        ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
1538        /*
1539         * we have reserved enough space when we start a new transaction,
1540         * so reserving metadata failure is impossible.
1541         */
1542        BUG_ON(ret);
1543
1544        mutex_lock(&node->mutex);
1545        ret = __btrfs_add_delayed_deletion_item(node, item);
1546        if (unlikely(ret)) {
1547                printk(KERN_ERR "err add delayed dir index item(index: %llu) "
1548                                "into the deletion tree of the delayed node"
1549                                "(root id: %llu, inode id: %llu, errno: %d)\n",
1550                                (unsigned long long)index,
1551                                (unsigned long long)node->root->objectid,
1552                                (unsigned long long)node->inode_id,
1553                                ret);
1554                BUG();
1555        }
1556        mutex_unlock(&node->mutex);
1557end:
1558        btrfs_release_delayed_node(node);
1559        return ret;
1560}
1561
1562int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1563{
1564        struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1565
1566        if (!delayed_node)
1567                return -ENOENT;
1568
1569        /*
1570         * Since we have held i_mutex of this directory, it is impossible that
1571         * a new directory index is added into the delayed node and index_cnt
1572         * is updated now. So we needn't lock the delayed node.
1573         */
1574        if (!delayed_node->index_cnt) {
1575                btrfs_release_delayed_node(delayed_node);
1576                return -EINVAL;
1577        }
1578
1579        BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
1580        btrfs_release_delayed_node(delayed_node);
1581        return 0;
1582}
1583
1584void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
1585                             struct list_head *del_list)
1586{
1587        struct btrfs_delayed_node *delayed_node;
1588        struct btrfs_delayed_item *item;
1589
1590        delayed_node = btrfs_get_delayed_node(inode);
1591        if (!delayed_node)
1592                return;
1593
1594        mutex_lock(&delayed_node->mutex);
1595        item = __btrfs_first_delayed_insertion_item(delayed_node);
1596        while (item) {
1597                atomic_inc(&item->refs);
1598                list_add_tail(&item->readdir_list, ins_list);
1599                item = __btrfs_next_delayed_item(item);
1600        }
1601
1602        item = __btrfs_first_delayed_deletion_item(delayed_node);
1603        while (item) {
1604                atomic_inc(&item->refs);
1605                list_add_tail(&item->readdir_list, del_list);
1606                item = __btrfs_next_delayed_item(item);
1607        }
1608        mutex_unlock(&delayed_node->mutex);
1609        /*
1610         * This delayed node is still cached in the btrfs inode, so refs
1611         * must be > 1 now, and we needn't check it is going to be freed
1612         * or not.
1613         *
1614         * Besides that, this function is used to read dir, we do not
1615         * insert/delete delayed items in this period. So we also needn't
1616         * requeue or dequeue this delayed node.
1617         */
1618        atomic_dec(&delayed_node->refs);
1619}
1620
1621void btrfs_put_delayed_items(struct list_head *ins_list,
1622                             struct list_head *del_list)
1623{
1624        struct btrfs_delayed_item *curr, *next;
1625
1626        list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1627                list_del(&curr->readdir_list);
1628                if (atomic_dec_and_test(&curr->refs))
1629                        kfree(curr);
1630        }
1631
1632        list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1633                list_del(&curr->readdir_list);
1634                if (atomic_dec_and_test(&curr->refs))
1635                        kfree(curr);
1636        }
1637}
1638
1639int btrfs_should_delete_dir_index(struct list_head *del_list,
1640                                  u64 index)
1641{
1642        struct btrfs_delayed_item *curr, *next;
1643        int ret;
1644
1645        if (list_empty(del_list))
1646                return 0;
1647
1648        list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1649                if (curr->key.offset > index)
1650                        break;
1651
1652                list_del(&curr->readdir_list);
1653                ret = (curr->key.offset == index);
1654
1655                if (atomic_dec_and_test(&curr->refs))
1656                        kfree(curr);
1657
1658                if (ret)
1659                        return 1;
1660                else
1661                        continue;
1662        }
1663        return 0;
1664}
1665
1666/*
1667 * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1668 *
1669 */
1670int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1671                                    struct list_head *ins_list)
1672{
1673        struct btrfs_dir_item *di;
1674        struct btrfs_delayed_item *curr, *next;
1675        struct btrfs_key location;
1676        char *name;
1677        int name_len;
1678        int over = 0;
1679        unsigned char d_type;
1680
1681        if (list_empty(ins_list))
1682                return 0;
1683
1684        /*
1685         * Changing the data of the delayed item is impossible. So
1686         * we needn't lock them. And we have held i_mutex of the
1687         * directory, nobody can delete any directory indexes now.
1688         */
1689        list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1690                list_del(&curr->readdir_list);
1691
1692                if (curr->key.offset < ctx->pos) {
1693                        if (atomic_dec_and_test(&curr->refs))
1694                                kfree(curr);
1695                        continue;
1696                }
1697
1698                ctx->pos = curr->key.offset;
1699
1700                di = (struct btrfs_dir_item *)curr->data;
1701                name = (char *)(di + 1);
1702                name_len = le16_to_cpu(di->name_len);
1703
1704                d_type = btrfs_filetype_table[di->type];
1705                btrfs_disk_key_to_cpu(&location, &di->location);
1706
1707                over = !dir_emit(ctx, name, name_len,
1708                               location.objectid, d_type);
1709
1710                if (atomic_dec_and_test(&curr->refs))
1711                        kfree(curr);
1712
1713                if (over)
1714                        return 1;
1715        }
1716        return 0;
1717}
1718
1719BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
1720                         generation, 64);
1721BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
1722                         sequence, 64);
1723BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
1724                         transid, 64);
1725BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
1726BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
1727                         nbytes, 64);
1728BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
1729                         block_group, 64);
1730BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
1731BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
1732BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
1733BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
1734BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
1735BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
1736
1737BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
1738BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
1739
1740static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1741                                  struct btrfs_inode_item *inode_item,
1742                                  struct inode *inode)
1743{
1744        btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1745        btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1746        btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1747        btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1748        btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1749        btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1750        btrfs_set_stack_inode_generation(inode_item,
1751                                         BTRFS_I(inode)->generation);
1752        btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
1753        btrfs_set_stack_inode_transid(inode_item, trans->transid);
1754        btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1755        btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1756        btrfs_set_stack_inode_block_group(inode_item, 0);
1757
1758        btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1759                                     inode->i_atime.tv_sec);
1760        btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
1761                                      inode->i_atime.tv_nsec);
1762
1763        btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
1764                                     inode->i_mtime.tv_sec);
1765        btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
1766                                      inode->i_mtime.tv_nsec);
1767
1768        btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
1769                                     inode->i_ctime.tv_sec);
1770        btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
1771                                      inode->i_ctime.tv_nsec);
1772}
1773
1774int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1775{
1776        struct btrfs_delayed_node *delayed_node;
1777        struct btrfs_inode_item *inode_item;
1778        struct btrfs_timespec *tspec;
1779
1780        delayed_node = btrfs_get_delayed_node(inode);
1781        if (!delayed_node)
1782                return -ENOENT;
1783
1784        mutex_lock(&delayed_node->mutex);
1785        if (!delayed_node->inode_dirty) {
1786                mutex_unlock(&delayed_node->mutex);
1787                btrfs_release_delayed_node(delayed_node);
1788                return -ENOENT;
1789        }
1790
1791        inode_item = &delayed_node->inode_item;
1792
1793        i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1794        i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1795        btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
1796        inode->i_mode = btrfs_stack_inode_mode(inode_item);
1797        set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1798        inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1799        BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1800        inode->i_version = btrfs_stack_inode_sequence(inode_item);
1801        inode->i_rdev = 0;
1802        *rdev = btrfs_stack_inode_rdev(inode_item);
1803        BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1804
1805        tspec = btrfs_inode_atime(inode_item);
1806        inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
1807        inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1808
1809        tspec = btrfs_inode_mtime(inode_item);
1810        inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
1811        inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1812
1813        tspec = btrfs_inode_ctime(inode_item);
1814        inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
1815        inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1816
1817        inode->i_generation = BTRFS_I(inode)->generation;
1818        BTRFS_I(inode)->index_cnt = (u64)-1;
1819
1820        mutex_unlock(&delayed_node->mutex);
1821        btrfs_release_delayed_node(delayed_node);
1822        return 0;
1823}
1824
1825int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1826                               struct btrfs_root *root, struct inode *inode)
1827{
1828        struct btrfs_delayed_node *delayed_node;
1829        int ret = 0;
1830
1831        delayed_node = btrfs_get_or_create_delayed_node(inode);
1832        if (IS_ERR(delayed_node))
1833                return PTR_ERR(delayed_node);
1834
1835        mutex_lock(&delayed_node->mutex);
1836        if (delayed_node->inode_dirty) {
1837                fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1838                goto release_node;
1839        }
1840
1841        ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
1842                                                   delayed_node);
1843        if (ret)
1844                goto release_node;
1845
1846        fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1847        delayed_node->inode_dirty = 1;
1848        delayed_node->count++;
1849        atomic_inc(&root->fs_info->delayed_root->items);
1850release_node:
1851        mutex_unlock(&delayed_node->mutex);
1852        btrfs_release_delayed_node(delayed_node);
1853        return ret;
1854}
1855
1856static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1857{
1858        struct btrfs_root *root = delayed_node->root;
1859        struct btrfs_delayed_item *curr_item, *prev_item;
1860
1861        mutex_lock(&delayed_node->mutex);
1862        curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1863        while (curr_item) {
1864                btrfs_delayed_item_release_metadata(root, curr_item);
1865                prev_item = curr_item;
1866                curr_item = __btrfs_next_delayed_item(prev_item);
1867                btrfs_release_delayed_item(prev_item);
1868        }
1869
1870        curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1871        while (curr_item) {
1872                btrfs_delayed_item_release_metadata(root, curr_item);
1873                prev_item = curr_item;
1874                curr_item = __btrfs_next_delayed_item(prev_item);
1875                btrfs_release_delayed_item(prev_item);
1876        }
1877
1878        if (delayed_node->inode_dirty) {
1879                btrfs_delayed_inode_release_metadata(root, delayed_node);
1880                btrfs_release_delayed_inode(delayed_node);
1881        }
1882        mutex_unlock(&delayed_node->mutex);
1883}
1884
1885void btrfs_kill_delayed_inode_items(struct inode *inode)
1886{
1887        struct btrfs_delayed_node *delayed_node;
1888
1889        delayed_node = btrfs_get_delayed_node(inode);
1890        if (!delayed_node)
1891                return;
1892
1893        __btrfs_kill_delayed_node(delayed_node);
1894        btrfs_release_delayed_node(delayed_node);
1895}
1896
1897void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1898{
1899        u64 inode_id = 0;
1900        struct btrfs_delayed_node *delayed_nodes[8];
1901        int i, n;
1902
1903        while (1) {
1904                spin_lock(&root->inode_lock);
1905                n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1906                                           (void **)delayed_nodes, inode_id,
1907                                           ARRAY_SIZE(delayed_nodes));
1908                if (!n) {
1909                        spin_unlock(&root->inode_lock);
1910                        break;
1911                }
1912
1913                inode_id = delayed_nodes[n - 1]->inode_id + 1;
1914
1915                for (i = 0; i < n; i++)
1916                        atomic_inc(&delayed_nodes[i]->refs);
1917                spin_unlock(&root->inode_lock);
1918
1919                for (i = 0; i < n; i++) {
1920                        __btrfs_kill_delayed_node(delayed_nodes[i]);
1921                        btrfs_release_delayed_node(delayed_nodes[i]);
1922                }
1923        }
1924}
1925
1926void btrfs_destroy_delayed_inodes(struct btrfs_root *root)
1927{
1928        struct btrfs_delayed_root *delayed_root;
1929        struct btrfs_delayed_node *curr_node, *prev_node;
1930
1931        delayed_root = btrfs_get_delayed_root(root);
1932
1933        curr_node = btrfs_first_delayed_node(delayed_root);
1934        while (curr_node) {
1935                __btrfs_kill_delayed_node(curr_node);
1936
1937                prev_node = curr_node;
1938                curr_node = btrfs_next_delayed_node(curr_node);
1939                btrfs_release_delayed_node(prev_node);
1940        }
1941}
1942
1943