linux/fs/btrfs/delayed-inode.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2011 Fujitsu.  All rights reserved.
   3 * Written by Miao Xie <miaox@cn.fujitsu.com>
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public
   7 * License v2 as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12 * General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public
  15 * License along with this program; if not, write to the
  16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  17 * Boston, MA 021110-1307, USA.
  18 */
  19
  20#include <linux/slab.h>
  21#include "delayed-inode.h"
  22#include "disk-io.h"
  23#include "transaction.h"
  24
  25#define BTRFS_DELAYED_WRITEBACK         512
  26#define BTRFS_DELAYED_BACKGROUND        128
  27#define BTRFS_DELAYED_BATCH             16
  28
  29static struct kmem_cache *delayed_node_cache;
  30
  31int __init btrfs_delayed_inode_init(void)
  32{
  33        delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
  34                                        sizeof(struct btrfs_delayed_node),
  35                                        0,
  36                                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
  37                                        NULL);
  38        if (!delayed_node_cache)
  39                return -ENOMEM;
  40        return 0;
  41}
  42
  43void btrfs_delayed_inode_exit(void)
  44{
  45        if (delayed_node_cache)
  46                kmem_cache_destroy(delayed_node_cache);
  47}
  48
  49static inline void btrfs_init_delayed_node(
  50                                struct btrfs_delayed_node *delayed_node,
  51                                struct btrfs_root *root, u64 inode_id)
  52{
  53        delayed_node->root = root;
  54        delayed_node->inode_id = inode_id;
  55        atomic_set(&delayed_node->refs, 0);
  56        delayed_node->count = 0;
  57        delayed_node->in_list = 0;
  58        delayed_node->inode_dirty = 0;
  59        delayed_node->ins_root = RB_ROOT;
  60        delayed_node->del_root = RB_ROOT;
  61        mutex_init(&delayed_node->mutex);
  62        delayed_node->index_cnt = 0;
  63        INIT_LIST_HEAD(&delayed_node->n_list);
  64        INIT_LIST_HEAD(&delayed_node->p_list);
  65        delayed_node->bytes_reserved = 0;
  66        memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item));
  67}
  68
  69static inline int btrfs_is_continuous_delayed_item(
  70                                        struct btrfs_delayed_item *item1,
  71                                        struct btrfs_delayed_item *item2)
  72{
  73        if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
  74            item1->key.objectid == item2->key.objectid &&
  75            item1->key.type == item2->key.type &&
  76            item1->key.offset + 1 == item2->key.offset)
  77                return 1;
  78        return 0;
  79}
  80
  81static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
  82                                                        struct btrfs_root *root)
  83{
  84        return root->fs_info->delayed_root;
  85}
  86
  87static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
  88{
  89        struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
  90        struct btrfs_root *root = btrfs_inode->root;
  91        u64 ino = btrfs_ino(inode);
  92        struct btrfs_delayed_node *node;
  93
  94        node = ACCESS_ONCE(btrfs_inode->delayed_node);
  95        if (node) {
  96                atomic_inc(&node->refs);
  97                return node;
  98        }
  99
 100        spin_lock(&root->inode_lock);
 101        node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
 102        if (node) {
 103                if (btrfs_inode->delayed_node) {
 104                        atomic_inc(&node->refs);        /* can be accessed */
 105                        BUG_ON(btrfs_inode->delayed_node != node);
 106                        spin_unlock(&root->inode_lock);
 107                        return node;
 108                }
 109                btrfs_inode->delayed_node = node;
 110                atomic_inc(&node->refs);        /* can be accessed */
 111                atomic_inc(&node->refs);        /* cached in the inode */
 112                spin_unlock(&root->inode_lock);
 113                return node;
 114        }
 115        spin_unlock(&root->inode_lock);
 116
 117        return NULL;
 118}
 119
 120/* Will return either the node or PTR_ERR(-ENOMEM) */
 121static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
 122                                                        struct inode *inode)
 123{
 124        struct btrfs_delayed_node *node;
 125        struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
 126        struct btrfs_root *root = btrfs_inode->root;
 127        u64 ino = btrfs_ino(inode);
 128        int ret;
 129
 130again:
 131        node = btrfs_get_delayed_node(inode);
 132        if (node)
 133                return node;
 134
 135        node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
 136        if (!node)
 137                return ERR_PTR(-ENOMEM);
 138        btrfs_init_delayed_node(node, root, ino);
 139
 140        atomic_inc(&node->refs);        /* cached in the btrfs inode */
 141        atomic_inc(&node->refs);        /* can be accessed */
 142
 143        ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
 144        if (ret) {
 145                kmem_cache_free(delayed_node_cache, node);
 146                return ERR_PTR(ret);
 147        }
 148
 149        spin_lock(&root->inode_lock);
 150        ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
 151        if (ret == -EEXIST) {
 152                kmem_cache_free(delayed_node_cache, node);
 153                spin_unlock(&root->inode_lock);
 154                radix_tree_preload_end();
 155                goto again;
 156        }
 157        btrfs_inode->delayed_node = node;
 158        spin_unlock(&root->inode_lock);
 159        radix_tree_preload_end();
 160
 161        return node;
 162}
 163
 164/*
 165 * Call it when holding delayed_node->mutex
 166 *
 167 * If mod = 1, add this node into the prepared list.
 168 */
 169static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
 170                                     struct btrfs_delayed_node *node,
 171                                     int mod)
 172{
 173        spin_lock(&root->lock);
 174        if (node->in_list) {
 175                if (!list_empty(&node->p_list))
 176                        list_move_tail(&node->p_list, &root->prepare_list);
 177                else if (mod)
 178                        list_add_tail(&node->p_list, &root->prepare_list);
 179        } else {
 180                list_add_tail(&node->n_list, &root->node_list);
 181                list_add_tail(&node->p_list, &root->prepare_list);
 182                atomic_inc(&node->refs);        /* inserted into list */
 183                root->nodes++;
 184                node->in_list = 1;
 185        }
 186        spin_unlock(&root->lock);
 187}
 188
 189/* Call it when holding delayed_node->mutex */
 190static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
 191                                       struct btrfs_delayed_node *node)
 192{
 193        spin_lock(&root->lock);
 194        if (node->in_list) {
 195                root->nodes--;
 196                atomic_dec(&node->refs);        /* not in the list */
 197                list_del_init(&node->n_list);
 198                if (!list_empty(&node->p_list))
 199                        list_del_init(&node->p_list);
 200                node->in_list = 0;
 201        }
 202        spin_unlock(&root->lock);
 203}
 204
 205static struct btrfs_delayed_node *btrfs_first_delayed_node(
 206                        struct btrfs_delayed_root *delayed_root)
 207{
 208        struct list_head *p;
 209        struct btrfs_delayed_node *node = NULL;
 210
 211        spin_lock(&delayed_root->lock);
 212        if (list_empty(&delayed_root->node_list))
 213                goto out;
 214
 215        p = delayed_root->node_list.next;
 216        node = list_entry(p, struct btrfs_delayed_node, n_list);
 217        atomic_inc(&node->refs);
 218out:
 219        spin_unlock(&delayed_root->lock);
 220
 221        return node;
 222}
 223
 224static struct btrfs_delayed_node *btrfs_next_delayed_node(
 225                                                struct btrfs_delayed_node *node)
 226{
 227        struct btrfs_delayed_root *delayed_root;
 228        struct list_head *p;
 229        struct btrfs_delayed_node *next = NULL;
 230
 231        delayed_root = node->root->fs_info->delayed_root;
 232        spin_lock(&delayed_root->lock);
 233        if (!node->in_list) {   /* not in the list */
 234                if (list_empty(&delayed_root->node_list))
 235                        goto out;
 236                p = delayed_root->node_list.next;
 237        } else if (list_is_last(&node->n_list, &delayed_root->node_list))
 238                goto out;
 239        else
 240                p = node->n_list.next;
 241
 242        next = list_entry(p, struct btrfs_delayed_node, n_list);
 243        atomic_inc(&next->refs);
 244out:
 245        spin_unlock(&delayed_root->lock);
 246
 247        return next;
 248}
 249
 250static void __btrfs_release_delayed_node(
 251                                struct btrfs_delayed_node *delayed_node,
 252                                int mod)
 253{
 254        struct btrfs_delayed_root *delayed_root;
 255
 256        if (!delayed_node)
 257                return;
 258
 259        delayed_root = delayed_node->root->fs_info->delayed_root;
 260
 261        mutex_lock(&delayed_node->mutex);
 262        if (delayed_node->count)
 263                btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
 264        else
 265                btrfs_dequeue_delayed_node(delayed_root, delayed_node);
 266        mutex_unlock(&delayed_node->mutex);
 267
 268        if (atomic_dec_and_test(&delayed_node->refs)) {
 269                struct btrfs_root *root = delayed_node->root;
 270                spin_lock(&root->inode_lock);
 271                if (atomic_read(&delayed_node->refs) == 0) {
 272                        radix_tree_delete(&root->delayed_nodes_tree,
 273                                          delayed_node->inode_id);
 274                        kmem_cache_free(delayed_node_cache, delayed_node);
 275                }
 276                spin_unlock(&root->inode_lock);
 277        }
 278}
 279
 280static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
 281{
 282        __btrfs_release_delayed_node(node, 0);
 283}
 284
 285static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
 286                                        struct btrfs_delayed_root *delayed_root)
 287{
 288        struct list_head *p;
 289        struct btrfs_delayed_node *node = NULL;
 290
 291        spin_lock(&delayed_root->lock);
 292        if (list_empty(&delayed_root->prepare_list))
 293                goto out;
 294
 295        p = delayed_root->prepare_list.next;
 296        list_del_init(p);
 297        node = list_entry(p, struct btrfs_delayed_node, p_list);
 298        atomic_inc(&node->refs);
 299out:
 300        spin_unlock(&delayed_root->lock);
 301
 302        return node;
 303}
 304
 305static inline void btrfs_release_prepared_delayed_node(
 306                                        struct btrfs_delayed_node *node)
 307{
 308        __btrfs_release_delayed_node(node, 1);
 309}
 310
 311static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
 312{
 313        struct btrfs_delayed_item *item;
 314        item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
 315        if (item) {
 316                item->data_len = data_len;
 317                item->ins_or_del = 0;
 318                item->bytes_reserved = 0;
 319                item->delayed_node = NULL;
 320                atomic_set(&item->refs, 1);
 321        }
 322        return item;
 323}
 324
 325/*
 326 * __btrfs_lookup_delayed_item - look up the delayed item by key
 327 * @delayed_node: pointer to the delayed node
 328 * @key:          the key to look up
 329 * @prev:         used to store the prev item if the right item isn't found
 330 * @next:         used to store the next item if the right item isn't found
 331 *
 332 * Note: if we don't find the right item, we will return the prev item and
 333 * the next item.
 334 */
 335static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
 336                                struct rb_root *root,
 337                                struct btrfs_key *key,
 338                                struct btrfs_delayed_item **prev,
 339                                struct btrfs_delayed_item **next)
 340{
 341        struct rb_node *node, *prev_node = NULL;
 342        struct btrfs_delayed_item *delayed_item = NULL;
 343        int ret = 0;
 344
 345        node = root->rb_node;
 346
 347        while (node) {
 348                delayed_item = rb_entry(node, struct btrfs_delayed_item,
 349                                        rb_node);
 350                prev_node = node;
 351                ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
 352                if (ret < 0)
 353                        node = node->rb_right;
 354                else if (ret > 0)
 355                        node = node->rb_left;
 356                else
 357                        return delayed_item;
 358        }
 359
 360        if (prev) {
 361                if (!prev_node)
 362                        *prev = NULL;
 363                else if (ret < 0)
 364                        *prev = delayed_item;
 365                else if ((node = rb_prev(prev_node)) != NULL) {
 366                        *prev = rb_entry(node, struct btrfs_delayed_item,
 367                                         rb_node);
 368                } else
 369                        *prev = NULL;
 370        }
 371
 372        if (next) {
 373                if (!prev_node)
 374                        *next = NULL;
 375                else if (ret > 0)
 376                        *next = delayed_item;
 377                else if ((node = rb_next(prev_node)) != NULL) {
 378                        *next = rb_entry(node, struct btrfs_delayed_item,
 379                                         rb_node);
 380                } else
 381                        *next = NULL;
 382        }
 383        return NULL;
 384}
 385
 386static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
 387                                        struct btrfs_delayed_node *delayed_node,
 388                                        struct btrfs_key *key)
 389{
 390        struct btrfs_delayed_item *item;
 391
 392        item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
 393                                           NULL, NULL);
 394        return item;
 395}
 396
 397static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
 398                                    struct btrfs_delayed_item *ins,
 399                                    int action)
 400{
 401        struct rb_node **p, *node;
 402        struct rb_node *parent_node = NULL;
 403        struct rb_root *root;
 404        struct btrfs_delayed_item *item;
 405        int cmp;
 406
 407        if (action == BTRFS_DELAYED_INSERTION_ITEM)
 408                root = &delayed_node->ins_root;
 409        else if (action == BTRFS_DELAYED_DELETION_ITEM)
 410                root = &delayed_node->del_root;
 411        else
 412                BUG();
 413        p = &root->rb_node;
 414        node = &ins->rb_node;
 415
 416        while (*p) {
 417                parent_node = *p;
 418                item = rb_entry(parent_node, struct btrfs_delayed_item,
 419                                 rb_node);
 420
 421                cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
 422                if (cmp < 0)
 423                        p = &(*p)->rb_right;
 424                else if (cmp > 0)
 425                        p = &(*p)->rb_left;
 426                else
 427                        return -EEXIST;
 428        }
 429
 430        rb_link_node(node, parent_node, p);
 431        rb_insert_color(node, root);
 432        ins->delayed_node = delayed_node;
 433        ins->ins_or_del = action;
 434
 435        if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
 436            action == BTRFS_DELAYED_INSERTION_ITEM &&
 437            ins->key.offset >= delayed_node->index_cnt)
 438                        delayed_node->index_cnt = ins->key.offset + 1;
 439
 440        delayed_node->count++;
 441        atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
 442        return 0;
 443}
 444
 445static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
 446                                              struct btrfs_delayed_item *item)
 447{
 448        return __btrfs_add_delayed_item(node, item,
 449                                        BTRFS_DELAYED_INSERTION_ITEM);
 450}
 451
 452static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
 453                                             struct btrfs_delayed_item *item)
 454{
 455        return __btrfs_add_delayed_item(node, item,
 456                                        BTRFS_DELAYED_DELETION_ITEM);
 457}
 458
 459static void finish_one_item(struct btrfs_delayed_root *delayed_root)
 460{
 461        int seq = atomic_inc_return(&delayed_root->items_seq);
 462        if ((atomic_dec_return(&delayed_root->items) <
 463            BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
 464            waitqueue_active(&delayed_root->wait))
 465                wake_up(&delayed_root->wait);
 466}
 467
 468static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
 469{
 470        struct rb_root *root;
 471        struct btrfs_delayed_root *delayed_root;
 472
 473        delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
 474
 475        BUG_ON(!delayed_root);
 476        BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
 477               delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
 478
 479        if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
 480                root = &delayed_item->delayed_node->ins_root;
 481        else
 482                root = &delayed_item->delayed_node->del_root;
 483
 484        rb_erase(&delayed_item->rb_node, root);
 485        delayed_item->delayed_node->count--;
 486
 487        finish_one_item(delayed_root);
 488}
 489
 490static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
 491{
 492        if (item) {
 493                __btrfs_remove_delayed_item(item);
 494                if (atomic_dec_and_test(&item->refs))
 495                        kfree(item);
 496        }
 497}
 498
 499static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
 500                                        struct btrfs_delayed_node *delayed_node)
 501{
 502        struct rb_node *p;
 503        struct btrfs_delayed_item *item = NULL;
 504
 505        p = rb_first(&delayed_node->ins_root);
 506        if (p)
 507                item = rb_entry(p, struct btrfs_delayed_item, rb_node);
 508
 509        return item;
 510}
 511
 512static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
 513                                        struct btrfs_delayed_node *delayed_node)
 514{
 515        struct rb_node *p;
 516        struct btrfs_delayed_item *item = NULL;
 517
 518        p = rb_first(&delayed_node->del_root);
 519        if (p)
 520                item = rb_entry(p, struct btrfs_delayed_item, rb_node);
 521
 522        return item;
 523}
 524
 525static struct btrfs_delayed_item *__btrfs_next_delayed_item(
 526                                                struct btrfs_delayed_item *item)
 527{
 528        struct rb_node *p;
 529        struct btrfs_delayed_item *next = NULL;
 530
 531        p = rb_next(&item->rb_node);
 532        if (p)
 533                next = rb_entry(p, struct btrfs_delayed_item, rb_node);
 534
 535        return next;
 536}
 537
 538static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
 539                                                   u64 root_id)
 540{
 541        struct btrfs_key root_key;
 542
 543        if (root->objectid == root_id)
 544                return root;
 545
 546        root_key.objectid = root_id;
 547        root_key.type = BTRFS_ROOT_ITEM_KEY;
 548        root_key.offset = (u64)-1;
 549        return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
 550}
 551
 552static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
 553                                               struct btrfs_root *root,
 554                                               struct btrfs_delayed_item *item)
 555{
 556        struct btrfs_block_rsv *src_rsv;
 557        struct btrfs_block_rsv *dst_rsv;
 558        u64 num_bytes;
 559        int ret;
 560
 561        if (!trans->bytes_reserved)
 562                return 0;
 563
 564        src_rsv = trans->block_rsv;
 565        dst_rsv = &root->fs_info->delayed_block_rsv;
 566
 567        num_bytes = btrfs_calc_trans_metadata_size(root, 1);
 568        ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
 569        if (!ret) {
 570                trace_btrfs_space_reservation(root->fs_info, "delayed_item",
 571                                              item->key.objectid,
 572                                              num_bytes, 1);
 573                item->bytes_reserved = num_bytes;
 574        }
 575
 576        return ret;
 577}
 578
 579static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
 580                                                struct btrfs_delayed_item *item)
 581{
 582        struct btrfs_block_rsv *rsv;
 583
 584        if (!item->bytes_reserved)
 585                return;
 586
 587        rsv = &root->fs_info->delayed_block_rsv;
 588        trace_btrfs_space_reservation(root->fs_info, "delayed_item",
 589                                      item->key.objectid, item->bytes_reserved,
 590                                      0);
 591        btrfs_block_rsv_release(root, rsv,
 592                                item->bytes_reserved);
 593}
 594
 595static int btrfs_delayed_inode_reserve_metadata(
 596                                        struct btrfs_trans_handle *trans,
 597                                        struct btrfs_root *root,
 598                                        struct inode *inode,
 599                                        struct btrfs_delayed_node *node)
 600{
 601        struct btrfs_block_rsv *src_rsv;
 602        struct btrfs_block_rsv *dst_rsv;
 603        u64 num_bytes;
 604        int ret;
 605        bool release = false;
 606
 607        src_rsv = trans->block_rsv;
 608        dst_rsv = &root->fs_info->delayed_block_rsv;
 609
 610        num_bytes = btrfs_calc_trans_metadata_size(root, 1);
 611
 612        /*
 613         * btrfs_dirty_inode will update the inode under btrfs_join_transaction
 614         * which doesn't reserve space for speed.  This is a problem since we
 615         * still need to reserve space for this update, so try to reserve the
 616         * space.
 617         *
 618         * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
 619         * we're accounted for.
 620         */
 621        if (!src_rsv || (!trans->bytes_reserved &&
 622                         src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
 623                ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
 624                                          BTRFS_RESERVE_NO_FLUSH);
 625                /*
 626                 * Since we're under a transaction reserve_metadata_bytes could
 627                 * try to commit the transaction which will make it return
 628                 * EAGAIN to make us stop the transaction we have, so return
 629                 * ENOSPC instead so that btrfs_dirty_inode knows what to do.
 630                 */
 631                if (ret == -EAGAIN)
 632                        ret = -ENOSPC;
 633                if (!ret) {
 634                        node->bytes_reserved = num_bytes;
 635                        trace_btrfs_space_reservation(root->fs_info,
 636                                                      "delayed_inode",
 637                                                      btrfs_ino(inode),
 638                                                      num_bytes, 1);
 639                }
 640                return ret;
 641        } else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
 642                spin_lock(&BTRFS_I(inode)->lock);
 643                if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
 644                                       &BTRFS_I(inode)->runtime_flags)) {
 645                        spin_unlock(&BTRFS_I(inode)->lock);
 646                        release = true;
 647                        goto migrate;
 648                }
 649                spin_unlock(&BTRFS_I(inode)->lock);
 650
 651                /* Ok we didn't have space pre-reserved.  This shouldn't happen
 652                 * too often but it can happen if we do delalloc to an existing
 653                 * inode which gets dirtied because of the time update, and then
 654                 * isn't touched again until after the transaction commits and
 655                 * then we try to write out the data.  First try to be nice and
 656                 * reserve something strictly for us.  If not be a pain and try
 657                 * to steal from the delalloc block rsv.
 658                 */
 659                ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
 660                                          BTRFS_RESERVE_NO_FLUSH);
 661                if (!ret)
 662                        goto out;
 663
 664                ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
 665                if (!ret)
 666                        goto out;
 667
 668                /*
 669                 * Ok this is a problem, let's just steal from the global rsv
 670                 * since this really shouldn't happen that often.
 671                 */
 672                WARN_ON(1);
 673                ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
 674                                              dst_rsv, num_bytes);
 675                goto out;
 676        }
 677
 678migrate:
 679        ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
 680
 681out:
 682        /*
 683         * Migrate only takes a reservation, it doesn't touch the size of the
 684         * block_rsv.  This is to simplify people who don't normally have things
 685         * migrated from their block rsv.  If they go to release their
 686         * reservation, that will decrease the size as well, so if migrate
 687         * reduced size we'd end up with a negative size.  But for the
 688         * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
 689         * but we could in fact do this reserve/migrate dance several times
 690         * between the time we did the original reservation and we'd clean it
 691         * up.  So to take care of this, release the space for the meta
 692         * reservation here.  I think it may be time for a documentation page on
 693         * how block rsvs. work.
 694         */
 695        if (!ret) {
 696                trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
 697                                              btrfs_ino(inode), num_bytes, 1);
 698                node->bytes_reserved = num_bytes;
 699        }
 700
 701        if (release) {
 702                trace_btrfs_space_reservation(root->fs_info, "delalloc",
 703                                              btrfs_ino(inode), num_bytes, 0);
 704                btrfs_block_rsv_release(root, src_rsv, num_bytes);
 705        }
 706
 707        return ret;
 708}
 709
 710static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
 711                                                struct btrfs_delayed_node *node)
 712{
 713        struct btrfs_block_rsv *rsv;
 714
 715        if (!node->bytes_reserved)
 716                return;
 717
 718        rsv = &root->fs_info->delayed_block_rsv;
 719        trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
 720                                      node->inode_id, node->bytes_reserved, 0);
 721        btrfs_block_rsv_release(root, rsv,
 722                                node->bytes_reserved);
 723        node->bytes_reserved = 0;
 724}
 725
 726/*
 727 * This helper will insert some continuous items into the same leaf according
 728 * to the free space of the leaf.
 729 */
 730static int btrfs_batch_insert_items(struct btrfs_root *root,
 731                                    struct btrfs_path *path,
 732                                    struct btrfs_delayed_item *item)
 733{
 734        struct btrfs_delayed_item *curr, *next;
 735        int free_space;
 736        int total_data_size = 0, total_size = 0;
 737        struct extent_buffer *leaf;
 738        char *data_ptr;
 739        struct btrfs_key *keys;
 740        u32 *data_size;
 741        struct list_head head;
 742        int slot;
 743        int nitems;
 744        int i;
 745        int ret = 0;
 746
 747        BUG_ON(!path->nodes[0]);
 748
 749        leaf = path->nodes[0];
 750        free_space = btrfs_leaf_free_space(root, leaf);
 751        INIT_LIST_HEAD(&head);
 752
 753        next = item;
 754        nitems = 0;
 755
 756        /*
 757         * count the number of the continuous items that we can insert in batch
 758         */
 759        while (total_size + next->data_len + sizeof(struct btrfs_item) <=
 760               free_space) {
 761                total_data_size += next->data_len;
 762                total_size += next->data_len + sizeof(struct btrfs_item);
 763                list_add_tail(&next->tree_list, &head);
 764                nitems++;
 765
 766                curr = next;
 767                next = __btrfs_next_delayed_item(curr);
 768                if (!next)
 769                        break;
 770
 771                if (!btrfs_is_continuous_delayed_item(curr, next))
 772                        break;
 773        }
 774
 775        if (!nitems) {
 776                ret = 0;
 777                goto out;
 778        }
 779
 780        /*
 781         * we need allocate some memory space, but it might cause the task
 782         * to sleep, so we set all locked nodes in the path to blocking locks
 783         * first.
 784         */
 785        btrfs_set_path_blocking(path);
 786
 787        keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
 788        if (!keys) {
 789                ret = -ENOMEM;
 790                goto out;
 791        }
 792
 793        data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
 794        if (!data_size) {
 795                ret = -ENOMEM;
 796                goto error;
 797        }
 798
 799        /* get keys of all the delayed items */
 800        i = 0;
 801        list_for_each_entry(next, &head, tree_list) {
 802                keys[i] = next->key;
 803                data_size[i] = next->data_len;
 804                i++;
 805        }
 806
 807        /* reset all the locked nodes in the patch to spinning locks. */
 808        btrfs_clear_path_blocking(path, NULL, 0);
 809
 810        /* insert the keys of the items */
 811        setup_items_for_insert(root, path, keys, data_size,
 812                               total_data_size, total_size, nitems);
 813
 814        /* insert the dir index items */
 815        slot = path->slots[0];
 816        list_for_each_entry_safe(curr, next, &head, tree_list) {
 817                data_ptr = btrfs_item_ptr(leaf, slot, char);
 818                write_extent_buffer(leaf, &curr->data,
 819                                    (unsigned long)data_ptr,
 820                                    curr->data_len);
 821                slot++;
 822
 823                btrfs_delayed_item_release_metadata(root, curr);
 824
 825                list_del(&curr->tree_list);
 826                btrfs_release_delayed_item(curr);
 827        }
 828
 829error:
 830        kfree(data_size);
 831        kfree(keys);
 832out:
 833        return ret;
 834}
 835
 836/*
 837 * This helper can just do simple insertion that needn't extend item for new
 838 * data, such as directory name index insertion, inode insertion.
 839 */
 840static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
 841                                     struct btrfs_root *root,
 842                                     struct btrfs_path *path,
 843                                     struct btrfs_delayed_item *delayed_item)
 844{
 845        struct extent_buffer *leaf;
 846        char *ptr;
 847        int ret;
 848
 849        ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
 850                                      delayed_item->data_len);
 851        if (ret < 0 && ret != -EEXIST)
 852                return ret;
 853
 854        leaf = path->nodes[0];
 855
 856        ptr = btrfs_item_ptr(leaf, path->slots[0], char);
 857
 858        write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
 859                            delayed_item->data_len);
 860        btrfs_mark_buffer_dirty(leaf);
 861
 862        btrfs_delayed_item_release_metadata(root, delayed_item);
 863        return 0;
 864}
 865
 866/*
 867 * we insert an item first, then if there are some continuous items, we try
 868 * to insert those items into the same leaf.
 869 */
 870static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
 871                                      struct btrfs_path *path,
 872                                      struct btrfs_root *root,
 873                                      struct btrfs_delayed_node *node)
 874{
 875        struct btrfs_delayed_item *curr, *prev;
 876        int ret = 0;
 877
 878do_again:
 879        mutex_lock(&node->mutex);
 880        curr = __btrfs_first_delayed_insertion_item(node);
 881        if (!curr)
 882                goto insert_end;
 883
 884        ret = btrfs_insert_delayed_item(trans, root, path, curr);
 885        if (ret < 0) {
 886                btrfs_release_path(path);
 887                goto insert_end;
 888        }
 889
 890        prev = curr;
 891        curr = __btrfs_next_delayed_item(prev);
 892        if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
 893                /* insert the continuous items into the same leaf */
 894                path->slots[0]++;
 895                btrfs_batch_insert_items(root, path, curr);
 896        }
 897        btrfs_release_delayed_item(prev);
 898        btrfs_mark_buffer_dirty(path->nodes[0]);
 899
 900        btrfs_release_path(path);
 901        mutex_unlock(&node->mutex);
 902        goto do_again;
 903
 904insert_end:
 905        mutex_unlock(&node->mutex);
 906        return ret;
 907}
 908
 909static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
 910                                    struct btrfs_root *root,
 911                                    struct btrfs_path *path,
 912                                    struct btrfs_delayed_item *item)
 913{
 914        struct btrfs_delayed_item *curr, *next;
 915        struct extent_buffer *leaf;
 916        struct btrfs_key key;
 917        struct list_head head;
 918        int nitems, i, last_item;
 919        int ret = 0;
 920
 921        BUG_ON(!path->nodes[0]);
 922
 923        leaf = path->nodes[0];
 924
 925        i = path->slots[0];
 926        last_item = btrfs_header_nritems(leaf) - 1;
 927        if (i > last_item)
 928                return -ENOENT; /* FIXME: Is errno suitable? */
 929
 930        next = item;
 931        INIT_LIST_HEAD(&head);
 932        btrfs_item_key_to_cpu(leaf, &key, i);
 933        nitems = 0;
 934        /*
 935         * count the number of the dir index items that we can delete in batch
 936         */
 937        while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
 938                list_add_tail(&next->tree_list, &head);
 939                nitems++;
 940
 941                curr = next;
 942                next = __btrfs_next_delayed_item(curr);
 943                if (!next)
 944                        break;
 945
 946                if (!btrfs_is_continuous_delayed_item(curr, next))
 947                        break;
 948
 949                i++;
 950                if (i > last_item)
 951                        break;
 952                btrfs_item_key_to_cpu(leaf, &key, i);
 953        }
 954
 955        if (!nitems)
 956                return 0;
 957
 958        ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
 959        if (ret)
 960                goto out;
 961
 962        list_for_each_entry_safe(curr, next, &head, tree_list) {
 963                btrfs_delayed_item_release_metadata(root, curr);
 964                list_del(&curr->tree_list);
 965                btrfs_release_delayed_item(curr);
 966        }
 967
 968out:
 969        return ret;
 970}
 971
 972static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
 973                                      struct btrfs_path *path,
 974                                      struct btrfs_root *root,
 975                                      struct btrfs_delayed_node *node)
 976{
 977        struct btrfs_delayed_item *curr, *prev;
 978        int ret = 0;
 979
 980do_again:
 981        mutex_lock(&node->mutex);
 982        curr = __btrfs_first_delayed_deletion_item(node);
 983        if (!curr)
 984                goto delete_fail;
 985
 986        ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
 987        if (ret < 0)
 988                goto delete_fail;
 989        else if (ret > 0) {
 990                /*
 991                 * can't find the item which the node points to, so this node
 992                 * is invalid, just drop it.
 993                 */
 994                prev = curr;
 995                curr = __btrfs_next_delayed_item(prev);
 996                btrfs_release_delayed_item(prev);
 997                ret = 0;
 998                btrfs_release_path(path);
 999                if (curr) {
1000                        mutex_unlock(&node->mutex);
1001                        goto do_again;
1002                } else
1003                        goto delete_fail;
1004        }
1005
1006        btrfs_batch_delete_items(trans, root, path, curr);
1007        btrfs_release_path(path);
1008        mutex_unlock(&node->mutex);
1009        goto do_again;
1010
1011delete_fail:
1012        btrfs_release_path(path);
1013        mutex_unlock(&node->mutex);
1014        return ret;
1015}
1016
1017static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1018{
1019        struct btrfs_delayed_root *delayed_root;
1020
1021        if (delayed_node && delayed_node->inode_dirty) {
1022                BUG_ON(!delayed_node->root);
1023                delayed_node->inode_dirty = 0;
1024                delayed_node->count--;
1025
1026                delayed_root = delayed_node->root->fs_info->delayed_root;
1027                finish_one_item(delayed_root);
1028        }
1029}
1030
1031static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1032                                        struct btrfs_root *root,
1033                                        struct btrfs_path *path,
1034                                        struct btrfs_delayed_node *node)
1035{
1036        struct btrfs_key key;
1037        struct btrfs_inode_item *inode_item;
1038        struct extent_buffer *leaf;
1039        int ret;
1040
1041        key.objectid = node->inode_id;
1042        btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1043        key.offset = 0;
1044
1045        ret = btrfs_lookup_inode(trans, root, path, &key, 1);
1046        if (ret > 0) {
1047                btrfs_release_path(path);
1048                return -ENOENT;
1049        } else if (ret < 0) {
1050                return ret;
1051        }
1052
1053        btrfs_unlock_up_safe(path, 1);
1054        leaf = path->nodes[0];
1055        inode_item = btrfs_item_ptr(leaf, path->slots[0],
1056                                    struct btrfs_inode_item);
1057        write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1058                            sizeof(struct btrfs_inode_item));
1059        btrfs_mark_buffer_dirty(leaf);
1060        btrfs_release_path(path);
1061
1062        btrfs_delayed_inode_release_metadata(root, node);
1063        btrfs_release_delayed_inode(node);
1064
1065        return 0;
1066}
1067
1068static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1069                                             struct btrfs_root *root,
1070                                             struct btrfs_path *path,
1071                                             struct btrfs_delayed_node *node)
1072{
1073        int ret;
1074
1075        mutex_lock(&node->mutex);
1076        if (!node->inode_dirty) {
1077                mutex_unlock(&node->mutex);
1078                return 0;
1079        }
1080
1081        ret = __btrfs_update_delayed_inode(trans, root, path, node);
1082        mutex_unlock(&node->mutex);
1083        return ret;
1084}
1085
1086static inline int
1087__btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1088                                   struct btrfs_path *path,
1089                                   struct btrfs_delayed_node *node)
1090{
1091        int ret;
1092
1093        ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1094        if (ret)
1095                return ret;
1096
1097        ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1098        if (ret)
1099                return ret;
1100
1101        ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1102        return ret;
1103}
1104
1105/*
1106 * Called when committing the transaction.
1107 * Returns 0 on success.
1108 * Returns < 0 on error and returns with an aborted transaction with any
1109 * outstanding delayed items cleaned up.
1110 */
1111static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1112                                     struct btrfs_root *root, int nr)
1113{
1114        struct btrfs_delayed_root *delayed_root;
1115        struct btrfs_delayed_node *curr_node, *prev_node;
1116        struct btrfs_path *path;
1117        struct btrfs_block_rsv *block_rsv;
1118        int ret = 0;
1119        bool count = (nr > 0);
1120
1121        if (trans->aborted)
1122                return -EIO;
1123
1124        path = btrfs_alloc_path();
1125        if (!path)
1126                return -ENOMEM;
1127        path->leave_spinning = 1;
1128
1129        block_rsv = trans->block_rsv;
1130        trans->block_rsv = &root->fs_info->delayed_block_rsv;
1131
1132        delayed_root = btrfs_get_delayed_root(root);
1133
1134        curr_node = btrfs_first_delayed_node(delayed_root);
1135        while (curr_node && (!count || (count && nr--))) {
1136                ret = __btrfs_commit_inode_delayed_items(trans, path,
1137                                                         curr_node);
1138                if (ret) {
1139                        btrfs_release_delayed_node(curr_node);
1140                        curr_node = NULL;
1141                        btrfs_abort_transaction(trans, root, ret);
1142                        break;
1143                }
1144
1145                prev_node = curr_node;
1146                curr_node = btrfs_next_delayed_node(curr_node);
1147                btrfs_release_delayed_node(prev_node);
1148        }
1149
1150        if (curr_node)
1151                btrfs_release_delayed_node(curr_node);
1152        btrfs_free_path(path);
1153        trans->block_rsv = block_rsv;
1154
1155        return ret;
1156}
1157
1158int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1159                            struct btrfs_root *root)
1160{
1161        return __btrfs_run_delayed_items(trans, root, -1);
1162}
1163
1164int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
1165                               struct btrfs_root *root, int nr)
1166{
1167        return __btrfs_run_delayed_items(trans, root, nr);
1168}
1169
1170int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1171                                     struct inode *inode)
1172{
1173        struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1174        struct btrfs_path *path;
1175        struct btrfs_block_rsv *block_rsv;
1176        int ret;
1177
1178        if (!delayed_node)
1179                return 0;
1180
1181        mutex_lock(&delayed_node->mutex);
1182        if (!delayed_node->count) {
1183                mutex_unlock(&delayed_node->mutex);
1184                btrfs_release_delayed_node(delayed_node);
1185                return 0;
1186        }
1187        mutex_unlock(&delayed_node->mutex);
1188
1189        path = btrfs_alloc_path();
1190        if (!path)
1191                return -ENOMEM;
1192        path->leave_spinning = 1;
1193
1194        block_rsv = trans->block_rsv;
1195        trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1196
1197        ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1198
1199        btrfs_release_delayed_node(delayed_node);
1200        btrfs_free_path(path);
1201        trans->block_rsv = block_rsv;
1202
1203        return ret;
1204}
1205
1206int btrfs_commit_inode_delayed_inode(struct inode *inode)
1207{
1208        struct btrfs_trans_handle *trans;
1209        struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1210        struct btrfs_path *path;
1211        struct btrfs_block_rsv *block_rsv;
1212        int ret;
1213
1214        if (!delayed_node)
1215                return 0;
1216
1217        mutex_lock(&delayed_node->mutex);
1218        if (!delayed_node->inode_dirty) {
1219                mutex_unlock(&delayed_node->mutex);
1220                btrfs_release_delayed_node(delayed_node);
1221                return 0;
1222        }
1223        mutex_unlock(&delayed_node->mutex);
1224
1225        trans = btrfs_join_transaction(delayed_node->root);
1226        if (IS_ERR(trans)) {
1227                ret = PTR_ERR(trans);
1228                goto out;
1229        }
1230
1231        path = btrfs_alloc_path();
1232        if (!path) {
1233                ret = -ENOMEM;
1234                goto trans_out;
1235        }
1236        path->leave_spinning = 1;
1237
1238        block_rsv = trans->block_rsv;
1239        trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1240
1241        mutex_lock(&delayed_node->mutex);
1242        if (delayed_node->inode_dirty)
1243                ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1244                                                   path, delayed_node);
1245        else
1246                ret = 0;
1247        mutex_unlock(&delayed_node->mutex);
1248
1249        btrfs_free_path(path);
1250        trans->block_rsv = block_rsv;
1251trans_out:
1252        btrfs_end_transaction(trans, delayed_node->root);
1253        btrfs_btree_balance_dirty(delayed_node->root);
1254out:
1255        btrfs_release_delayed_node(delayed_node);
1256
1257        return ret;
1258}
1259
1260void btrfs_remove_delayed_node(struct inode *inode)
1261{
1262        struct btrfs_delayed_node *delayed_node;
1263
1264        delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
1265        if (!delayed_node)
1266                return;
1267
1268        BTRFS_I(inode)->delayed_node = NULL;
1269        btrfs_release_delayed_node(delayed_node);
1270}
1271
1272struct btrfs_async_delayed_work {
1273        struct btrfs_delayed_root *delayed_root;
1274        int nr;
1275        struct btrfs_work work;
1276};
1277
1278static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1279{
1280        struct btrfs_async_delayed_work *async_work;
1281        struct btrfs_delayed_root *delayed_root;
1282        struct btrfs_trans_handle *trans;
1283        struct btrfs_path *path;
1284        struct btrfs_delayed_node *delayed_node = NULL;
1285        struct btrfs_root *root;
1286        struct btrfs_block_rsv *block_rsv;
1287        int total_done = 0;
1288
1289        async_work = container_of(work, struct btrfs_async_delayed_work, work);
1290        delayed_root = async_work->delayed_root;
1291
1292        path = btrfs_alloc_path();
1293        if (!path)
1294                goto out;
1295
1296again:
1297        if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
1298                goto free_path;
1299
1300        delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1301        if (!delayed_node)
1302                goto free_path;
1303
1304        path->leave_spinning = 1;
1305        root = delayed_node->root;
1306
1307        trans = btrfs_join_transaction(root);
1308        if (IS_ERR(trans))
1309                goto release_path;
1310
1311        block_rsv = trans->block_rsv;
1312        trans->block_rsv = &root->fs_info->delayed_block_rsv;
1313
1314        __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1315        /*
1316         * Maybe new delayed items have been inserted, so we need requeue
1317         * the work. Besides that, we must dequeue the empty delayed nodes
1318         * to avoid the race between delayed items balance and the worker.
1319         * The race like this:
1320         *      Task1                           Worker thread
1321         *                                      count == 0, needn't requeue
1322         *                                        also needn't insert the
1323         *                                        delayed node into prepare
1324         *                                        list again.
1325         *      add lots of delayed items
1326         *      queue the delayed node
1327         *        already in the list,
1328         *        and not in the prepare
1329         *        list, it means the delayed
1330         *        node is being dealt with
1331         *        by the worker.
1332         *      do delayed items balance
1333         *        the delayed node is being
1334         *        dealt with by the worker
1335         *        now, just wait.
1336         *                                      the worker goto idle.
1337         * Task1 will sleep until the transaction is commited.
1338         */
1339        mutex_lock(&delayed_node->mutex);
1340        btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node);
1341        mutex_unlock(&delayed_node->mutex);
1342
1343        trans->block_rsv = block_rsv;
1344        btrfs_end_transaction_dmeta(trans, root);
1345        btrfs_btree_balance_dirty_nodelay(root);
1346
1347release_path:
1348        btrfs_release_path(path);
1349        total_done++;
1350
1351        btrfs_release_prepared_delayed_node(delayed_node);
1352        if (async_work->nr == 0 || total_done < async_work->nr)
1353                goto again;
1354
1355free_path:
1356        btrfs_free_path(path);
1357out:
1358        wake_up(&delayed_root->wait);
1359        kfree(async_work);
1360}
1361
1362
1363static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1364                                     struct btrfs_root *root, int nr)
1365{
1366        struct btrfs_async_delayed_work *async_work;
1367
1368        if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1369                return 0;
1370
1371        async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1372        if (!async_work)
1373                return -ENOMEM;
1374
1375        async_work->delayed_root = delayed_root;
1376        async_work->work.func = btrfs_async_run_delayed_root;
1377        async_work->work.flags = 0;
1378        async_work->nr = nr;
1379
1380        btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work);
1381        return 0;
1382}
1383
1384void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1385{
1386        struct btrfs_delayed_root *delayed_root;
1387        delayed_root = btrfs_get_delayed_root(root);
1388        WARN_ON(btrfs_first_delayed_node(delayed_root));
1389}
1390
1391static int refs_newer(struct btrfs_delayed_root *delayed_root,
1392                      int seq, int count)
1393{
1394        int val = atomic_read(&delayed_root->items_seq);
1395
1396        if (val < seq || val >= seq + count)
1397                return 1;
1398        return 0;
1399}
1400
1401void btrfs_balance_delayed_items(struct btrfs_root *root)
1402{
1403        struct btrfs_delayed_root *delayed_root;
1404        int seq;
1405
1406        delayed_root = btrfs_get_delayed_root(root);
1407
1408        if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1409                return;
1410
1411        seq = atomic_read(&delayed_root->items_seq);
1412
1413        if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1414                int ret;
1415                DEFINE_WAIT(__wait);
1416
1417                ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
1418                if (ret)
1419                        return;
1420
1421                while (1) {
1422                        prepare_to_wait(&delayed_root->wait, &__wait,
1423                                        TASK_INTERRUPTIBLE);
1424
1425                        if (refs_newer(delayed_root, seq,
1426                                       BTRFS_DELAYED_BATCH) ||
1427                            atomic_read(&delayed_root->items) <
1428                            BTRFS_DELAYED_BACKGROUND) {
1429                                break;
1430                        }
1431                        if (!signal_pending(current))
1432                                schedule();
1433                        else
1434                                break;
1435                }
1436                finish_wait(&delayed_root->wait, &__wait);
1437        }
1438
1439        btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
1440}
1441
1442/* Will return 0 or -ENOMEM */
1443int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1444                                   struct btrfs_root *root, const char *name,
1445                                   int name_len, struct inode *dir,
1446                                   struct btrfs_disk_key *disk_key, u8 type,
1447                                   u64 index)
1448{
1449        struct btrfs_delayed_node *delayed_node;
1450        struct btrfs_delayed_item *delayed_item;
1451        struct btrfs_dir_item *dir_item;
1452        int ret;
1453
1454        delayed_node = btrfs_get_or_create_delayed_node(dir);
1455        if (IS_ERR(delayed_node))
1456                return PTR_ERR(delayed_node);
1457
1458        delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1459        if (!delayed_item) {
1460                ret = -ENOMEM;
1461                goto release_node;
1462        }
1463
1464        delayed_item->key.objectid = btrfs_ino(dir);
1465        btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
1466        delayed_item->key.offset = index;
1467
1468        dir_item = (struct btrfs_dir_item *)delayed_item->data;
1469        dir_item->location = *disk_key;
1470        dir_item->transid = cpu_to_le64(trans->transid);
1471        dir_item->data_len = 0;
1472        dir_item->name_len = cpu_to_le16(name_len);
1473        dir_item->type = type;
1474        memcpy((char *)(dir_item + 1), name, name_len);
1475
1476        ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
1477        /*
1478         * we have reserved enough space when we start a new transaction,
1479         * so reserving metadata failure is impossible
1480         */
1481        BUG_ON(ret);
1482
1483
1484        mutex_lock(&delayed_node->mutex);
1485        ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1486        if (unlikely(ret)) {
1487                printk(KERN_ERR "err add delayed dir index item(name: %s) into "
1488                                "the insertion tree of the delayed node"
1489                                "(root id: %llu, inode id: %llu, errno: %d)\n",
1490                                name,
1491                                (unsigned long long)delayed_node->root->objectid,
1492                                (unsigned long long)delayed_node->inode_id,
1493                                ret);
1494                BUG();
1495        }
1496        mutex_unlock(&delayed_node->mutex);
1497
1498release_node:
1499        btrfs_release_delayed_node(delayed_node);
1500        return ret;
1501}
1502
1503static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
1504                                               struct btrfs_delayed_node *node,
1505                                               struct btrfs_key *key)
1506{
1507        struct btrfs_delayed_item *item;
1508
1509        mutex_lock(&node->mutex);
1510        item = __btrfs_lookup_delayed_insertion_item(node, key);
1511        if (!item) {
1512                mutex_unlock(&node->mutex);
1513                return 1;
1514        }
1515
1516        btrfs_delayed_item_release_metadata(root, item);
1517        btrfs_release_delayed_item(item);
1518        mutex_unlock(&node->mutex);
1519        return 0;
1520}
1521
1522int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1523                                   struct btrfs_root *root, struct inode *dir,
1524                                   u64 index)
1525{
1526        struct btrfs_delayed_node *node;
1527        struct btrfs_delayed_item *item;
1528        struct btrfs_key item_key;
1529        int ret;
1530
1531        node = btrfs_get_or_create_delayed_node(dir);
1532        if (IS_ERR(node))
1533                return PTR_ERR(node);
1534
1535        item_key.objectid = btrfs_ino(dir);
1536        btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY);
1537        item_key.offset = index;
1538
1539        ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
1540        if (!ret)
1541                goto end;
1542
1543        item = btrfs_alloc_delayed_item(0);
1544        if (!item) {
1545                ret = -ENOMEM;
1546                goto end;
1547        }
1548
1549        item->key = item_key;
1550
1551        ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
1552        /*
1553         * we have reserved enough space when we start a new transaction,
1554         * so reserving metadata failure is impossible.
1555         */
1556        BUG_ON(ret);
1557
1558        mutex_lock(&node->mutex);
1559        ret = __btrfs_add_delayed_deletion_item(node, item);
1560        if (unlikely(ret)) {
1561                printk(KERN_ERR "err add delayed dir index item(index: %llu) "
1562                                "into the deletion tree of the delayed node"
1563                                "(root id: %llu, inode id: %llu, errno: %d)\n",
1564                                (unsigned long long)index,
1565                                (unsigned long long)node->root->objectid,
1566                                (unsigned long long)node->inode_id,
1567                                ret);
1568                BUG();
1569        }
1570        mutex_unlock(&node->mutex);
1571end:
1572        btrfs_release_delayed_node(node);
1573        return ret;
1574}
1575
1576int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1577{
1578        struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1579
1580        if (!delayed_node)
1581                return -ENOENT;
1582
1583        /*
1584         * Since we have held i_mutex of this directory, it is impossible that
1585         * a new directory index is added into the delayed node and index_cnt
1586         * is updated now. So we needn't lock the delayed node.
1587         */
1588        if (!delayed_node->index_cnt) {
1589                btrfs_release_delayed_node(delayed_node);
1590                return -EINVAL;
1591        }
1592
1593        BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
1594        btrfs_release_delayed_node(delayed_node);
1595        return 0;
1596}
1597
1598void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
1599                             struct list_head *del_list)
1600{
1601        struct btrfs_delayed_node *delayed_node;
1602        struct btrfs_delayed_item *item;
1603
1604        delayed_node = btrfs_get_delayed_node(inode);
1605        if (!delayed_node)
1606                return;
1607
1608        mutex_lock(&delayed_node->mutex);
1609        item = __btrfs_first_delayed_insertion_item(delayed_node);
1610        while (item) {
1611                atomic_inc(&item->refs);
1612                list_add_tail(&item->readdir_list, ins_list);
1613                item = __btrfs_next_delayed_item(item);
1614        }
1615
1616        item = __btrfs_first_delayed_deletion_item(delayed_node);
1617        while (item) {
1618                atomic_inc(&item->refs);
1619                list_add_tail(&item->readdir_list, del_list);
1620                item = __btrfs_next_delayed_item(item);
1621        }
1622        mutex_unlock(&delayed_node->mutex);
1623        /*
1624         * This delayed node is still cached in the btrfs inode, so refs
1625         * must be > 1 now, and we needn't check it is going to be freed
1626         * or not.
1627         *
1628         * Besides that, this function is used to read dir, we do not
1629         * insert/delete delayed items in this period. So we also needn't
1630         * requeue or dequeue this delayed node.
1631         */
1632        atomic_dec(&delayed_node->refs);
1633}
1634
1635void btrfs_put_delayed_items(struct list_head *ins_list,
1636                             struct list_head *del_list)
1637{
1638        struct btrfs_delayed_item *curr, *next;
1639
1640        list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1641                list_del(&curr->readdir_list);
1642                if (atomic_dec_and_test(&curr->refs))
1643                        kfree(curr);
1644        }
1645
1646        list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1647                list_del(&curr->readdir_list);
1648                if (atomic_dec_and_test(&curr->refs))
1649                        kfree(curr);
1650        }
1651}
1652
1653int btrfs_should_delete_dir_index(struct list_head *del_list,
1654                                  u64 index)
1655{
1656        struct btrfs_delayed_item *curr, *next;
1657        int ret;
1658
1659        if (list_empty(del_list))
1660                return 0;
1661
1662        list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1663                if (curr->key.offset > index)
1664                        break;
1665
1666                list_del(&curr->readdir_list);
1667                ret = (curr->key.offset == index);
1668
1669                if (atomic_dec_and_test(&curr->refs))
1670                        kfree(curr);
1671
1672                if (ret)
1673                        return 1;
1674                else
1675                        continue;
1676        }
1677        return 0;
1678}
1679
1680/*
1681 * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1682 *
1683 */
1684int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
1685                                    filldir_t filldir,
1686                                    struct list_head *ins_list)
1687{
1688        struct btrfs_dir_item *di;
1689        struct btrfs_delayed_item *curr, *next;
1690        struct btrfs_key location;
1691        char *name;
1692        int name_len;
1693        int over = 0;
1694        unsigned char d_type;
1695
1696        if (list_empty(ins_list))
1697                return 0;
1698
1699        /*
1700         * Changing the data of the delayed item is impossible. So
1701         * we needn't lock them. And we have held i_mutex of the
1702         * directory, nobody can delete any directory indexes now.
1703         */
1704        list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1705                list_del(&curr->readdir_list);
1706
1707                if (curr->key.offset < filp->f_pos) {
1708                        if (atomic_dec_and_test(&curr->refs))
1709                                kfree(curr);
1710                        continue;
1711                }
1712
1713                filp->f_pos = curr->key.offset;
1714
1715                di = (struct btrfs_dir_item *)curr->data;
1716                name = (char *)(di + 1);
1717                name_len = le16_to_cpu(di->name_len);
1718
1719                d_type = btrfs_filetype_table[di->type];
1720                btrfs_disk_key_to_cpu(&location, &di->location);
1721
1722                over = filldir(dirent, name, name_len, curr->key.offset,
1723                               location.objectid, d_type);
1724
1725                if (atomic_dec_and_test(&curr->refs))
1726                        kfree(curr);
1727
1728                if (over)
1729                        return 1;
1730        }
1731        return 0;
1732}
1733
1734BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
1735                         generation, 64);
1736BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
1737                         sequence, 64);
1738BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
1739                         transid, 64);
1740BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
1741BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
1742                         nbytes, 64);
1743BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
1744                         block_group, 64);
1745BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
1746BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
1747BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
1748BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
1749BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
1750BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
1751
1752BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
1753BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
1754
1755static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1756                                  struct btrfs_inode_item *inode_item,
1757                                  struct inode *inode)
1758{
1759        btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1760        btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1761        btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1762        btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1763        btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1764        btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1765        btrfs_set_stack_inode_generation(inode_item,
1766                                         BTRFS_I(inode)->generation);
1767        btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
1768        btrfs_set_stack_inode_transid(inode_item, trans->transid);
1769        btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1770        btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1771        btrfs_set_stack_inode_block_group(inode_item, 0);
1772
1773        btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1774                                     inode->i_atime.tv_sec);
1775        btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
1776                                      inode->i_atime.tv_nsec);
1777
1778        btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
1779                                     inode->i_mtime.tv_sec);
1780        btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
1781                                      inode->i_mtime.tv_nsec);
1782
1783        btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
1784                                     inode->i_ctime.tv_sec);
1785        btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
1786                                      inode->i_ctime.tv_nsec);
1787}
1788
1789int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1790{
1791        struct btrfs_delayed_node *delayed_node;
1792        struct btrfs_inode_item *inode_item;
1793        struct btrfs_timespec *tspec;
1794
1795        delayed_node = btrfs_get_delayed_node(inode);
1796        if (!delayed_node)
1797                return -ENOENT;
1798
1799        mutex_lock(&delayed_node->mutex);
1800        if (!delayed_node->inode_dirty) {
1801                mutex_unlock(&delayed_node->mutex);
1802                btrfs_release_delayed_node(delayed_node);
1803                return -ENOENT;
1804        }
1805
1806        inode_item = &delayed_node->inode_item;
1807
1808        i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1809        i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1810        btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
1811        inode->i_mode = btrfs_stack_inode_mode(inode_item);
1812        set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1813        inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1814        BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1815        inode->i_version = btrfs_stack_inode_sequence(inode_item);
1816        inode->i_rdev = 0;
1817        *rdev = btrfs_stack_inode_rdev(inode_item);
1818        BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1819
1820        tspec = btrfs_inode_atime(inode_item);
1821        inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
1822        inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1823
1824        tspec = btrfs_inode_mtime(inode_item);
1825        inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
1826        inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1827
1828        tspec = btrfs_inode_ctime(inode_item);
1829        inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
1830        inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1831
1832        inode->i_generation = BTRFS_I(inode)->generation;
1833        BTRFS_I(inode)->index_cnt = (u64)-1;
1834
1835        mutex_unlock(&delayed_node->mutex);
1836        btrfs_release_delayed_node(delayed_node);
1837        return 0;
1838}
1839
1840int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1841                               struct btrfs_root *root, struct inode *inode)
1842{
1843        struct btrfs_delayed_node *delayed_node;
1844        int ret = 0;
1845
1846        delayed_node = btrfs_get_or_create_delayed_node(inode);
1847        if (IS_ERR(delayed_node))
1848                return PTR_ERR(delayed_node);
1849
1850        mutex_lock(&delayed_node->mutex);
1851        if (delayed_node->inode_dirty) {
1852                fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1853                goto release_node;
1854        }
1855
1856        ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
1857                                                   delayed_node);
1858        if (ret)
1859                goto release_node;
1860
1861        fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1862        delayed_node->inode_dirty = 1;
1863        delayed_node->count++;
1864        atomic_inc(&root->fs_info->delayed_root->items);
1865release_node:
1866        mutex_unlock(&delayed_node->mutex);
1867        btrfs_release_delayed_node(delayed_node);
1868        return ret;
1869}
1870
1871static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1872{
1873        struct btrfs_root *root = delayed_node->root;
1874        struct btrfs_delayed_item *curr_item, *prev_item;
1875
1876        mutex_lock(&delayed_node->mutex);
1877        curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1878        while (curr_item) {
1879                btrfs_delayed_item_release_metadata(root, curr_item);
1880                prev_item = curr_item;
1881                curr_item = __btrfs_next_delayed_item(prev_item);
1882                btrfs_release_delayed_item(prev_item);
1883        }
1884
1885        curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1886        while (curr_item) {
1887                btrfs_delayed_item_release_metadata(root, curr_item);
1888                prev_item = curr_item;
1889                curr_item = __btrfs_next_delayed_item(prev_item);
1890                btrfs_release_delayed_item(prev_item);
1891        }
1892
1893        if (delayed_node->inode_dirty) {
1894                btrfs_delayed_inode_release_metadata(root, delayed_node);
1895                btrfs_release_delayed_inode(delayed_node);
1896        }
1897        mutex_unlock(&delayed_node->mutex);
1898}
1899
1900void btrfs_kill_delayed_inode_items(struct inode *inode)
1901{
1902        struct btrfs_delayed_node *delayed_node;
1903
1904        delayed_node = btrfs_get_delayed_node(inode);
1905        if (!delayed_node)
1906                return;
1907
1908        __btrfs_kill_delayed_node(delayed_node);
1909        btrfs_release_delayed_node(delayed_node);
1910}
1911
1912void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1913{
1914        u64 inode_id = 0;
1915        struct btrfs_delayed_node *delayed_nodes[8];
1916        int i, n;
1917
1918        while (1) {
1919                spin_lock(&root->inode_lock);
1920                n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1921                                           (void **)delayed_nodes, inode_id,
1922                                           ARRAY_SIZE(delayed_nodes));
1923                if (!n) {
1924                        spin_unlock(&root->inode_lock);
1925                        break;
1926                }
1927
1928                inode_id = delayed_nodes[n - 1]->inode_id + 1;
1929
1930                for (i = 0; i < n; i++)
1931                        atomic_inc(&delayed_nodes[i]->refs);
1932                spin_unlock(&root->inode_lock);
1933
1934                for (i = 0; i < n; i++) {
1935                        __btrfs_kill_delayed_node(delayed_nodes[i]);
1936                        btrfs_release_delayed_node(delayed_nodes[i]);
1937                }
1938        }
1939}
1940
1941void btrfs_destroy_delayed_inodes(struct btrfs_root *root)
1942{
1943        struct btrfs_delayed_root *delayed_root;
1944        struct btrfs_delayed_node *curr_node, *prev_node;
1945
1946        delayed_root = btrfs_get_delayed_root(root);
1947
1948        curr_node = btrfs_first_delayed_node(delayed_root);
1949        while (curr_node) {
1950                __btrfs_kill_delayed_node(curr_node);
1951
1952                prev_node = curr_node;
1953                curr_node = btrfs_next_delayed_node(curr_node);
1954                btrfs_release_delayed_node(prev_node);
1955        }
1956}
1957
1958