linux/fs/btrfs/inode-map.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2007 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/delay.h>
  20#include <linux/kthread.h>
  21#include <linux/pagemap.h>
  22
  23#include "ctree.h"
  24#include "disk-io.h"
  25#include "free-space-cache.h"
  26#include "inode-map.h"
  27#include "transaction.h"
  28
  29static int caching_kthread(void *data)
  30{
  31        struct btrfs_root *root = data;
  32        struct btrfs_fs_info *fs_info = root->fs_info;
  33        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
  34        struct btrfs_key key;
  35        struct btrfs_path *path;
  36        struct extent_buffer *leaf;
  37        u64 last = (u64)-1;
  38        int slot;
  39        int ret;
  40
  41        if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
  42                return 0;
  43
  44        path = btrfs_alloc_path();
  45        if (!path)
  46                return -ENOMEM;
  47
  48        /* Since the commit root is read-only, we can safely skip locking. */
  49        path->skip_locking = 1;
  50        path->search_commit_root = 1;
  51        path->reada = READA_FORWARD;
  52
  53        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
  54        key.offset = 0;
  55        key.type = BTRFS_INODE_ITEM_KEY;
  56again:
  57        /* need to make sure the commit_root doesn't disappear */
  58        down_read(&fs_info->commit_root_sem);
  59
  60        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  61        if (ret < 0)
  62                goto out;
  63
  64        while (1) {
  65                if (btrfs_fs_closing(fs_info))
  66                        goto out;
  67
  68                leaf = path->nodes[0];
  69                slot = path->slots[0];
  70                if (slot >= btrfs_header_nritems(leaf)) {
  71                        ret = btrfs_next_leaf(root, path);
  72                        if (ret < 0)
  73                                goto out;
  74                        else if (ret > 0)
  75                                break;
  76
  77                        if (need_resched() ||
  78                            btrfs_transaction_in_commit(fs_info)) {
  79                                leaf = path->nodes[0];
  80
  81                                if (WARN_ON(btrfs_header_nritems(leaf) == 0))
  82                                        break;
  83
  84                                /*
  85                                 * Save the key so we can advances forward
  86                                 * in the next search.
  87                                 */
  88                                btrfs_item_key_to_cpu(leaf, &key, 0);
  89                                btrfs_release_path(path);
  90                                root->ino_cache_progress = last;
  91                                up_read(&fs_info->commit_root_sem);
  92                                schedule_timeout(1);
  93                                goto again;
  94                        } else
  95                                continue;
  96                }
  97
  98                btrfs_item_key_to_cpu(leaf, &key, slot);
  99
 100                if (key.type != BTRFS_INODE_ITEM_KEY)
 101                        goto next;
 102
 103                if (key.objectid >= root->highest_objectid)
 104                        break;
 105
 106                if (last != (u64)-1 && last + 1 != key.objectid) {
 107                        __btrfs_add_free_space(ctl, last + 1,
 108                                               key.objectid - last - 1);
 109                        wake_up(&root->ino_cache_wait);
 110                }
 111
 112                last = key.objectid;
 113next:
 114                path->slots[0]++;
 115        }
 116
 117        if (last < root->highest_objectid - 1) {
 118                __btrfs_add_free_space(ctl, last + 1,
 119                                       root->highest_objectid - last - 1);
 120        }
 121
 122        spin_lock(&root->ino_cache_lock);
 123        root->ino_cache_state = BTRFS_CACHE_FINISHED;
 124        spin_unlock(&root->ino_cache_lock);
 125
 126        root->ino_cache_progress = (u64)-1;
 127        btrfs_unpin_free_ino(root);
 128out:
 129        wake_up(&root->ino_cache_wait);
 130        up_read(&fs_info->commit_root_sem);
 131
 132        btrfs_free_path(path);
 133
 134        return ret;
 135}
 136
 137static void start_caching(struct btrfs_root *root)
 138{
 139        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 140        struct task_struct *tsk;
 141        int ret;
 142        u64 objectid;
 143
 144        if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
 145                return;
 146
 147        spin_lock(&root->ino_cache_lock);
 148        if (root->ino_cache_state != BTRFS_CACHE_NO) {
 149                spin_unlock(&root->ino_cache_lock);
 150                return;
 151        }
 152
 153        root->ino_cache_state = BTRFS_CACHE_STARTED;
 154        spin_unlock(&root->ino_cache_lock);
 155
 156        ret = load_free_ino_cache(root->fs_info, root);
 157        if (ret == 1) {
 158                spin_lock(&root->ino_cache_lock);
 159                root->ino_cache_state = BTRFS_CACHE_FINISHED;
 160                spin_unlock(&root->ino_cache_lock);
 161                return;
 162        }
 163
 164        /*
 165         * It can be quite time-consuming to fill the cache by searching
 166         * through the extent tree, and this can keep ino allocation path
 167         * waiting. Therefore at start we quickly find out the highest
 168         * inode number and we know we can use inode numbers which fall in
 169         * [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID].
 170         */
 171        ret = btrfs_find_free_objectid(root, &objectid);
 172        if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
 173                __btrfs_add_free_space(ctl, objectid,
 174                                       BTRFS_LAST_FREE_OBJECTID - objectid + 1);
 175        }
 176
 177        tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu",
 178                          root->root_key.objectid);
 179        if (IS_ERR(tsk)) {
 180                btrfs_warn(root->fs_info, "failed to start inode caching task");
 181                btrfs_clear_pending_and_info(root->fs_info, INODE_MAP_CACHE,
 182                                "disabling inode map caching");
 183        }
 184}
 185
 186int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
 187{
 188        if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
 189                return btrfs_find_free_objectid(root, objectid);
 190
 191again:
 192        *objectid = btrfs_find_ino_for_alloc(root);
 193
 194        if (*objectid != 0)
 195                return 0;
 196
 197        start_caching(root);
 198
 199        wait_event(root->ino_cache_wait,
 200                   root->ino_cache_state == BTRFS_CACHE_FINISHED ||
 201                   root->free_ino_ctl->free_space > 0);
 202
 203        if (root->ino_cache_state == BTRFS_CACHE_FINISHED &&
 204            root->free_ino_ctl->free_space == 0)
 205                return -ENOSPC;
 206        else
 207                goto again;
 208}
 209
 210void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
 211{
 212        struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
 213
 214        if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
 215                return;
 216again:
 217        if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
 218                __btrfs_add_free_space(pinned, objectid, 1);
 219        } else {
 220                down_write(&root->fs_info->commit_root_sem);
 221                spin_lock(&root->ino_cache_lock);
 222                if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
 223                        spin_unlock(&root->ino_cache_lock);
 224                        up_write(&root->fs_info->commit_root_sem);
 225                        goto again;
 226                }
 227                spin_unlock(&root->ino_cache_lock);
 228
 229                start_caching(root);
 230
 231                __btrfs_add_free_space(pinned, objectid, 1);
 232
 233                up_write(&root->fs_info->commit_root_sem);
 234        }
 235}
 236
 237/*
 238 * When a transaction is committed, we'll move those inode numbers which are
 239 * smaller than root->ino_cache_progress from pinned tree to free_ino tree, and
 240 * others will just be dropped, because the commit root we were searching has
 241 * changed.
 242 *
 243 * Must be called with root->fs_info->commit_root_sem held
 244 */
 245void btrfs_unpin_free_ino(struct btrfs_root *root)
 246{
 247        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 248        struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
 249        spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock;
 250        struct btrfs_free_space *info;
 251        struct rb_node *n;
 252        u64 count;
 253
 254        if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
 255                return;
 256
 257        while (1) {
 258                bool add_to_ctl = true;
 259
 260                spin_lock(rbroot_lock);
 261                n = rb_first(rbroot);
 262                if (!n) {
 263                        spin_unlock(rbroot_lock);
 264                        break;
 265                }
 266
 267                info = rb_entry(n, struct btrfs_free_space, offset_index);
 268                BUG_ON(info->bitmap); /* Logic error */
 269
 270                if (info->offset > root->ino_cache_progress)
 271                        add_to_ctl = false;
 272                else if (info->offset + info->bytes > root->ino_cache_progress)
 273                        count = root->ino_cache_progress - info->offset + 1;
 274                else
 275                        count = info->bytes;
 276
 277                rb_erase(&info->offset_index, rbroot);
 278                spin_unlock(rbroot_lock);
 279                if (add_to_ctl)
 280                        __btrfs_add_free_space(ctl, info->offset, count);
 281                kmem_cache_free(btrfs_free_space_cachep, info);
 282        }
 283}
 284
 285#define INIT_THRESHOLD  ((SZ_32K / 2) / sizeof(struct btrfs_free_space))
 286#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8)
 287
 288/*
 289 * The goal is to keep the memory used by the free_ino tree won't
 290 * exceed the memory if we use bitmaps only.
 291 */
 292static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
 293{
 294        struct btrfs_free_space *info;
 295        struct rb_node *n;
 296        int max_ino;
 297        int max_bitmaps;
 298
 299        n = rb_last(&ctl->free_space_offset);
 300        if (!n) {
 301                ctl->extents_thresh = INIT_THRESHOLD;
 302                return;
 303        }
 304        info = rb_entry(n, struct btrfs_free_space, offset_index);
 305
 306        /*
 307         * Find the maximum inode number in the filesystem. Note we
 308         * ignore the fact that this can be a bitmap, because we are
 309         * not doing precise calculation.
 310         */
 311        max_ino = info->bytes - 1;
 312
 313        max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
 314        if (max_bitmaps <= ctl->total_bitmaps) {
 315                ctl->extents_thresh = 0;
 316                return;
 317        }
 318
 319        ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
 320                                PAGE_CACHE_SIZE / sizeof(*info);
 321}
 322
 323/*
 324 * We don't fall back to bitmap, if we are below the extents threshold
 325 * or this chunk of inode numbers is a big one.
 326 */
 327static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
 328                       struct btrfs_free_space *info)
 329{
 330        if (ctl->free_extents < ctl->extents_thresh ||
 331            info->bytes > INODES_PER_BITMAP / 10)
 332                return false;
 333
 334        return true;
 335}
 336
 337static const struct btrfs_free_space_op free_ino_op = {
 338        .recalc_thresholds      = recalculate_thresholds,
 339        .use_bitmap             = use_bitmap,
 340};
 341
 342static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
 343{
 344}
 345
 346static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
 347                              struct btrfs_free_space *info)
 348{
 349        /*
 350         * We always use extents for two reasons:
 351         *
 352         * - The pinned tree is only used during the process of caching
 353         *   work.
 354         * - Make code simpler. See btrfs_unpin_free_ino().
 355         */
 356        return false;
 357}
 358
 359static const struct btrfs_free_space_op pinned_free_ino_op = {
 360        .recalc_thresholds      = pinned_recalc_thresholds,
 361        .use_bitmap             = pinned_use_bitmap,
 362};
 363
 364void btrfs_init_free_ino_ctl(struct btrfs_root *root)
 365{
 366        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 367        struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
 368
 369        spin_lock_init(&ctl->tree_lock);
 370        ctl->unit = 1;
 371        ctl->start = 0;
 372        ctl->private = NULL;
 373        ctl->op = &free_ino_op;
 374        INIT_LIST_HEAD(&ctl->trimming_ranges);
 375        mutex_init(&ctl->cache_writeout_mutex);
 376
 377        /*
 378         * Initially we allow to use 16K of ram to cache chunks of
 379         * inode numbers before we resort to bitmaps. This is somewhat
 380         * arbitrary, but it will be adjusted in runtime.
 381         */
 382        ctl->extents_thresh = INIT_THRESHOLD;
 383
 384        spin_lock_init(&pinned->tree_lock);
 385        pinned->unit = 1;
 386        pinned->start = 0;
 387        pinned->private = NULL;
 388        pinned->extents_thresh = 0;
 389        pinned->op = &pinned_free_ino_op;
 390}
 391
 392int btrfs_save_ino_cache(struct btrfs_root *root,
 393                         struct btrfs_trans_handle *trans)
 394{
 395        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 396        struct btrfs_path *path;
 397        struct inode *inode;
 398        struct btrfs_block_rsv *rsv;
 399        u64 num_bytes;
 400        u64 alloc_hint = 0;
 401        int ret;
 402        int prealloc;
 403        bool retry = false;
 404
 405        /* only fs tree and subvol/snap needs ino cache */
 406        if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
 407            (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
 408             root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
 409                return 0;
 410
 411        /* Don't save inode cache if we are deleting this root */
 412        if (btrfs_root_refs(&root->root_item) == 0)
 413                return 0;
 414
 415        if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
 416                return 0;
 417
 418        path = btrfs_alloc_path();
 419        if (!path)
 420                return -ENOMEM;
 421
 422        rsv = trans->block_rsv;
 423        trans->block_rsv = &root->fs_info->trans_block_rsv;
 424
 425        num_bytes = trans->bytes_reserved;
 426        /*
 427         * 1 item for inode item insertion if need
 428         * 4 items for inode item update (in the worst case)
 429         * 1 items for slack space if we need do truncation
 430         * 1 item for free space object
 431         * 3 items for pre-allocation
 432         */
 433        trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 10);
 434        ret = btrfs_block_rsv_add(root, trans->block_rsv,
 435                                  trans->bytes_reserved,
 436                                  BTRFS_RESERVE_NO_FLUSH);
 437        if (ret)
 438                goto out;
 439        trace_btrfs_space_reservation(root->fs_info, "ino_cache",
 440                                      trans->transid, trans->bytes_reserved, 1);
 441again:
 442        inode = lookup_free_ino_inode(root, path);
 443        if (IS_ERR(inode) && (PTR_ERR(inode) != -ENOENT || retry)) {
 444                ret = PTR_ERR(inode);
 445                goto out_release;
 446        }
 447
 448        if (IS_ERR(inode)) {
 449                BUG_ON(retry); /* Logic error */
 450                retry = true;
 451
 452                ret = create_free_ino_inode(root, trans, path);
 453                if (ret)
 454                        goto out_release;
 455                goto again;
 456        }
 457
 458        BTRFS_I(inode)->generation = 0;
 459        ret = btrfs_update_inode(trans, root, inode);
 460        if (ret) {
 461                btrfs_abort_transaction(trans, root, ret);
 462                goto out_put;
 463        }
 464
 465        if (i_size_read(inode) > 0) {
 466                ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
 467                if (ret) {
 468                        if (ret != -ENOSPC)
 469                                btrfs_abort_transaction(trans, root, ret);
 470                        goto out_put;
 471                }
 472        }
 473
 474        spin_lock(&root->ino_cache_lock);
 475        if (root->ino_cache_state != BTRFS_CACHE_FINISHED) {
 476                ret = -1;
 477                spin_unlock(&root->ino_cache_lock);
 478                goto out_put;
 479        }
 480        spin_unlock(&root->ino_cache_lock);
 481
 482        spin_lock(&ctl->tree_lock);
 483        prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
 484        prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
 485        prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
 486        spin_unlock(&ctl->tree_lock);
 487
 488        /* Just to make sure we have enough space */
 489        prealloc += 8 * PAGE_CACHE_SIZE;
 490
 491        ret = btrfs_delalloc_reserve_space(inode, 0, prealloc);
 492        if (ret)
 493                goto out_put;
 494
 495        ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
 496                                              prealloc, prealloc, &alloc_hint);
 497        if (ret) {
 498                btrfs_delalloc_release_metadata(inode, prealloc);
 499                goto out_put;
 500        }
 501
 502        ret = btrfs_write_out_ino_cache(root, trans, path, inode);
 503out_put:
 504        iput(inode);
 505out_release:
 506        trace_btrfs_space_reservation(root->fs_info, "ino_cache",
 507                                      trans->transid, trans->bytes_reserved, 0);
 508        btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
 509out:
 510        trans->block_rsv = rsv;
 511        trans->bytes_reserved = num_bytes;
 512
 513        btrfs_free_path(path);
 514        return ret;
 515}
 516
 517int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
 518{
 519        struct btrfs_path *path;
 520        int ret;
 521        struct extent_buffer *l;
 522        struct btrfs_key search_key;
 523        struct btrfs_key found_key;
 524        int slot;
 525
 526        path = btrfs_alloc_path();
 527        if (!path)
 528                return -ENOMEM;
 529
 530        search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
 531        search_key.type = -1;
 532        search_key.offset = (u64)-1;
 533        ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
 534        if (ret < 0)
 535                goto error;
 536        BUG_ON(ret == 0); /* Corruption */
 537        if (path->slots[0] > 0) {
 538                slot = path->slots[0] - 1;
 539                l = path->nodes[0];
 540                btrfs_item_key_to_cpu(l, &found_key, slot);
 541                *objectid = max_t(u64, found_key.objectid,
 542                                  BTRFS_FIRST_FREE_OBJECTID - 1);
 543        } else {
 544                *objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
 545        }
 546        ret = 0;
 547error:
 548        btrfs_free_path(path);
 549        return ret;
 550}
 551
 552int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
 553{
 554        int ret;
 555        mutex_lock(&root->objectid_mutex);
 556
 557        if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
 558                btrfs_warn(root->fs_info,
 559                           "the objectid of root %llu reaches its highest value",
 560                           root->root_key.objectid);
 561                ret = -ENOSPC;
 562                goto out;
 563        }
 564
 565        *objectid = ++root->highest_objectid;
 566        ret = 0;
 567out:
 568        mutex_unlock(&root->objectid_mutex);
 569        return ret;
 570}
 571