linux/fs/btrfs/inode-map.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2007 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/delay.h>
  20#include <linux/kthread.h>
  21#include <linux/pagemap.h>
  22
  23#include "ctree.h"
  24#include "disk-io.h"
  25#include "free-space-cache.h"
  26#include "inode-map.h"
  27#include "transaction.h"
  28
  29static int caching_kthread(void *data)
  30{
  31        struct btrfs_root *root = data;
  32        struct btrfs_fs_info *fs_info = root->fs_info;
  33        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
  34        struct btrfs_key key;
  35        struct btrfs_path *path;
  36        struct extent_buffer *leaf;
  37        u64 last = (u64)-1;
  38        int slot;
  39        int ret;
  40
  41        if (!btrfs_test_opt(root, INODE_MAP_CACHE))
  42                return 0;
  43
  44        path = btrfs_alloc_path();
  45        if (!path)
  46                return -ENOMEM;
  47
  48        /* Since the commit root is read-only, we can safely skip locking. */
  49        path->skip_locking = 1;
  50        path->search_commit_root = 1;
  51        path->reada = 2;
  52
  53        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
  54        key.offset = 0;
  55        key.type = BTRFS_INODE_ITEM_KEY;
  56again:
  57        /* need to make sure the commit_root doesn't disappear */
  58        down_read(&fs_info->commit_root_sem);
  59
  60        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  61        if (ret < 0)
  62                goto out;
  63
  64        while (1) {
  65                if (btrfs_fs_closing(fs_info))
  66                        goto out;
  67
  68                leaf = path->nodes[0];
  69                slot = path->slots[0];
  70                if (slot >= btrfs_header_nritems(leaf)) {
  71                        ret = btrfs_next_leaf(root, path);
  72                        if (ret < 0)
  73                                goto out;
  74                        else if (ret > 0)
  75                                break;
  76
  77                        if (need_resched() ||
  78                            btrfs_transaction_in_commit(fs_info)) {
  79                                leaf = path->nodes[0];
  80
  81                                if (WARN_ON(btrfs_header_nritems(leaf) == 0))
  82                                        break;
  83
  84                                /*
  85                                 * Save the key so we can advances forward
  86                                 * in the next search.
  87                                 */
  88                                btrfs_item_key_to_cpu(leaf, &key, 0);
  89                                btrfs_release_path(path);
  90                                root->ino_cache_progress = last;
  91                                up_read(&fs_info->commit_root_sem);
  92                                schedule_timeout(1);
  93                                goto again;
  94                        } else
  95                                continue;
  96                }
  97
  98                btrfs_item_key_to_cpu(leaf, &key, slot);
  99
 100                if (key.type != BTRFS_INODE_ITEM_KEY)
 101                        goto next;
 102
 103                if (key.objectid >= root->highest_objectid)
 104                        break;
 105
 106                if (last != (u64)-1 && last + 1 != key.objectid) {
 107                        __btrfs_add_free_space(ctl, last + 1,
 108                                               key.objectid - last - 1);
 109                        wake_up(&root->ino_cache_wait);
 110                }
 111
 112                last = key.objectid;
 113next:
 114                path->slots[0]++;
 115        }
 116
 117        if (last < root->highest_objectid - 1) {
 118                __btrfs_add_free_space(ctl, last + 1,
 119                                       root->highest_objectid - last - 1);
 120        }
 121
 122        spin_lock(&root->ino_cache_lock);
 123        root->ino_cache_state = BTRFS_CACHE_FINISHED;
 124        spin_unlock(&root->ino_cache_lock);
 125
 126        root->ino_cache_progress = (u64)-1;
 127        btrfs_unpin_free_ino(root);
 128out:
 129        wake_up(&root->ino_cache_wait);
 130        up_read(&fs_info->commit_root_sem);
 131
 132        btrfs_free_path(path);
 133
 134        return ret;
 135}
 136
 137static void start_caching(struct btrfs_root *root)
 138{
 139        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 140        struct task_struct *tsk;
 141        int ret;
 142        u64 objectid;
 143
 144        if (!btrfs_test_opt(root, INODE_MAP_CACHE))
 145                return;
 146
 147        spin_lock(&root->ino_cache_lock);
 148        if (root->ino_cache_state != BTRFS_CACHE_NO) {
 149                spin_unlock(&root->ino_cache_lock);
 150                return;
 151        }
 152
 153        root->ino_cache_state = BTRFS_CACHE_STARTED;
 154        spin_unlock(&root->ino_cache_lock);
 155
 156        ret = load_free_ino_cache(root->fs_info, root);
 157        if (ret == 1) {
 158                spin_lock(&root->ino_cache_lock);
 159                root->ino_cache_state = BTRFS_CACHE_FINISHED;
 160                spin_unlock(&root->ino_cache_lock);
 161                return;
 162        }
 163
 164        /*
 165         * It can be quite time-consuming to fill the cache by searching
 166         * through the extent tree, and this can keep ino allocation path
 167         * waiting. Therefore at start we quickly find out the highest
 168         * inode number and we know we can use inode numbers which fall in
 169         * [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID].
 170         */
 171        ret = btrfs_find_free_objectid(root, &objectid);
 172        if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
 173                __btrfs_add_free_space(ctl, objectid,
 174                                       BTRFS_LAST_FREE_OBJECTID - objectid + 1);
 175        }
 176
 177        tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu",
 178                          root->root_key.objectid);
 179        if (IS_ERR(tsk)) {
 180                btrfs_warn(root->fs_info, "failed to start inode caching task");
 181                btrfs_clear_and_info(root, CHANGE_INODE_CACHE,
 182                                "disabling inode map caching");
 183        }
 184}
 185
 186int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
 187{
 188        if (!btrfs_test_opt(root, INODE_MAP_CACHE))
 189                return btrfs_find_free_objectid(root, objectid);
 190
 191again:
 192        *objectid = btrfs_find_ino_for_alloc(root);
 193
 194        if (*objectid != 0)
 195                return 0;
 196
 197        start_caching(root);
 198
 199        wait_event(root->ino_cache_wait,
 200                   root->ino_cache_state == BTRFS_CACHE_FINISHED ||
 201                   root->free_ino_ctl->free_space > 0);
 202
 203        if (root->ino_cache_state == BTRFS_CACHE_FINISHED &&
 204            root->free_ino_ctl->free_space == 0)
 205                return -ENOSPC;
 206        else
 207                goto again;
 208}
 209
 210void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
 211{
 212        struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
 213
 214        if (!btrfs_test_opt(root, INODE_MAP_CACHE))
 215                return;
 216again:
 217        if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
 218                __btrfs_add_free_space(pinned, objectid, 1);
 219        } else {
 220                down_write(&root->fs_info->commit_root_sem);
 221                spin_lock(&root->ino_cache_lock);
 222                if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
 223                        spin_unlock(&root->ino_cache_lock);
 224                        up_write(&root->fs_info->commit_root_sem);
 225                        goto again;
 226                }
 227                spin_unlock(&root->ino_cache_lock);
 228
 229                start_caching(root);
 230
 231                __btrfs_add_free_space(pinned, objectid, 1);
 232
 233                up_write(&root->fs_info->commit_root_sem);
 234        }
 235}
 236
 237/*
 238 * When a transaction is committed, we'll move those inode numbers which are
 239 * smaller than root->ino_cache_progress from pinned tree to free_ino tree, and
 240 * others will just be dropped, because the commit root we were searching has
 241 * changed.
 242 *
 243 * Must be called with root->fs_info->commit_root_sem held
 244 */
 245void btrfs_unpin_free_ino(struct btrfs_root *root)
 246{
 247        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 248        struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
 249        struct btrfs_free_space *info;
 250        struct rb_node *n;
 251        u64 count;
 252
 253        if (!btrfs_test_opt(root, INODE_MAP_CACHE))
 254                return;
 255
 256        while (1) {
 257                n = rb_first(rbroot);
 258                if (!n)
 259                        break;
 260
 261                info = rb_entry(n, struct btrfs_free_space, offset_index);
 262                BUG_ON(info->bitmap); /* Logic error */
 263
 264                if (info->offset > root->ino_cache_progress)
 265                        goto free;
 266                else if (info->offset + info->bytes > root->ino_cache_progress)
 267                        count = root->ino_cache_progress - info->offset + 1;
 268                else
 269                        count = info->bytes;
 270
 271                __btrfs_add_free_space(ctl, info->offset, count);
 272free:
 273                rb_erase(&info->offset_index, rbroot);
 274                kfree(info);
 275        }
 276}
 277
 278#define INIT_THRESHOLD  (((1024 * 32) / 2) / sizeof(struct btrfs_free_space))
 279#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8)
 280
 281/*
 282 * The goal is to keep the memory used by the free_ino tree won't
 283 * exceed the memory if we use bitmaps only.
 284 */
 285static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
 286{
 287        struct btrfs_free_space *info;
 288        struct rb_node *n;
 289        int max_ino;
 290        int max_bitmaps;
 291
 292        n = rb_last(&ctl->free_space_offset);
 293        if (!n) {
 294                ctl->extents_thresh = INIT_THRESHOLD;
 295                return;
 296        }
 297        info = rb_entry(n, struct btrfs_free_space, offset_index);
 298
 299        /*
 300         * Find the maximum inode number in the filesystem. Note we
 301         * ignore the fact that this can be a bitmap, because we are
 302         * not doing precise calculation.
 303         */
 304        max_ino = info->bytes - 1;
 305
 306        max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
 307        if (max_bitmaps <= ctl->total_bitmaps) {
 308                ctl->extents_thresh = 0;
 309                return;
 310        }
 311
 312        ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
 313                                PAGE_CACHE_SIZE / sizeof(*info);
 314}
 315
 316/*
 317 * We don't fall back to bitmap, if we are below the extents threshold
 318 * or this chunk of inode numbers is a big one.
 319 */
 320static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
 321                       struct btrfs_free_space *info)
 322{
 323        if (ctl->free_extents < ctl->extents_thresh ||
 324            info->bytes > INODES_PER_BITMAP / 10)
 325                return false;
 326
 327        return true;
 328}
 329
 330static struct btrfs_free_space_op free_ino_op = {
 331        .recalc_thresholds      = recalculate_thresholds,
 332        .use_bitmap             = use_bitmap,
 333};
 334
 335static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
 336{
 337}
 338
 339static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
 340                              struct btrfs_free_space *info)
 341{
 342        /*
 343         * We always use extents for two reasons:
 344         *
 345         * - The pinned tree is only used during the process of caching
 346         *   work.
 347         * - Make code simpler. See btrfs_unpin_free_ino().
 348         */
 349        return false;
 350}
 351
 352static struct btrfs_free_space_op pinned_free_ino_op = {
 353        .recalc_thresholds      = pinned_recalc_thresholds,
 354        .use_bitmap             = pinned_use_bitmap,
 355};
 356
 357void btrfs_init_free_ino_ctl(struct btrfs_root *root)
 358{
 359        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 360        struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
 361
 362        spin_lock_init(&ctl->tree_lock);
 363        ctl->unit = 1;
 364        ctl->start = 0;
 365        ctl->private = NULL;
 366        ctl->op = &free_ino_op;
 367
 368        /*
 369         * Initially we allow to use 16K of ram to cache chunks of
 370         * inode numbers before we resort to bitmaps. This is somewhat
 371         * arbitrary, but it will be adjusted in runtime.
 372         */
 373        ctl->extents_thresh = INIT_THRESHOLD;
 374
 375        spin_lock_init(&pinned->tree_lock);
 376        pinned->unit = 1;
 377        pinned->start = 0;
 378        pinned->private = NULL;
 379        pinned->extents_thresh = 0;
 380        pinned->op = &pinned_free_ino_op;
 381}
 382
 383int btrfs_save_ino_cache(struct btrfs_root *root,
 384                         struct btrfs_trans_handle *trans)
 385{
 386        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
 387        struct btrfs_path *path;
 388        struct inode *inode;
 389        struct btrfs_block_rsv *rsv;
 390        u64 num_bytes;
 391        u64 alloc_hint = 0;
 392        int ret;
 393        int prealloc;
 394        bool retry = false;
 395
 396        /* only fs tree and subvol/snap needs ino cache */
 397        if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
 398            (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
 399             root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
 400                return 0;
 401
 402        /* Don't save inode cache if we are deleting this root */
 403        if (btrfs_root_refs(&root->root_item) == 0)
 404                return 0;
 405
 406        if (!btrfs_test_opt(root, INODE_MAP_CACHE))
 407                return 0;
 408
 409        path = btrfs_alloc_path();
 410        if (!path)
 411                return -ENOMEM;
 412
 413        rsv = trans->block_rsv;
 414        trans->block_rsv = &root->fs_info->trans_block_rsv;
 415
 416        num_bytes = trans->bytes_reserved;
 417        /*
 418         * 1 item for inode item insertion if need
 419         * 4 items for inode item update (in the worst case)
 420         * 1 items for slack space if we need do truncation
 421         * 1 item for free space object
 422         * 3 items for pre-allocation
 423         */
 424        trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 10);
 425        ret = btrfs_block_rsv_add(root, trans->block_rsv,
 426                                  trans->bytes_reserved,
 427                                  BTRFS_RESERVE_NO_FLUSH);
 428        if (ret)
 429                goto out;
 430        trace_btrfs_space_reservation(root->fs_info, "ino_cache",
 431                                      trans->transid, trans->bytes_reserved, 1);
 432again:
 433        inode = lookup_free_ino_inode(root, path);
 434        if (IS_ERR(inode) && (PTR_ERR(inode) != -ENOENT || retry)) {
 435                ret = PTR_ERR(inode);
 436                goto out_release;
 437        }
 438
 439        if (IS_ERR(inode)) {
 440                BUG_ON(retry); /* Logic error */
 441                retry = true;
 442
 443                ret = create_free_ino_inode(root, trans, path);
 444                if (ret)
 445                        goto out_release;
 446                goto again;
 447        }
 448
 449        BTRFS_I(inode)->generation = 0;
 450        ret = btrfs_update_inode(trans, root, inode);
 451        if (ret) {
 452                btrfs_abort_transaction(trans, root, ret);
 453                goto out_put;
 454        }
 455
 456        if (i_size_read(inode) > 0) {
 457                ret = btrfs_truncate_free_space_cache(root, trans, inode);
 458                if (ret) {
 459                        if (ret != -ENOSPC)
 460                                btrfs_abort_transaction(trans, root, ret);
 461                        goto out_put;
 462                }
 463        }
 464
 465        spin_lock(&root->ino_cache_lock);
 466        if (root->ino_cache_state != BTRFS_CACHE_FINISHED) {
 467                ret = -1;
 468                spin_unlock(&root->ino_cache_lock);
 469                goto out_put;
 470        }
 471        spin_unlock(&root->ino_cache_lock);
 472
 473        spin_lock(&ctl->tree_lock);
 474        prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
 475        prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
 476        prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
 477        spin_unlock(&ctl->tree_lock);
 478
 479        /* Just to make sure we have enough space */
 480        prealloc += 8 * PAGE_CACHE_SIZE;
 481
 482        ret = btrfs_delalloc_reserve_space(inode, prealloc);
 483        if (ret)
 484                goto out_put;
 485
 486        ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
 487                                              prealloc, prealloc, &alloc_hint);
 488        if (ret) {
 489                btrfs_delalloc_release_space(inode, prealloc);
 490                goto out_put;
 491        }
 492        btrfs_free_reserved_data_space(inode, prealloc);
 493
 494        ret = btrfs_write_out_ino_cache(root, trans, path, inode);
 495out_put:
 496        iput(inode);
 497out_release:
 498        trace_btrfs_space_reservation(root->fs_info, "ino_cache",
 499                                      trans->transid, trans->bytes_reserved, 0);
 500        btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
 501out:
 502        trans->block_rsv = rsv;
 503        trans->bytes_reserved = num_bytes;
 504
 505        btrfs_free_path(path);
 506        return ret;
 507}
 508
 509static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
 510{
 511        struct btrfs_path *path;
 512        int ret;
 513        struct extent_buffer *l;
 514        struct btrfs_key search_key;
 515        struct btrfs_key found_key;
 516        int slot;
 517
 518        path = btrfs_alloc_path();
 519        if (!path)
 520                return -ENOMEM;
 521
 522        search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
 523        search_key.type = -1;
 524        search_key.offset = (u64)-1;
 525        ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
 526        if (ret < 0)
 527                goto error;
 528        BUG_ON(ret == 0); /* Corruption */
 529        if (path->slots[0] > 0) {
 530                slot = path->slots[0] - 1;
 531                l = path->nodes[0];
 532                btrfs_item_key_to_cpu(l, &found_key, slot);
 533                *objectid = max_t(u64, found_key.objectid,
 534                                  BTRFS_FIRST_FREE_OBJECTID - 1);
 535        } else {
 536                *objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
 537        }
 538        ret = 0;
 539error:
 540        btrfs_free_path(path);
 541        return ret;
 542}
 543
 544int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
 545{
 546        int ret;
 547        mutex_lock(&root->objectid_mutex);
 548
 549        if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
 550                ret = btrfs_find_highest_objectid(root,
 551                                                  &root->highest_objectid);
 552                if (ret)
 553                        goto out;
 554        }
 555
 556        if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
 557                ret = -ENOSPC;
 558                goto out;
 559        }
 560
 561        *objectid = ++root->highest_objectid;
 562        ret = 0;
 563out:
 564        mutex_unlock(&root->objectid_mutex);
 565        return ret;
 566}
 567